fix(law_monitor): US 타입 필터 제거 + JP RDF 네임스페이스 수정

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-03-26 14:52:13 +09:00
parent c8e30b530b
commit 35062145ed

View File

@@ -241,7 +241,6 @@ def fetch_us_osha(last_check: dict) -> int:
from_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
resp = requests.get("https://www.federalregister.gov/api/v1/documents.json", params={
"conditions[agencies][]": "occupational-safety-and-health-administration",
"conditions[type][]": ["RULE", "PRORULE"],
"conditions[publication_date][gte]": from_date,
"per_page": 10,
"order": "newest",
@@ -296,15 +295,16 @@ def fetch_jp_mhlw(last_check: dict) -> int:
resp.raise_for_status()
root = ET.fromstring(resp.content)
safety_keywords = ["労働安全", "安全衛生", "労災", "化学物質", "石綿", "安全管理"]
ns = {"rss": "http://purl.org/rss/1.0", "dc": "http://purl.org/dc/elements/1.1/"}
safety_keywords = ["労働安全", "安全衛生", "労災", "化学物質", "石綿", "安全管理", "労働", "安全", "衛生"]
rss_ns = "http://purl.org/rss/1.0/"
count = 0
# RDF 1.0 형식: <item> 또는 {ns}item
items = root.findall("rss:item", ns) or root.iter("item")
# RDF 1.0 형식: {http://purl.org/rss/1.0/}item
items = root.findall(f"{{{rss_ns}}}item")
logger.info(f"JP RSS 항목: {len(items)}")
for item in items:
title = item.findtext("rss:title", "", ns) or item.findtext("title", "")
link = item.findtext("rss:link", "", ns) or item.findtext("link", "")
title = item.findtext(f"{{{rss_ns}}}title", "")
link = item.findtext(f"{{{rss_ns}}}link", "")
pub_date = item.findtext("pubDate", "")
# 안전위생 키워드 필터