feat(law_monitor): 외국 법령 지원 추가 — US OSHA, JP 厚労省(MLX 번역), EU-OSHA

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-03-26 13:54:14 +09:00
parent ec6074d9ee
commit 9dc0694035

View File

@@ -16,7 +16,7 @@ from datetime import datetime, timedelta
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from pkm_utils import setup_logger, load_credentials, run_applescript_inline, PROJECT_ROOT, DATA_DIR
from pkm_utils import setup_logger, load_credentials, run_applescript_inline, llm_generate, PROJECT_ROOT, DATA_DIR
logger = setup_logger("law_monitor")
@@ -182,7 +182,226 @@ def run():
logger.debug(f"변경 없음: {law_name}")
save_last_check(last_check)
logger.info(f"=== 법령 모니터링 완료 — {changes_found}건 변경 감지 ===")
# ─── 외국 법령 (빈도 체크 후 실행) ───
us_count = fetch_us_osha(last_check)
jp_count = fetch_jp_mhlw(last_check)
eu_count = fetch_eu_osha(last_check)
changes_found += us_count + jp_count + eu_count
save_last_check(last_check)
logger.info(f"=== 법령 모니터링 완료 — {changes_found}건 변경 감지 (한국+외국) ===")
# ═══════════════════════════════════════════════
# 외국 법령 모니터링
# ═══════════════════════════════════════════════
def _should_run(last_check: dict, key: str, interval_days: int) -> bool:
"""빈도 체크: 마지막 실행일로부터 interval_days 경과 여부"""
last_run = last_check.get(key, "")
if not last_run:
return True
try:
last_date = datetime.strptime(last_run, "%Y-%m-%d")
return (datetime.now() - last_date).days >= interval_days
except ValueError:
return True
def _import_foreign_to_devonthink(filepath: Path, title: str, country: str):
"""외국 법령 DEVONthink 임포트"""
country_map = {"US": "US", "JP": "JP", "EU": "EU"}
folder = country_map.get(country, country)
escaped_path = str(filepath).replace('"', '\\"')
escaped_title = title.replace('"', '\\"')[:60]
script = f'''
tell application id "DNtp"
set targetDB to missing value
repeat with db in databases
if name of db is "04_Industrial safety" then
set targetDB to db
exit repeat
end if
end repeat
if targetDB is not missing value then
set targetGroup to create location "/10_Legislation/Foreign/{folder}" in targetDB
set theRecord to import POSIX path "{escaped_path}" to targetGroup
set tags of theRecord to {{"#주제/산업안전/법령", "$유형/법령", "{country}"}}
add custom meta data "law_monitor" for "sourceChannel" to theRecord
add custom meta data "external" for "dataOrigin" to theRecord
add custom meta data (current date) for "lastAIProcess" to theRecord
end if
end tell
'''
try:
run_applescript_inline(script)
logger.info(f"DEVONthink 임포트 [{country}]: {escaped_title}")
except Exception as e:
logger.error(f"DEVONthink 임포트 실패 [{country}]: {e}")
def fetch_us_osha(last_check: dict) -> int:
"""US OSHA — Federal Register API (주 1회)"""
if not _should_run(last_check, "_us_osha_last", 7):
logger.debug("US OSHA: 이번 주 이미 실행됨, 건너뜀")
return 0
logger.info("=== US OSHA 확인 ===")
try:
from_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
resp = requests.get("https://www.federalregister.gov/api/v1/documents.json", params={
"conditions[agencies][]": "occupational-safety-and-health-administration",
"conditions[type][]": ["RULE", "PRORULE"],
"conditions[publication_date][gte]": from_date,
"per_page": 10,
"order": "newest",
}, timeout=30)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
count = 0
for doc in results:
doc_id = doc.get("document_number", "")
title = doc.get("title", "")
pub_date = doc.get("publication_date", "")
abstract = doc.get("abstract", "")
doc_url = doc.get("html_url", "")
# 마크다운으로 저장
content = f"# {title}\n\n"
content += f"- **Document**: {doc_id}\n"
content += f"- **Date**: {pub_date}\n"
content += f"- **URL**: {doc_url}\n\n"
if abstract:
content += f"## Abstract\n\n{abstract}\n"
safe_title = "".join(c if c.isalnum() or c in " _-" else "_" for c in title)[:50]
filepath = LAWS_DIR / f"US_OSHA_{pub_date}_{safe_title}.md"
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
_import_foreign_to_devonthink(filepath, title, "US")
count += 1
last_check["_us_osha_last"] = datetime.now().strftime("%Y-%m-%d")
logger.info(f"US OSHA: {count}")
return count
except Exception as e:
logger.error(f"US OSHA 에러: {e}", exc_info=True)
return 0
def fetch_jp_mhlw(last_check: dict) -> int:
"""JP 厚生労働省 — RSS 파싱 + MLX 번역 (주 1회)"""
if not _should_run(last_check, "_jp_mhlw_last", 7):
logger.debug("JP 厚労省: 이번 주 이미 실행됨, 건너뜀")
return 0
logger.info("=== JP 厚生労働省 확인 ===")
try:
import xml.etree.ElementTree as ET
resp = requests.get("https://www.mhlw.go.jp/stf/rss/shinchaku.xml", timeout=30)
resp.raise_for_status()
root = ET.fromstring(resp.content)
safety_keywords = ["労働安全", "安全衛生", "労災", "化学物質", "石綿", "安全管理"]
count = 0
for item in root.iter("item"):
title = item.findtext("title", "")
link = item.findtext("link", "")
pub_date = item.findtext("pubDate", "")
# 안전위생 키워드 필터
if not any(kw in title for kw in safety_keywords):
continue
# MLX 35B로 한국어 번역
translated = ""
try:
translated = llm_generate(
f"다음 일본어 제목을 한국어로 번역해줘. 번역만 출력하고 다른 말은 하지 마.\n\n{title}"
)
# thinking 출력 제거 — 마지막 줄만 사용
lines = [l.strip() for l in translated.strip().split("\n") if l.strip()]
translated = lines[-1] if lines else title
except Exception:
translated = title
content = f"# {title}\n\n"
content += f"**한국어**: {translated}\n\n"
content += f"- **URL**: {link}\n"
content += f"- **Date**: {pub_date}\n"
safe_title = "".join(c if c.isalnum() or c in " _-" else "_" for c in title)[:40]
today = datetime.now().strftime("%Y%m%d")
filepath = LAWS_DIR / f"JP_{today}_{safe_title}.md"
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
_import_foreign_to_devonthink(filepath, f"{translated} ({title})", "JP")
count += 1
if count >= 10:
break
last_check["_jp_mhlw_last"] = datetime.now().strftime("%Y-%m-%d")
logger.info(f"JP 厚労省: {count}")
return count
except Exception as e:
logger.error(f"JP 厚労省 에러: {e}", exc_info=True)
return 0
def fetch_eu_osha(last_check: dict) -> int:
"""EU-OSHA — RSS 파싱 (월 1회)"""
if not _should_run(last_check, "_eu_osha_last", 30):
logger.debug("EU-OSHA: 이번 달 이미 실행됨, 건너뜀")
return 0
logger.info("=== EU-OSHA 확인 ===")
try:
import xml.etree.ElementTree as ET
resp = requests.get("https://osha.europa.eu/en/rss", timeout=30)
resp.raise_for_status()
root = ET.fromstring(resp.content)
count = 0
for item in root.iter("item"):
title = item.findtext("title", "")
link = item.findtext("link", "")
description = item.findtext("description", "")
pub_date = item.findtext("pubDate", "")
content = f"# {title}\n\n"
content += f"- **URL**: {link}\n"
content += f"- **Date**: {pub_date}\n\n"
if description:
content += f"## Summary\n\n{description}\n"
safe_title = "".join(c if c.isalnum() or c in " _-" else "_" for c in title)[:50]
today = datetime.now().strftime("%Y%m%d")
filepath = LAWS_DIR / f"EU_{today}_{safe_title}.md"
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
_import_foreign_to_devonthink(filepath, title, "EU")
count += 1
if count >= 5:
break
last_check["_eu_osha_last"] = datetime.now().strftime("%Y-%m-%d")
logger.info(f"EU-OSHA: {count}")
return count
except Exception as e:
logger.error(f"EU-OSHA 에러: {e}", exc_info=True)
return 0
if __name__ == "__main__":