From 9dc069403565cfaa4627780e1d75d16589e30ef4 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Thu, 26 Mar 2026 13:54:14 +0900 Subject: [PATCH] =?UTF-8?q?feat(law=5Fmonitor):=20=EC=99=B8=EA=B5=AD=20?= =?UTF-8?q?=EB=B2=95=EB=A0=B9=20=EC=A7=80=EC=9B=90=20=EC=B6=94=EA=B0=80=20?= =?UTF-8?q?=E2=80=94=20US=20OSHA,=20JP=20=E5=8E=9A=E5=8A=B4=E7=9C=81(MLX?= =?UTF-8?q?=20=EB=B2=88=EC=97=AD),=20EU-OSHA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/law_monitor.py | 223 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 221 insertions(+), 2 deletions(-) diff --git a/scripts/law_monitor.py b/scripts/law_monitor.py index 4c9e68b..e1de021 100644 --- a/scripts/law_monitor.py +++ b/scripts/law_monitor.py @@ -16,7 +16,7 @@ from datetime import datetime, timedelta from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) -from pkm_utils import setup_logger, load_credentials, run_applescript_inline, PROJECT_ROOT, DATA_DIR +from pkm_utils import setup_logger, load_credentials, run_applescript_inline, llm_generate, PROJECT_ROOT, DATA_DIR logger = setup_logger("law_monitor") @@ -182,7 +182,226 @@ def run(): logger.debug(f"변경 없음: {law_name}") save_last_check(last_check) - logger.info(f"=== 법령 모니터링 완료 — {changes_found}건 변경 감지 ===") + + # ─── 외국 법령 (빈도 체크 후 실행) ─── + us_count = fetch_us_osha(last_check) + jp_count = fetch_jp_mhlw(last_check) + eu_count = fetch_eu_osha(last_check) + changes_found += us_count + jp_count + eu_count + + save_last_check(last_check) + logger.info(f"=== 법령 모니터링 완료 — {changes_found}건 변경 감지 (한국+외국) ===") + + +# ═══════════════════════════════════════════════ +# 외국 법령 모니터링 +# ═══════════════════════════════════════════════ + +def _should_run(last_check: dict, key: str, interval_days: int) -> bool: + """빈도 체크: 마지막 실행일로부터 interval_days 경과 여부""" + last_run = last_check.get(key, "") + if not last_run: + return True + try: + last_date = datetime.strptime(last_run, "%Y-%m-%d") + return (datetime.now() - last_date).days >= interval_days + except ValueError: + return True + + +def _import_foreign_to_devonthink(filepath: Path, title: str, country: str): + """외국 법령 DEVONthink 임포트""" + country_map = {"US": "US", "JP": "JP", "EU": "EU"} + folder = country_map.get(country, country) + escaped_path = str(filepath).replace('"', '\\"') + escaped_title = title.replace('"', '\\"')[:60] + script = f''' + tell application id "DNtp" + set targetDB to missing value + repeat with db in databases + if name of db is "04_Industrial safety" then + set targetDB to db + exit repeat + end if + end repeat + if targetDB is not missing value then + set targetGroup to create location "/10_Legislation/Foreign/{folder}" in targetDB + set theRecord to import POSIX path "{escaped_path}" to targetGroup + set tags of theRecord to {{"#주제/산업안전/법령", "$유형/법령", "{country}"}} + add custom meta data "law_monitor" for "sourceChannel" to theRecord + add custom meta data "external" for "dataOrigin" to theRecord + add custom meta data (current date) for "lastAIProcess" to theRecord + end if + end tell + ''' + try: + run_applescript_inline(script) + logger.info(f"DEVONthink 임포트 [{country}]: {escaped_title}") + except Exception as e: + logger.error(f"DEVONthink 임포트 실패 [{country}]: {e}") + + +def fetch_us_osha(last_check: dict) -> int: + """US OSHA — Federal Register API (주 1회)""" + if not _should_run(last_check, "_us_osha_last", 7): + logger.debug("US OSHA: 이번 주 이미 실행됨, 건너뜀") + return 0 + + logger.info("=== US OSHA 확인 ===") + try: + from_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") + resp = requests.get("https://www.federalregister.gov/api/v1/documents.json", params={ + "conditions[agencies][]": "occupational-safety-and-health-administration", + "conditions[type][]": ["RULE", "PRORULE"], + "conditions[publication_date][gte]": from_date, + "per_page": 10, + "order": "newest", + }, timeout=30) + resp.raise_for_status() + data = resp.json() + results = data.get("results", []) + count = 0 + + for doc in results: + doc_id = doc.get("document_number", "") + title = doc.get("title", "") + pub_date = doc.get("publication_date", "") + abstract = doc.get("abstract", "") + doc_url = doc.get("html_url", "") + + # 마크다운으로 저장 + content = f"# {title}\n\n" + content += f"- **Document**: {doc_id}\n" + content += f"- **Date**: {pub_date}\n" + content += f"- **URL**: {doc_url}\n\n" + if abstract: + content += f"## Abstract\n\n{abstract}\n" + + safe_title = "".join(c if c.isalnum() or c in " _-" else "_" for c in title)[:50] + filepath = LAWS_DIR / f"US_OSHA_{pub_date}_{safe_title}.md" + with open(filepath, "w", encoding="utf-8") as f: + f.write(content) + + _import_foreign_to_devonthink(filepath, title, "US") + count += 1 + + last_check["_us_osha_last"] = datetime.now().strftime("%Y-%m-%d") + logger.info(f"US OSHA: {count}건") + return count + + except Exception as e: + logger.error(f"US OSHA 에러: {e}", exc_info=True) + return 0 + + +def fetch_jp_mhlw(last_check: dict) -> int: + """JP 厚生労働省 — RSS 파싱 + MLX 번역 (주 1회)""" + if not _should_run(last_check, "_jp_mhlw_last", 7): + logger.debug("JP 厚労省: 이번 주 이미 실행됨, 건너뜀") + return 0 + + logger.info("=== JP 厚生労働省 확인 ===") + try: + import xml.etree.ElementTree as ET + resp = requests.get("https://www.mhlw.go.jp/stf/rss/shinchaku.xml", timeout=30) + resp.raise_for_status() + root = ET.fromstring(resp.content) + + safety_keywords = ["労働安全", "安全衛生", "労災", "化学物質", "石綿", "安全管理"] + count = 0 + + for item in root.iter("item"): + title = item.findtext("title", "") + link = item.findtext("link", "") + pub_date = item.findtext("pubDate", "") + + # 안전위생 키워드 필터 + if not any(kw in title for kw in safety_keywords): + continue + + # MLX 35B로 한국어 번역 + translated = "" + try: + translated = llm_generate( + f"다음 일본어 제목을 한국어로 번역해줘. 번역만 출력하고 다른 말은 하지 마.\n\n{title}" + ) + # thinking 출력 제거 — 마지막 줄만 사용 + lines = [l.strip() for l in translated.strip().split("\n") if l.strip()] + translated = lines[-1] if lines else title + except Exception: + translated = title + + content = f"# {title}\n\n" + content += f"**한국어**: {translated}\n\n" + content += f"- **URL**: {link}\n" + content += f"- **Date**: {pub_date}\n" + + safe_title = "".join(c if c.isalnum() or c in " _-" else "_" for c in title)[:40] + today = datetime.now().strftime("%Y%m%d") + filepath = LAWS_DIR / f"JP_{today}_{safe_title}.md" + with open(filepath, "w", encoding="utf-8") as f: + f.write(content) + + _import_foreign_to_devonthink(filepath, f"{translated} ({title})", "JP") + count += 1 + + if count >= 10: + break + + last_check["_jp_mhlw_last"] = datetime.now().strftime("%Y-%m-%d") + logger.info(f"JP 厚労省: {count}건") + return count + + except Exception as e: + logger.error(f"JP 厚労省 에러: {e}", exc_info=True) + return 0 + + +def fetch_eu_osha(last_check: dict) -> int: + """EU-OSHA — RSS 파싱 (월 1회)""" + if not _should_run(last_check, "_eu_osha_last", 30): + logger.debug("EU-OSHA: 이번 달 이미 실행됨, 건너뜀") + return 0 + + logger.info("=== EU-OSHA 확인 ===") + try: + import xml.etree.ElementTree as ET + resp = requests.get("https://osha.europa.eu/en/rss", timeout=30) + resp.raise_for_status() + root = ET.fromstring(resp.content) + + count = 0 + for item in root.iter("item"): + title = item.findtext("title", "") + link = item.findtext("link", "") + description = item.findtext("description", "") + pub_date = item.findtext("pubDate", "") + + content = f"# {title}\n\n" + content += f"- **URL**: {link}\n" + content += f"- **Date**: {pub_date}\n\n" + if description: + content += f"## Summary\n\n{description}\n" + + safe_title = "".join(c if c.isalnum() or c in " _-" else "_" for c in title)[:50] + today = datetime.now().strftime("%Y%m%d") + filepath = LAWS_DIR / f"EU_{today}_{safe_title}.md" + with open(filepath, "w", encoding="utf-8") as f: + f.write(content) + + _import_foreign_to_devonthink(filepath, title, "EU") + count += 1 + + if count >= 5: + break + + last_check["_eu_osha_last"] = datetime.now().strftime("%Y-%m-%d") + logger.info(f"EU-OSHA: {count}건") + return count + + except Exception as e: + logger.error(f"EU-OSHA 에러: {e}", exc_info=True) + return 0 if __name__ == "__main__":