"""B-1 PR① — KR 어댑터 순수 파서 fixture 테스트 (plan safety-library-1). fixture = 2026-06-13 law.go.kr 라이브 박제 (OC 새니타이즈, tests/fixtures/statute_kr/). 파서는 순수 함수라 httpx/DB 불요 — 컨테이너 밖 로컬 실행. """ import gzip import sys import xml.etree.ElementTree as ET from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "app")) from workers.statute_adapters import ChangeEvent # noqa: E402 from workers.statute_adapters.kr import detect_change, parse_search_hit # noqa: E402 FIX = Path(__file__).parent / "fixtures" / "statute_kr" def _read(name: str) -> str: p = FIX / name if name.endswith(".gz"): return gzip.decompress(p.read_bytes()).decode("utf-8") return p.read_text(encoding="utf-8") def test_parse_search_hit_exact_match(): hit = parse_search_hit(_read("lawsearch_sanab.xml"), "산업안전보건법") assert hit is not None assert hit["law_id"] == "001766" assert hit["mst"] == "283449" assert hit["promulgation_date"] == "20260219" assert hit["effective_date"] == "20260601" assert hit["status_code"] == "현행" def test_parse_search_hit_rejects_partial_name(): # totalCnt 3 인 응답에서 '산업안전보건법 시행령' 등 부분 일치는 비매칭이어야 함 hit = parse_search_hit(_read("lawsearch_sanab.xml"), "산업안전보건") assert hit is None def test_detect_change_same_watermark_is_silent(): hit = parse_search_hit(_read("lawsearch_sanab.xml"), "산업안전보건법") assert detect_change(hit, "kr-law:001766", "산업안전보건법", watermark="283449") is None def test_detect_change_new_mst_is_amend(): hit = parse_search_hit(_read("lawsearch_sanab.xml"), "산업안전보건법") ev = detect_change(hit, "kr-law:001766", "산업안전보건법", watermark="283448") assert isinstance(ev, ChangeEvent) assert ev.kind == "amend" assert ev.new_version_key == "283449" assert ev.effective_date == "20260601" def test_detect_change_empty_watermark_is_amend(): # 첫 폴링(워터마크 부재) = 변경으로 감지 — PR② 부트스트랩 전 관찰 모드의 기대 동작 hit = parse_search_hit(_read("lawsearch_sanab.xml"), "산업안전보건법") ev = detect_change(hit, "kr-law:001766", "산업안전보건법", watermark=None) assert ev is not None and ev.kind == "amend" def test_detect_change_repeal_keyword(): hit = {"mst": "9", "revision_type": "폐지", "promulgation_date": None, "effective_date": None, "law_id": "x", "status_code": None} ev = detect_change(hit, "kr-law:x", "x", watermark="1") assert ev is not None and ev.kind == "repeal" def test_lawservice_snapshot_semantics_rule(): """R7-M3 판정 박제: 전문 1콜 XML = 조문+별표 전체 스냅샷 (PR② payload 계약의 전제).""" root = ET.fromstring(_read("lawservice_rule.xml.gz")) articles = root.findall(".//조문단위") annexes = root.findall(".//별표단위") assert len(articles) >= 800, "산안기준규칙 조문 853 기대 — 전문 1콜 판정 근거" assert len(annexes) == 23, "별표 23 전부 본문 XML 포함 = 스냅샷 의미론" # R7-M3 ②: 별표 식별 = 구조화 필드 (suffix 문자열 파싱 불요) first = annexes[0] assert first.findtext("별표번호") is not None assert first.findtext("별표가지번호") is not None def test_lawservice_sanab_basic_info(): root = ET.fromstring(_read("lawservice_sanab.xml.gz")) assert root.findtext(".//법령ID") == "001766" assert len(root.findall(".//조문단위")) >= 200 # 별표 없는 법령 = 별표단위 0 (스냅샷 의미론의 반대쪽 케이스) assert len(root.findall(".//별표단위")) == 0