"""B-2 KOSHA 사망사고 속보(callApiId=1040) — 순수 파서 fixture 테스트 (plan safety-library-1). fixture = 2026-06-13 data.go.kr 라이브 박제 (serviceKey 응답 본문 미포함 확인, tests/fixtures/kosha_fatal_response.json). _fatal_fields/_items 는 순수 함수라 DB/httpx 호출 없이 검증 — [[feedback_external_api_fixture_first]]. """ import json from datetime import date from pathlib import Path from workers.kosha_collector import _fatal_fields, _items FIXTURE = Path(__file__).parent / "fixtures" / "kosha_fatal_response.json" def _payload() -> dict: return json.loads(FIXTURE.read_text(encoding="utf-8")) def test_items_envelope_parse(): """body.items.item 봉투 파싱 — 재해사례와 동일 envelope.""" items = _items(_payload()) assert len(items) == 3 assert all({"arno", "keyword", "contents"} <= set(it) for it in items) def test_fatal_fields_basic_mapping(): item = _items(_payload())[0] f = _fatal_fields(item) assert f is not None assert f["arno"] == "20260611111536KIZXJ8" assert f["title"].startswith("[6/9, 부산 사상구]") # HTML 태그 + 이미지 서버 URL 노이즈 완전 제거 (검색/임베딩 본문 정화) assert "<" not in f["text"] assert "portal.kosha.or.kr" not in f["text"] assert "data-filename" not in f["text"] # 본문 텍스트는 보존 assert "(사망 2명)" in f["text"] assert "베란다 난간" in f["text"] # published_date = arno 접두 8자리(KST 등록일), reg_dt = 14자리 등록시각 원문 assert f["published_date"] == date(2026, 6, 11) assert f["reg_dt"] == "20260611111536" def test_fatal_fields_all_three_items_well_formed(): for item in _items(_payload()): f = _fatal_fields(item) assert f is not None assert f["published_date"] == date(2026, 6, 11) # 3건 모두 06-11 등록 assert f["reg_dt"] is not None assert f["text"] and "<" not in f["text"] def test_fatal_fields_skips_missing_required(): assert _fatal_fields({"arno": "20260611111536XX", "contents": "x"}) is None # keyword 부재 assert _fatal_fields({"keyword": "제목만", "contents": "x"}) is None # arno 부재 assert _fatal_fields({"arno": " ", "keyword": " ", "contents": "x"}) is None # 공백뿐 def test_fatal_fields_malformed_arno_date_is_fail_quiet(): # arno 접두가 8자리 날짜로 안 풀리면 published_date/reg_dt = None (보조 축이라 fail-quiet) f = _fatal_fields({"arno": "ABC123", "keyword": "제목", "contents": "
본문
"}) assert f is not None assert f["published_date"] is None assert f["reg_dt"] is None assert f["text"] == "본문"