diff --git a/app/api/search.py b/app/api/search.py index 65a415d..2cd43ed 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -33,6 +33,7 @@ from services.search.grounding_check import check as grounding_check from services.search.refusal_gate import RefusalDecision, decide as refusal_decide from services.search import query_rewriter from services.search.retrieval_service import AxisFilter +from services.search.result_decorate import compute_facets, decorate_version_status from services.search.search_pipeline import PipelineResult, run_search from services.search.synthesis_service import SynthesisResult, synthesize from services.search.verifier_service import VerifierResult, verify @@ -77,6 +78,9 @@ class SearchResult(BaseModel): material_type: str | None = None jurisdiction: str | None = None published_date: date | None = None + # 안전 자료실 C-1 후속: 법령 버전 상태(legal_meta.version_status) — wrapper 1회 decorate. + # law 결과만 채워짐(legal_meta 위성), 그 외/무매핑 law = None. D-1 버전 뱃지 선행. + version_status: str | None = None # ─── Phase 0.4: 디버그 응답 스키마 ───────────────────────── @@ -108,6 +112,9 @@ class SearchResponse(BaseModel): query: str mode: str debug: SearchDebug | None = None + # 안전 자료실 C-1 후속: facets=true 일 때만 채워짐(미요청=None, byte 불변). + # top-K 결과 내 분류 축 분포 라벨 {axis: {label: count}}. + facets: dict[str, dict[str, int]] | None = None def _to_debug_candidates(rows: list[SearchResult], n: int = 20) -> list[DebugCandidate]: @@ -218,6 +225,7 @@ async def search( None, description="안전 자료실 C-1: 관할 필터 (KR/US/EU/JP/GB/INT)"), year_from: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 하한 (NULL=created_at fallback)"), year_to: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 상한"), + facets: bool = Query(False, description="안전 자료실 C-1 후속: top-K 결과 분류 축 분포(material_type/jurisdiction/version_status)를 응답 facets 에 집계. 미지정=계산/노출 0"), ): """문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)""" try: @@ -334,12 +342,17 @@ async def search( debug_obj = _build_search_debug(pr) if debug else None + # 안전 자료실 C-1 후속 — wrapper decoration (검색 코어 무접촉, ranking 무관) + await decorate_version_status(session, pr.results) # 법령 결과에 version_status + facets_obj = compute_facets(pr.results) if facets else None + return SearchResponse( results=pr.results, total=len(pr.results), query=q, mode=pr.mode, debug=debug_obj, + facets=facets_obj, ) diff --git a/app/services/search/result_decorate.py b/app/services/search/result_decorate.py new file mode 100644 index 0000000..bbb07fb --- /dev/null +++ b/app/services/search/result_decorate.py @@ -0,0 +1,55 @@ +"""안전 자료실 C-1 후속 — 검색 결과 wrapper decoration (version_status + facets). + +엔드포인트 wrapper 에서 run_search() 결과에 1회 적용 — 검색 코어(run_search) 무접촉(r3). +- version_status: 법령 결과(material_type='law')에 legal_meta.version_status + (current/superseded/pending/repealed) 부착. legal_meta.document_id 1:0..1 위성 → + 매핑 없는 law(레거시 등)는 None 유지. law 결과 없으면 query skip. +- facets: top-K 결과 내 분류 축(material_type/jurisdiction/version_status) 분포 라벨(r2-M4). + facets=true 일 때만 계산(미요청 시 None = byte 불변·ranking 무관). +""" + +from __future__ import annotations + +from collections import Counter +from typing import TYPE_CHECKING + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +if TYPE_CHECKING: + from api.search import SearchResult + + +async def decorate_version_status( + session: AsyncSession, results: list["SearchResult"] +) -> None: + """법령 결과에 legal_meta.version_status 부착 (in-place). law 결과 없으면 query skip.""" + law_ids = [r.id for r in results if r.material_type == "law" and r.id is not None] + if not law_ids: + return + rows = await session.execute( + text( + "SELECT document_id, version_status FROM legal_meta " + "WHERE document_id = ANY(:ids)" + ), + {"ids": law_ids}, + ) + status_by_id = {row.document_id: row.version_status for row in rows} + for r in results: + if r.id in status_by_id: + r.version_status = status_by_id[r.id] + + +def compute_facets(results: list["SearchResult"]) -> dict[str, dict[str, int]]: + """top-K 결과의 분류 축 분포 라벨. None 값은 제외(present 라벨만, 빈 축은 미포함).""" + axes = { + "material_type": [r.material_type for r in results], + "jurisdiction": [r.jurisdiction for r in results], + "version_status": [getattr(r, "version_status", None) for r in results], + } + facets: dict[str, dict[str, int]] = {} + for axis, vals in axes.items(): + counter = Counter(v for v in vals if v is not None) + if counter: + facets[axis] = dict(counter.most_common()) + return facets diff --git a/tests/test_c1_decorate.py b/tests/test_c1_decorate.py new file mode 100644 index 0000000..905040b --- /dev/null +++ b/tests/test_c1_decorate.py @@ -0,0 +1,57 @@ +"""C-1 후속 — facets 집계 + version_status decorate 순수 로직 테스트. + +version_status 의 실제 legal_meta 조회는 GPU 라이브(법령 검색)로 검증 — 여기선 facets 분포 +계약 + decorate 의 law 무결과 skip 경로(DB 미접촉)만. +""" + +import asyncio +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent / "app")) + +from services.search.result_decorate import ( # noqa: E402 + compute_facets, + decorate_version_status, +) + + +class _R: + """SearchResult 흉내 — 분류 축 속성만.""" + + def __init__(self, material_type=None, jurisdiction=None, version_status=None, id=1): + self.material_type = material_type + self.jurisdiction = jurisdiction + self.version_status = version_status + self.id = id + + +def test_compute_facets_distribution(): + results = [ + _R("law", "KR", "current"), + _R("law", "KR", "superseded"), + _R("incident", "KR", None), + _R("paper", None, None), + ] + f = compute_facets(results) + assert f["material_type"] == {"law": 2, "incident": 1, "paper": 1} + assert f["jurisdiction"] == {"KR": 3} # paper jurisdiction None 제외 + assert f["version_status"] == {"current": 1, "superseded": 1} # None 제외 + + +def test_compute_facets_empty_and_all_none(): + assert compute_facets([]) == {} + assert compute_facets([_R(), _R()]) == {} # 모든 축 None → 빈 축 미포함 + + +def test_compute_facets_excludes_empty_axes(): + f = compute_facets([_R(jurisdiction="US"), _R(jurisdiction="EU")]) + assert f == {"jurisdiction": {"US": 1, "EU": 1}} + assert "material_type" not in f + + +def test_decorate_version_status_skips_without_law(): + # law 결과 없으면 legal_meta 조회 skip → session 미사용(None 으로도 무오류) + results = [_R("incident", "KR"), _R("paper")] + asyncio.run(decorate_version_status(None, results)) + assert all(r.version_status is None for r in results)