diff --git a/app/api/search.py b/app/api/search.py index 0ed58f1..75a3a33 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -638,7 +638,10 @@ async def ask( "elapsed_ms": verifier_result.elapsed_ms, } - # ── Re-gate: 6-tier completeness 결정 (Phase 3.5b 4차 리뷰 확정) ── + # ── Re-gate: 7-tier completeness 결정 (Phase 3.5 B2 — Tier 4 신규 삽입, 재번호) ── + # 기존 6-tier (3.5b 4차 리뷰) + Tier 4(g_strong + v_strong_numeric + low_conf → refuse). + # 호환성: defense_layers["re_gate"] 의 string literal 들은 기존 그대로 유지. + # 신규 "refuse(grounding+verifier_numeric)" 만 추가. completeness: Literal["full", "partial", "insufficient"] = "full" covered_aspects = classifier_result.covered_aspects or None missing_aspects = classifier_result.missing_aspects or None @@ -649,6 +652,12 @@ async def ask( v_strong = [f for f in grounding.strong_flags if f.startswith("verifier_")] v_medium = [f for f in grounding.weak_flags if f.startswith("verifier_") and "_medium:" in f] has_direct_negation = any("direct_negation" in f for f in v_strong) + # Phase 3.5 B2: verifier strong flags 중 numeric_conflict 만 카운트. + # promote(VERIFIER_NUMERIC_PROMOTE=1) 활성 시 critical numeric_conflict 가 strong 으로 승격되며 + # 여기 카운트에 잡힘. promote off 면 항상 0 → Tier 4 활성 안 됨 (기존 동작 유지). + v_strong_numeric = sum( + 1 for f in v_strong if f.startswith("verifier_numeric_conflict") + ) if len(g_strong) >= 2: # Tier 1: grounding strong 2+ → refuse @@ -671,13 +680,21 @@ async def ask( sr.refused = True sr.confidence = None defense_log["re_gate"] = "refuse(grounding+low_conf+weak_ev)" + elif g_strong and v_strong_numeric >= 1 and sr.confidence == "low": + # Tier 4 (B2 신규): grounding strong + verifier numeric_conflict strong + low conf → refuse. + # verifier strong 단독 refuse 금지 원칙 유지 — g_strong 교차 필수. + completeness = "insufficient" + sr.answer = None + sr.refused = True + sr.confidence = None + defense_log["re_gate"] = "refuse(grounding+verifier_numeric)" elif g_strong or has_direct_negation: - # Tier 4: grounding strong 1 또는 verifier direct_negation 단독 → partial + # Tier 5 (기존 4): grounding strong 1 또는 verifier direct_negation 단독 → partial completeness = "partial" sr.confidence = "low" defense_log["re_gate"] = "partial(strong_or_negation)" elif v_medium: - # Tier 5: verifier medium 누적 → count 기반 confidence 하향 + # Tier 6 (기존 5): verifier medium 누적 → count 기반 confidence 하향 medium_count = len(v_medium) if medium_count >= 3: sr.confidence = "low" @@ -688,7 +705,7 @@ async def ask( else: defense_log["re_gate"] = f"medium_x{medium_count}(no_action)" elif grounding.weak_flags: - # Tier 6: weak → confidence 한 단계 하향 + # Tier 7 (기존 6): weak → confidence 한 단계 하향 if sr.confidence == "high": sr.confidence = "medium" defense_log["re_gate"] = "conf_lower(weak)" diff --git a/app/prompts/verifier.txt b/app/prompts/verifier.txt index 10b25f7..aa3fa8b 100644 --- a/app/prompts/verifier.txt +++ b/app/prompts/verifier.txt @@ -2,7 +2,7 @@ You are a grounding verifier. Given an answer and its evidence sources, check if ## Contradiction Types (IMPORTANT — severity depends on type) - **direct_negation** (CRITICAL): Answer directly contradicts evidence. Examples: evidence "의무" but answer "권고"; evidence "금지" but answer "허용"; negation reversal ("~해야 한다" vs "~할 필요 없다"). -- **numeric_conflict**: Answer states a number different from evidence. "50명" in evidence but "100명" in answer. Only flag if the same concept is referenced. +- **numeric_conflict**: Answer states a number different from evidence. "50명" in evidence but "100명" in answer. Only flag if the same concept is referenced. severity=critical when the number is the CORE answered quantity (amount/count/rate/date/duration that the query asked for); severity=minor when the number is peripheral (e.g., example/footnote). - **intent_core_mismatch**: Answer addresses a fundamentally different topic than the query asked about. - **nuance**: Answer overgeneralizes or adds qualifiers not in evidence (e.g., "모든" when evidence says "일부"). - **unsupported_claim**: Answer makes a factual claim with no basis in any evidence. @@ -10,7 +10,7 @@ You are a grounding verifier. Given an answer and its evidence sources, check if ## Rules 1. Compare each claim in the answer against the cited evidence. A claim with [n] citation should be checked against evidence [n]. 2. NOT a contradiction: Paraphrasing, summarizing, or restating the same fact in different words. Korean formal/informal style (합니다/한다) differences. -3. Numbers must match exactly after normalization (1,000 = 1000). +3. Numbers must match exactly after normalization (1,000 = 1000). Range values (e.g., "100~200명") satisfy any answer within range. 4. Legal/regulatory terms must preserve original meaning (의무 ≠ 권고, 금지 ≠ 제한, 허용 ≠ 금지). 5. Maximum 5 contradictions (most severe first: direct_negation > numeric_conflict > intent_core_mismatch > nuance > unsupported_claim). @@ -30,7 +30,8 @@ You are a grounding verifier. Given an answer and its evidence sources, check if severity mapping: - direct_negation → "critical" -- All others → "minor" +- numeric_conflict → "critical" if the number is the CORE answered quantity, else "minor" +- All other types → "minor" If no contradictions: {"contradictions": [], "verdict": "clean"} diff --git a/app/services/search/verifier_service.py b/app/services/search/verifier_service.py index 6dec9c8..3606cbf 100644 --- a/app/services/search/verifier_service.py +++ b/app/services/search/verifier_service.py @@ -17,6 +17,7 @@ from __future__ import annotations import asyncio +import os import time from dataclasses import dataclass, field from typing import TYPE_CHECKING, Literal @@ -37,10 +38,20 @@ CIRCUIT_RECOVERY_SEC = 60 _failure_count = 0 _circuit_open_until: float | None = None +# Phase 3.5 B2: numeric_conflict severity promote 실험. +# import time 평가 — env 변경 후 process restart 필수 (docker compose restart fastapi). +# default=0 (off). production 적용은 B3 FP 검증 통과 후만. +_NUMERIC_PROMOTE = os.getenv("VERIFIER_NUMERIC_PROMOTE", "0") == "1" + # severity 매핑 (프롬프트 "critical"/"minor" → 코드 strong/medium/weak) +# Tier 4 (B2): _NUMERIC_PROMOTE=1 일 때 numeric_conflict critical → strong 으로 격상. +# minor 는 medium 유지 (FP 위험 분리). _SEVERITY_MAP: dict[str, dict[str, Literal["strong", "medium", "weak"]]] = { "direct_negation": {"critical": "strong", "minor": "strong"}, - "numeric_conflict": {"critical": "medium", "minor": "medium"}, + "numeric_conflict": ( + {"critical": "strong", "minor": "medium"} if _NUMERIC_PROMOTE + else {"critical": "medium", "minor": "medium"} + ), "intent_core_mismatch": {"critical": "medium", "minor": "medium"}, "nuance": {"critical": "weak", "minor": "weak"}, "unsupported_claim": {"critical": "weak", "minor": "weak"}, diff --git a/credentials.env.example b/credentials.env.example index efcf7cf..aefa347 100644 --- a/credentials.env.example +++ b/credentials.env.example @@ -51,6 +51,12 @@ NYT_API_KEY= # ─── 국가법령정보센터 (법령 모니터링) ─── LAW_OC= +# ─── Phase 3.5 B2: verifier numeric_conflict promote 실험 ─── +# 0=off (기본, critical/minor 둘 다 medium), 1=on (critical → strong, minor 는 medium 유지). +# ⚠ env 변경 후 process restart 필수 (docker compose restart fastapi) — _SEVERITY_MAP 가 import time 평가됨. +# B3 FP 검증 (true FP < 20%) 통과 후만 production 적용. +VERIFIER_NUMERIC_PROMOTE=0 + # ─── Phase 3.5 fix2: eval runner shared secret ─── # /ask 엔드포인트의 X-Source=eval / X-Eval-Case-Id 헤더 신뢰 검증 토큰. # 비어있거나 클라이언트 X-Eval-Token 와 불일치 시 eval 헤더 거부 (warning log + source='document_server' 강등). diff --git a/tests/test_verifier_numeric_promote.py b/tests/test_verifier_numeric_promote.py new file mode 100644 index 0000000..e7c8443 --- /dev/null +++ b/tests/test_verifier_numeric_promote.py @@ -0,0 +1,58 @@ +"""Phase 3.5 B2: verifier _SEVERITY_MAP env flag 테스트. + +VERIFIER_NUMERIC_PROMOTE 환경변수에 따른 _SEVERITY_MAP 변화 검증. +모듈은 import time 에 env 평가하므로 reload 필요. +""" + +from __future__ import annotations + +import importlib +import os +import sys + +# tests/ → 프로젝트 루트 → app/ +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app")) + +import pytest + + +def _reload_verifier(monkeypatch, value: str | None): + """env 설정 후 verifier_service 를 reload 하여 _SEVERITY_MAP 재평가.""" + if value is None: + monkeypatch.delenv("VERIFIER_NUMERIC_PROMOTE", raising=False) + else: + monkeypatch.setenv("VERIFIER_NUMERIC_PROMOTE", value) + from services.search import verifier_service + importlib.reload(verifier_service) + return verifier_service + + +def test_severity_map_off_default(monkeypatch): + """env 미설정 → numeric_conflict critical 은 medium (기존 동작).""" + vs = _reload_verifier(monkeypatch, None) + assert vs._SEVERITY_MAP["numeric_conflict"]["critical"] == "medium" + assert vs._SEVERITY_MAP["numeric_conflict"]["minor"] == "medium" + assert vs._NUMERIC_PROMOTE is False + + +def test_severity_map_on_critical_promoted(monkeypatch): + """VERIFIER_NUMERIC_PROMOTE=1 → critical 만 strong, minor 는 medium 유지.""" + vs = _reload_verifier(monkeypatch, "1") + assert vs._SEVERITY_MAP["numeric_conflict"]["critical"] == "strong" + assert vs._SEVERITY_MAP["numeric_conflict"]["minor"] == "medium" + assert vs._NUMERIC_PROMOTE is True + + +def test_severity_map_off_explicit_zero(monkeypatch): + """VERIFIER_NUMERIC_PROMOTE=0 명시 → off (default 와 동일).""" + vs = _reload_verifier(monkeypatch, "0") + assert vs._SEVERITY_MAP["numeric_conflict"]["critical"] == "medium" + assert vs._NUMERIC_PROMOTE is False + + +def test_direct_negation_invariant(monkeypatch): + """direct_negation 은 env 무관 항상 strong (불변 — 안전장치).""" + for value in [None, "0", "1"]: + vs = _reload_verifier(monkeypatch, value) + assert vs._SEVERITY_MAP["direct_negation"]["critical"] == "strong" + assert vs._SEVERITY_MAP["direct_negation"]["minor"] == "strong"