From ec45d9d31097076dc59aa2680c026be372250125 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Fri, 17 Apr 2026 08:01:40 +0900 Subject: [PATCH] =?UTF-8?q?feat(verifier):=20Phase=203.5=20B2=20=E2=80=94?= =?UTF-8?q?=20numeric=5Fconflict=20promote=20(env=20flag)=20+=20Tier=204?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VERIFIER_NUMERIC_PROMOTE 환경변수로 numeric_conflict severity 승격 실험. verifier_service.py: - _NUMERIC_PROMOTE = os.getenv('VERIFIER_NUMERIC_PROMOTE', '0') == '1' (import time 평가 — env 변경 시 process restart 필수) - _SEVERITY_MAP['numeric_conflict']: env=1 → critical=strong / minor=medium, env=0 (기본) → 둘 다 medium (기존 동작 유지) - direct_negation 은 env 무관 항상 strong (안전장치) verifier.txt: - numeric_conflict 정의에 critical/minor 분리 명시 (core quantity vs peripheral) - "Range values satisfy any answer within range" rule 추가 - severity mapping 갱신: numeric_conflict 분기 명시 search.py re-gate (Tier 1~7 재번호, B2 신규 Tier 4): - v_strong_numeric = sum(1 for f in v_strong if f.startswith('verifier_numeric_conflict')) - Tier 4 (신규): g_strong + v_strong_numeric >= 1 + low_conf → refuse re_gate value: 'refuse(grounding+verifier_numeric)' - 원칙 유지: verifier strong 단독 refuse 금지 — g_strong 교차 필수 - 호환성: 기존 re_gate string literals 그대로 유지, 신규 1개만 추가 credentials.env.example: VERIFIER_NUMERIC_PROMOTE=0 (off, B3 통과 후 production 전환) tests/test_verifier_numeric_promote.py: 4 케이스 (env off / on / explicit 0 / direct_negation invariant). monkeypatch.setenv + importlib.reload 패턴. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/api/search.py | 25 +++++++++-- app/prompts/verifier.txt | 7 +-- app/services/search/verifier_service.py | 13 +++++- credentials.env.example | 6 +++ tests/test_verifier_numeric_promote.py | 58 +++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 8 deletions(-) create mode 100644 tests/test_verifier_numeric_promote.py diff --git a/app/api/search.py b/app/api/search.py index 0ed58f1..75a3a33 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -638,7 +638,10 @@ async def ask( "elapsed_ms": verifier_result.elapsed_ms, } - # ── Re-gate: 6-tier completeness 결정 (Phase 3.5b 4차 리뷰 확정) ── + # ── Re-gate: 7-tier completeness 결정 (Phase 3.5 B2 — Tier 4 신규 삽입, 재번호) ── + # 기존 6-tier (3.5b 4차 리뷰) + Tier 4(g_strong + v_strong_numeric + low_conf → refuse). + # 호환성: defense_layers["re_gate"] 의 string literal 들은 기존 그대로 유지. + # 신규 "refuse(grounding+verifier_numeric)" 만 추가. completeness: Literal["full", "partial", "insufficient"] = "full" covered_aspects = classifier_result.covered_aspects or None missing_aspects = classifier_result.missing_aspects or None @@ -649,6 +652,12 @@ async def ask( v_strong = [f for f in grounding.strong_flags if f.startswith("verifier_")] v_medium = [f for f in grounding.weak_flags if f.startswith("verifier_") and "_medium:" in f] has_direct_negation = any("direct_negation" in f for f in v_strong) + # Phase 3.5 B2: verifier strong flags 중 numeric_conflict 만 카운트. + # promote(VERIFIER_NUMERIC_PROMOTE=1) 활성 시 critical numeric_conflict 가 strong 으로 승격되며 + # 여기 카운트에 잡힘. promote off 면 항상 0 → Tier 4 활성 안 됨 (기존 동작 유지). + v_strong_numeric = sum( + 1 for f in v_strong if f.startswith("verifier_numeric_conflict") + ) if len(g_strong) >= 2: # Tier 1: grounding strong 2+ → refuse @@ -671,13 +680,21 @@ async def ask( sr.refused = True sr.confidence = None defense_log["re_gate"] = "refuse(grounding+low_conf+weak_ev)" + elif g_strong and v_strong_numeric >= 1 and sr.confidence == "low": + # Tier 4 (B2 신규): grounding strong + verifier numeric_conflict strong + low conf → refuse. + # verifier strong 단독 refuse 금지 원칙 유지 — g_strong 교차 필수. + completeness = "insufficient" + sr.answer = None + sr.refused = True + sr.confidence = None + defense_log["re_gate"] = "refuse(grounding+verifier_numeric)" elif g_strong or has_direct_negation: - # Tier 4: grounding strong 1 또는 verifier direct_negation 단독 → partial + # Tier 5 (기존 4): grounding strong 1 또는 verifier direct_negation 단독 → partial completeness = "partial" sr.confidence = "low" defense_log["re_gate"] = "partial(strong_or_negation)" elif v_medium: - # Tier 5: verifier medium 누적 → count 기반 confidence 하향 + # Tier 6 (기존 5): verifier medium 누적 → count 기반 confidence 하향 medium_count = len(v_medium) if medium_count >= 3: sr.confidence = "low" @@ -688,7 +705,7 @@ async def ask( else: defense_log["re_gate"] = f"medium_x{medium_count}(no_action)" elif grounding.weak_flags: - # Tier 6: weak → confidence 한 단계 하향 + # Tier 7 (기존 6): weak → confidence 한 단계 하향 if sr.confidence == "high": sr.confidence = "medium" defense_log["re_gate"] = "conf_lower(weak)" diff --git a/app/prompts/verifier.txt b/app/prompts/verifier.txt index 10b25f7..aa3fa8b 100644 --- a/app/prompts/verifier.txt +++ b/app/prompts/verifier.txt @@ -2,7 +2,7 @@ You are a grounding verifier. Given an answer and its evidence sources, check if ## Contradiction Types (IMPORTANT — severity depends on type) - **direct_negation** (CRITICAL): Answer directly contradicts evidence. Examples: evidence "의무" but answer "권고"; evidence "금지" but answer "허용"; negation reversal ("~해야 한다" vs "~할 필요 없다"). -- **numeric_conflict**: Answer states a number different from evidence. "50명" in evidence but "100명" in answer. Only flag if the same concept is referenced. +- **numeric_conflict**: Answer states a number different from evidence. "50명" in evidence but "100명" in answer. Only flag if the same concept is referenced. severity=critical when the number is the CORE answered quantity (amount/count/rate/date/duration that the query asked for); severity=minor when the number is peripheral (e.g., example/footnote). - **intent_core_mismatch**: Answer addresses a fundamentally different topic than the query asked about. - **nuance**: Answer overgeneralizes or adds qualifiers not in evidence (e.g., "모든" when evidence says "일부"). - **unsupported_claim**: Answer makes a factual claim with no basis in any evidence. @@ -10,7 +10,7 @@ You are a grounding verifier. Given an answer and its evidence sources, check if ## Rules 1. Compare each claim in the answer against the cited evidence. A claim with [n] citation should be checked against evidence [n]. 2. NOT a contradiction: Paraphrasing, summarizing, or restating the same fact in different words. Korean formal/informal style (합니다/한다) differences. -3. Numbers must match exactly after normalization (1,000 = 1000). +3. Numbers must match exactly after normalization (1,000 = 1000). Range values (e.g., "100~200명") satisfy any answer within range. 4. Legal/regulatory terms must preserve original meaning (의무 ≠ 권고, 금지 ≠ 제한, 허용 ≠ 금지). 5. Maximum 5 contradictions (most severe first: direct_negation > numeric_conflict > intent_core_mismatch > nuance > unsupported_claim). @@ -30,7 +30,8 @@ You are a grounding verifier. Given an answer and its evidence sources, check if severity mapping: - direct_negation → "critical" -- All others → "minor" +- numeric_conflict → "critical" if the number is the CORE answered quantity, else "minor" +- All other types → "minor" If no contradictions: {"contradictions": [], "verdict": "clean"} diff --git a/app/services/search/verifier_service.py b/app/services/search/verifier_service.py index 6dec9c8..3606cbf 100644 --- a/app/services/search/verifier_service.py +++ b/app/services/search/verifier_service.py @@ -17,6 +17,7 @@ from __future__ import annotations import asyncio +import os import time from dataclasses import dataclass, field from typing import TYPE_CHECKING, Literal @@ -37,10 +38,20 @@ CIRCUIT_RECOVERY_SEC = 60 _failure_count = 0 _circuit_open_until: float | None = None +# Phase 3.5 B2: numeric_conflict severity promote 실험. +# import time 평가 — env 변경 후 process restart 필수 (docker compose restart fastapi). +# default=0 (off). production 적용은 B3 FP 검증 통과 후만. +_NUMERIC_PROMOTE = os.getenv("VERIFIER_NUMERIC_PROMOTE", "0") == "1" + # severity 매핑 (프롬프트 "critical"/"minor" → 코드 strong/medium/weak) +# Tier 4 (B2): _NUMERIC_PROMOTE=1 일 때 numeric_conflict critical → strong 으로 격상. +# minor 는 medium 유지 (FP 위험 분리). _SEVERITY_MAP: dict[str, dict[str, Literal["strong", "medium", "weak"]]] = { "direct_negation": {"critical": "strong", "minor": "strong"}, - "numeric_conflict": {"critical": "medium", "minor": "medium"}, + "numeric_conflict": ( + {"critical": "strong", "minor": "medium"} if _NUMERIC_PROMOTE + else {"critical": "medium", "minor": "medium"} + ), "intent_core_mismatch": {"critical": "medium", "minor": "medium"}, "nuance": {"critical": "weak", "minor": "weak"}, "unsupported_claim": {"critical": "weak", "minor": "weak"}, diff --git a/credentials.env.example b/credentials.env.example index efcf7cf..aefa347 100644 --- a/credentials.env.example +++ b/credentials.env.example @@ -51,6 +51,12 @@ NYT_API_KEY= # ─── 국가법령정보센터 (법령 모니터링) ─── LAW_OC= +# ─── Phase 3.5 B2: verifier numeric_conflict promote 실험 ─── +# 0=off (기본, critical/minor 둘 다 medium), 1=on (critical → strong, minor 는 medium 유지). +# ⚠ env 변경 후 process restart 필수 (docker compose restart fastapi) — _SEVERITY_MAP 가 import time 평가됨. +# B3 FP 검증 (true FP < 20%) 통과 후만 production 적용. +VERIFIER_NUMERIC_PROMOTE=0 + # ─── Phase 3.5 fix2: eval runner shared secret ─── # /ask 엔드포인트의 X-Source=eval / X-Eval-Case-Id 헤더 신뢰 검증 토큰. # 비어있거나 클라이언트 X-Eval-Token 와 불일치 시 eval 헤더 거부 (warning log + source='document_server' 강등). diff --git a/tests/test_verifier_numeric_promote.py b/tests/test_verifier_numeric_promote.py new file mode 100644 index 0000000..e7c8443 --- /dev/null +++ b/tests/test_verifier_numeric_promote.py @@ -0,0 +1,58 @@ +"""Phase 3.5 B2: verifier _SEVERITY_MAP env flag 테스트. + +VERIFIER_NUMERIC_PROMOTE 환경변수에 따른 _SEVERITY_MAP 변화 검증. +모듈은 import time 에 env 평가하므로 reload 필요. +""" + +from __future__ import annotations + +import importlib +import os +import sys + +# tests/ → 프로젝트 루트 → app/ +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app")) + +import pytest + + +def _reload_verifier(monkeypatch, value: str | None): + """env 설정 후 verifier_service 를 reload 하여 _SEVERITY_MAP 재평가.""" + if value is None: + monkeypatch.delenv("VERIFIER_NUMERIC_PROMOTE", raising=False) + else: + monkeypatch.setenv("VERIFIER_NUMERIC_PROMOTE", value) + from services.search import verifier_service + importlib.reload(verifier_service) + return verifier_service + + +def test_severity_map_off_default(monkeypatch): + """env 미설정 → numeric_conflict critical 은 medium (기존 동작).""" + vs = _reload_verifier(monkeypatch, None) + assert vs._SEVERITY_MAP["numeric_conflict"]["critical"] == "medium" + assert vs._SEVERITY_MAP["numeric_conflict"]["minor"] == "medium" + assert vs._NUMERIC_PROMOTE is False + + +def test_severity_map_on_critical_promoted(monkeypatch): + """VERIFIER_NUMERIC_PROMOTE=1 → critical 만 strong, minor 는 medium 유지.""" + vs = _reload_verifier(monkeypatch, "1") + assert vs._SEVERITY_MAP["numeric_conflict"]["critical"] == "strong" + assert vs._SEVERITY_MAP["numeric_conflict"]["minor"] == "medium" + assert vs._NUMERIC_PROMOTE is True + + +def test_severity_map_off_explicit_zero(monkeypatch): + """VERIFIER_NUMERIC_PROMOTE=0 명시 → off (default 와 동일).""" + vs = _reload_verifier(monkeypatch, "0") + assert vs._SEVERITY_MAP["numeric_conflict"]["critical"] == "medium" + assert vs._NUMERIC_PROMOTE is False + + +def test_direct_negation_invariant(monkeypatch): + """direct_negation 은 env 무관 항상 strong (불변 — 안전장치).""" + for value in [None, "0", "1"]: + vs = _reload_verifier(monkeypatch, value) + assert vs._SEVERITY_MAP["direct_negation"]["critical"] == "strong" + assert vs._SEVERITY_MAP["direct_negation"]["minor"] == "strong"