feat(ask): Phase 3.5a guardrails (classifier + refusal gate + grounding + partial)

신규 파일: - classifier_service.py: exaone binary classifier (sufficient/insufficient) parallel with evidence, circuit breaker, timeout 5s - refusal_gate.py: multi-signal fusion (score + classifier) AND 조건, conservative fallback 3-tier (classifier 부재 시) - grounding_check.py: strong/weak flag 분리 strong: fabricated_number + intent_misalignment(important keywords) weak: uncited_claim + low_overlap + intent_misalignment(generic) re-gate: 2+ strong → refuse, 1 strong → partial - sentence_splitter.py: regex 기반 (Phase 3.5b KSS 업그레이드) - classifier.txt: exaone Y+ prompt (calibration examples 포함) - search_synthesis_partial.txt: partial answer 전용 프롬프트 - 102_ask_events.sql: /ask 관측 테이블 (completeness 3-분리 지표) - queries.yaml: Phase 3.5 smoke test 평가셋 10개 수정 파일: - search.py /ask: classifier parallel + refusal gate + grounding re-gate + defense_layers 로깅 + AskResponse completeness/aspects/confirmed_items - config.yaml: classifier model 섹션 (exaone3.5:7.8b GPU Ollama) - config.py: classifier optional 파싱 - AskAnswer.svelte: 4분기 렌더 (full/partial/insufficient/loading) - ask.ts: Completeness + ConfirmedItem 타입 P1 실측: exaone ternary 불안정 → binary gate 축소. partial은 grounding이 담당. 토론 9라운드 확정. plan: quiet-meandering-nova.md Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 08:49:11 +09:00
parent 0eecf1afca
commit 06443947bf
13 changed files with 869 additions and 47 deletions
--- a/app/services/search/refusal_gate.py
+++ b/app/services/search/refusal_gate.py
@@ -0,0 +1,105 @@
+"""Refusal gate — multi-signal fusion (Phase 3.5a).
+
+Score gate (deterministic) + classifier verdict (semantic, binary) 를 독립 평가 후 합성.
+Classifier 부재 시 3-tier conservative fallback.
+
+P1 실측 결과: exaone ternary 불안정 → binary (sufficient/insufficient) 로 축소.
+"full" vs "partial" 구분은 grounding check (intent alignment) 가 담당.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Literal
+
+from core.utils import setup_logger
+
+if TYPE_CHECKING:
+    from .classifier_service import ClassifierResult
+
+logger = setup_logger("refusal_gate")
+
+# Placeholder thresholds — Phase 3.5b 에서 실측 기반 tuning
+# AND 조건이라 false refusal 방어됨 (둘 다 만족해야 refuse)
+SCORE_MAX_REFUSE = 0.25
+SCORE_AGG_REFUSE = 0.70
+
+# Conservative fallback tiers (classifier 부재 시)
+CONSERVATIVE_WEAK = 0.35
+CONSERVATIVE_MID = 0.55
+
+
+@dataclass(slots=True)
+class RefusalDecision:
+    refused: bool
+    confidence_cap: Literal["high", "medium", "low"] | None  # None = no cap
+    rule_triggered: str | None  # 디버깅: 어느 signal 이 결정에 기여?
+
+
+def decide(
+    rerank_scores: list[float],
+    classifier: ClassifierResult | None,
+) -> RefusalDecision:
+    """Multi-signal fusion. Binary classifier verdict 기반.
+
+    Returns:
+        RefusalDecision. refused=True 이면 synthesis skip.
+        confidence_cap 은 synthesis 결과의 confidence 에 upper bound 적용.
+    """
+    max_score = max(rerank_scores) if rerank_scores else 0.0
+    agg_top3 = sum(sorted(rerank_scores, reverse=True)[:3])
+
+    score_gate_fails = (
+        max_score < SCORE_MAX_REFUSE and agg_top3 < SCORE_AGG_REFUSE
+    )
+
+    # ── Classifier 사용 가능 (정상 경로) ──
+    if classifier and classifier.verdict is not None:
+        if classifier.verdict == "insufficient":
+            # Evidence quality override: classifier 가 insufficient 라 해도
+            # evidence 가 충분히 좋으면 override (토론 8라운드 합의)
+            # (evidence quality 는 이 함수 밖에서 별도 체크 — caller 에서 처리)
+            logger.info(
+                "refusal gate: classifier=insufficient max=%.2f agg=%.2f",
+                max_score, agg_top3,
+            )
+            return RefusalDecision(
+                refused=True,
+                confidence_cap=None,
+                rule_triggered="classifier_insufficient",
+            )
+        if score_gate_fails:
+            logger.info(
+                "refusal gate: score_low max=%.2f agg=%.2f classifier=%s",
+                max_score, agg_top3, classifier.verdict,
+            )
+            return RefusalDecision(
+                refused=True,
+                confidence_cap=None,
+                rule_triggered="score_low",
+            )
+        # Classifier says sufficient → proceed
+        return RefusalDecision(
+            refused=False,
+            confidence_cap=None,
+            rule_triggered=None,
+        )
+
+    # ── Classifier 부재 → 3-tier conservative ──
+    if max_score < CONSERVATIVE_WEAK:
+        return RefusalDecision(
+            refused=True,
+            confidence_cap=None,
+            rule_triggered="conservative_refuse(no_classifier)",
+        )
+    if max_score < CONSERVATIVE_MID:
+        return RefusalDecision(
+            refused=False,
+            confidence_cap="low",
+            rule_triggered="conservative_low(no_classifier)",
+        )
+    return RefusalDecision(
+        refused=False,
+        confidence_cap="medium",
+        rule_triggered="conservative_medium(no_classifier)",
+    )