"""Refusal gate — multi-signal fusion (Phase 3.5a). Score gate (deterministic) + classifier verdict (semantic, binary) 를 독립 평가 후 합성. Classifier 부재 시 3-tier conservative fallback. P1 실측 결과: exaone ternary 불안정 → binary (sufficient/insufficient) 로 축소. "full" vs "partial" 구분은 grounding check (intent alignment) 가 담당. """ from __future__ import annotations from dataclasses import dataclass, field from typing import TYPE_CHECKING, Literal from core.utils import setup_logger if TYPE_CHECKING: from .classifier_service import ClassifierResult logger = setup_logger("refusal_gate") # Placeholder thresholds — Phase 3.5b 에서 실측 기반 tuning # AND 조건이라 false refusal 방어됨 (둘 다 만족해야 refuse) SCORE_MAX_REFUSE = 0.25 SCORE_AGG_REFUSE = 0.70 # Conservative fallback tiers (classifier 부재 시) CONSERVATIVE_WEAK = 0.35 CONSERVATIVE_MID = 0.55 @dataclass(slots=True) class RefusalDecision: refused: bool confidence_cap: Literal["high", "medium", "low"] | None # None = no cap rule_triggered: str | None # 디버깅: 어느 signal 이 결정에 기여? def decide( rerank_scores: list[float], classifier: ClassifierResult | None, ) -> RefusalDecision: """Multi-signal fusion. Binary classifier verdict 기반. Returns: RefusalDecision. refused=True 이면 synthesis skip. confidence_cap 은 synthesis 결과의 confidence 에 upper bound 적용. """ max_score = max(rerank_scores) if rerank_scores else 0.0 agg_top3 = sum(sorted(rerank_scores, reverse=True)[:3]) score_gate_fails = ( max_score < SCORE_MAX_REFUSE and agg_top3 < SCORE_AGG_REFUSE ) # ── Classifier 사용 가능 (정상 경로) ── if classifier and classifier.verdict is not None: if classifier.verdict == "insufficient": # Evidence quality override: classifier 가 insufficient 라 해도 # evidence 가 충분히 좋으면 override (토론 8라운드 합의) # (evidence quality 는 이 함수 밖에서 별도 체크 — caller 에서 처리) logger.info( "refusal gate: classifier=insufficient max=%.2f agg=%.2f", max_score, agg_top3, ) return RefusalDecision( refused=True, confidence_cap=None, rule_triggered="classifier_insufficient", ) if score_gate_fails: logger.info( "refusal gate: score_low max=%.2f agg=%.2f classifier=%s", max_score, agg_top3, classifier.verdict, ) return RefusalDecision( refused=True, confidence_cap=None, rule_triggered="score_low", ) # Classifier says sufficient → proceed return RefusalDecision( refused=False, confidence_cap=None, rule_triggered=None, ) # ── Classifier 부재 → 3-tier conservative ── if max_score < CONSERVATIVE_WEAK: return RefusalDecision( refused=True, confidence_cap=None, rule_triggered="conservative_refuse(no_classifier)", ) if max_score < CONSERVATIVE_MID: return RefusalDecision( refused=False, confidence_cap="low", rule_triggered="conservative_low(no_classifier)", ) return RefusalDecision( refused=False, confidence_cap="medium", rule_triggered="conservative_medium(no_classifier)", )