신규 파일: - classifier_service.py: exaone binary classifier (sufficient/insufficient) parallel with evidence, circuit breaker, timeout 5s - refusal_gate.py: multi-signal fusion (score + classifier) AND 조건, conservative fallback 3-tier (classifier 부재 시) - grounding_check.py: strong/weak flag 분리 strong: fabricated_number + intent_misalignment(important keywords) weak: uncited_claim + low_overlap + intent_misalignment(generic) re-gate: 2+ strong → refuse, 1 strong → partial - sentence_splitter.py: regex 기반 (Phase 3.5b KSS 업그레이드) - classifier.txt: exaone Y+ prompt (calibration examples 포함) - search_synthesis_partial.txt: partial answer 전용 프롬프트 - 102_ask_events.sql: /ask 관측 테이블 (completeness 3-분리 지표) - queries.yaml: Phase 3.5 smoke test 평가셋 10개 수정 파일: - search.py /ask: classifier parallel + refusal gate + grounding re-gate + defense_layers 로깅 + AskResponse completeness/aspects/confirmed_items - config.yaml: classifier model 섹션 (exaone3.5:7.8b GPU Ollama) - config.py: classifier optional 파싱 - AskAnswer.svelte: 4분기 렌더 (full/partial/insufficient/loading) - ask.ts: Completeness + ConfirmedItem 타입 P1 실측: exaone ternary 불안정 → binary gate 축소. partial은 grounding이 담당. 토론 9라운드 확정. plan: quiet-meandering-nova.md Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
106 lines
3.6 KiB
Python
106 lines
3.6 KiB
Python
"""Refusal gate — multi-signal fusion (Phase 3.5a).
|
|
|
|
Score gate (deterministic) + classifier verdict (semantic, binary) 를 독립 평가 후 합성.
|
|
Classifier 부재 시 3-tier conservative fallback.
|
|
|
|
P1 실측 결과: exaone ternary 불안정 → binary (sufficient/insufficient) 로 축소.
|
|
"full" vs "partial" 구분은 grounding check (intent alignment) 가 담당.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import TYPE_CHECKING, Literal
|
|
|
|
from core.utils import setup_logger
|
|
|
|
if TYPE_CHECKING:
|
|
from .classifier_service import ClassifierResult
|
|
|
|
logger = setup_logger("refusal_gate")
|
|
|
|
# Placeholder thresholds — Phase 3.5b 에서 실측 기반 tuning
|
|
# AND 조건이라 false refusal 방어됨 (둘 다 만족해야 refuse)
|
|
SCORE_MAX_REFUSE = 0.25
|
|
SCORE_AGG_REFUSE = 0.70
|
|
|
|
# Conservative fallback tiers (classifier 부재 시)
|
|
CONSERVATIVE_WEAK = 0.35
|
|
CONSERVATIVE_MID = 0.55
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class RefusalDecision:
|
|
refused: bool
|
|
confidence_cap: Literal["high", "medium", "low"] | None # None = no cap
|
|
rule_triggered: str | None # 디버깅: 어느 signal 이 결정에 기여?
|
|
|
|
|
|
def decide(
|
|
rerank_scores: list[float],
|
|
classifier: ClassifierResult | None,
|
|
) -> RefusalDecision:
|
|
"""Multi-signal fusion. Binary classifier verdict 기반.
|
|
|
|
Returns:
|
|
RefusalDecision. refused=True 이면 synthesis skip.
|
|
confidence_cap 은 synthesis 결과의 confidence 에 upper bound 적용.
|
|
"""
|
|
max_score = max(rerank_scores) if rerank_scores else 0.0
|
|
agg_top3 = sum(sorted(rerank_scores, reverse=True)[:3])
|
|
|
|
score_gate_fails = (
|
|
max_score < SCORE_MAX_REFUSE and agg_top3 < SCORE_AGG_REFUSE
|
|
)
|
|
|
|
# ── Classifier 사용 가능 (정상 경로) ──
|
|
if classifier and classifier.verdict is not None:
|
|
if classifier.verdict == "insufficient":
|
|
# Evidence quality override: classifier 가 insufficient 라 해도
|
|
# evidence 가 충분히 좋으면 override (토론 8라운드 합의)
|
|
# (evidence quality 는 이 함수 밖에서 별도 체크 — caller 에서 처리)
|
|
logger.info(
|
|
"refusal gate: classifier=insufficient max=%.2f agg=%.2f",
|
|
max_score, agg_top3,
|
|
)
|
|
return RefusalDecision(
|
|
refused=True,
|
|
confidence_cap=None,
|
|
rule_triggered="classifier_insufficient",
|
|
)
|
|
if score_gate_fails:
|
|
logger.info(
|
|
"refusal gate: score_low max=%.2f agg=%.2f classifier=%s",
|
|
max_score, agg_top3, classifier.verdict,
|
|
)
|
|
return RefusalDecision(
|
|
refused=True,
|
|
confidence_cap=None,
|
|
rule_triggered="score_low",
|
|
)
|
|
# Classifier says sufficient → proceed
|
|
return RefusalDecision(
|
|
refused=False,
|
|
confidence_cap=None,
|
|
rule_triggered=None,
|
|
)
|
|
|
|
# ── Classifier 부재 → 3-tier conservative ──
|
|
if max_score < CONSERVATIVE_WEAK:
|
|
return RefusalDecision(
|
|
refused=True,
|
|
confidence_cap=None,
|
|
rule_triggered="conservative_refuse(no_classifier)",
|
|
)
|
|
if max_score < CONSERVATIVE_MID:
|
|
return RefusalDecision(
|
|
refused=False,
|
|
confidence_cap="low",
|
|
rule_triggered="conservative_low(no_classifier)",
|
|
)
|
|
return RefusalDecision(
|
|
refused=False,
|
|
confidence_cap="medium",
|
|
rule_triggered="conservative_medium(no_classifier)",
|
|
)
|