feat(ask): Phase 3.5a guardrails (classifier + refusal gate + grounding + partial)

신규 파일: - classifier_service.py: exaone binary classifier (sufficient/insufficient) parallel with evidence, circuit breaker, timeout 5s - refusal_gate.py: multi-signal fusion (score + classifier) AND 조건, conservative fallback 3-tier (classifier 부재 시) - grounding_check.py: strong/weak flag 분리 strong: fabricated_number + intent_misalignment(important keywords) weak: uncited_claim + low_overlap + intent_misalignment(generic) re-gate: 2+ strong → refuse, 1 strong → partial - sentence_splitter.py: regex 기반 (Phase 3.5b KSS 업그레이드) - classifier.txt: exaone Y+ prompt (calibration examples 포함) - search_synthesis_partial.txt: partial answer 전용 프롬프트 - 102_ask_events.sql: /ask 관측 테이블 (completeness 3-분리 지표) - queries.yaml: Phase 3.5 smoke test 평가셋 10개 수정 파일: - search.py /ask: classifier parallel + refusal gate + grounding re-gate + defense_layers 로깅 + AskResponse completeness/aspects/confirmed_items - config.yaml: classifier model 섹션 (exaone3.5:7.8b GPU Ollama) - config.py: classifier optional 파싱 - AskAnswer.svelte: 4분기 렌더 (full/partial/insufficient/loading) - ask.ts: Completeness + ConfirmedItem 타입 P1 실측: exaone ternary 불안정 → binary gate 축소. partial은 grounding이 담당. 토론 9라운드 확정. plan: quiet-meandering-nova.md Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 08:49:11 +09:00
parent 0eecf1afca
commit 06443947bf
13 changed files with 869 additions and 47 deletions
--- a/app/services/search/grounding_check.py
+++ b/app/services/search/grounding_check.py
@@ -0,0 +1,131 @@
+"""Grounding check — post-synthesis 검증 (Phase 3.5a).
+
+Strong/weak flag 분리:
+- **Strong** (→ partial 강등 or refuse): fabricated_number, intent_misalignment(important)
+- **Weak** (→ confidence lower only): uncited_claim, low_overlap, intent_misalignment(generic)
+
+Re-gate 로직 (Phase 3.5a 9라운드 토론 결과):
+- strong 1개 → partial 강등
+- strong 2개 이상 → refuse
+- weak → confidence "low" 만
+
+Intent alignment (rule-based):
+- query 의 핵심 명사가 answer 에 등장하는지 확인
+- "처벌" 같은 중요 키워드 누락은 strong
+- "주요", "관련" 같은 generic 은 무시
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from core.utils import setup_logger
+
+if TYPE_CHECKING:
+    from .evidence_service import EvidenceItem
+
+logger = setup_logger("grounding")
+
+# "주요", "관련" 등 intent alignment 에서 제외할 generic 단어
+GENERIC_TERMS = frozenset({
+    "주요", "관련", "내용", "정의", "기준", "방법", "설명", "개요",
+    "대한", "위한", "대해", "무엇", "어떤", "어떻게", "있는",
+    "하는", "되는", "이런", "그런", "이것", "그것",
+})
+
+
+@dataclass(slots=True)
+class GroundingResult:
+    strong_flags: list[str]
+    weak_flags: list[str]
+
+
+def _extract_number_literals(text: str) -> set[str]:
+    """숫자 + 단위 추출 + normalize."""
+    raw = set(re.findall(r'\d[\d,.]*\s*[명인개%년월일조항호세]\w{0,2}', text))
+    normalized = set()
+    for r in raw:
+        normalized.add(r.strip())
+        num_only = re.match(r'[\d,.]+', r)
+        if num_only:
+            normalized.add(num_only.group().replace(',', ''))
+    # 단독 숫자도 추출
+    for d in re.findall(r'\b\d+\b', text):
+        normalized.add(d)
+    return normalized
+
+
+def _extract_content_tokens(text: str) -> set[str]:
+    """한국어 2자 이상 명사 + 영어 3자 이상 단어."""
+    return set(re.findall(r'[가-힣]{2,}|[a-zA-Z]{3,}', text))
+
+
+def check(
+    query: str,
+    answer: str,
+    evidence: list[EvidenceItem],
+) -> GroundingResult:
+    """답변 vs evidence grounding 검증 + query intent alignment."""
+    strong: list[str] = []
+    weak: list[str] = []
+
+    if not answer or not evidence:
+        return GroundingResult([], [])
+
+    evidence_text = " ".join(e.span_text for e in evidence)
+
+    # ── Strong 1: fabricated number ──
+    answer_nums = _extract_number_literals(answer)
+    evidence_nums = _extract_number_literals(evidence_text)
+    for num in answer_nums:
+        digits_only = re.sub(r'[^\d]', '', num)
+        if digits_only and not any(
+            digits_only in re.sub(r'[^\d]', '', en) for en in evidence_nums
+        ):
+            strong.append(f"fabricated_number:{num}")
+
+    # ── Strong/Weak 2: query-answer intent alignment ──
+    query_content = _extract_content_tokens(query)
+    answer_content = _extract_content_tokens(answer)
+    if query_content:
+        missing_terms = query_content - answer_content
+        important_missing = [
+            t for t in missing_terms
+            if t not in GENERIC_TERMS and len(t) >= 2
+        ]
+        if important_missing:
+            strong.append(
+                f"intent_misalignment:{','.join(important_missing[:3])}"
+            )
+        elif len(missing_terms) > len(query_content) * 0.5:
+            weak.append(
+                f"intent_misalignment_generic:"
+                f"missing({','.join(list(missing_terms)[:5])})"
+            )
+
+    # ── Weak 1: uncited claim ──
+    sentences = re.split(r'(?<=[.!?。])\s+', answer)
+    for s in sentences:
+        if len(s.strip()) > 20 and not re.search(r'\[\d+\]', s):
+            weak.append(f"uncited_claim:{s[:40]}")
+
+    # ── Weak 2: token overlap ──
+    answer_tokens = _extract_content_tokens(answer)
+    evidence_tokens = _extract_content_tokens(evidence_text)
+    if answer_tokens:
+        overlap = len(answer_tokens & evidence_tokens) / len(answer_tokens)
+        if overlap < 0.4:
+            weak.append(f"low_overlap:{overlap:.2f}")
+
+    if strong or weak:
+        logger.info(
+            "grounding query=%r strong=%d weak=%d flags=%s",
+            query[:60],
+            len(strong),
+            len(weak),
+            ",".join(strong[:3] + weak[:3]),
+        )
+
+    return GroundingResult(strong, weak)