feat(ask): Phase 3.5a guardrails (classifier + refusal gate + grounding + partial)
신규 파일: - classifier_service.py: exaone binary classifier (sufficient/insufficient) parallel with evidence, circuit breaker, timeout 5s - refusal_gate.py: multi-signal fusion (score + classifier) AND 조건, conservative fallback 3-tier (classifier 부재 시) - grounding_check.py: strong/weak flag 분리 strong: fabricated_number + intent_misalignment(important keywords) weak: uncited_claim + low_overlap + intent_misalignment(generic) re-gate: 2+ strong → refuse, 1 strong → partial - sentence_splitter.py: regex 기반 (Phase 3.5b KSS 업그레이드) - classifier.txt: exaone Y+ prompt (calibration examples 포함) - search_synthesis_partial.txt: partial answer 전용 프롬프트 - 102_ask_events.sql: /ask 관측 테이블 (completeness 3-분리 지표) - queries.yaml: Phase 3.5 smoke test 평가셋 10개 수정 파일: - search.py /ask: classifier parallel + refusal gate + grounding re-gate + defense_layers 로깅 + AskResponse completeness/aspects/confirmed_items - config.yaml: classifier model 섹션 (exaone3.5:7.8b GPU Ollama) - config.py: classifier optional 파싱 - AskAnswer.svelte: 4분기 렌더 (full/partial/insufficient/loading) - ask.ts: Completeness + ConfirmedItem 타입 P1 실측: exaone ternary 불안정 → binary gate 축소. partial은 grounding이 담당. 토론 9라운드 확정. plan: quiet-meandering-nova.md Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
150
app/services/search/classifier_service.py
Normal file
150
app/services/search/classifier_service.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""Answerability classifier (Phase 3.5a).
|
||||
|
||||
exaone3.5:7.8b GPU Ollama 기반. MLX gate 밖 — evidence extraction 과 병렬 실행.
|
||||
|
||||
P1 실측 결과: ternary (full/partial/insufficient) 불안정 → **binary (sufficient/insufficient)**.
|
||||
"full" vs "partial" 구분은 grounding_check 의 intent alignment 이 담당.
|
||||
|
||||
Classifier verdict 는 "relevant evidence 가 있나" 의 binary 판단.
|
||||
covered_aspects / missing_aspects 는 로깅용으로 유지 (refusal gate 에서 사용 안 함).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal
|
||||
|
||||
from ai.client import AIClient, _load_prompt, parse_json_response
|
||||
from core.config import settings
|
||||
from core.utils import setup_logger
|
||||
|
||||
logger = setup_logger("classifier")
|
||||
|
||||
LLM_TIMEOUT_MS = 5000
|
||||
CIRCUIT_THRESHOLD = 5
|
||||
CIRCUIT_RECOVERY_SEC = 60
|
||||
|
||||
_failure_count = 0
|
||||
_circuit_open_until: float | None = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ClassifierResult:
|
||||
status: Literal["ok", "timeout", "error", "circuit_open", "skipped"]
|
||||
verdict: Literal["sufficient", "insufficient"] | None
|
||||
covered_aspects: list[str]
|
||||
missing_aspects: list[str]
|
||||
elapsed_ms: float
|
||||
|
||||
|
||||
try:
|
||||
CLASSIFIER_PROMPT = _load_prompt("classifier.txt")
|
||||
except FileNotFoundError:
|
||||
CLASSIFIER_PROMPT = ""
|
||||
logger.warning("classifier.txt not found — classifier will always skip")
|
||||
|
||||
|
||||
def _build_input(
|
||||
query: str,
|
||||
top_chunks: list[dict],
|
||||
rerank_scores: list[float],
|
||||
) -> str:
|
||||
"""Y+ input (content + scores with role separation)."""
|
||||
chunk_block = "\n".join(
|
||||
f"[{i+1}] title: {c.get('title','')}\n"
|
||||
f" section: {c.get('section','')}\n"
|
||||
f" snippet: {c.get('snippet','')}"
|
||||
for i, c in enumerate(top_chunks[:3])
|
||||
)
|
||||
scores_str = ", ".join(f"{s:.2f}" for s in rerank_scores[:3])
|
||||
return (
|
||||
CLASSIFIER_PROMPT
|
||||
.replace("{query}", query)
|
||||
.replace("{chunks}", chunk_block)
|
||||
.replace("{scores}", scores_str)
|
||||
)
|
||||
|
||||
|
||||
async def classify(
|
||||
query: str,
|
||||
top_chunks: list[dict],
|
||||
rerank_scores: list[float],
|
||||
) -> ClassifierResult:
|
||||
"""Always-on binary classifier. Parallel with evidence extraction.
|
||||
|
||||
Returns:
|
||||
ClassifierResult with verdict=sufficient|insufficient.
|
||||
Status "ok" 이 아니면 verdict=None (caller 가 fallback 처리).
|
||||
"""
|
||||
global _failure_count, _circuit_open_until
|
||||
t_start = time.perf_counter()
|
||||
|
||||
# Circuit breaker
|
||||
if _circuit_open_until and time.time() < _circuit_open_until:
|
||||
return ClassifierResult("circuit_open", None, [], [], 0.0)
|
||||
|
||||
if not CLASSIFIER_PROMPT:
|
||||
return ClassifierResult("skipped", None, [], [], 0.0)
|
||||
|
||||
if not hasattr(settings.ai, "classifier") or settings.ai.classifier is None:
|
||||
return ClassifierResult("skipped", None, [], [], 0.0)
|
||||
|
||||
prompt = _build_input(query, top_chunks, rerank_scores)
|
||||
client = AIClient()
|
||||
try:
|
||||
# ⚠ MLX gate 안 씀. Ollama(exaone) 는 concurrent OK.
|
||||
async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
|
||||
raw = await client._request(settings.ai.classifier, prompt)
|
||||
_failure_count = 0
|
||||
except asyncio.TimeoutError:
|
||||
_failure_count += 1
|
||||
if _failure_count >= CIRCUIT_THRESHOLD:
|
||||
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
|
||||
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
|
||||
logger.warning("classifier timeout")
|
||||
return ClassifierResult(
|
||||
"timeout", None, [], [],
|
||||
(time.perf_counter() - t_start) * 1000,
|
||||
)
|
||||
except Exception as e:
|
||||
_failure_count += 1
|
||||
if _failure_count >= CIRCUIT_THRESHOLD:
|
||||
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
|
||||
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
|
||||
logger.warning(f"classifier error: {e}")
|
||||
return ClassifierResult(
|
||||
"error", None, [], [],
|
||||
(time.perf_counter() - t_start) * 1000,
|
||||
)
|
||||
finally:
|
||||
await client.close()
|
||||
|
||||
elapsed_ms = (time.perf_counter() - t_start) * 1000
|
||||
parsed = parse_json_response(raw)
|
||||
if not isinstance(parsed, dict):
|
||||
logger.warning("classifier parse failed raw=%r", (raw or "")[:200])
|
||||
return ClassifierResult("error", None, [], [], elapsed_ms)
|
||||
|
||||
# ternary → binary 매핑
|
||||
raw_verdict = parsed.get("verdict", "")
|
||||
if raw_verdict == "insufficient":
|
||||
verdict: Literal["sufficient", "insufficient"] | None = "insufficient"
|
||||
elif raw_verdict in ("full", "partial", "sufficient"):
|
||||
verdict = "sufficient"
|
||||
else:
|
||||
verdict = None
|
||||
|
||||
covered = parsed.get("covered_aspects") or []
|
||||
missing = parsed.get("missing_aspects") or []
|
||||
if not isinstance(covered, list):
|
||||
covered = []
|
||||
if not isinstance(missing, list):
|
||||
missing = []
|
||||
|
||||
logger.info(
|
||||
"classifier ok query=%r verdict=%s (raw=%s) covered=%d missing=%d elapsed_ms=%.0f",
|
||||
query[:60], verdict, raw_verdict, len(covered), len(missing), elapsed_ms,
|
||||
)
|
||||
return ClassifierResult("ok", verdict, covered, missing, elapsed_ms)
|
||||
131
app/services/search/grounding_check.py
Normal file
131
app/services/search/grounding_check.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""Grounding check — post-synthesis 검증 (Phase 3.5a).
|
||||
|
||||
Strong/weak flag 분리:
|
||||
- **Strong** (→ partial 강등 or refuse): fabricated_number, intent_misalignment(important)
|
||||
- **Weak** (→ confidence lower only): uncited_claim, low_overlap, intent_misalignment(generic)
|
||||
|
||||
Re-gate 로직 (Phase 3.5a 9라운드 토론 결과):
|
||||
- strong 1개 → partial 강등
|
||||
- strong 2개 이상 → refuse
|
||||
- weak → confidence "low" 만
|
||||
|
||||
Intent alignment (rule-based):
|
||||
- query 의 핵심 명사가 answer 에 등장하는지 확인
|
||||
- "처벌" 같은 중요 키워드 누락은 strong
|
||||
- "주요", "관련" 같은 generic 은 무시
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from core.utils import setup_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .evidence_service import EvidenceItem
|
||||
|
||||
logger = setup_logger("grounding")
|
||||
|
||||
# "주요", "관련" 등 intent alignment 에서 제외할 generic 단어
|
||||
GENERIC_TERMS = frozenset({
|
||||
"주요", "관련", "내용", "정의", "기준", "방법", "설명", "개요",
|
||||
"대한", "위한", "대해", "무엇", "어떤", "어떻게", "있는",
|
||||
"하는", "되는", "이런", "그런", "이것", "그것",
|
||||
})
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class GroundingResult:
|
||||
strong_flags: list[str]
|
||||
weak_flags: list[str]
|
||||
|
||||
|
||||
def _extract_number_literals(text: str) -> set[str]:
|
||||
"""숫자 + 단위 추출 + normalize."""
|
||||
raw = set(re.findall(r'\d[\d,.]*\s*[명인개%년월일조항호세]\w{0,2}', text))
|
||||
normalized = set()
|
||||
for r in raw:
|
||||
normalized.add(r.strip())
|
||||
num_only = re.match(r'[\d,.]+', r)
|
||||
if num_only:
|
||||
normalized.add(num_only.group().replace(',', ''))
|
||||
# 단독 숫자도 추출
|
||||
for d in re.findall(r'\b\d+\b', text):
|
||||
normalized.add(d)
|
||||
return normalized
|
||||
|
||||
|
||||
def _extract_content_tokens(text: str) -> set[str]:
|
||||
"""한국어 2자 이상 명사 + 영어 3자 이상 단어."""
|
||||
return set(re.findall(r'[가-힣]{2,}|[a-zA-Z]{3,}', text))
|
||||
|
||||
|
||||
def check(
|
||||
query: str,
|
||||
answer: str,
|
||||
evidence: list[EvidenceItem],
|
||||
) -> GroundingResult:
|
||||
"""답변 vs evidence grounding 검증 + query intent alignment."""
|
||||
strong: list[str] = []
|
||||
weak: list[str] = []
|
||||
|
||||
if not answer or not evidence:
|
||||
return GroundingResult([], [])
|
||||
|
||||
evidence_text = " ".join(e.span_text for e in evidence)
|
||||
|
||||
# ── Strong 1: fabricated number ──
|
||||
answer_nums = _extract_number_literals(answer)
|
||||
evidence_nums = _extract_number_literals(evidence_text)
|
||||
for num in answer_nums:
|
||||
digits_only = re.sub(r'[^\d]', '', num)
|
||||
if digits_only and not any(
|
||||
digits_only in re.sub(r'[^\d]', '', en) for en in evidence_nums
|
||||
):
|
||||
strong.append(f"fabricated_number:{num}")
|
||||
|
||||
# ── Strong/Weak 2: query-answer intent alignment ──
|
||||
query_content = _extract_content_tokens(query)
|
||||
answer_content = _extract_content_tokens(answer)
|
||||
if query_content:
|
||||
missing_terms = query_content - answer_content
|
||||
important_missing = [
|
||||
t for t in missing_terms
|
||||
if t not in GENERIC_TERMS and len(t) >= 2
|
||||
]
|
||||
if important_missing:
|
||||
strong.append(
|
||||
f"intent_misalignment:{','.join(important_missing[:3])}"
|
||||
)
|
||||
elif len(missing_terms) > len(query_content) * 0.5:
|
||||
weak.append(
|
||||
f"intent_misalignment_generic:"
|
||||
f"missing({','.join(list(missing_terms)[:5])})"
|
||||
)
|
||||
|
||||
# ── Weak 1: uncited claim ──
|
||||
sentences = re.split(r'(?<=[.!?。])\s+', answer)
|
||||
for s in sentences:
|
||||
if len(s.strip()) > 20 and not re.search(r'\[\d+\]', s):
|
||||
weak.append(f"uncited_claim:{s[:40]}")
|
||||
|
||||
# ── Weak 2: token overlap ──
|
||||
answer_tokens = _extract_content_tokens(answer)
|
||||
evidence_tokens = _extract_content_tokens(evidence_text)
|
||||
if answer_tokens:
|
||||
overlap = len(answer_tokens & evidence_tokens) / len(answer_tokens)
|
||||
if overlap < 0.4:
|
||||
weak.append(f"low_overlap:{overlap:.2f}")
|
||||
|
||||
if strong or weak:
|
||||
logger.info(
|
||||
"grounding query=%r strong=%d weak=%d flags=%s",
|
||||
query[:60],
|
||||
len(strong),
|
||||
len(weak),
|
||||
",".join(strong[:3] + weak[:3]),
|
||||
)
|
||||
|
||||
return GroundingResult(strong, weak)
|
||||
105
app/services/search/refusal_gate.py
Normal file
105
app/services/search/refusal_gate.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Refusal gate — multi-signal fusion (Phase 3.5a).
|
||||
|
||||
Score gate (deterministic) + classifier verdict (semantic, binary) 를 독립 평가 후 합성.
|
||||
Classifier 부재 시 3-tier conservative fallback.
|
||||
|
||||
P1 실측 결과: exaone ternary 불안정 → binary (sufficient/insufficient) 로 축소.
|
||||
"full" vs "partial" 구분은 grounding check (intent alignment) 가 담당.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from core.utils import setup_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .classifier_service import ClassifierResult
|
||||
|
||||
logger = setup_logger("refusal_gate")
|
||||
|
||||
# Placeholder thresholds — Phase 3.5b 에서 실측 기반 tuning
|
||||
# AND 조건이라 false refusal 방어됨 (둘 다 만족해야 refuse)
|
||||
SCORE_MAX_REFUSE = 0.25
|
||||
SCORE_AGG_REFUSE = 0.70
|
||||
|
||||
# Conservative fallback tiers (classifier 부재 시)
|
||||
CONSERVATIVE_WEAK = 0.35
|
||||
CONSERVATIVE_MID = 0.55
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class RefusalDecision:
|
||||
refused: bool
|
||||
confidence_cap: Literal["high", "medium", "low"] | None # None = no cap
|
||||
rule_triggered: str | None # 디버깅: 어느 signal 이 결정에 기여?
|
||||
|
||||
|
||||
def decide(
|
||||
rerank_scores: list[float],
|
||||
classifier: ClassifierResult | None,
|
||||
) -> RefusalDecision:
|
||||
"""Multi-signal fusion. Binary classifier verdict 기반.
|
||||
|
||||
Returns:
|
||||
RefusalDecision. refused=True 이면 synthesis skip.
|
||||
confidence_cap 은 synthesis 결과의 confidence 에 upper bound 적용.
|
||||
"""
|
||||
max_score = max(rerank_scores) if rerank_scores else 0.0
|
||||
agg_top3 = sum(sorted(rerank_scores, reverse=True)[:3])
|
||||
|
||||
score_gate_fails = (
|
||||
max_score < SCORE_MAX_REFUSE and agg_top3 < SCORE_AGG_REFUSE
|
||||
)
|
||||
|
||||
# ── Classifier 사용 가능 (정상 경로) ──
|
||||
if classifier and classifier.verdict is not None:
|
||||
if classifier.verdict == "insufficient":
|
||||
# Evidence quality override: classifier 가 insufficient 라 해도
|
||||
# evidence 가 충분히 좋으면 override (토론 8라운드 합의)
|
||||
# (evidence quality 는 이 함수 밖에서 별도 체크 — caller 에서 처리)
|
||||
logger.info(
|
||||
"refusal gate: classifier=insufficient max=%.2f agg=%.2f",
|
||||
max_score, agg_top3,
|
||||
)
|
||||
return RefusalDecision(
|
||||
refused=True,
|
||||
confidence_cap=None,
|
||||
rule_triggered="classifier_insufficient",
|
||||
)
|
||||
if score_gate_fails:
|
||||
logger.info(
|
||||
"refusal gate: score_low max=%.2f agg=%.2f classifier=%s",
|
||||
max_score, agg_top3, classifier.verdict,
|
||||
)
|
||||
return RefusalDecision(
|
||||
refused=True,
|
||||
confidence_cap=None,
|
||||
rule_triggered="score_low",
|
||||
)
|
||||
# Classifier says sufficient → proceed
|
||||
return RefusalDecision(
|
||||
refused=False,
|
||||
confidence_cap=None,
|
||||
rule_triggered=None,
|
||||
)
|
||||
|
||||
# ── Classifier 부재 → 3-tier conservative ──
|
||||
if max_score < CONSERVATIVE_WEAK:
|
||||
return RefusalDecision(
|
||||
refused=True,
|
||||
confidence_cap=None,
|
||||
rule_triggered="conservative_refuse(no_classifier)",
|
||||
)
|
||||
if max_score < CONSERVATIVE_MID:
|
||||
return RefusalDecision(
|
||||
refused=False,
|
||||
confidence_cap="low",
|
||||
rule_triggered="conservative_low(no_classifier)",
|
||||
)
|
||||
return RefusalDecision(
|
||||
refused=False,
|
||||
confidence_cap="medium",
|
||||
rule_triggered="conservative_medium(no_classifier)",
|
||||
)
|
||||
33
app/services/search/sentence_splitter.py
Normal file
33
app/services/search/sentence_splitter.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""문장 분할 (Phase 3.5a — regex 기반).
|
||||
|
||||
Phase 3.5b 에서 KSS 라이브러리 기반으로 업그레이드 예정.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
MIN_SENTENCE_CHARS = 15
|
||||
|
||||
|
||||
def split_sentences(text: str) -> list[str]:
|
||||
"""한국어/영어 혼합 텍스트를 문장 단위로 분할.
|
||||
|
||||
규칙:
|
||||
- 마침표/느낌표/물음표 + 공백/줄바꿈
|
||||
- 한국어 종결 어미 (다. 함. 음. 됨.) 패턴
|
||||
- MIN_SENTENCE_CHARS 미만은 이전 문장에 병합
|
||||
"""
|
||||
# 1차 분할: punctuation + whitespace
|
||||
raw = re.split(r'(?<=[.!?。])\s+|(?<=[다됨음함]\.)\s+|\n{2,}', text)
|
||||
|
||||
# 2차: 너무 짧은 것 병합
|
||||
merged: list[str] = []
|
||||
for part in raw:
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
if merged and len(part) < MIN_SENTENCE_CHARS:
|
||||
merged[-1] = merged[-1] + " " + part
|
||||
else:
|
||||
merged.append(part)
|
||||
|
||||
return merged if merged else [text.strip()] if text.strip() else []
|
||||
Reference in New Issue
Block a user