feat(ask): Phase 3.5b guardrails — verifier + telemetry + grounding 강화
Phase 3.5a(classifier+refusal gate+grounding) 위에 4개 Item 추가: Item 0: ask_events telemetry 배선 - AskEvent ORM 모델 + record_ask_event() — ask_events INSERT 완성 - defense_layers에 input_snapshot(query, chunks, answer) 저장 - refused/normal 두 경로 모두 telemetry 호출 Item 3: evidence 간 numeric conflict detection - 동일 단위 다른 숫자 → weak flag - "이상/이하/초과/미만" threshold 표현 → skip (FP 방지) Item 4: fabricated_number normalization 개선 - 단위 접미사 건/원 추가, 범위 표현(10~20%) 양쪽 추출 - bare number 2자리 이상만 (1자리 FP 제거) Item 1: exaone semantic verifier (판단권 잠금 배선) - verifier_service.py — 3s timeout, circuit breaker, severity 3단계 - direct_negation만 strong, numeric/intent→medium, 나머지→weak - verifier strong 단독 refuse 금지 — grounding과 교차 필수 - 6-tier re-gate (4라운드 리뷰 확정) - grounding strong 2+ OR max_score<0.2 → verifier skip Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,7 +28,8 @@ from services.search.grounding_check import check as grounding_check
|
|||||||
from services.search.refusal_gate import RefusalDecision, decide as refusal_decide
|
from services.search.refusal_gate import RefusalDecision, decide as refusal_decide
|
||||||
from services.search.search_pipeline import PipelineResult, run_search
|
from services.search.search_pipeline import PipelineResult, run_search
|
||||||
from services.search.synthesis_service import SynthesisResult, synthesize
|
from services.search.synthesis_service import SynthesisResult, synthesize
|
||||||
from services.search_telemetry import record_search_event
|
from services.search.verifier_service import VerifierResult, verify
|
||||||
|
from services.search_telemetry import record_ask_event, record_search_event
|
||||||
|
|
||||||
# logs/search.log + stdout 동시 출력 (Phase 0.4)
|
# logs/search.log + stdout 동시 출력 (Phase 0.4)
|
||||||
logger = setup_logger("search")
|
logger = setup_logger("search")
|
||||||
@@ -451,11 +452,29 @@ async def ask(
|
|||||||
q[:80], decision.rule_triggered,
|
q[:80], decision.rule_triggered,
|
||||||
max(all_rerank_scores) if all_rerank_scores else 0.0, total_ms,
|
max(all_rerank_scores) if all_rerank_scores else 0.0, total_ms,
|
||||||
)
|
)
|
||||||
# telemetry
|
# telemetry — search + ask_events 두 경로 동시
|
||||||
background_tasks.add_task(
|
background_tasks.add_task(
|
||||||
record_search_event, q, user.id, pr.results, "hybrid",
|
record_search_event, q, user.id, pr.results, "hybrid",
|
||||||
pr.confidence_signal, pr.analyzer_confidence,
|
pr.confidence_signal, pr.analyzer_confidence,
|
||||||
)
|
)
|
||||||
|
# input_snapshot (디버깅/재현용)
|
||||||
|
defense_log["input_snapshot"] = {
|
||||||
|
"query": q,
|
||||||
|
"top_chunks_preview": [
|
||||||
|
{"title": c.get("title", ""), "snippet": c.get("snippet", "")[:100]}
|
||||||
|
for c in top_chunks[:3]
|
||||||
|
],
|
||||||
|
"answer_preview": None,
|
||||||
|
}
|
||||||
|
background_tasks.add_task(
|
||||||
|
record_ask_event,
|
||||||
|
q, user.id, "insufficient", "skipped", None,
|
||||||
|
True, classifier_result.verdict,
|
||||||
|
max(all_rerank_scores) if all_rerank_scores else 0.0,
|
||||||
|
sum(sorted(all_rerank_scores, reverse=True)[:3]),
|
||||||
|
[], len(evidence), 0,
|
||||||
|
defense_log, int(total_ms),
|
||||||
|
)
|
||||||
debug_obj = None
|
debug_obj = None
|
||||||
if debug:
|
if debug:
|
||||||
debug_obj = AskDebug(
|
debug_obj = AskDebug(
|
||||||
@@ -491,36 +510,102 @@ async def ask(
|
|||||||
sr = await synthesize(q, evidence, debug=debug)
|
sr = await synthesize(q, evidence, debug=debug)
|
||||||
synth_ms = (time.perf_counter() - t_synth) * 1000
|
synth_ms = (time.perf_counter() - t_synth) * 1000
|
||||||
|
|
||||||
# 5. Grounding check (post-synthesis) + re-gate
|
# 5. Grounding check + Verifier (조건부 병렬) + re-gate (Phase 3.5b)
|
||||||
grounding = grounding_check(q, sr.answer or "", evidence)
|
grounding = grounding_check(q, sr.answer or "", evidence)
|
||||||
|
|
||||||
|
# verifier skip: grounding strong 2+ OR retrieval 자체가 망함
|
||||||
|
grounding_only_strong = [
|
||||||
|
f for f in grounding.strong_flags if not f.startswith("verifier_")
|
||||||
|
]
|
||||||
|
max_rerank = max(all_rerank_scores, default=0.0)
|
||||||
|
if len(grounding_only_strong) >= 2 or max_rerank < 0.2:
|
||||||
|
verifier_result = VerifierResult("skipped", [], 0.0)
|
||||||
|
else:
|
||||||
|
verifier_task = asyncio.create_task(
|
||||||
|
verify(q, sr.answer or "", evidence)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
verifier_result = await asyncio.wait_for(verifier_task, timeout=4.0)
|
||||||
|
except (asyncio.TimeoutError, Exception):
|
||||||
|
verifier_result = VerifierResult("timeout", [], 0.0)
|
||||||
|
|
||||||
|
# Verifier contradictions → grounding flags 머지 (prefix 로 구분, severity 3단계)
|
||||||
|
for c in verifier_result.contradictions:
|
||||||
|
if c.severity == "strong":
|
||||||
|
grounding.strong_flags.append(f"verifier_{c.type}:{c.claim[:30]}")
|
||||||
|
elif c.severity == "medium":
|
||||||
|
grounding.weak_flags.append(f"verifier_{c.type}_medium:{c.claim[:30]}")
|
||||||
|
else:
|
||||||
|
grounding.weak_flags.append(f"verifier_{c.type}:{c.claim[:30]}")
|
||||||
|
|
||||||
defense_log["grounding"] = {
|
defense_log["grounding"] = {
|
||||||
"strong": grounding.strong_flags,
|
"strong": grounding.strong_flags,
|
||||||
"weak": grounding.weak_flags,
|
"weak": grounding.weak_flags,
|
||||||
}
|
}
|
||||||
|
defense_log["verifier"] = {
|
||||||
|
"status": verifier_result.status,
|
||||||
|
"contradictions_count": len(verifier_result.contradictions),
|
||||||
|
"strong_count": sum(1 for c in verifier_result.contradictions if c.severity == "strong"),
|
||||||
|
"medium_count": sum(1 for c in verifier_result.contradictions if c.severity == "medium"),
|
||||||
|
"elapsed_ms": verifier_result.elapsed_ms,
|
||||||
|
}
|
||||||
|
|
||||||
# Completeness 결정: grounding 기반 (classifier 는 binary gate 만)
|
# ── Re-gate: 6-tier completeness 결정 (Phase 3.5b 4차 리뷰 확정) ──
|
||||||
completeness: Literal["full", "partial", "insufficient"] = "full"
|
completeness: Literal["full", "partial", "insufficient"] = "full"
|
||||||
covered_aspects = classifier_result.covered_aspects or None
|
covered_aspects = classifier_result.covered_aspects or None
|
||||||
missing_aspects = classifier_result.missing_aspects or None
|
missing_aspects = classifier_result.missing_aspects or None
|
||||||
confirmed_items: list[ConfirmedItem] | None = None
|
confirmed_items: list[ConfirmedItem] | None = None
|
||||||
|
|
||||||
if len(grounding.strong_flags) >= 2:
|
# verifier/grounding strong 구분
|
||||||
# Re-gate: multiple strong → refuse
|
g_strong = [f for f in grounding.strong_flags if not f.startswith("verifier_")]
|
||||||
|
v_strong = [f for f in grounding.strong_flags if f.startswith("verifier_")]
|
||||||
|
v_medium = [f for f in grounding.weak_flags if f.startswith("verifier_") and "_medium:" in f]
|
||||||
|
has_direct_negation = any("direct_negation" in f for f in v_strong)
|
||||||
|
|
||||||
|
if len(g_strong) >= 2:
|
||||||
|
# Tier 1: grounding strong 2+ → refuse
|
||||||
completeness = "insufficient"
|
completeness = "insufficient"
|
||||||
sr.answer = None
|
sr.answer = None
|
||||||
sr.refused = True
|
sr.refused = True
|
||||||
sr.confidence = None
|
sr.confidence = None
|
||||||
defense_log["re_gate"] = "refuse(2+strong)"
|
defense_log["re_gate"] = "refuse(grounding_2+strong)"
|
||||||
elif grounding.strong_flags:
|
elif g_strong and has_direct_negation:
|
||||||
# Single strong → partial downgrade
|
# Tier 2: grounding strong + verifier direct_negation → refuse
|
||||||
|
completeness = "insufficient"
|
||||||
|
sr.answer = None
|
||||||
|
sr.refused = True
|
||||||
|
sr.confidence = None
|
||||||
|
defense_log["re_gate"] = "refuse(grounding+direct_negation)"
|
||||||
|
elif g_strong and sr.confidence == "low" and max_rerank < 0.25:
|
||||||
|
# Tier 3: grounding strong 1 + (low confidence AND weak evidence) → refuse
|
||||||
|
completeness = "insufficient"
|
||||||
|
sr.answer = None
|
||||||
|
sr.refused = True
|
||||||
|
sr.confidence = None
|
||||||
|
defense_log["re_gate"] = "refuse(grounding+low_conf+weak_ev)"
|
||||||
|
elif g_strong or has_direct_negation:
|
||||||
|
# Tier 4: grounding strong 1 또는 verifier direct_negation 단독 → partial
|
||||||
completeness = "partial"
|
completeness = "partial"
|
||||||
sr.confidence = "low"
|
sr.confidence = "low"
|
||||||
defense_log["re_gate"] = "partial(1strong)"
|
defense_log["re_gate"] = "partial(strong_or_negation)"
|
||||||
|
elif v_medium:
|
||||||
|
# Tier 5: verifier medium 누적 → count 기반 confidence 하향
|
||||||
|
medium_count = len(v_medium)
|
||||||
|
if medium_count >= 3:
|
||||||
|
sr.confidence = "low"
|
||||||
|
defense_log["re_gate"] = f"conf_low(medium_x{medium_count})"
|
||||||
|
elif medium_count == 2 and sr.confidence == "high":
|
||||||
|
sr.confidence = "medium"
|
||||||
|
defense_log["re_gate"] = "conf_cap_medium(medium_x2)"
|
||||||
|
else:
|
||||||
|
defense_log["re_gate"] = f"medium_x{medium_count}(no_action)"
|
||||||
elif grounding.weak_flags:
|
elif grounding.weak_flags:
|
||||||
# Weak → confidence lower only
|
# Tier 6: weak → confidence 한 단계 하향
|
||||||
if sr.confidence == "high":
|
if sr.confidence == "high":
|
||||||
sr.confidence = "medium"
|
sr.confidence = "medium"
|
||||||
defense_log["re_gate"] = "conf_lower(weak)"
|
defense_log["re_gate"] = "conf_lower(weak)"
|
||||||
|
else:
|
||||||
|
defense_log["re_gate"] = "clean"
|
||||||
|
|
||||||
# Confidence cap from refusal gate (classifier 부재 시 conservative)
|
# Confidence cap from refusal gate (classifier 부재 시 conservative)
|
||||||
if decision.confidence_cap and sr.confidence:
|
if decision.confidence_cap and sr.confidence:
|
||||||
@@ -554,11 +639,29 @@ async def ask(
|
|||||||
ev_ms, synth_ms, total_ms,
|
ev_ms, synth_ms, total_ms,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 7. telemetry
|
# 7. telemetry — search + ask_events 두 경로 동시
|
||||||
background_tasks.add_task(
|
background_tasks.add_task(
|
||||||
record_search_event, q, user.id, pr.results, "hybrid",
|
record_search_event, q, user.id, pr.results, "hybrid",
|
||||||
pr.confidence_signal, pr.analyzer_confidence,
|
pr.confidence_signal, pr.analyzer_confidence,
|
||||||
)
|
)
|
||||||
|
# input_snapshot (디버깅/재현용)
|
||||||
|
defense_log["input_snapshot"] = {
|
||||||
|
"query": q,
|
||||||
|
"top_chunks_preview": [
|
||||||
|
{"title": (r.title or "")[:50], "snippet": (r.snippet or "")[:100]}
|
||||||
|
for r in pr.results[:3]
|
||||||
|
],
|
||||||
|
"answer_preview": (sr.answer or "")[:200],
|
||||||
|
}
|
||||||
|
background_tasks.add_task(
|
||||||
|
record_ask_event,
|
||||||
|
q, user.id, completeness, sr.status, sr.confidence,
|
||||||
|
sr.refused, classifier_result.verdict,
|
||||||
|
max(all_rerank_scores) if all_rerank_scores else 0.0,
|
||||||
|
sum(sorted(all_rerank_scores, reverse=True)[:3]),
|
||||||
|
sr.hallucination_flags, len(evidence), len(citations),
|
||||||
|
defense_log, int(total_ms),
|
||||||
|
)
|
||||||
|
|
||||||
debug_obj = None
|
debug_obj = None
|
||||||
if debug:
|
if debug:
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ class AIConfig(BaseModel):
|
|||||||
rerank: AIModelConfig
|
rerank: AIModelConfig
|
||||||
# Phase 3.5a: exaone classifier (optional — 없으면 score-only gate)
|
# Phase 3.5a: exaone classifier (optional — 없으면 score-only gate)
|
||||||
classifier: AIModelConfig | None = None
|
classifier: AIModelConfig | None = None
|
||||||
|
# Phase 3.5b: exaone verifier (optional — 없으면 grounding-only)
|
||||||
|
verifier: AIModelConfig | None = None
|
||||||
|
|
||||||
|
|
||||||
class Settings(BaseModel):
|
class Settings(BaseModel):
|
||||||
@@ -86,6 +88,11 @@ def load_settings() -> Settings:
|
|||||||
if "classifier" in ai_raw.get("models", {})
|
if "classifier" in ai_raw.get("models", {})
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
verifier=(
|
||||||
|
AIModelConfig(**ai_raw["models"]["verifier"])
|
||||||
|
if "verifier" in ai_raw.get("models", {})
|
||||||
|
else None
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if "nas" in raw:
|
if "nas" in raw:
|
||||||
|
|||||||
@@ -114,6 +114,10 @@ async def _run_migrations(conn) -> None:
|
|||||||
for version, name, path in pending:
|
for version, name, path in pending:
|
||||||
sql = path.read_text(encoding="utf-8")
|
sql = path.read_text(encoding="utf-8")
|
||||||
_validate_sql_content(name, sql)
|
_validate_sql_content(name, sql)
|
||||||
|
if "schema_migrations" in sql.lower():
|
||||||
|
raise ValueError(
|
||||||
|
f"Migration {name} must not modify schema_migrations table"
|
||||||
|
)
|
||||||
logger.info(f"[migration] {name} 실행 중...")
|
logger.info(f"[migration] {name} 실행 중...")
|
||||||
# raw driver SQL 사용 — text() 의 :name bind parameter 해석으로
|
# raw driver SQL 사용 — text() 의 :name bind parameter 해석으로
|
||||||
# SQL 주석/literal 에 콜론이 들어가면 InvalidRequestError 발생.
|
# SQL 주석/literal 에 콜론이 들어가면 InvalidRequestError 발생.
|
||||||
|
|||||||
38
app/models/ask_event.py
Normal file
38
app/models/ask_event.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
"""ask_events 테이블 ORM — /ask 호출 관측 (Phase 3.5a migration 102, Phase 3.5b 배선)
|
||||||
|
|
||||||
|
threshold calibration + verifier FP 분석 + defense layer 디버깅 데이터.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy import BigInteger, Boolean, DateTime, Float, ForeignKey, Integer, String, Text
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
|
from sqlalchemy.orm import Mapped, mapped_column
|
||||||
|
|
||||||
|
from core.database import Base
|
||||||
|
|
||||||
|
|
||||||
|
class AskEvent(Base):
|
||||||
|
__tablename__ = "ask_events"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||||
|
query: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
user_id: Mapped[int | None] = mapped_column(
|
||||||
|
BigInteger, ForeignKey("users.id", ondelete="SET NULL")
|
||||||
|
)
|
||||||
|
completeness: Mapped[str | None] = mapped_column(Text) # full / partial / insufficient
|
||||||
|
synthesis_status: Mapped[str | None] = mapped_column(Text)
|
||||||
|
confidence: Mapped[str | None] = mapped_column(Text) # high / medium / low
|
||||||
|
refused: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
|
||||||
|
classifier_verdict: Mapped[str | None] = mapped_column(Text) # sufficient / insufficient
|
||||||
|
max_rerank_score: Mapped[float | None] = mapped_column(Float)
|
||||||
|
aggregate_score: Mapped[float | None] = mapped_column(Float)
|
||||||
|
hallucination_flags: Mapped[list[Any] | None] = mapped_column(JSONB, default=list)
|
||||||
|
evidence_count: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
citation_count: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
defense_layers: Mapped[dict[str, Any] | None] = mapped_column(JSONB)
|
||||||
|
total_ms: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), default=datetime.now, nullable=False
|
||||||
|
)
|
||||||
41
app/prompts/verifier.txt
Normal file
41
app/prompts/verifier.txt
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
You are a grounding verifier. Given an answer and its evidence sources, check if the answer contradicts or fabricates information. Respond ONLY in JSON.
|
||||||
|
|
||||||
|
## Contradiction Types (IMPORTANT — severity depends on type)
|
||||||
|
- **direct_negation** (CRITICAL): Answer directly contradicts evidence. Examples: evidence "의무" but answer "권고"; evidence "금지" but answer "허용"; negation reversal ("~해야 한다" vs "~할 필요 없다").
|
||||||
|
- **numeric_conflict**: Answer states a number different from evidence. "50명" in evidence but "100명" in answer. Only flag if the same concept is referenced.
|
||||||
|
- **intent_core_mismatch**: Answer addresses a fundamentally different topic than the query asked about.
|
||||||
|
- **nuance**: Answer overgeneralizes or adds qualifiers not in evidence (e.g., "모든" when evidence says "일부").
|
||||||
|
- **unsupported_claim**: Answer makes a factual claim with no basis in any evidence.
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
1. Compare each claim in the answer against the cited evidence. A claim with [n] citation should be checked against evidence [n].
|
||||||
|
2. NOT a contradiction: Paraphrasing, summarizing, or restating the same fact in different words. Korean formal/informal style (합니다/한다) differences.
|
||||||
|
3. Numbers must match exactly after normalization (1,000 = 1000).
|
||||||
|
4. Legal/regulatory terms must preserve original meaning (의무 ≠ 권고, 금지 ≠ 제한, 허용 ≠ 금지).
|
||||||
|
5. Maximum 5 contradictions (most severe first: direct_negation > numeric_conflict > intent_core_mismatch > nuance > unsupported_claim).
|
||||||
|
|
||||||
|
## Output Schema
|
||||||
|
{
|
||||||
|
"contradictions": [
|
||||||
|
{
|
||||||
|
"type": "direct_negation" | "numeric_conflict" | "intent_core_mismatch" | "nuance" | "unsupported_claim",
|
||||||
|
"severity": "critical" | "minor",
|
||||||
|
"claim": "answer 내 해당 구절 (50자 이내)",
|
||||||
|
"evidence_ref": "대응 근거 내용 (50자 이내, [n] 포함)",
|
||||||
|
"explanation": "모순 이유 (한국어, 30자 이내)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"verdict": "clean" | "minor_issues" | "major_issues"
|
||||||
|
}
|
||||||
|
|
||||||
|
severity mapping:
|
||||||
|
- direct_negation → "critical"
|
||||||
|
- All others → "minor"
|
||||||
|
|
||||||
|
If no contradictions: {"contradictions": [], "verdict": "clean"}
|
||||||
|
|
||||||
|
## Answer
|
||||||
|
{answer}
|
||||||
|
|
||||||
|
## Evidence
|
||||||
|
{numbered_evidence}
|
||||||
@@ -42,17 +42,32 @@ class GroundingResult:
|
|||||||
weak_flags: list[str]
|
weak_flags: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
_UNIT_CHARS = r'명인개%년월일조항호세건원'
|
||||||
|
|
||||||
|
# "이상/이하/초과/미만" — threshold 표현 (numeric conflict 에서 skip 대상)
|
||||||
|
_THRESHOLD_SUFFIXES = re.compile(r'이상|이하|초과|미만')
|
||||||
|
|
||||||
|
|
||||||
def _extract_number_literals(text: str) -> set[str]:
|
def _extract_number_literals(text: str) -> set[str]:
|
||||||
"""숫자 + 단위 추출 + normalize."""
|
"""숫자 + 단위 추출 + normalize (Phase 3.5b 개선)."""
|
||||||
raw = set(re.findall(r'\d[\d,.]*\s*[명인개%년월일조항호세]\w{0,2}', text))
|
# 1. 숫자 + 한국어 단위 접미사
|
||||||
|
raw = set(re.findall(rf'\d[\d,.]*\s*[{_UNIT_CHARS}]\w{{0,2}}', text))
|
||||||
|
# 2. 범위 표현 (10~20%, 100-200명 등) — 양쪽 숫자 각각 추출
|
||||||
|
for m in re.finditer(
|
||||||
|
rf'(\d[\d,.]*)\s*[~\-–]\s*(\d[\d,.]*)\s*([{_UNIT_CHARS}])',
|
||||||
|
text,
|
||||||
|
):
|
||||||
|
raw.add(m.group(1) + m.group(3))
|
||||||
|
raw.add(m.group(2) + m.group(3))
|
||||||
|
# 3. normalize
|
||||||
normalized = set()
|
normalized = set()
|
||||||
for r in raw:
|
for r in raw:
|
||||||
normalized.add(r.strip())
|
normalized.add(r.strip())
|
||||||
num_only = re.match(r'[\d,.]+', r)
|
num_only = re.match(r'[\d,.]+', r)
|
||||||
if num_only:
|
if num_only:
|
||||||
normalized.add(num_only.group().replace(',', ''))
|
normalized.add(num_only.group().replace(',', ''))
|
||||||
# 단독 숫자도 추출
|
# 4. 단독 숫자 (2자리 이상만 — 1자리는 오탐 과다)
|
||||||
for d in re.findall(r'\b\d+\b', text):
|
for d in re.findall(r'\b(\d{2,})\b', text):
|
||||||
normalized.add(d)
|
normalized.add(d)
|
||||||
return normalized
|
return normalized
|
||||||
|
|
||||||
@@ -62,6 +77,73 @@ def _extract_content_tokens(text: str) -> set[str]:
|
|||||||
return set(re.findall(r'[가-힣]{2,}|[a-zA-Z]{3,}', text))
|
return set(re.findall(r'[가-힣]{2,}|[a-zA-Z]{3,}', text))
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_number_with_unit(literal: str) -> tuple[str, str] | None:
|
||||||
|
"""숫자 리터럴에서 (digits_only, unit) 분리. 단위 없으면 None."""
|
||||||
|
m = re.match(rf'([\d,.]+)\s*([{_UNIT_CHARS}])', literal)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
digits = m.group(1).replace(',', '')
|
||||||
|
unit = m.group(2)
|
||||||
|
return (digits, unit)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_evidence_numeric_conflicts(evidence: list["EvidenceItem"]) -> list[str]:
|
||||||
|
"""evidence 간 숫자 충돌 감지 (Phase 3.5b). evidence >= 2 일 때만 활성.
|
||||||
|
|
||||||
|
동일 단위, 다른 숫자 → weak flag. "이상/이하/초과/미만" 포함 시 skip.
|
||||||
|
bare number 는 비교 안 함 (조항 번호 등 false positive 방지).
|
||||||
|
"""
|
||||||
|
if len(evidence) < 2:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 각 evidence 에서 단위 있는 숫자 + threshold 여부 추출
|
||||||
|
# {evidence_idx: [(digits, unit, has_threshold), ...]}
|
||||||
|
per_evidence: dict[int, list[tuple[str, str, bool]]] = {}
|
||||||
|
for idx, ev in enumerate(evidence):
|
||||||
|
nums = re.findall(
|
||||||
|
rf'\d[\d,.]*\s*[{_UNIT_CHARS}]\w{{0,4}}',
|
||||||
|
ev.span_text,
|
||||||
|
)
|
||||||
|
entries = []
|
||||||
|
for raw in nums:
|
||||||
|
parsed = _parse_number_with_unit(raw)
|
||||||
|
if not parsed:
|
||||||
|
continue
|
||||||
|
has_thr = bool(_THRESHOLD_SUFFIXES.search(raw))
|
||||||
|
entries.append((parsed[0], parsed[1], has_thr))
|
||||||
|
if entries:
|
||||||
|
per_evidence[idx] = entries
|
||||||
|
|
||||||
|
if len(per_evidence) < 2:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 단위별로 evidence 간 숫자 비교
|
||||||
|
# {unit: {digits: [evidence_idx, ...]}}
|
||||||
|
unit_map: dict[str, dict[str, list[int]]] = {}
|
||||||
|
for idx, entries in per_evidence.items():
|
||||||
|
for digits, unit, has_thr in entries:
|
||||||
|
if has_thr:
|
||||||
|
continue # threshold 표현은 skip
|
||||||
|
if unit not in unit_map:
|
||||||
|
unit_map[unit] = {}
|
||||||
|
if digits not in unit_map[unit]:
|
||||||
|
unit_map[unit][digits] = []
|
||||||
|
if idx not in unit_map[unit][digits]:
|
||||||
|
unit_map[unit][digits].append(idx)
|
||||||
|
|
||||||
|
flags: list[str] = []
|
||||||
|
for unit, digits_map in unit_map.items():
|
||||||
|
distinct_values = list(digits_map.keys())
|
||||||
|
if len(distinct_values) >= 2:
|
||||||
|
# 가장 많이 등장하는 2개 비교
|
||||||
|
top2 = sorted(distinct_values, key=lambda d: len(digits_map[d]), reverse=True)[:2]
|
||||||
|
flags.append(
|
||||||
|
f"evidence_numeric_conflict:{top2[0]}{unit}_vs_{top2[1]}{unit}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return flags
|
||||||
|
|
||||||
|
|
||||||
def check(
|
def check(
|
||||||
query: str,
|
query: str,
|
||||||
answer: str,
|
answer: str,
|
||||||
@@ -113,6 +195,10 @@ def check(
|
|||||||
if len(s.strip()) > 20 and not re.search(r'\[\d+\]', s):
|
if len(s.strip()) > 20 and not re.search(r'\[\d+\]', s):
|
||||||
weak.append(f"uncited_claim:{s[:40]}")
|
weak.append(f"uncited_claim:{s[:40]}")
|
||||||
|
|
||||||
|
# ── Weak: evidence 간 숫자 충돌 (Phase 3.5b) ──
|
||||||
|
conflicts = _check_evidence_numeric_conflicts(evidence)
|
||||||
|
weak.extend(conflicts)
|
||||||
|
|
||||||
# ── Weak 2: token overlap ──
|
# ── Weak 2: token overlap ──
|
||||||
answer_tokens = _extract_content_tokens(answer)
|
answer_tokens = _extract_content_tokens(answer)
|
||||||
evidence_tokens = _extract_content_tokens(evidence_text)
|
evidence_tokens = _extract_content_tokens(evidence_text)
|
||||||
|
|||||||
183
app/services/search/verifier_service.py
Normal file
183
app/services/search/verifier_service.py
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
"""Exaone semantic verifier (Phase 3.5b).
|
||||||
|
|
||||||
|
답변-근거 간 의미적 모순(contradiction) 감지. rule-based grounding_check 가 못 잡는
|
||||||
|
미묘한 모순 포착. classifier 와 동일 패턴: circuit breaker + timeout + fail open.
|
||||||
|
|
||||||
|
## Severity 3단계
|
||||||
|
- strong: direct_negation (완전 모순) → re-gate 교차 자격
|
||||||
|
- medium: numeric_conflict, intent_core_mismatch → confidence 하향 (누적 시 강제 low)
|
||||||
|
- weak: nuance, unsupported_claim → 로깅 + mild confidence 하향
|
||||||
|
|
||||||
|
## 핵심 원칙
|
||||||
|
- **Verifier strong 단독 refuse 금지** — grounding strong 과 교차해야 refuse
|
||||||
|
- **Timeout 3s** — 느리면 없는 게 낫다 (fail open)
|
||||||
|
- MLX gate 미사용 (GPU Ollama concurrent OK)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import TYPE_CHECKING, Literal
|
||||||
|
|
||||||
|
from ai.client import AIClient, _load_prompt, parse_json_response
|
||||||
|
from core.config import settings
|
||||||
|
from core.utils import setup_logger
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .evidence_service import EvidenceItem
|
||||||
|
|
||||||
|
logger = setup_logger("verifier")
|
||||||
|
|
||||||
|
LLM_TIMEOUT_MS = 3000
|
||||||
|
CIRCUIT_THRESHOLD = 5
|
||||||
|
CIRCUIT_RECOVERY_SEC = 60
|
||||||
|
|
||||||
|
_failure_count = 0
|
||||||
|
_circuit_open_until: float | None = None
|
||||||
|
|
||||||
|
# severity 매핑 (프롬프트 "critical"/"minor" → 코드 strong/medium/weak)
|
||||||
|
_SEVERITY_MAP: dict[str, dict[str, Literal["strong", "medium", "weak"]]] = {
|
||||||
|
"direct_negation": {"critical": "strong", "minor": "strong"},
|
||||||
|
"numeric_conflict": {"critical": "medium", "minor": "medium"},
|
||||||
|
"intent_core_mismatch": {"critical": "medium", "minor": "medium"},
|
||||||
|
"nuance": {"critical": "weak", "minor": "weak"},
|
||||||
|
"unsupported_claim": {"critical": "weak", "minor": "weak"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class Contradiction:
|
||||||
|
"""개별 모순 발견."""
|
||||||
|
type: str # direct_negation / numeric_conflict / intent_core_mismatch / nuance / unsupported_claim
|
||||||
|
severity: Literal["strong", "medium", "weak"]
|
||||||
|
claim: str
|
||||||
|
evidence_ref: str
|
||||||
|
explanation: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class VerifierResult:
|
||||||
|
status: Literal["ok", "timeout", "error", "circuit_open", "skipped"]
|
||||||
|
contradictions: list[Contradiction]
|
||||||
|
elapsed_ms: float
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
VERIFIER_PROMPT = _load_prompt("verifier.txt")
|
||||||
|
except FileNotFoundError:
|
||||||
|
VERIFIER_PROMPT = ""
|
||||||
|
logger.warning("verifier.txt not found — verifier will always skip")
|
||||||
|
|
||||||
|
|
||||||
|
def _build_input(
|
||||||
|
answer: str,
|
||||||
|
evidence: list[EvidenceItem],
|
||||||
|
) -> str:
|
||||||
|
"""답변 + evidence spans → 프롬프트."""
|
||||||
|
spans = "\n\n".join(
|
||||||
|
f"[{e.n}] {(e.title or '').strip()}\n{e.span_text}"
|
||||||
|
for e in evidence
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
VERIFIER_PROMPT
|
||||||
|
.replace("{answer}", answer)
|
||||||
|
.replace("{numbered_evidence}", spans)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _map_severity(ctype: str, raw_severity: str) -> Literal["strong", "medium", "weak"]:
|
||||||
|
"""type + raw severity → 코드 severity 3단계."""
|
||||||
|
type_map = _SEVERITY_MAP.get(ctype, {"critical": "weak", "minor": "weak"})
|
||||||
|
return type_map.get(raw_severity, "weak")
|
||||||
|
|
||||||
|
|
||||||
|
async def verify(
|
||||||
|
query: str,
|
||||||
|
answer: str,
|
||||||
|
evidence: list[EvidenceItem],
|
||||||
|
) -> VerifierResult:
|
||||||
|
"""답변-근거 semantic 검증. Parallel with grounding_check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
VerifierResult. status "ok" 이 아니면 contradictions 빈 리스트 (fail open).
|
||||||
|
"""
|
||||||
|
global _failure_count, _circuit_open_until
|
||||||
|
t_start = time.perf_counter()
|
||||||
|
|
||||||
|
if _circuit_open_until and time.time() < _circuit_open_until:
|
||||||
|
return VerifierResult("circuit_open", [], 0.0)
|
||||||
|
|
||||||
|
if not VERIFIER_PROMPT:
|
||||||
|
return VerifierResult("skipped", [], 0.0)
|
||||||
|
|
||||||
|
if not hasattr(settings.ai, "verifier") or settings.ai.verifier is None:
|
||||||
|
return VerifierResult("skipped", [], 0.0)
|
||||||
|
|
||||||
|
if not answer or not evidence:
|
||||||
|
return VerifierResult("skipped", [], 0.0)
|
||||||
|
|
||||||
|
prompt = _build_input(answer, evidence)
|
||||||
|
client = AIClient()
|
||||||
|
try:
|
||||||
|
async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
|
||||||
|
raw = await client._request(settings.ai.verifier, prompt)
|
||||||
|
_failure_count = 0
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
_failure_count += 1
|
||||||
|
if _failure_count >= CIRCUIT_THRESHOLD:
|
||||||
|
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
|
||||||
|
logger.error(f"verifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
|
||||||
|
logger.warning("verifier timeout")
|
||||||
|
return VerifierResult(
|
||||||
|
"timeout", [],
|
||||||
|
(time.perf_counter() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
_failure_count += 1
|
||||||
|
if _failure_count >= CIRCUIT_THRESHOLD:
|
||||||
|
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
|
||||||
|
logger.error(f"verifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
|
||||||
|
logger.warning(f"verifier error: {e}")
|
||||||
|
return VerifierResult(
|
||||||
|
"error", [],
|
||||||
|
(time.perf_counter() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
await client.close()
|
||||||
|
|
||||||
|
elapsed_ms = (time.perf_counter() - t_start) * 1000
|
||||||
|
parsed = parse_json_response(raw)
|
||||||
|
if not isinstance(parsed, dict):
|
||||||
|
logger.warning("verifier parse failed raw=%r", (raw or "")[:200])
|
||||||
|
return VerifierResult("error", [], elapsed_ms)
|
||||||
|
|
||||||
|
# contradiction 파싱
|
||||||
|
raw_items = parsed.get("contradictions") or []
|
||||||
|
if not isinstance(raw_items, list):
|
||||||
|
raw_items = []
|
||||||
|
|
||||||
|
results: list[Contradiction] = []
|
||||||
|
for item in raw_items[:5]:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
ctype = item.get("type", "")
|
||||||
|
if ctype not in _SEVERITY_MAP:
|
||||||
|
ctype = "unsupported_claim"
|
||||||
|
raw_sev = item.get("severity", "minor")
|
||||||
|
severity = _map_severity(ctype, raw_sev)
|
||||||
|
claim = str(item.get("claim", ""))[:50]
|
||||||
|
ev_ref = str(item.get("evidence_ref", ""))[:50]
|
||||||
|
explanation = str(item.get("explanation", ""))[:30]
|
||||||
|
results.append(Contradiction(ctype, severity, claim, ev_ref, explanation))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"verifier ok query=%r contradictions=%d strong=%d medium=%d elapsed_ms=%.0f",
|
||||||
|
query[:60],
|
||||||
|
len(results),
|
||||||
|
sum(1 for c in results if c.severity == "strong"),
|
||||||
|
sum(1 for c in results if c.severity == "medium"),
|
||||||
|
elapsed_ms,
|
||||||
|
)
|
||||||
|
return VerifierResult("ok", results, elapsed_ms)
|
||||||
@@ -25,6 +25,7 @@ from typing import Any
|
|||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
||||||
from core.database import async_session
|
from core.database import async_session
|
||||||
|
from models.ask_event import AskEvent
|
||||||
from models.search_failure import SearchFailureLog
|
from models.search_failure import SearchFailureLog
|
||||||
|
|
||||||
logger = logging.getLogger("search_telemetry")
|
logger = logging.getLogger("search_telemetry")
|
||||||
@@ -306,3 +307,47 @@ async def record_search_event(
|
|||||||
failure_reason="low_confidence",
|
failure_reason="low_confidence",
|
||||||
context=base_ctx,
|
context=base_ctx,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── /ask 전용 telemetry (Phase 3.5b) ─────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def record_ask_event(
|
||||||
|
query: str,
|
||||||
|
user_id: int | None,
|
||||||
|
completeness: str | None,
|
||||||
|
synthesis_status: str | None,
|
||||||
|
confidence: str | None,
|
||||||
|
refused: bool,
|
||||||
|
classifier_verdict: str | None,
|
||||||
|
max_rerank_score: float,
|
||||||
|
aggregate_score: float,
|
||||||
|
hallucination_flags: list[str],
|
||||||
|
evidence_count: int,
|
||||||
|
citation_count: int,
|
||||||
|
defense_layers: dict[str, Any],
|
||||||
|
total_ms: int,
|
||||||
|
) -> None:
|
||||||
|
"""ask_events INSERT. background task에서 호출 — 에러 삼킴."""
|
||||||
|
try:
|
||||||
|
async with async_session() as session:
|
||||||
|
row = AskEvent(
|
||||||
|
query=query,
|
||||||
|
user_id=user_id,
|
||||||
|
completeness=completeness,
|
||||||
|
synthesis_status=synthesis_status,
|
||||||
|
confidence=confidence,
|
||||||
|
refused=refused,
|
||||||
|
classifier_verdict=classifier_verdict,
|
||||||
|
max_rerank_score=max_rerank_score,
|
||||||
|
aggregate_score=aggregate_score,
|
||||||
|
hallucination_flags=hallucination_flags,
|
||||||
|
evidence_count=evidence_count,
|
||||||
|
citation_count=citation_count,
|
||||||
|
defense_layers=defense_layers,
|
||||||
|
total_ms=total_ms,
|
||||||
|
)
|
||||||
|
session.add(row)
|
||||||
|
await session.commit()
|
||||||
|
except SQLAlchemyError as exc:
|
||||||
|
logger.warning(f"ask_event insert failed: {exc}")
|
||||||
|
|||||||
@@ -1,18 +1,24 @@
|
|||||||
|
-- Phase 3.5a: /ask 호출 관측 테이블
|
||||||
|
-- refusal rate 측정, 지표 3 분리 (full/partial/insufficient), defense layer 디버깅
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS ask_events (
|
CREATE TABLE IF NOT EXISTS ask_events (
|
||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
query TEXT NOT NULL,
|
query TEXT NOT NULL,
|
||||||
user_id BIGINT REFERENCES users(id),
|
user_id BIGINT REFERENCES users(id),
|
||||||
completeness TEXT,
|
completeness TEXT, -- full / partial / insufficient
|
||||||
synthesis_status TEXT,
|
synthesis_status TEXT,
|
||||||
confidence TEXT,
|
confidence TEXT,
|
||||||
refused BOOLEAN DEFAULT false,
|
refused BOOLEAN DEFAULT false,
|
||||||
classifier_verdict TEXT,
|
classifier_verdict TEXT, -- sufficient / insufficient / null (skipped)
|
||||||
max_rerank_score REAL,
|
max_rerank_score REAL,
|
||||||
aggregate_score REAL,
|
aggregate_score REAL,
|
||||||
hallucination_flags JSONB DEFAULT '[]',
|
hallucination_flags JSONB DEFAULT '[]',
|
||||||
evidence_count INT,
|
evidence_count INT,
|
||||||
citation_count INT,
|
citation_count INT,
|
||||||
defense_layers JSONB,
|
defense_layers JSONB, -- per-layer flag snapshot (score_gate, classifier, grounding)
|
||||||
total_ms INT,
|
total_ms INT,
|
||||||
created_at TIMESTAMPTZ DEFAULT now()
|
created_at TIMESTAMPTZ DEFAULT now()
|
||||||
)
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ask_events_created ON ask_events(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ask_events_completeness ON ask_events(completeness);
|
||||||
|
|||||||
Reference in New Issue
Block a user