From b2306c3afdadd5e60878d16107355aa1aba44a2f Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Fri, 10 Apr 2026 09:49:56 +0900 Subject: [PATCH] =?UTF-8?q?feat(ask):=20Phase=203.5b=20guardrails=20?= =?UTF-8?q?=E2=80=94=20verifier=20+=20telemetry=20+=20grounding=20?= =?UTF-8?q?=EA=B0=95=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3.5a(classifier+refusal gate+grounding) 위에 4개 Item 추가: Item 0: ask_events telemetry 배선 - AskEvent ORM 모델 + record_ask_event() — ask_events INSERT 완성 - defense_layers에 input_snapshot(query, chunks, answer) 저장 - refused/normal 두 경로 모두 telemetry 호출 Item 3: evidence 간 numeric conflict detection - 동일 단위 다른 숫자 → weak flag - "이상/이하/초과/미만" threshold 표현 → skip (FP 방지) Item 4: fabricated_number normalization 개선 - 단위 접미사 건/원 추가, 범위 표현(10~20%) 양쪽 추출 - bare number 2자리 이상만 (1자리 FP 제거) Item 1: exaone semantic verifier (판단권 잠금 배선) - verifier_service.py — 3s timeout, circuit breaker, severity 3단계 - direct_negation만 strong, numeric/intent→medium, 나머지→weak - verifier strong 단독 refuse 금지 — grounding과 교차 필수 - 6-tier re-gate (4라운드 리뷰 확정) - grounding strong 2+ OR max_score<0.2 → verifier skip Co-Authored-By: Claude Opus 4.6 (1M context) --- app/api/search.py | 127 ++++++++++++++-- app/core/config.py | 7 + app/core/database.py | 4 + app/models/ask_event.py | 38 +++++ app/prompts/verifier.txt | 41 ++++++ app/services/search/grounding_check.py | 94 +++++++++++- app/services/search/verifier_service.py | 183 ++++++++++++++++++++++++ app/services/search_telemetry.py | 45 ++++++ migrations/102_ask_events.sql | 14 +- 9 files changed, 533 insertions(+), 20 deletions(-) create mode 100644 app/models/ask_event.py create mode 100644 app/prompts/verifier.txt create mode 100644 app/services/search/verifier_service.py diff --git a/app/api/search.py b/app/api/search.py index 330d6b2..bcfcde1 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -28,7 +28,8 @@ from services.search.grounding_check import check as grounding_check from services.search.refusal_gate import RefusalDecision, decide as refusal_decide from services.search.search_pipeline import PipelineResult, run_search from services.search.synthesis_service import SynthesisResult, synthesize -from services.search_telemetry import record_search_event +from services.search.verifier_service import VerifierResult, verify +from services.search_telemetry import record_ask_event, record_search_event # logs/search.log + stdout 동시 출력 (Phase 0.4) logger = setup_logger("search") @@ -451,11 +452,29 @@ async def ask( q[:80], decision.rule_triggered, max(all_rerank_scores) if all_rerank_scores else 0.0, total_ms, ) - # telemetry + # telemetry — search + ask_events 두 경로 동시 background_tasks.add_task( record_search_event, q, user.id, pr.results, "hybrid", pr.confidence_signal, pr.analyzer_confidence, ) + # input_snapshot (디버깅/재현용) + defense_log["input_snapshot"] = { + "query": q, + "top_chunks_preview": [ + {"title": c.get("title", ""), "snippet": c.get("snippet", "")[:100]} + for c in top_chunks[:3] + ], + "answer_preview": None, + } + background_tasks.add_task( + record_ask_event, + q, user.id, "insufficient", "skipped", None, + True, classifier_result.verdict, + max(all_rerank_scores) if all_rerank_scores else 0.0, + sum(sorted(all_rerank_scores, reverse=True)[:3]), + [], len(evidence), 0, + defense_log, int(total_ms), + ) debug_obj = None if debug: debug_obj = AskDebug( @@ -491,36 +510,102 @@ async def ask( sr = await synthesize(q, evidence, debug=debug) synth_ms = (time.perf_counter() - t_synth) * 1000 - # 5. Grounding check (post-synthesis) + re-gate + # 5. Grounding check + Verifier (조건부 병렬) + re-gate (Phase 3.5b) grounding = grounding_check(q, sr.answer or "", evidence) + + # verifier skip: grounding strong 2+ OR retrieval 자체가 망함 + grounding_only_strong = [ + f for f in grounding.strong_flags if not f.startswith("verifier_") + ] + max_rerank = max(all_rerank_scores, default=0.0) + if len(grounding_only_strong) >= 2 or max_rerank < 0.2: + verifier_result = VerifierResult("skipped", [], 0.0) + else: + verifier_task = asyncio.create_task( + verify(q, sr.answer or "", evidence) + ) + try: + verifier_result = await asyncio.wait_for(verifier_task, timeout=4.0) + except (asyncio.TimeoutError, Exception): + verifier_result = VerifierResult("timeout", [], 0.0) + + # Verifier contradictions → grounding flags 머지 (prefix 로 구분, severity 3단계) + for c in verifier_result.contradictions: + if c.severity == "strong": + grounding.strong_flags.append(f"verifier_{c.type}:{c.claim[:30]}") + elif c.severity == "medium": + grounding.weak_flags.append(f"verifier_{c.type}_medium:{c.claim[:30]}") + else: + grounding.weak_flags.append(f"verifier_{c.type}:{c.claim[:30]}") + defense_log["grounding"] = { "strong": grounding.strong_flags, "weak": grounding.weak_flags, } + defense_log["verifier"] = { + "status": verifier_result.status, + "contradictions_count": len(verifier_result.contradictions), + "strong_count": sum(1 for c in verifier_result.contradictions if c.severity == "strong"), + "medium_count": sum(1 for c in verifier_result.contradictions if c.severity == "medium"), + "elapsed_ms": verifier_result.elapsed_ms, + } - # Completeness 결정: grounding 기반 (classifier 는 binary gate 만) + # ── Re-gate: 6-tier completeness 결정 (Phase 3.5b 4차 리뷰 확정) ── completeness: Literal["full", "partial", "insufficient"] = "full" covered_aspects = classifier_result.covered_aspects or None missing_aspects = classifier_result.missing_aspects or None confirmed_items: list[ConfirmedItem] | None = None - if len(grounding.strong_flags) >= 2: - # Re-gate: multiple strong → refuse + # verifier/grounding strong 구분 + g_strong = [f for f in grounding.strong_flags if not f.startswith("verifier_")] + v_strong = [f for f in grounding.strong_flags if f.startswith("verifier_")] + v_medium = [f for f in grounding.weak_flags if f.startswith("verifier_") and "_medium:" in f] + has_direct_negation = any("direct_negation" in f for f in v_strong) + + if len(g_strong) >= 2: + # Tier 1: grounding strong 2+ → refuse completeness = "insufficient" sr.answer = None sr.refused = True sr.confidence = None - defense_log["re_gate"] = "refuse(2+strong)" - elif grounding.strong_flags: - # Single strong → partial downgrade + defense_log["re_gate"] = "refuse(grounding_2+strong)" + elif g_strong and has_direct_negation: + # Tier 2: grounding strong + verifier direct_negation → refuse + completeness = "insufficient" + sr.answer = None + sr.refused = True + sr.confidence = None + defense_log["re_gate"] = "refuse(grounding+direct_negation)" + elif g_strong and sr.confidence == "low" and max_rerank < 0.25: + # Tier 3: grounding strong 1 + (low confidence AND weak evidence) → refuse + completeness = "insufficient" + sr.answer = None + sr.refused = True + sr.confidence = None + defense_log["re_gate"] = "refuse(grounding+low_conf+weak_ev)" + elif g_strong or has_direct_negation: + # Tier 4: grounding strong 1 또는 verifier direct_negation 단독 → partial completeness = "partial" sr.confidence = "low" - defense_log["re_gate"] = "partial(1strong)" + defense_log["re_gate"] = "partial(strong_or_negation)" + elif v_medium: + # Tier 5: verifier medium 누적 → count 기반 confidence 하향 + medium_count = len(v_medium) + if medium_count >= 3: + sr.confidence = "low" + defense_log["re_gate"] = f"conf_low(medium_x{medium_count})" + elif medium_count == 2 and sr.confidence == "high": + sr.confidence = "medium" + defense_log["re_gate"] = "conf_cap_medium(medium_x2)" + else: + defense_log["re_gate"] = f"medium_x{medium_count}(no_action)" elif grounding.weak_flags: - # Weak → confidence lower only + # Tier 6: weak → confidence 한 단계 하향 if sr.confidence == "high": sr.confidence = "medium" defense_log["re_gate"] = "conf_lower(weak)" + else: + defense_log["re_gate"] = "clean" # Confidence cap from refusal gate (classifier 부재 시 conservative) if decision.confidence_cap and sr.confidence: @@ -554,11 +639,29 @@ async def ask( ev_ms, synth_ms, total_ms, ) - # 7. telemetry + # 7. telemetry — search + ask_events 두 경로 동시 background_tasks.add_task( record_search_event, q, user.id, pr.results, "hybrid", pr.confidence_signal, pr.analyzer_confidence, ) + # input_snapshot (디버깅/재현용) + defense_log["input_snapshot"] = { + "query": q, + "top_chunks_preview": [ + {"title": (r.title or "")[:50], "snippet": (r.snippet or "")[:100]} + for r in pr.results[:3] + ], + "answer_preview": (sr.answer or "")[:200], + } + background_tasks.add_task( + record_ask_event, + q, user.id, completeness, sr.status, sr.confidence, + sr.refused, classifier_result.verdict, + max(all_rerank_scores) if all_rerank_scores else 0.0, + sum(sorted(all_rerank_scores, reverse=True)[:3]), + sr.hallucination_flags, len(evidence), len(citations), + defense_log, int(total_ms), + ) debug_obj = None if debug: diff --git a/app/core/config.py b/app/core/config.py index 36c95fe..3b2765d 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -26,6 +26,8 @@ class AIConfig(BaseModel): rerank: AIModelConfig # Phase 3.5a: exaone classifier (optional — 없으면 score-only gate) classifier: AIModelConfig | None = None + # Phase 3.5b: exaone verifier (optional — 없으면 grounding-only) + verifier: AIModelConfig | None = None class Settings(BaseModel): @@ -86,6 +88,11 @@ def load_settings() -> Settings: if "classifier" in ai_raw.get("models", {}) else None ), + verifier=( + AIModelConfig(**ai_raw["models"]["verifier"]) + if "verifier" in ai_raw.get("models", {}) + else None + ), ) if "nas" in raw: diff --git a/app/core/database.py b/app/core/database.py index e5f2938..9dca470 100644 --- a/app/core/database.py +++ b/app/core/database.py @@ -114,6 +114,10 @@ async def _run_migrations(conn) -> None: for version, name, path in pending: sql = path.read_text(encoding="utf-8") _validate_sql_content(name, sql) + if "schema_migrations" in sql.lower(): + raise ValueError( + f"Migration {name} must not modify schema_migrations table" + ) logger.info(f"[migration] {name} 실행 중...") # raw driver SQL 사용 — text() 의 :name bind parameter 해석으로 # SQL 주석/literal 에 콜론이 들어가면 InvalidRequestError 발생. diff --git a/app/models/ask_event.py b/app/models/ask_event.py new file mode 100644 index 0000000..ebcc84b --- /dev/null +++ b/app/models/ask_event.py @@ -0,0 +1,38 @@ +"""ask_events 테이블 ORM — /ask 호출 관측 (Phase 3.5a migration 102, Phase 3.5b 배선) + +threshold calibration + verifier FP 분석 + defense layer 디버깅 데이터. +""" + +from datetime import datetime +from typing import Any + +from sqlalchemy import BigInteger, Boolean, DateTime, Float, ForeignKey, Integer, String, Text +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from core.database import Base + + +class AskEvent(Base): + __tablename__ = "ask_events" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + query: Mapped[str] = mapped_column(Text, nullable=False) + user_id: Mapped[int | None] = mapped_column( + BigInteger, ForeignKey("users.id", ondelete="SET NULL") + ) + completeness: Mapped[str | None] = mapped_column(Text) # full / partial / insufficient + synthesis_status: Mapped[str | None] = mapped_column(Text) + confidence: Mapped[str | None] = mapped_column(Text) # high / medium / low + refused: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) + classifier_verdict: Mapped[str | None] = mapped_column(Text) # sufficient / insufficient + max_rerank_score: Mapped[float | None] = mapped_column(Float) + aggregate_score: Mapped[float | None] = mapped_column(Float) + hallucination_flags: Mapped[list[Any] | None] = mapped_column(JSONB, default=list) + evidence_count: Mapped[int | None] = mapped_column(Integer) + citation_count: Mapped[int | None] = mapped_column(Integer) + defense_layers: Mapped[dict[str, Any] | None] = mapped_column(JSONB) + total_ms: Mapped[int | None] = mapped_column(Integer) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, nullable=False + ) diff --git a/app/prompts/verifier.txt b/app/prompts/verifier.txt new file mode 100644 index 0000000..10b25f7 --- /dev/null +++ b/app/prompts/verifier.txt @@ -0,0 +1,41 @@ +You are a grounding verifier. Given an answer and its evidence sources, check if the answer contradicts or fabricates information. Respond ONLY in JSON. + +## Contradiction Types (IMPORTANT — severity depends on type) +- **direct_negation** (CRITICAL): Answer directly contradicts evidence. Examples: evidence "의무" but answer "권고"; evidence "금지" but answer "허용"; negation reversal ("~해야 한다" vs "~할 필요 없다"). +- **numeric_conflict**: Answer states a number different from evidence. "50명" in evidence but "100명" in answer. Only flag if the same concept is referenced. +- **intent_core_mismatch**: Answer addresses a fundamentally different topic than the query asked about. +- **nuance**: Answer overgeneralizes or adds qualifiers not in evidence (e.g., "모든" when evidence says "일부"). +- **unsupported_claim**: Answer makes a factual claim with no basis in any evidence. + +## Rules +1. Compare each claim in the answer against the cited evidence. A claim with [n] citation should be checked against evidence [n]. +2. NOT a contradiction: Paraphrasing, summarizing, or restating the same fact in different words. Korean formal/informal style (합니다/한다) differences. +3. Numbers must match exactly after normalization (1,000 = 1000). +4. Legal/regulatory terms must preserve original meaning (의무 ≠ 권고, 금지 ≠ 제한, 허용 ≠ 금지). +5. Maximum 5 contradictions (most severe first: direct_negation > numeric_conflict > intent_core_mismatch > nuance > unsupported_claim). + +## Output Schema +{ + "contradictions": [ + { + "type": "direct_negation" | "numeric_conflict" | "intent_core_mismatch" | "nuance" | "unsupported_claim", + "severity": "critical" | "minor", + "claim": "answer 내 해당 구절 (50자 이내)", + "evidence_ref": "대응 근거 내용 (50자 이내, [n] 포함)", + "explanation": "모순 이유 (한국어, 30자 이내)" + } + ], + "verdict": "clean" | "minor_issues" | "major_issues" +} + +severity mapping: +- direct_negation → "critical" +- All others → "minor" + +If no contradictions: {"contradictions": [], "verdict": "clean"} + +## Answer +{answer} + +## Evidence +{numbered_evidence} diff --git a/app/services/search/grounding_check.py b/app/services/search/grounding_check.py index 97a2bbf..c1a2ca0 100644 --- a/app/services/search/grounding_check.py +++ b/app/services/search/grounding_check.py @@ -42,17 +42,32 @@ class GroundingResult: weak_flags: list[str] +_UNIT_CHARS = r'명인개%년월일조항호세건원' + +# "이상/이하/초과/미만" — threshold 표현 (numeric conflict 에서 skip 대상) +_THRESHOLD_SUFFIXES = re.compile(r'이상|이하|초과|미만') + + def _extract_number_literals(text: str) -> set[str]: - """숫자 + 단위 추출 + normalize.""" - raw = set(re.findall(r'\d[\d,.]*\s*[명인개%년월일조항호세]\w{0,2}', text)) + """숫자 + 단위 추출 + normalize (Phase 3.5b 개선).""" + # 1. 숫자 + 한국어 단위 접미사 + raw = set(re.findall(rf'\d[\d,.]*\s*[{_UNIT_CHARS}]\w{{0,2}}', text)) + # 2. 범위 표현 (10~20%, 100-200명 등) — 양쪽 숫자 각각 추출 + for m in re.finditer( + rf'(\d[\d,.]*)\s*[~\-–]\s*(\d[\d,.]*)\s*([{_UNIT_CHARS}])', + text, + ): + raw.add(m.group(1) + m.group(3)) + raw.add(m.group(2) + m.group(3)) + # 3. normalize normalized = set() for r in raw: normalized.add(r.strip()) num_only = re.match(r'[\d,.]+', r) if num_only: normalized.add(num_only.group().replace(',', '')) - # 단독 숫자도 추출 - for d in re.findall(r'\b\d+\b', text): + # 4. 단독 숫자 (2자리 이상만 — 1자리는 오탐 과다) + for d in re.findall(r'\b(\d{2,})\b', text): normalized.add(d) return normalized @@ -62,6 +77,73 @@ def _extract_content_tokens(text: str) -> set[str]: return set(re.findall(r'[가-힣]{2,}|[a-zA-Z]{3,}', text)) +def _parse_number_with_unit(literal: str) -> tuple[str, str] | None: + """숫자 리터럴에서 (digits_only, unit) 분리. 단위 없으면 None.""" + m = re.match(rf'([\d,.]+)\s*([{_UNIT_CHARS}])', literal) + if not m: + return None + digits = m.group(1).replace(',', '') + unit = m.group(2) + return (digits, unit) + + +def _check_evidence_numeric_conflicts(evidence: list["EvidenceItem"]) -> list[str]: + """evidence 간 숫자 충돌 감지 (Phase 3.5b). evidence >= 2 일 때만 활성. + + 동일 단위, 다른 숫자 → weak flag. "이상/이하/초과/미만" 포함 시 skip. + bare number 는 비교 안 함 (조항 번호 등 false positive 방지). + """ + if len(evidence) < 2: + return [] + + # 각 evidence 에서 단위 있는 숫자 + threshold 여부 추출 + # {evidence_idx: [(digits, unit, has_threshold), ...]} + per_evidence: dict[int, list[tuple[str, str, bool]]] = {} + for idx, ev in enumerate(evidence): + nums = re.findall( + rf'\d[\d,.]*\s*[{_UNIT_CHARS}]\w{{0,4}}', + ev.span_text, + ) + entries = [] + for raw in nums: + parsed = _parse_number_with_unit(raw) + if not parsed: + continue + has_thr = bool(_THRESHOLD_SUFFIXES.search(raw)) + entries.append((parsed[0], parsed[1], has_thr)) + if entries: + per_evidence[idx] = entries + + if len(per_evidence) < 2: + return [] + + # 단위별로 evidence 간 숫자 비교 + # {unit: {digits: [evidence_idx, ...]}} + unit_map: dict[str, dict[str, list[int]]] = {} + for idx, entries in per_evidence.items(): + for digits, unit, has_thr in entries: + if has_thr: + continue # threshold 표현은 skip + if unit not in unit_map: + unit_map[unit] = {} + if digits not in unit_map[unit]: + unit_map[unit][digits] = [] + if idx not in unit_map[unit][digits]: + unit_map[unit][digits].append(idx) + + flags: list[str] = [] + for unit, digits_map in unit_map.items(): + distinct_values = list(digits_map.keys()) + if len(distinct_values) >= 2: + # 가장 많이 등장하는 2개 비교 + top2 = sorted(distinct_values, key=lambda d: len(digits_map[d]), reverse=True)[:2] + flags.append( + f"evidence_numeric_conflict:{top2[0]}{unit}_vs_{top2[1]}{unit}" + ) + + return flags + + def check( query: str, answer: str, @@ -113,6 +195,10 @@ def check( if len(s.strip()) > 20 and not re.search(r'\[\d+\]', s): weak.append(f"uncited_claim:{s[:40]}") + # ── Weak: evidence 간 숫자 충돌 (Phase 3.5b) ── + conflicts = _check_evidence_numeric_conflicts(evidence) + weak.extend(conflicts) + # ── Weak 2: token overlap ── answer_tokens = _extract_content_tokens(answer) evidence_tokens = _extract_content_tokens(evidence_text) diff --git a/app/services/search/verifier_service.py b/app/services/search/verifier_service.py new file mode 100644 index 0000000..6dec9c8 --- /dev/null +++ b/app/services/search/verifier_service.py @@ -0,0 +1,183 @@ +"""Exaone semantic verifier (Phase 3.5b). + +답변-근거 간 의미적 모순(contradiction) 감지. rule-based grounding_check 가 못 잡는 +미묘한 모순 포착. classifier 와 동일 패턴: circuit breaker + timeout + fail open. + +## Severity 3단계 +- strong: direct_negation (완전 모순) → re-gate 교차 자격 +- medium: numeric_conflict, intent_core_mismatch → confidence 하향 (누적 시 강제 low) +- weak: nuance, unsupported_claim → 로깅 + mild confidence 하향 + +## 핵심 원칙 +- **Verifier strong 단독 refuse 금지** — grounding strong 과 교차해야 refuse +- **Timeout 3s** — 느리면 없는 게 낫다 (fail open) +- MLX gate 미사용 (GPU Ollama concurrent OK) +""" + +from __future__ import annotations + +import asyncio +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Literal + +from ai.client import AIClient, _load_prompt, parse_json_response +from core.config import settings +from core.utils import setup_logger + +if TYPE_CHECKING: + from .evidence_service import EvidenceItem + +logger = setup_logger("verifier") + +LLM_TIMEOUT_MS = 3000 +CIRCUIT_THRESHOLD = 5 +CIRCUIT_RECOVERY_SEC = 60 + +_failure_count = 0 +_circuit_open_until: float | None = None + +# severity 매핑 (프롬프트 "critical"/"minor" → 코드 strong/medium/weak) +_SEVERITY_MAP: dict[str, dict[str, Literal["strong", "medium", "weak"]]] = { + "direct_negation": {"critical": "strong", "minor": "strong"}, + "numeric_conflict": {"critical": "medium", "minor": "medium"}, + "intent_core_mismatch": {"critical": "medium", "minor": "medium"}, + "nuance": {"critical": "weak", "minor": "weak"}, + "unsupported_claim": {"critical": "weak", "minor": "weak"}, +} + + +@dataclass(slots=True) +class Contradiction: + """개별 모순 발견.""" + type: str # direct_negation / numeric_conflict / intent_core_mismatch / nuance / unsupported_claim + severity: Literal["strong", "medium", "weak"] + claim: str + evidence_ref: str + explanation: str + + +@dataclass(slots=True) +class VerifierResult: + status: Literal["ok", "timeout", "error", "circuit_open", "skipped"] + contradictions: list[Contradiction] + elapsed_ms: float + + +try: + VERIFIER_PROMPT = _load_prompt("verifier.txt") +except FileNotFoundError: + VERIFIER_PROMPT = "" + logger.warning("verifier.txt not found — verifier will always skip") + + +def _build_input( + answer: str, + evidence: list[EvidenceItem], +) -> str: + """답변 + evidence spans → 프롬프트.""" + spans = "\n\n".join( + f"[{e.n}] {(e.title or '').strip()}\n{e.span_text}" + for e in evidence + ) + return ( + VERIFIER_PROMPT + .replace("{answer}", answer) + .replace("{numbered_evidence}", spans) + ) + + +def _map_severity(ctype: str, raw_severity: str) -> Literal["strong", "medium", "weak"]: + """type + raw severity → 코드 severity 3단계.""" + type_map = _SEVERITY_MAP.get(ctype, {"critical": "weak", "minor": "weak"}) + return type_map.get(raw_severity, "weak") + + +async def verify( + query: str, + answer: str, + evidence: list[EvidenceItem], +) -> VerifierResult: + """답변-근거 semantic 검증. Parallel with grounding_check. + + Returns: + VerifierResult. status "ok" 이 아니면 contradictions 빈 리스트 (fail open). + """ + global _failure_count, _circuit_open_until + t_start = time.perf_counter() + + if _circuit_open_until and time.time() < _circuit_open_until: + return VerifierResult("circuit_open", [], 0.0) + + if not VERIFIER_PROMPT: + return VerifierResult("skipped", [], 0.0) + + if not hasattr(settings.ai, "verifier") or settings.ai.verifier is None: + return VerifierResult("skipped", [], 0.0) + + if not answer or not evidence: + return VerifierResult("skipped", [], 0.0) + + prompt = _build_input(answer, evidence) + client = AIClient() + try: + async with asyncio.timeout(LLM_TIMEOUT_MS / 1000): + raw = await client._request(settings.ai.verifier, prompt) + _failure_count = 0 + except asyncio.TimeoutError: + _failure_count += 1 + if _failure_count >= CIRCUIT_THRESHOLD: + _circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC + logger.error(f"verifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s") + logger.warning("verifier timeout") + return VerifierResult( + "timeout", [], + (time.perf_counter() - t_start) * 1000, + ) + except Exception as e: + _failure_count += 1 + if _failure_count >= CIRCUIT_THRESHOLD: + _circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC + logger.error(f"verifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s") + logger.warning(f"verifier error: {e}") + return VerifierResult( + "error", [], + (time.perf_counter() - t_start) * 1000, + ) + finally: + await client.close() + + elapsed_ms = (time.perf_counter() - t_start) * 1000 + parsed = parse_json_response(raw) + if not isinstance(parsed, dict): + logger.warning("verifier parse failed raw=%r", (raw or "")[:200]) + return VerifierResult("error", [], elapsed_ms) + + # contradiction 파싱 + raw_items = parsed.get("contradictions") or [] + if not isinstance(raw_items, list): + raw_items = [] + + results: list[Contradiction] = [] + for item in raw_items[:5]: + if not isinstance(item, dict): + continue + ctype = item.get("type", "") + if ctype not in _SEVERITY_MAP: + ctype = "unsupported_claim" + raw_sev = item.get("severity", "minor") + severity = _map_severity(ctype, raw_sev) + claim = str(item.get("claim", ""))[:50] + ev_ref = str(item.get("evidence_ref", ""))[:50] + explanation = str(item.get("explanation", ""))[:30] + results.append(Contradiction(ctype, severity, claim, ev_ref, explanation)) + + logger.info( + "verifier ok query=%r contradictions=%d strong=%d medium=%d elapsed_ms=%.0f", + query[:60], + len(results), + sum(1 for c in results if c.severity == "strong"), + sum(1 for c in results if c.severity == "medium"), + elapsed_ms, + ) + return VerifierResult("ok", results, elapsed_ms) diff --git a/app/services/search_telemetry.py b/app/services/search_telemetry.py index 09c9b9c..2f22db7 100644 --- a/app/services/search_telemetry.py +++ b/app/services/search_telemetry.py @@ -25,6 +25,7 @@ from typing import Any from sqlalchemy.exc import SQLAlchemyError from core.database import async_session +from models.ask_event import AskEvent from models.search_failure import SearchFailureLog logger = logging.getLogger("search_telemetry") @@ -306,3 +307,47 @@ async def record_search_event( failure_reason="low_confidence", context=base_ctx, ) + + +# ─── /ask 전용 telemetry (Phase 3.5b) ───────────────────── + + +async def record_ask_event( + query: str, + user_id: int | None, + completeness: str | None, + synthesis_status: str | None, + confidence: str | None, + refused: bool, + classifier_verdict: str | None, + max_rerank_score: float, + aggregate_score: float, + hallucination_flags: list[str], + evidence_count: int, + citation_count: int, + defense_layers: dict[str, Any], + total_ms: int, +) -> None: + """ask_events INSERT. background task에서 호출 — 에러 삼킴.""" + try: + async with async_session() as session: + row = AskEvent( + query=query, + user_id=user_id, + completeness=completeness, + synthesis_status=synthesis_status, + confidence=confidence, + refused=refused, + classifier_verdict=classifier_verdict, + max_rerank_score=max_rerank_score, + aggregate_score=aggregate_score, + hallucination_flags=hallucination_flags, + evidence_count=evidence_count, + citation_count=citation_count, + defense_layers=defense_layers, + total_ms=total_ms, + ) + session.add(row) + await session.commit() + except SQLAlchemyError as exc: + logger.warning(f"ask_event insert failed: {exc}") diff --git a/migrations/102_ask_events.sql b/migrations/102_ask_events.sql index 4c2e240..a3fbe7d 100644 --- a/migrations/102_ask_events.sql +++ b/migrations/102_ask_events.sql @@ -1,18 +1,24 @@ +-- Phase 3.5a: /ask 호출 관측 테이블 +-- refusal rate 측정, 지표 3 분리 (full/partial/insufficient), defense layer 디버깅 + CREATE TABLE IF NOT EXISTS ask_events ( id BIGSERIAL PRIMARY KEY, query TEXT NOT NULL, user_id BIGINT REFERENCES users(id), - completeness TEXT, + completeness TEXT, -- full / partial / insufficient synthesis_status TEXT, confidence TEXT, refused BOOLEAN DEFAULT false, - classifier_verdict TEXT, + classifier_verdict TEXT, -- sufficient / insufficient / null (skipped) max_rerank_score REAL, aggregate_score REAL, hallucination_flags JSONB DEFAULT '[]', evidence_count INT, citation_count INT, - defense_layers JSONB, + defense_layers JSONB, -- per-layer flag snapshot (score_gate, classifier, grounding) total_ms INT, created_at TIMESTAMPTZ DEFAULT now() -) +); + +CREATE INDEX IF NOT EXISTS idx_ask_events_created ON ask_events(created_at); +CREATE INDEX IF NOT EXISTS idx_ask_events_completeness ON ask_events(completeness);