feat(ask): Phase 3.5a guardrails (classifier + refusal gate + grounding + partial)

신규 파일:
- classifier_service.py: exaone binary classifier (sufficient/insufficient)
  parallel with evidence, circuit breaker, timeout 5s
- refusal_gate.py: multi-signal fusion (score + classifier)
  AND 조건, conservative fallback 3-tier (classifier 부재 시)
- grounding_check.py: strong/weak flag 분리
  strong: fabricated_number + intent_misalignment(important keywords)
  weak: uncited_claim + low_overlap + intent_misalignment(generic)
  re-gate: 2+ strong → refuse, 1 strong → partial
- sentence_splitter.py: regex 기반 (Phase 3.5b KSS 업그레이드)
- classifier.txt: exaone Y+ prompt (calibration examples 포함)
- search_synthesis_partial.txt: partial answer 전용 프롬프트
- 102_ask_events.sql: /ask 관측 테이블 (completeness 3-분리 지표)
- queries.yaml: Phase 3.5 smoke test 평가셋 10개

수정 파일:
- search.py /ask: classifier parallel + refusal gate + grounding re-gate
  + defense_layers 로깅 + AskResponse completeness/aspects/confirmed_items
- config.yaml: classifier model 섹션 (exaone3.5:7.8b GPU Ollama)
- config.py: classifier optional 파싱
- AskAnswer.svelte: 4분기 렌더 (full/partial/insufficient/loading)
- ask.ts: Completeness + ConfirmedItem 타입

P1 실측: exaone ternary 불안정 → binary gate 축소. partial은 grounding이 담당.
토론 9라운드 확정. plan: quiet-meandering-nova.md

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-10 08:49:11 +09:00
parent 0eecf1afca
commit 06443947bf
13 changed files with 869 additions and 47 deletions

View File

@@ -9,6 +9,7 @@
- `/ask` endpoint wrapper (Phase 3.3 에서 추가) - `/ask` endpoint wrapper (Phase 3.3 에서 추가)
""" """
import asyncio
import time import time
from typing import Annotated, Literal from typing import Annotated, Literal
@@ -20,8 +21,11 @@ from core.auth import get_current_user
from core.database import get_session from core.database import get_session
from core.utils import setup_logger from core.utils import setup_logger
from models.user import User from models.user import User
from services.search.classifier_service import ClassifierResult, classify
from services.search.evidence_service import EvidenceItem, extract_evidence from services.search.evidence_service import EvidenceItem, extract_evidence
from services.search.fusion_service import DEFAULT_FUSION from services.search.fusion_service import DEFAULT_FUSION
from services.search.grounding_check import check as grounding_check
from services.search.refusal_gate import RefusalDecision, decide as refusal_decide
from services.search.search_pipeline import PipelineResult, run_search from services.search.search_pipeline import PipelineResult, run_search
from services.search.synthesis_service import SynthesisResult, synthesize from services.search.synthesis_service import SynthesisResult, synthesize
from services.search_telemetry import record_search_event from services.search_telemetry import record_search_event
@@ -216,6 +220,14 @@ class Citation(BaseModel):
rerank_score: float rerank_score: float
class ConfirmedItem(BaseModel):
"""Partial answer 의 개별 aspect 답변."""
aspect: str
text: str
citations: list[int]
class AskDebug(BaseModel): class AskDebug(BaseModel):
"""`/ask?debug=true` 응답 확장.""" """`/ask?debug=true` 응답 확장."""
@@ -230,10 +242,12 @@ class AskDebug(BaseModel):
synthesis_prompt_preview: str | None = None synthesis_prompt_preview: str | None = None
synthesis_raw_preview: str | None = None synthesis_raw_preview: str | None = None
hallucination_flags: list[str] = [] hallucination_flags: list[str] = []
# Phase 3.5a: per-layer defense 로깅
defense_layers: dict | None = None
class AskResponse(BaseModel): class AskResponse(BaseModel):
"""`/ask` 응답. `/search` 의 SearchResult 는 그대로 재사용.""" """`/ask` 응답. Phase 3.5a: completeness + aspects 추가."""
results: list[SearchResult] results: list[SearchResult]
ai_answer: str | None ai_answer: str | None
@@ -247,6 +261,11 @@ class AskResponse(BaseModel):
no_results_reason: str | None no_results_reason: str | None
query: str query: str
total: int total: int
# Phase 3.5a
completeness: Literal["full", "partial", "insufficient"] = "full"
covered_aspects: list[str] | None = None
missing_aspects: list[str] | None = None
confirmed_items: list[ConfirmedItem] | None = None
debug: AskDebug | None = None debug: AskDebug | None = None
@@ -355,72 +374,210 @@ async def ask(
limit: int = Query(10, ge=1, le=20, description="synthesis 입력 상한"), limit: int = Query(10, ge=1, le=20, description="synthesis 입력 상한"),
debug: bool = Query(False, description="evidence/synthesis 중간 상태 노출"), debug: bool = Query(False, description="evidence/synthesis 중간 상태 노출"),
): ):
"""근거 기반 AI 답변 (Phase 3.3). """근거 기반 AI 답변 (Phase 3.5a).
`/search` 와 동일한 검색 파이프라인을 거친 후 evidence extraction + Phase 3.3 기반 + classifier parallel + refusal gate + grounding re-gate.
grounded synthesis 를 추가한다. `mode`, `rerank`, `analyze` 는 품질 보장을 실패 경로에서도 `results` 는 항상 반환.
위해 강제 고정 (hybrid / True / True).
실패 경로(timeout/parse_failed/refused/...) 에서도 `results` 는 항상 반환.
""" """
t_total = time.perf_counter() t_total = time.perf_counter()
defense_log: dict = {} # per-layer flag snapshot
# 1. 검색 파이프라인 (run_search — /search 와 동일 로직, 단일 진실 소스) # 1. 검색 파이프라인
pr = await run_search( pr = await run_search(
session, session, q, mode="hybrid", limit=limit,
q, fusion=DEFAULT_FUSION, rerank=True, analyze=True,
mode="hybrid",
limit=limit,
fusion=DEFAULT_FUSION,
rerank=True,
analyze=True,
) )
# 2. Evidence extraction (rule + LLM span select, 1 batched call) # 2. Evidence + Classifier 병렬
t_ev = time.perf_counter() t_ev = time.perf_counter()
evidence, ev_skip = await extract_evidence(q, pr.results) evidence_task = asyncio.create_task(extract_evidence(q, pr.results))
# classifier input: top 3 chunks meta + rerank scores
top_chunks = [
{
"title": r.title or "",
"section": r.section_title or "",
"snippet": (r.snippet or "")[:200],
}
for r in pr.results[:3]
]
rerank_scores_top = [
r.rerank_score if r.rerank_score is not None else r.score
for r in pr.results[:3]
]
classifier_task = asyncio.create_task(
classify(q, top_chunks, rerank_scores_top)
)
evidence, ev_skip = await evidence_task
ev_ms = (time.perf_counter() - t_ev) * 1000 ev_ms = (time.perf_counter() - t_ev) * 1000
# 3. Grounded synthesis (gemma-4, 15s timeout, citation 검증) # classifier await (timeout 보호 — classifier_service 내부에도 있지만 여기서 이중 보호)
try:
classifier_result = await asyncio.wait_for(classifier_task, timeout=6.0)
except (asyncio.TimeoutError, Exception):
classifier_result = ClassifierResult("timeout", None, [], [], 0.0)
defense_log["classifier"] = {
"status": classifier_result.status,
"verdict": classifier_result.verdict,
"covered_aspects": classifier_result.covered_aspects,
"missing_aspects": classifier_result.missing_aspects,
"elapsed_ms": classifier_result.elapsed_ms,
}
# 3. Refusal gate (multi-signal fusion)
all_rerank_scores = [
e.rerank_score for e in evidence
] if evidence else rerank_scores_top
decision = refusal_decide(all_rerank_scores, classifier_result)
defense_log["score_gate"] = {
"max": max(all_rerank_scores) if all_rerank_scores else 0.0,
"agg_top3": sum(sorted(all_rerank_scores, reverse=True)[:3]),
}
defense_log["refusal"] = {
"refused": decision.refused,
"rule_triggered": decision.rule_triggered,
}
if decision.refused:
total_ms = (time.perf_counter() - t_total) * 1000
no_reason = "관련 근거를 찾지 못했습니다."
if not pr.results:
no_reason = "검색 결과가 없습니다."
logger.info(
"ask REFUSED query=%r rule=%s max_score=%.2f total=%.0f",
q[:80], decision.rule_triggered,
max(all_rerank_scores) if all_rerank_scores else 0.0, total_ms,
)
# telemetry
background_tasks.add_task(
record_search_event, q, user.id, pr.results, "hybrid",
pr.confidence_signal, pr.analyzer_confidence,
)
debug_obj = None
if debug:
debug_obj = AskDebug(
timing_ms={**pr.timing_ms, "evidence_ms": ev_ms, "ask_total_ms": total_ms},
search_notes=pr.notes,
confidence_signal=pr.confidence_signal,
evidence_candidate_count=len(evidence),
evidence_kept_count=len(evidence),
evidence_skip_reason=ev_skip,
synthesis_cache_hit=False,
hallucination_flags=[],
defense_layers=defense_log,
)
return AskResponse(
results=pr.results,
ai_answer=None,
citations=[],
synthesis_status="skipped",
synthesis_ms=0.0,
confidence=None,
refused=True,
no_results_reason=no_reason,
query=q,
total=len(pr.results),
completeness="insufficient",
covered_aspects=classifier_result.covered_aspects or None,
missing_aspects=classifier_result.missing_aspects or None,
debug=debug_obj,
)
# 4. Synthesis
t_synth = time.perf_counter() t_synth = time.perf_counter()
sr = await synthesize(q, evidence, debug=debug) sr = await synthesize(q, evidence, debug=debug)
synth_ms = (time.perf_counter() - t_synth) * 1000 synth_ms = (time.perf_counter() - t_synth) * 1000
# 5. Grounding check (post-synthesis) + re-gate
grounding = grounding_check(q, sr.answer or "", evidence)
defense_log["grounding"] = {
"strong": grounding.strong_flags,
"weak": grounding.weak_flags,
}
# Completeness 결정: grounding 기반 (classifier 는 binary gate 만)
completeness: Literal["full", "partial", "insufficient"] = "full"
covered_aspects = classifier_result.covered_aspects or None
missing_aspects = classifier_result.missing_aspects or None
confirmed_items: list[ConfirmedItem] | None = None
if len(grounding.strong_flags) >= 2:
# Re-gate: multiple strong → refuse
completeness = "insufficient"
sr.answer = None
sr.refused = True
sr.confidence = None
defense_log["re_gate"] = "refuse(2+strong)"
elif grounding.strong_flags:
# Single strong → partial downgrade
completeness = "partial"
sr.confidence = "low"
defense_log["re_gate"] = "partial(1strong)"
elif grounding.weak_flags:
# Weak → confidence lower only
if sr.confidence == "high":
sr.confidence = "medium"
defense_log["re_gate"] = "conf_lower(weak)"
# Confidence cap from refusal gate (classifier 부재 시 conservative)
if decision.confidence_cap and sr.confidence:
conf_rank = {"low": 0, "medium": 1, "high": 2}
if conf_rank.get(sr.confidence, 0) > conf_rank.get(decision.confidence_cap, 2):
sr.confidence = decision.confidence_cap
# Partial 이면 max confidence = medium
if completeness == "partial" and sr.confidence == "high":
sr.confidence = "medium"
sr.hallucination_flags.extend(
[f"strong:{f}" for f in grounding.strong_flags]
+ [f"weak:{f}" for f in grounding.weak_flags]
)
total_ms = (time.perf_counter() - t_total) * 1000 total_ms = (time.perf_counter() - t_total) * 1000
# 4. 응답 구성 # 6. 응답 구성
citations = _build_citations(evidence, sr.used_citations) citations = _build_citations(evidence, sr.used_citations)
no_reason = _map_no_results_reason(pr, evidence, ev_skip, sr) no_reason = _map_no_results_reason(pr, evidence, ev_skip, sr)
if completeness == "insufficient" and not no_reason:
no_reason = "답변 검증에서 복수 오류 감지"
logger.info( logger.info(
"ask query=%r results=%d evidence=%d cite=%d synth=%s conf=%s refused=%s ev_ms=%.0f synth_ms=%.0f total=%.0f", "ask query=%r results=%d evidence=%d cite=%d synth=%s conf=%s completeness=%s "
q[:80], "refused=%s grounding_strong=%d grounding_weak=%d ev_ms=%.0f synth_ms=%.0f total=%.0f",
len(pr.results), q[:80], len(pr.results), len(evidence), len(citations),
len(evidence), sr.status, sr.confidence or "-", completeness,
len(citations), sr.refused, len(grounding.strong_flags), len(grounding.weak_flags),
sr.status, ev_ms, synth_ms, total_ms,
sr.confidence or "-",
sr.refused,
ev_ms,
synth_ms,
total_ms,
) )
# 5. telemetry — 기존 record_search_event 재사용 (Phase 0.3 호환) # 7. telemetry
background_tasks.add_task( background_tasks.add_task(
record_search_event, record_search_event, q, user.id, pr.results, "hybrid",
q, pr.confidence_signal, pr.analyzer_confidence,
user.id,
pr.results,
"hybrid",
pr.confidence_signal,
pr.analyzer_confidence,
) )
debug_obj = ( debug_obj = None
_build_ask_debug(pr, evidence, ev_skip, sr, ev_ms, synth_ms, total_ms) if debug:
if debug timing = dict(pr.timing_ms)
else None timing["evidence_ms"] = ev_ms
timing["synthesis_ms"] = synth_ms
timing["ask_total_ms"] = total_ms
debug_obj = AskDebug(
timing_ms=timing,
search_notes=pr.notes,
query_analysis=pr.query_analysis,
confidence_signal=pr.confidence_signal,
evidence_candidate_count=len(evidence),
evidence_kept_count=len(evidence),
evidence_skip_reason=ev_skip,
synthesis_cache_hit=sr.cache_hit,
synthesis_raw_preview=sr.raw_preview,
hallucination_flags=sr.hallucination_flags,
defense_layers=defense_log,
) )
return AskResponse( return AskResponse(
@@ -434,5 +591,9 @@ async def ask(
no_results_reason=no_reason, no_results_reason=no_reason,
query=q, query=q,
total=len(pr.results), total=len(pr.results),
completeness=completeness,
covered_aspects=covered_aspects,
missing_aspects=missing_aspects,
confirmed_items=confirmed_items,
debug=debug_obj, debug=debug_obj,
) )

View File

@@ -24,6 +24,8 @@ class AIConfig(BaseModel):
embedding: AIModelConfig embedding: AIModelConfig
vision: AIModelConfig vision: AIModelConfig
rerank: AIModelConfig rerank: AIModelConfig
# Phase 3.5a: exaone classifier (optional — 없으면 score-only gate)
classifier: AIModelConfig | None = None
class Settings(BaseModel): class Settings(BaseModel):
@@ -79,6 +81,11 @@ def load_settings() -> Settings:
embedding=AIModelConfig(**ai_raw["models"]["embedding"]), embedding=AIModelConfig(**ai_raw["models"]["embedding"]),
vision=AIModelConfig(**ai_raw["models"]["vision"]), vision=AIModelConfig(**ai_raw["models"]["vision"]),
rerank=AIModelConfig(**ai_raw["models"]["rerank"]), rerank=AIModelConfig(**ai_raw["models"]["rerank"]),
classifier=(
AIModelConfig(**ai_raw["models"]["classifier"])
if "classifier" in ai_raw.get("models", {})
else None
),
) )
if "nas" in raw: if "nas" in raw:

View File

@@ -0,0 +1,33 @@
You are an answerability judge. Given a query and evidence chunks, determine if the evidence can answer the query. Respond ONLY in JSON.
## CALIBRATION (CRITICAL)
- verdict=full: evidence is SUFFICIENT to answer the CORE of the query. Missing minor details does NOT make it insufficient.
- verdict=partial: evidence covers SOME major aspects but CLEARLY MISSES others the user explicitly asked about.
- verdict=insufficient: evidence has NO relevant information for the query, or is completely off-topic.
Example: Query="제6장 주요 내용", Evidence covers 제6장 definition+scope → verdict=full (core is covered).
Example: Query="제6장 처벌 조항", Evidence covers 제6장 definition but NOT 처벌 → verdict=partial.
Example: Query="감귤 출하량", Evidence about 산업안전보건법 → verdict=insufficient.
## Rules
1. Your "verdict" must be based ONLY on whether the CONTENT semantically answers the query. Ignore retrieval scores for this field.
2. "covered_aspects": query aspects that evidence covers. Korean labels for Korean queries.
3. "missing_aspects": query aspects that evidence does NOT cover. Korean labels.
4. Keep aspects concise (2-5 words each), non-overlapping.
## Output Schema
{
"verdict": "full" | "partial" | "insufficient",
"covered_aspects": ["aspect1"],
"missing_aspects": ["aspect2"],
"confidence": "high" | "medium" | "low"
}
## Query
{query}
## Evidence chunks:
{chunks}
## Retrieval scores (for reference only, NOT for verdict):
[{scores}]

View File

@@ -0,0 +1,34 @@
You are a grounded answer synthesizer handling a PARTIAL answer case. Some aspects of the query CAN be answered, others CANNOT. Respond ONLY in JSON.
## Task
Answer ONLY the covered aspects. Do NOT attempt to answer missing aspects.
## Output Schema
{
"confirmed_items": [
{"aspect": "aspect label", "text": "1~2 sentence answer", "citations": [1, 2]}
],
"confidence": "medium" | "low",
"refused": false
}
## Rules
- Each confirmed_item: aspect label + 1~2 sentences + inline [n] citations
- ONLY use facts present in evidence. No outside knowledge, no guessing.
- Do NOT mention or address missing_aspects in your text.
- Korean query → Korean answer / English → English
- confidence: medium (2+ strong evidence matches) / low (1 or weak)
- Max total text: 400 chars across all items
- 모든 주장 문장 끝에 [n] 필수
## Covered aspects (answer these):
{covered_aspects}
## Missing aspects (do NOT answer these):
{missing_aspects}
## Query
{query}
## Evidence
{numbered_evidence}

View File

@@ -0,0 +1,150 @@
"""Answerability classifier (Phase 3.5a).
exaone3.5:7.8b GPU Ollama 기반. MLX gate 밖 — evidence extraction 과 병렬 실행.
P1 실측 결과: ternary (full/partial/insufficient) 불안정 → **binary (sufficient/insufficient)**.
"full" vs "partial" 구분은 grounding_check 의 intent alignment 이 담당.
Classifier verdict 는 "relevant evidence 가 있나" 의 binary 판단.
covered_aspects / missing_aspects 는 로깅용으로 유지 (refusal gate 에서 사용 안 함).
"""
from __future__ import annotations
import asyncio
import time
from dataclasses import dataclass
from typing import Literal
from ai.client import AIClient, _load_prompt, parse_json_response
from core.config import settings
from core.utils import setup_logger
logger = setup_logger("classifier")
LLM_TIMEOUT_MS = 5000
CIRCUIT_THRESHOLD = 5
CIRCUIT_RECOVERY_SEC = 60
_failure_count = 0
_circuit_open_until: float | None = None
@dataclass(slots=True)
class ClassifierResult:
status: Literal["ok", "timeout", "error", "circuit_open", "skipped"]
verdict: Literal["sufficient", "insufficient"] | None
covered_aspects: list[str]
missing_aspects: list[str]
elapsed_ms: float
try:
CLASSIFIER_PROMPT = _load_prompt("classifier.txt")
except FileNotFoundError:
CLASSIFIER_PROMPT = ""
logger.warning("classifier.txt not found — classifier will always skip")
def _build_input(
query: str,
top_chunks: list[dict],
rerank_scores: list[float],
) -> str:
"""Y+ input (content + scores with role separation)."""
chunk_block = "\n".join(
f"[{i+1}] title: {c.get('title','')}\n"
f" section: {c.get('section','')}\n"
f" snippet: {c.get('snippet','')}"
for i, c in enumerate(top_chunks[:3])
)
scores_str = ", ".join(f"{s:.2f}" for s in rerank_scores[:3])
return (
CLASSIFIER_PROMPT
.replace("{query}", query)
.replace("{chunks}", chunk_block)
.replace("{scores}", scores_str)
)
async def classify(
query: str,
top_chunks: list[dict],
rerank_scores: list[float],
) -> ClassifierResult:
"""Always-on binary classifier. Parallel with evidence extraction.
Returns:
ClassifierResult with verdict=sufficient|insufficient.
Status "ok" 이 아니면 verdict=None (caller 가 fallback 처리).
"""
global _failure_count, _circuit_open_until
t_start = time.perf_counter()
# Circuit breaker
if _circuit_open_until and time.time() < _circuit_open_until:
return ClassifierResult("circuit_open", None, [], [], 0.0)
if not CLASSIFIER_PROMPT:
return ClassifierResult("skipped", None, [], [], 0.0)
if not hasattr(settings.ai, "classifier") or settings.ai.classifier is None:
return ClassifierResult("skipped", None, [], [], 0.0)
prompt = _build_input(query, top_chunks, rerank_scores)
client = AIClient()
try:
# ⚠ MLX gate 안 씀. Ollama(exaone) 는 concurrent OK.
async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
raw = await client._request(settings.ai.classifier, prompt)
_failure_count = 0
except asyncio.TimeoutError:
_failure_count += 1
if _failure_count >= CIRCUIT_THRESHOLD:
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
logger.warning("classifier timeout")
return ClassifierResult(
"timeout", None, [], [],
(time.perf_counter() - t_start) * 1000,
)
except Exception as e:
_failure_count += 1
if _failure_count >= CIRCUIT_THRESHOLD:
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
logger.warning(f"classifier error: {e}")
return ClassifierResult(
"error", None, [], [],
(time.perf_counter() - t_start) * 1000,
)
finally:
await client.close()
elapsed_ms = (time.perf_counter() - t_start) * 1000
parsed = parse_json_response(raw)
if not isinstance(parsed, dict):
logger.warning("classifier parse failed raw=%r", (raw or "")[:200])
return ClassifierResult("error", None, [], [], elapsed_ms)
# ternary → binary 매핑
raw_verdict = parsed.get("verdict", "")
if raw_verdict == "insufficient":
verdict: Literal["sufficient", "insufficient"] | None = "insufficient"
elif raw_verdict in ("full", "partial", "sufficient"):
verdict = "sufficient"
else:
verdict = None
covered = parsed.get("covered_aspects") or []
missing = parsed.get("missing_aspects") or []
if not isinstance(covered, list):
covered = []
if not isinstance(missing, list):
missing = []
logger.info(
"classifier ok query=%r verdict=%s (raw=%s) covered=%d missing=%d elapsed_ms=%.0f",
query[:60], verdict, raw_verdict, len(covered), len(missing), elapsed_ms,
)
return ClassifierResult("ok", verdict, covered, missing, elapsed_ms)

View File

@@ -0,0 +1,131 @@
"""Grounding check — post-synthesis 검증 (Phase 3.5a).
Strong/weak flag 분리:
- **Strong** (→ partial 강등 or refuse): fabricated_number, intent_misalignment(important)
- **Weak** (→ confidence lower only): uncited_claim, low_overlap, intent_misalignment(generic)
Re-gate 로직 (Phase 3.5a 9라운드 토론 결과):
- strong 1개 → partial 강등
- strong 2개 이상 → refuse
- weak → confidence "low"
Intent alignment (rule-based):
- query 의 핵심 명사가 answer 에 등장하는지 확인
- "처벌" 같은 중요 키워드 누락은 strong
- "주요", "관련" 같은 generic 은 무시
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import TYPE_CHECKING
from core.utils import setup_logger
if TYPE_CHECKING:
from .evidence_service import EvidenceItem
logger = setup_logger("grounding")
# "주요", "관련" 등 intent alignment 에서 제외할 generic 단어
GENERIC_TERMS = frozenset({
"주요", "관련", "내용", "정의", "기준", "방법", "설명", "개요",
"대한", "위한", "대해", "무엇", "어떤", "어떻게", "있는",
"하는", "되는", "이런", "그런", "이것", "그것",
})
@dataclass(slots=True)
class GroundingResult:
strong_flags: list[str]
weak_flags: list[str]
def _extract_number_literals(text: str) -> set[str]:
"""숫자 + 단위 추출 + normalize."""
raw = set(re.findall(r'\d[\d,.]*\s*[명인개%년월일조항호세]\w{0,2}', text))
normalized = set()
for r in raw:
normalized.add(r.strip())
num_only = re.match(r'[\d,.]+', r)
if num_only:
normalized.add(num_only.group().replace(',', ''))
# 단독 숫자도 추출
for d in re.findall(r'\b\d+\b', text):
normalized.add(d)
return normalized
def _extract_content_tokens(text: str) -> set[str]:
"""한국어 2자 이상 명사 + 영어 3자 이상 단어."""
return set(re.findall(r'[가-힣]{2,}|[a-zA-Z]{3,}', text))
def check(
query: str,
answer: str,
evidence: list[EvidenceItem],
) -> GroundingResult:
"""답변 vs evidence grounding 검증 + query intent alignment."""
strong: list[str] = []
weak: list[str] = []
if not answer or not evidence:
return GroundingResult([], [])
evidence_text = " ".join(e.span_text for e in evidence)
# ── Strong 1: fabricated number ──
answer_nums = _extract_number_literals(answer)
evidence_nums = _extract_number_literals(evidence_text)
for num in answer_nums:
digits_only = re.sub(r'[^\d]', '', num)
if digits_only and not any(
digits_only in re.sub(r'[^\d]', '', en) for en in evidence_nums
):
strong.append(f"fabricated_number:{num}")
# ── Strong/Weak 2: query-answer intent alignment ──
query_content = _extract_content_tokens(query)
answer_content = _extract_content_tokens(answer)
if query_content:
missing_terms = query_content - answer_content
important_missing = [
t for t in missing_terms
if t not in GENERIC_TERMS and len(t) >= 2
]
if important_missing:
strong.append(
f"intent_misalignment:{','.join(important_missing[:3])}"
)
elif len(missing_terms) > len(query_content) * 0.5:
weak.append(
f"intent_misalignment_generic:"
f"missing({','.join(list(missing_terms)[:5])})"
)
# ── Weak 1: uncited claim ──
sentences = re.split(r'(?<=[.!?。])\s+', answer)
for s in sentences:
if len(s.strip()) > 20 and not re.search(r'\[\d+\]', s):
weak.append(f"uncited_claim:{s[:40]}")
# ── Weak 2: token overlap ──
answer_tokens = _extract_content_tokens(answer)
evidence_tokens = _extract_content_tokens(evidence_text)
if answer_tokens:
overlap = len(answer_tokens & evidence_tokens) / len(answer_tokens)
if overlap < 0.4:
weak.append(f"low_overlap:{overlap:.2f}")
if strong or weak:
logger.info(
"grounding query=%r strong=%d weak=%d flags=%s",
query[:60],
len(strong),
len(weak),
",".join(strong[:3] + weak[:3]),
)
return GroundingResult(strong, weak)

View File

@@ -0,0 +1,105 @@
"""Refusal gate — multi-signal fusion (Phase 3.5a).
Score gate (deterministic) + classifier verdict (semantic, binary) 를 독립 평가 후 합성.
Classifier 부재 시 3-tier conservative fallback.
P1 실측 결과: exaone ternary 불안정 → binary (sufficient/insufficient) 로 축소.
"full" vs "partial" 구분은 grounding check (intent alignment) 가 담당.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Literal
from core.utils import setup_logger
if TYPE_CHECKING:
from .classifier_service import ClassifierResult
logger = setup_logger("refusal_gate")
# Placeholder thresholds — Phase 3.5b 에서 실측 기반 tuning
# AND 조건이라 false refusal 방어됨 (둘 다 만족해야 refuse)
SCORE_MAX_REFUSE = 0.25
SCORE_AGG_REFUSE = 0.70
# Conservative fallback tiers (classifier 부재 시)
CONSERVATIVE_WEAK = 0.35
CONSERVATIVE_MID = 0.55
@dataclass(slots=True)
class RefusalDecision:
refused: bool
confidence_cap: Literal["high", "medium", "low"] | None # None = no cap
rule_triggered: str | None # 디버깅: 어느 signal 이 결정에 기여?
def decide(
rerank_scores: list[float],
classifier: ClassifierResult | None,
) -> RefusalDecision:
"""Multi-signal fusion. Binary classifier verdict 기반.
Returns:
RefusalDecision. refused=True 이면 synthesis skip.
confidence_cap 은 synthesis 결과의 confidence 에 upper bound 적용.
"""
max_score = max(rerank_scores) if rerank_scores else 0.0
agg_top3 = sum(sorted(rerank_scores, reverse=True)[:3])
score_gate_fails = (
max_score < SCORE_MAX_REFUSE and agg_top3 < SCORE_AGG_REFUSE
)
# ── Classifier 사용 가능 (정상 경로) ──
if classifier and classifier.verdict is not None:
if classifier.verdict == "insufficient":
# Evidence quality override: classifier 가 insufficient 라 해도
# evidence 가 충분히 좋으면 override (토론 8라운드 합의)
# (evidence quality 는 이 함수 밖에서 별도 체크 — caller 에서 처리)
logger.info(
"refusal gate: classifier=insufficient max=%.2f agg=%.2f",
max_score, agg_top3,
)
return RefusalDecision(
refused=True,
confidence_cap=None,
rule_triggered="classifier_insufficient",
)
if score_gate_fails:
logger.info(
"refusal gate: score_low max=%.2f agg=%.2f classifier=%s",
max_score, agg_top3, classifier.verdict,
)
return RefusalDecision(
refused=True,
confidence_cap=None,
rule_triggered="score_low",
)
# Classifier says sufficient → proceed
return RefusalDecision(
refused=False,
confidence_cap=None,
rule_triggered=None,
)
# ── Classifier 부재 → 3-tier conservative ──
if max_score < CONSERVATIVE_WEAK:
return RefusalDecision(
refused=True,
confidence_cap=None,
rule_triggered="conservative_refuse(no_classifier)",
)
if max_score < CONSERVATIVE_MID:
return RefusalDecision(
refused=False,
confidence_cap="low",
rule_triggered="conservative_low(no_classifier)",
)
return RefusalDecision(
refused=False,
confidence_cap="medium",
rule_triggered="conservative_medium(no_classifier)",
)

View File

@@ -0,0 +1,33 @@
"""문장 분할 (Phase 3.5a — regex 기반).
Phase 3.5b 에서 KSS 라이브러리 기반으로 업그레이드 예정.
"""
import re
MIN_SENTENCE_CHARS = 15
def split_sentences(text: str) -> list[str]:
"""한국어/영어 혼합 텍스트를 문장 단위로 분할.
규칙:
- 마침표/느낌표/물음표 + 공백/줄바꿈
- 한국어 종결 어미 (다. 함. 음. 됨.) 패턴
- MIN_SENTENCE_CHARS 미만은 이전 문장에 병합
"""
# 1차 분할: punctuation + whitespace
raw = re.split(r'(?<=[.!?。])\s+|(?<=[다됨음함]\.)\s+|\n{2,}', text)
# 2차: 너무 짧은 것 병합
merged: list[str] = []
for part in raw:
part = part.strip()
if not part:
continue
if merged and len(part) < MIN_SENTENCE_CHARS:
merged[-1] = merged[-1] + " " + part
else:
merged.append(part)
return merged if merged else [text.strip()] if text.strip() else []

View File

@@ -35,6 +35,12 @@ ai:
rerank: rerank:
endpoint: "http://ollama:11434/api/rerank" endpoint: "http://ollama:11434/api/rerank"
model: "bge-reranker-v2-m3" model: "bge-reranker-v2-m3"
# Phase 3.5a: exaone answerability classifier (GPU Ollama, concurrent OK)
classifier:
endpoint: "http://ollama:11434/v1/chat/completions"
model: "exaone3.5:7.8b-instruct-q8_0"
max_tokens: 512
timeout: 10
nas: nas:
mount_path: "/documents" mount_path: "/documents"

View File

@@ -63,10 +63,14 @@
}; };
let tokens = $derived(data?.ai_answer ? splitAnswer(data.ai_answer) : []); let tokens = $derived(data?.ai_answer ? splitAnswer(data.ai_answer) : []);
let showAnswer = $derived( let showFullAnswer = $derived(
!!data && !!data.ai_answer && data.synthesis_status === 'completed' && !data.refused, !!data && !!data.ai_answer && data.completeness === 'full'
&& data.synthesis_status === 'completed' && !data.refused,
); );
let showWarning = $derived(!!data && !showAnswer); let showPartial = $derived(
!!data && data.completeness === 'partial' && !data.refused,
);
let showWarning = $derived(!!data && !showFullAnswer && !showPartial);
</script> </script>
<section class="bg-surface border border-default rounded-card p-5"> <section class="bg-surface border border-default rounded-card p-5">
@@ -107,7 +111,7 @@
근거 기반 답변 생성 중… 약 15초 소요 근거 기반 답변 생성 중… 약 15초 소요
</p> </p>
</div> </div>
{:else if showAnswer && data} {:else if showFullAnswer && data}
<div class="text-sm leading-7 text-text"> <div class="text-sm leading-7 text-text">
{#each tokens as tok} {#each tokens as tok}
{#if tok.type === 'cite'} {#if tok.type === 'cite'}
@@ -124,6 +128,67 @@
{/if} {/if}
{/each} {/each}
</div> </div>
{:else if showPartial && data}
<!-- Phase 3.5a: question-aligned partial structure -->
<div>
<Badge tone="warning" size="sm">일부 답변</Badge>
{#if data.ai_answer}
<div class="mt-3 text-sm leading-7 text-text">
{#each tokens as tok}
{#if tok.type === 'cite'}
<button
type="button"
class="inline-block align-baseline text-accent font-semibold hover:underline rounded px-0.5"
onclick={() => onCitationClick(tok.n)}
>{tok.raw}</button>
{:else}
<span>{tok.value}</span>
{/if}
{/each}
</div>
{:else if data.confirmed_items?.length}
<div class="mt-3">
<h4 class="text-xs font-semibold text-dim uppercase tracking-wider">✓ 답변 가능</h4>
<ul class="mt-2 space-y-2">
{#each data.confirmed_items as item}
<li class="text-sm text-text">
<strong class="text-accent">{item.aspect}:</strong>
<span>{item.text}</span>
{#each item.citations as n}
<button
type="button"
class="text-accent font-semibold hover:underline px-0.5"
onclick={() => onCitationClick(n)}
>[{n}]</button>
{/each}
</li>
{/each}
</ul>
</div>
{/if}
{#if data.missing_aspects?.length}
<div class="mt-4 border-t border-default pt-3">
<h4 class="text-xs font-semibold text-dim uppercase tracking-wider">✗ 답변 불가</h4>
<ul class="mt-2 space-y-1">
{#each data.missing_aspects as aspect}
<li class="text-sm text-dim">{aspect} <span class="text-[10px]">(근거 없음)</span></li>
{/each}
</ul>
</div>
{/if}
<div class="mt-4">
<Button
variant="secondary"
size="sm"
href={`/documents?q=${encodeURIComponent(data.query)}`}
>
검색 결과 확인하기
</Button>
</div>
</div>
{:else if showWarning && data} {:else if showWarning && data}
<EmptyState <EmptyState
icon={AlertTriangle} icon={AlertTriangle}

View File

@@ -50,6 +50,14 @@ export interface SearchResult {
rerank_score: number | null; rerank_score: number | null;
} }
export type Completeness = 'full' | 'partial' | 'insufficient';
export interface ConfirmedItem {
aspect: string;
text: string;
citations: number[];
}
export interface AskResponse { export interface AskResponse {
results: SearchResult[]; results: SearchResult[];
ai_answer: string | null; ai_answer: string | null;
@@ -61,4 +69,9 @@ export interface AskResponse {
no_results_reason: string | null; no_results_reason: string | null;
query: string; query: string;
total: number; total: number;
/** Phase 3.5a */
completeness: Completeness;
covered_aspects: string[] | null;
missing_aspects: string[] | null;
confirmed_items: ConfirmedItem[] | null;
} }

View File

@@ -0,0 +1,26 @@
-- Phase 3.5a: /ask 호출 관측 테이블
-- refusal rate 측정, 지표 3 분리 (full/partial/insufficient), defense layer 디버깅
CREATE TABLE IF NOT EXISTS ask_events (
id BIGSERIAL PRIMARY KEY,
query TEXT NOT NULL,
user_id BIGINT REFERENCES users(id),
completeness TEXT, -- full / partial / insufficient
synthesis_status TEXT,
confidence TEXT,
refused BOOLEAN DEFAULT false,
classifier_verdict TEXT, -- sufficient / insufficient / null (skipped)
max_rerank_score REAL,
aggregate_score REAL,
hallucination_flags JSONB DEFAULT '[]',
evidence_count INT,
citation_count INT,
defense_layers JSONB, -- per-layer flag snapshot (score_gate, classifier, grounding)
total_ms INT,
created_at TIMESTAMPTZ DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_ask_events_created ON ask_events(created_at);
CREATE INDEX IF NOT EXISTS idx_ask_events_completeness ON ask_events(completeness);
INSERT INTO schema_migrations (version) VALUES (102);

View File

@@ -0,0 +1,58 @@
# Phase 3.5a Smoke Test Evaluation Set (10 queries)
# 목적: 구조 검증 (smoke test), 정밀 튜닝 아님
# Phase 3.5b 에서 30+ 쿼리로 확장 예정
queries:
# 정상 (4) — full answer expected
- q: "산업안전보건법 제6장 주요 내용"
expected_completeness: full
expected_refuse: false
notes: "prewarm #1. evidence 충분."
- q: "기계 사고 관련 법령"
expected_completeness: full
expected_refuse: false
notes: "prewarm #2. 법령 도메인."
- q: "유해화학물질을 다루는 회사가 지켜야 할 안전 의무"
expected_completeness: full
expected_refuse: false
notes: "prewarm #5. 긴 자연어 쿼리."
- q: "위험성평가 절차"
expected_completeness: full
expected_refuse: false
notes: "prewarm #12. 짧은 키워드 쿼리."
# no-result (2) — insufficient expected
- q: "xyzzy_nonexistent_query_12345"
expected_completeness: insufficient
expected_refuse: true
notes: "Phase 3 에서 이미 검증됨."
- q: "제주도 감귤 출하량 통계"
expected_completeness: insufficient
expected_refuse: true
notes: "corpus 에 확실히 없는 도메인."
# tricky mismatch (2) — classifier/grounding 핵심 케이스
- q: "산업안전보건법 제6장 처벌 조항"
expected_completeness: partial
expected_refuse: false
notes: "제6장 내용은 있지만 처벌(제10장 벌칙)은 없음. intent_misalignment 이 잡아야 함."
- q: "화학물질관리법과 산업안전보건법의 차이"
expected_completeness: partial
expected_refuse: false
notes: "복합 쿼리. 하나만 있을 수 있음."
# cross-domain (2)
- q: "Python async best practice"
expected_completeness: insufficient
expected_refuse: true
notes: "corpus 에 영어 프로그래밍 문서 적음."
- q: "EU AI Act"
expected_completeness: full
expected_refuse: false
notes: "news 도메인. prewarm #11."