Files
hyungi_document_server/app/services/search/classifier_service.py
T
hyungi 2d86683636 refactor(ai): AIClient PR-B — gate 누락 경로 봉인 + 공유 httpx + public classifier/verifier
코드리뷰 AIClient 정비 PR-B (#2 gate·#3 httpx·#4 public).

#2 gate 구조 (call-site 컨벤션 — gate 는 caller-managed, AIClient self-gate 금지):
  · classify_worker consumer call_triage: gate 없이 Mac mini 직타하던 것 → acquire_mlx_gate(BACKGROUND).
    (drain 경로 call_deep_or_defer 는 맥북 deep 슬롯이라 mini gate 무관, 미적용.)
  · verifier_service: gate 없이 _request(verifier) 하던 것 → acquire_mlx_gate(FOREGROUND) + call_verifier.
    classifier/evidence 와 동일 gate 공유로 thundering-herd(22-timeout 사고) 방어.
  ★재진입 안전 검증: AIClient 메서드 내부 self-gate 0(전부 call-site) + evidence/classifier 는 이미
   독립 gate 보유 + api/search 오케스트레이터 gate 미보유 → double-acquire 데드락 불가.

#4 public 메서드: call_classifier/call_verifier 추가 → classifier/verifier_service 의 private _request
  직접호출 봉인(egress 가드 일관 적용). gate 는 caller-managed 유지(call_primary 와 동일 계약).

#3 공유 httpx: 호출마다 AsyncClient 생성(30+ 사이트)을 _get_shared_http() 단일 풀로 — keep-alive
  재사용. 이벤트루프 바인딩이라 루프 변경(테스트) 시 재생성, close() 는 no-op.

py_compile PASS. (잔여 #4: query_analyzer/digest/backends 의 _request·_call_chat 직접호출은 gated 라
안전, 후속 sweep.)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 20:07:30 +09:00

157 lines
5.6 KiB
Python

"""Answerability classifier (Phase 3.5a).
Mac mini 26B MLX 기반 (config.yaml ai.models.classifier — PR #20 이후 triage/primary/classifier 동일 endpoint). MLX gate 밖 — evidence extraction 과 병렬 실행 (concurrent 안전성 별 검토).
P1 실측 결과: ternary (full/partial/insufficient) 불안정 → **binary (sufficient/insufficient)**.
"full" vs "partial" 구분은 grounding_check 의 intent alignment 이 담당.
Classifier verdict 는 "relevant evidence 가 있나" 의 binary 판단.
covered_aspects / missing_aspects 는 로깅용으로 유지 (refusal gate 에서 사용 안 함).
"""
from __future__ import annotations
import asyncio
import time
from dataclasses import dataclass
from typing import Literal
from ai.client import AIClient, _load_prompt, parse_json_response
from core.config import settings
from core.utils import setup_logger
from .llm_gate import Priority, acquire_mlx_gate
logger = setup_logger("classifier")
LLM_TIMEOUT_MS = 30000
CIRCUIT_THRESHOLD = 5
CIRCUIT_RECOVERY_SEC = 60
_failure_count = 0
_circuit_open_until: float | None = None
@dataclass(slots=True)
class ClassifierResult:
status: Literal["ok", "timeout", "error", "circuit_open", "skipped"]
verdict: Literal["sufficient", "insufficient"] | None
covered_aspects: list[str]
missing_aspects: list[str]
elapsed_ms: float
try:
CLASSIFIER_PROMPT = _load_prompt("classifier.txt")
except FileNotFoundError:
CLASSIFIER_PROMPT = ""
logger.warning("classifier.txt not found — classifier will always skip")
def _build_input(
query: str,
top_chunks: list[dict],
rerank_scores: list[float],
) -> str:
"""Y+ input (content + scores with role separation)."""
chunk_block = "\n".join(
f"[{i+1}] title: {c.get('title','')}\n"
f" section: {c.get('section','')}\n"
f" snippet: {c.get('snippet','')}"
for i, c in enumerate(top_chunks[:3])
)
scores_str = ", ".join(f"{s:.2f}" for s in rerank_scores[:3])
return (
CLASSIFIER_PROMPT
.replace("{query}", query)
.replace("{chunks}", chunk_block)
.replace("{scores}", scores_str)
)
async def classify(
query: str,
top_chunks: list[dict],
rerank_scores: list[float],
) -> ClassifierResult:
"""Always-on binary classifier. Parallel with evidence extraction.
Returns:
ClassifierResult with verdict=sufficient|insufficient.
Status "ok" 이 아니면 verdict=None (caller 가 fallback 처리).
"""
global _failure_count, _circuit_open_until
t_start = time.perf_counter()
# Circuit breaker
if _circuit_open_until and time.time() < _circuit_open_until:
return ClassifierResult("circuit_open", None, [], [], 0.0)
if not CLASSIFIER_PROMPT:
return ClassifierResult("skipped", None, [], [], 0.0)
if not hasattr(settings.ai, "classifier") or settings.ai.classifier is None:
return ClassifierResult("skipped", None, [], [], 0.0)
prompt = _build_input(query, top_chunks, rerank_scores)
client = AIClient()
try:
# 2026-05-17: PR #20 이후 endpoint 가 Mac mini 26B → llm_gate Semaphore(1) 필수.
# Gate 미사용 시 classifier + evidence + synthesis 가 동시에 single-inference
# MLX 에 race → 거의 모두 timeout (실측: 8/10 fixture query). docstring 영구 룰:
# "MLX primary 호출 경로는 예외 없이 gate 획득 필수".
async with acquire_mlx_gate(Priority.FOREGROUND):
async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
raw = await client.call_classifier(prompt)
_failure_count = 0
except asyncio.TimeoutError:
_failure_count += 1
if _failure_count >= CIRCUIT_THRESHOLD:
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
logger.warning("classifier timeout")
return ClassifierResult(
"timeout", None, [], [],
(time.perf_counter() - t_start) * 1000,
)
except Exception as e:
_failure_count += 1
if _failure_count >= CIRCUIT_THRESHOLD:
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
logger.warning("classifier error: type=%s repr=%r", type(e).__name__, e)
return ClassifierResult(
"error", None, [], [],
(time.perf_counter() - t_start) * 1000,
)
finally:
await client.close()
elapsed_ms = (time.perf_counter() - t_start) * 1000
parsed = parse_json_response(raw)
if not isinstance(parsed, dict):
logger.warning("classifier parse failed raw=%r", (raw or "")[:200])
return ClassifierResult("error", None, [], [], elapsed_ms)
# ternary → binary 매핑
raw_verdict = parsed.get("verdict", "")
if raw_verdict == "insufficient":
verdict: Literal["sufficient", "insufficient"] | None = "insufficient"
elif raw_verdict in ("full", "partial", "sufficient"):
verdict = "sufficient"
else:
verdict = None
covered = parsed.get("covered_aspects") or []
missing = parsed.get("missing_aspects") or []
if not isinstance(covered, list):
covered = []
if not isinstance(missing, list):
missing = []
logger.info(
"classifier ok query=%r verdict=%s (raw=%s) covered=%d missing=%d elapsed_ms=%.0f",
query[:60], verdict, raw_verdict, len(covered), len(missing), elapsed_ms,
)
return ClassifierResult("ok", verdict, covered, missing, elapsed_ms)