a08b620894
DS-Mac-mini-26B-Priority-Gate-1 — 사용자-facing 7 + worker 3 = 10 site 의 `async with get_mlx_gate():` → `async with acquire_mlx_gate(Priority.*):` 교체. Foreground 6 (user-facing path): - app/services/search/evidence_service.py:315 (/ask evidence stage) - app/services/search/classifier_service.py:103 (/ask classifier stage) - app/services/search/synthesis_service.py:299 (/ask synthesis stage) - app/api/documents.py:1306 (수동 analyze API) - app/api/study_topics.py:1183 (subject note 동기 생성) - app/api/study_questions.py:1560 (study explanation 동기 API) Background 4 (worker queue / fire-and-forget): - app/services/search/query_analyzer.py:240 (V0 grep 확인: fire-and-forget only, search_pipeline.py:179 trigger_background_analysis 만, docstring rule "analyze() 동기 호출 금지" 부합 → BACKGROUND 확정) - app/workers/deep_summary_worker.py:110 (classify-escalate worker) - app/workers/study_explanation_worker.py:149 - app/workers/study_session_analysis_worker.py:237 Cleanup: - query_analyzer._get_llm_semaphore() 제거 — self-only, unused, signature 거짓말 (이제 get_mlx_gate 가 Semaphore 아닌 context manager 반환) 기존 get_mlx_gate() legacy wrapper 는 보존 (BACKGROUND 매핑). user-facing path 잔재 0 — closure gate grep 검증 통과 (별 commit 에서).
157 lines
5.6 KiB
Python
157 lines
5.6 KiB
Python
"""Answerability classifier (Phase 3.5a).
|
|
|
|
Mac mini 26B MLX 기반 (config.yaml ai.models.classifier — PR #20 이후 triage/primary/classifier 동일 endpoint). MLX gate 밖 — evidence extraction 과 병렬 실행 (concurrent 안전성 별 검토).
|
|
|
|
P1 실측 결과: ternary (full/partial/insufficient) 불안정 → **binary (sufficient/insufficient)**.
|
|
"full" vs "partial" 구분은 grounding_check 의 intent alignment 이 담당.
|
|
|
|
Classifier verdict 는 "relevant evidence 가 있나" 의 binary 판단.
|
|
covered_aspects / missing_aspects 는 로깅용으로 유지 (refusal gate 에서 사용 안 함).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import time
|
|
from dataclasses import dataclass
|
|
from typing import Literal
|
|
|
|
from ai.client import AIClient, _load_prompt, parse_json_response
|
|
from core.config import settings
|
|
from core.utils import setup_logger
|
|
|
|
from .llm_gate import Priority, acquire_mlx_gate
|
|
|
|
logger = setup_logger("classifier")
|
|
|
|
LLM_TIMEOUT_MS = 30000
|
|
CIRCUIT_THRESHOLD = 5
|
|
CIRCUIT_RECOVERY_SEC = 60
|
|
|
|
_failure_count = 0
|
|
_circuit_open_until: float | None = None
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class ClassifierResult:
|
|
status: Literal["ok", "timeout", "error", "circuit_open", "skipped"]
|
|
verdict: Literal["sufficient", "insufficient"] | None
|
|
covered_aspects: list[str]
|
|
missing_aspects: list[str]
|
|
elapsed_ms: float
|
|
|
|
|
|
try:
|
|
CLASSIFIER_PROMPT = _load_prompt("classifier.txt")
|
|
except FileNotFoundError:
|
|
CLASSIFIER_PROMPT = ""
|
|
logger.warning("classifier.txt not found — classifier will always skip")
|
|
|
|
|
|
def _build_input(
|
|
query: str,
|
|
top_chunks: list[dict],
|
|
rerank_scores: list[float],
|
|
) -> str:
|
|
"""Y+ input (content + scores with role separation)."""
|
|
chunk_block = "\n".join(
|
|
f"[{i+1}] title: {c.get('title','')}\n"
|
|
f" section: {c.get('section','')}\n"
|
|
f" snippet: {c.get('snippet','')}"
|
|
for i, c in enumerate(top_chunks[:3])
|
|
)
|
|
scores_str = ", ".join(f"{s:.2f}" for s in rerank_scores[:3])
|
|
return (
|
|
CLASSIFIER_PROMPT
|
|
.replace("{query}", query)
|
|
.replace("{chunks}", chunk_block)
|
|
.replace("{scores}", scores_str)
|
|
)
|
|
|
|
|
|
async def classify(
|
|
query: str,
|
|
top_chunks: list[dict],
|
|
rerank_scores: list[float],
|
|
) -> ClassifierResult:
|
|
"""Always-on binary classifier. Parallel with evidence extraction.
|
|
|
|
Returns:
|
|
ClassifierResult with verdict=sufficient|insufficient.
|
|
Status "ok" 이 아니면 verdict=None (caller 가 fallback 처리).
|
|
"""
|
|
global _failure_count, _circuit_open_until
|
|
t_start = time.perf_counter()
|
|
|
|
# Circuit breaker
|
|
if _circuit_open_until and time.time() < _circuit_open_until:
|
|
return ClassifierResult("circuit_open", None, [], [], 0.0)
|
|
|
|
if not CLASSIFIER_PROMPT:
|
|
return ClassifierResult("skipped", None, [], [], 0.0)
|
|
|
|
if not hasattr(settings.ai, "classifier") or settings.ai.classifier is None:
|
|
return ClassifierResult("skipped", None, [], [], 0.0)
|
|
|
|
prompt = _build_input(query, top_chunks, rerank_scores)
|
|
client = AIClient()
|
|
try:
|
|
# 2026-05-17: PR #20 이후 endpoint 가 Mac mini 26B → llm_gate Semaphore(1) 필수.
|
|
# Gate 미사용 시 classifier + evidence + synthesis 가 동시에 single-inference
|
|
# MLX 에 race → 거의 모두 timeout (실측: 8/10 fixture query). docstring 영구 룰:
|
|
# "MLX primary 호출 경로는 예외 없이 gate 획득 필수".
|
|
async with acquire_mlx_gate(Priority.FOREGROUND):
|
|
async with asyncio.timeout(LLM_TIMEOUT_MS / 1000):
|
|
raw = await client._request(settings.ai.classifier, prompt)
|
|
_failure_count = 0
|
|
except asyncio.TimeoutError:
|
|
_failure_count += 1
|
|
if _failure_count >= CIRCUIT_THRESHOLD:
|
|
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
|
|
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
|
|
logger.warning("classifier timeout")
|
|
return ClassifierResult(
|
|
"timeout", None, [], [],
|
|
(time.perf_counter() - t_start) * 1000,
|
|
)
|
|
except Exception as e:
|
|
_failure_count += 1
|
|
if _failure_count >= CIRCUIT_THRESHOLD:
|
|
_circuit_open_until = time.time() + CIRCUIT_RECOVERY_SEC
|
|
logger.error(f"classifier circuit OPEN for {CIRCUIT_RECOVERY_SEC}s")
|
|
logger.warning("classifier error: type=%s repr=%r", type(e).__name__, e)
|
|
return ClassifierResult(
|
|
"error", None, [], [],
|
|
(time.perf_counter() - t_start) * 1000,
|
|
)
|
|
finally:
|
|
await client.close()
|
|
|
|
elapsed_ms = (time.perf_counter() - t_start) * 1000
|
|
parsed = parse_json_response(raw)
|
|
if not isinstance(parsed, dict):
|
|
logger.warning("classifier parse failed raw=%r", (raw or "")[:200])
|
|
return ClassifierResult("error", None, [], [], elapsed_ms)
|
|
|
|
# ternary → binary 매핑
|
|
raw_verdict = parsed.get("verdict", "")
|
|
if raw_verdict == "insufficient":
|
|
verdict: Literal["sufficient", "insufficient"] | None = "insufficient"
|
|
elif raw_verdict in ("full", "partial", "sufficient"):
|
|
verdict = "sufficient"
|
|
else:
|
|
verdict = None
|
|
|
|
covered = parsed.get("covered_aspects") or []
|
|
missing = parsed.get("missing_aspects") or []
|
|
if not isinstance(covered, list):
|
|
covered = []
|
|
if not isinstance(missing, list):
|
|
missing = []
|
|
|
|
logger.info(
|
|
"classifier ok query=%r verdict=%s (raw=%s) covered=%d missing=%d elapsed_ms=%.0f",
|
|
query[:60], verdict, raw_verdict, len(covered), len(missing), elapsed_ms,
|
|
)
|
|
return ClassifierResult("ok", verdict, covered, missing, elapsed_ms)
|