e8da53490c
PR-3 의 결과 화면 [AI 해설 보기] 실시간 호출이 클릭 시 8~30초 대기. 풀이 직후
백그라운드 batch 로 미리 생성해 캐시 hit. 환각 가드는 PR-3 보다 강화 — envelope
JSON {answer_choice, explanation_md, confidence} + answer_choice == correct_choice
검증 + evidence 의무.
processing_queue 가 documents.id FK 라 study_questions 에 직접 재사용 불가 →
별도 study_question_jobs 테이블 + 별도 consumer.
Backend:
- migrations/231 — study_question_jobs CREATE TABLE (13컬럼, kind 권장값
'explanation' / 'session_summary' 예약, status pending/processing/completed/
failed/skipped, max_attempts=2)
- migrations/232 — partial unique idx (qid, kind) WHERE status IN
(pending, processing) — active 행 중복 차단, terminal 이력 누적 허용
- models/study_question_job — ORM + enqueue_study_question_job() 헬퍼
(on_conflict_do_nothing 멱등)
- prompts/study_explanation_envelope.txt — envelope 형식 프롬프트
(answer_choice 1~4 강제, confidence high/medium/low)
- workers/study_explanation_worker — terminal status 분기:
· evidence 둘 다 빈 리스트 → job/question 모두 skipped (LLM 호출 X)
· answer_choice != correct_choice → guard_fail / failed (재시도 X)
· timeout/parse → 재시도 후보 (max_attempts=2)
· catch-all except → unknown 명시 + retryable 분기
· question.ai_explanation_status='ready' 이미 박혀있으면 즉시 completed
· confidence 는 job.payload 에 보존 (운영 분석)
- workers/study_queue_consumer — APScheduler 1분 주기, BATCH_SIZE=1, MLX gate
Semaphore(1) 공유. STALE_MINUTES=10 자체 복구
- main.py — scheduler.add_job(consume_study_queue, ..., id='study_queue_consumer')
- services/study/explanation_enqueue — finalize + GET fallback 공유 헬퍼:
filter_needs_explanation (study_questions status + 최신 job error_code 필터,
guard_fail/evidence_missing 인 마지막 job 은 자동 재enqueue 제외) +
enqueue_explanation_for_qids (max_count cap)
- session_finalize — 끝에서 wrong/unsure qid prefetch enqueue (best-effort,
실패해도 finalize 자체 안 깨짐)
- api/study_topics get_quiz_session — done 세션에서 backfill enqueue (max=30,
non-blocking, debug 로그)
대상 조건: ai_explanation_status IN ('none', 'failed') OR ai_explanation IS NULL.
stale / skipped / pending / ready 는 자동 enqueue 대상 X. stale 재생성은 PR-3
명시 [다시 생성] 또는 후속 Phase 에서.
Plan: ~/.claude/plans/nifty-sparking-spindle.md (Phase 4-A)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
216 lines
8.4 KiB
Python
216 lines
8.4 KiB
Python
"""Phase 4-A 풀이 prefetch worker — wrong/unsure 문제의 AI 풀이를 batch 로 미리 생성.
|
|
|
|
Plan: ~/.claude/plans/nifty-sparking-spindle.md
|
|
study_question_jobs (kind='explanation') row 1건을 받아 처리:
|
|
1. RAG 근거 수집 (PR-3 의 explanation_rag.py 재사용)
|
|
2. evidence 둘 다 비어있으면 LLM 호출 X → status='skipped'
|
|
3. MLX primary 호출 (gate Semaphore(1) 공유) → envelope JSON
|
|
4. 환각 가드 — answer_choice == question.correct_choice 검증
|
|
5. 통과 시 study_questions.ai_explanation 캐시 박기
|
|
|
|
terminal status (completed/failed/skipped) 는 completed_at 항상 기록.
|
|
재시도 정책 — guard_fail/evidence_missing 은 final, 그 외 (llm_timeout/parse_fail/unknown)
|
|
는 attempts < max_attempts 면 pending 으로 복귀.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from ai.client import AIClient, parse_json_response
|
|
from models.study_question import StudyQuestion
|
|
from models.study_question_job import StudyQuestionJob
|
|
from services.search.llm_gate import get_mlx_gate
|
|
from services.study.explanation_rag import (
|
|
gather_explanation_context,
|
|
render_evidence_block,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# PR-3 LLM_TIMEOUT_S 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려)
|
|
LLM_TIMEOUT_S = 30.0
|
|
|
|
_ENVELOPE_PROMPT_FILE = "study_explanation_envelope.txt"
|
|
_envelope_template_cache: str | None = None
|
|
|
|
|
|
def _load_envelope_prompt() -> str:
|
|
global _envelope_template_cache
|
|
if _envelope_template_cache is None:
|
|
prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
|
|
_envelope_template_cache = (
|
|
prompts_dir / _ENVELOPE_PROMPT_FILE
|
|
).read_text(encoding="utf-8")
|
|
return _envelope_template_cache
|
|
|
|
|
|
def _render_envelope_prompt(q: StudyQuestion, doc_block: str, q_block: str) -> str:
|
|
return (
|
|
_load_envelope_prompt()
|
|
.replace("{question_text}", q.question_text or "")
|
|
.replace("{choice_1}", q.choice_1 or "")
|
|
.replace("{choice_2}", q.choice_2 or "")
|
|
.replace("{choice_3}", q.choice_3 or "")
|
|
.replace("{choice_4}", q.choice_4 or "")
|
|
.replace("{correct_choice}", str(q.correct_choice))
|
|
.replace("{documents_evidence_block}", doc_block)
|
|
.replace("{questions_evidence_block}", q_block)
|
|
)
|
|
|
|
|
|
async def run_explanation_job(session: AsyncSession, job: StudyQuestionJob) -> None:
|
|
"""Phase 4-A: study_question_jobs row 1건 처리. caller 가 commit 책임.
|
|
|
|
job.status 는 호출 전 'pending' 가정. 종료 시 completed/failed/skipped/pending(재시도)
|
|
중 하나.
|
|
"""
|
|
now = lambda: datetime.now(timezone.utc) # noqa: E731
|
|
|
|
# attempt + processing 단정
|
|
job.attempts += 1
|
|
job.status = "processing"
|
|
job.started_at = now()
|
|
await session.flush()
|
|
|
|
try:
|
|
question = await session.get(StudyQuestion, job.study_question_id)
|
|
if question is None or question.deleted_at is not None:
|
|
# 삭제된 문제 — job 도 skipped 로 종결.
|
|
job.error_code = "evidence_missing"
|
|
job.error_message = "question deleted or missing"
|
|
job.status = "skipped"
|
|
job.completed_at = now()
|
|
return
|
|
|
|
# race-safe — PR-3 실시간 호출이 이미 ready 박았으면 즉시 종결.
|
|
if question.ai_explanation_status == "ready":
|
|
job.status = "completed"
|
|
job.completed_at = now()
|
|
return
|
|
|
|
# 1. RAG 근거 수집
|
|
ctx = await gather_explanation_context(session, job.user_id, question)
|
|
if not ctx.documents and not ctx.questions:
|
|
# evidence 없음 — LLM 호출 X. job/question 둘 다 skipped 통일.
|
|
job.error_code = "evidence_missing"
|
|
job.error_message = "no document/question evidence in this topic"
|
|
job.status = "skipped"
|
|
job.completed_at = now()
|
|
question.ai_explanation_status = "skipped"
|
|
question.updated_at = now()
|
|
return
|
|
|
|
# 2. 프롬프트 + MLX primary
|
|
doc_block = render_evidence_block(ctx.documents)
|
|
q_block = render_evidence_block(ctx.questions)
|
|
prompt = _render_envelope_prompt(question, doc_block, q_block)
|
|
|
|
ai_client = AIClient()
|
|
try:
|
|
async with get_mlx_gate():
|
|
async with asyncio.timeout(LLM_TIMEOUT_S):
|
|
raw_text = await ai_client.call_primary(prompt)
|
|
primary_name = (
|
|
ai_client.ai.primary.model
|
|
if hasattr(ai_client.ai, "primary") and hasattr(ai_client.ai.primary, "model")
|
|
else "primary"
|
|
)
|
|
finally:
|
|
await ai_client.close()
|
|
|
|
if not raw_text or not raw_text.strip():
|
|
# 빈 응답도 timeout 류로 처리 — 재시도 후보.
|
|
job.error_code = "llm_timeout"
|
|
job.error_message = "empty response from primary"
|
|
return
|
|
|
|
# 3. envelope 파싱
|
|
envelope = parse_json_response(raw_text)
|
|
if envelope is None or not isinstance(envelope, dict):
|
|
job.error_code = "parse_fail"
|
|
job.error_message = "envelope JSON parse failed"
|
|
return
|
|
|
|
answer_choice = envelope.get("answer_choice")
|
|
explanation_md = envelope.get("explanation_md") or ""
|
|
confidence = envelope.get("confidence")
|
|
|
|
if not isinstance(answer_choice, int) or answer_choice not in (1, 2, 3, 4):
|
|
job.error_code = "parse_fail"
|
|
job.error_message = f"invalid answer_choice: {answer_choice!r}"
|
|
return
|
|
if not explanation_md.strip():
|
|
job.error_code = "parse_fail"
|
|
job.error_message = "empty explanation_md"
|
|
return
|
|
|
|
# 4. 환각 가드 — 정답 번호 일치
|
|
if answer_choice != question.correct_choice:
|
|
job.error_code = "guard_fail"
|
|
job.error_message = (
|
|
f"answer_choice={answer_choice} != correct_choice={question.correct_choice}"
|
|
)
|
|
job.status = "failed"
|
|
job.completed_at = now()
|
|
question.ai_explanation_status = "failed"
|
|
question.updated_at = now()
|
|
return
|
|
|
|
# 5. 성공 — confidence 는 1차 통과 (Phase 4-B 임계 결정).
|
|
# 운영 분석 자산으로 payload 에 confidence 보존.
|
|
job_payload = dict(job.payload or {})
|
|
job_payload["confidence"] = confidence
|
|
job.payload = job_payload
|
|
|
|
question.ai_explanation = explanation_md
|
|
question.ai_explanation_status = "ready"
|
|
question.ai_explanation_generated_at = now()
|
|
question.ai_explanation_model = f"mlx:{primary_name}"
|
|
question.updated_at = question.ai_explanation_generated_at
|
|
|
|
job.status = "completed"
|
|
job.completed_at = now()
|
|
return
|
|
|
|
except (asyncio.TimeoutError, httpx.HTTPError) as e:
|
|
job.error_code = "llm_timeout"
|
|
job.error_message = f"{type(e).__name__}: {e}"
|
|
logger.warning(
|
|
"study_explanation_job_timeout job_id=%s qid=%s: %s",
|
|
job.id, job.study_question_id, e,
|
|
)
|
|
except (json.JSONDecodeError, ValueError) as e:
|
|
job.error_code = "parse_fail"
|
|
job.error_message = f"{type(e).__name__}: {e}"
|
|
logger.warning(
|
|
"study_explanation_job_parse_fail job_id=%s qid=%s: %s",
|
|
job.id, job.study_question_id, e,
|
|
)
|
|
except Exception as e:
|
|
# 예상 못한 예외 — error_code 미세팅 시 finally 가 None 을 retryable 로 보면 무한 루프.
|
|
# 명시적으로 'unknown' 박아 재시도 정책 안에 들어가게.
|
|
job.error_code = "unknown"
|
|
job.error_message = f"{type(e).__name__}: {e}"
|
|
logger.exception(
|
|
"study_explanation_job_unknown_fail job_id=%s qid=%s",
|
|
job.id, job.study_question_id,
|
|
)
|
|
finally:
|
|
# 재시도 분기 — guard_fail/evidence_missing 은 위 try 에서 이미 단정 종결.
|
|
# 여기 도달 케이스는 llm_timeout / parse_fail / unknown.
|
|
if job.status == "processing":
|
|
retryable = job.error_code in ("llm_timeout", "parse_fail", "unknown")
|
|
if retryable and job.attempts < job.max_attempts:
|
|
job.status = "pending" # 다음 cycle 재시도
|
|
else:
|
|
job.status = "failed"
|
|
job.completed_at = now()
|