"""Phase 4-A 풀이 prefetch worker — wrong/unsure 문제의 AI 풀이를 batch 로 미리 생성. Plan: ~/.claude/plans/nifty-sparking-spindle.md study_question_jobs (kind='explanation') row 1건을 받아 처리: 1. RAG 근거 수집 (PR-3 의 explanation_rag.py 재사용) 2. evidence 둘 다 비어있으면 LLM 호출 X → status='skipped' 3. MLX primary 호출 (gate Semaphore(1) 공유) → envelope JSON 4. 환각 가드 — answer_choice == question.correct_choice 검증 5. 통과 시 study_questions.ai_explanation 캐시 박기 terminal status (completed/failed/skipped) 는 completed_at 항상 기록. 재시도 정책 — guard_fail/evidence_missing 은 final, 그 외 (llm_timeout/parse_fail/unknown) 는 attempts < max_attempts 면 pending 으로 복귀. """ from __future__ import annotations import asyncio import json import logging from datetime import datetime, timezone from pathlib import Path import httpx from sqlalchemy.ext.asyncio import AsyncSession from ai.client import AIClient, parse_json_response from models.study_question import StudyQuestion from models.study_question_job import StudyQuestionJob from services.search.llm_gate import get_mlx_gate from services.study.explanation_rag import ( gather_explanation_context, render_evidence_block, ) logger = logging.getLogger(__name__) # PR-3 LLM_TIMEOUT_S 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려) LLM_TIMEOUT_S = 30.0 # explanation_md hard cap — 운영 데이터 793/838/866자 사례에서 1200 으로 시작 # (800 은 공식·오답·핵심개념 묶이는 기사시험 풀이에 빡빡함). 1차 운영 후 조정. EXPLANATION_MAX_CHARS = 1200 # cap 시 문장 경계 탐색 — 마지막 N자 안에서 줄바꿈 / 마침표 찾기 _BOUNDARY_LOOKBACK = 200 _ENVELOPE_PROMPT_FILE = "study_explanation_envelope.txt" _envelope_template_cache: str | None = None def _cap_explanation_md(text: str, max_chars: int = EXPLANATION_MAX_CHARS) -> str: """길이 cap. 가능하면 문장 경계에서 자르고 "…" 추가. 안 잘리면 원문 그대로. Phase 4-A 보강: 모델이 1200자 넘기는 경우 잦음 (LaTeX/긴 풀이). cap 됐다고 실패 처리하지 않고 ready 유지 — 학습 가치 보존이 중요. """ if not text: return text if len(text) <= max_chars: return text # 1) 마지막 _BOUNDARY_LOOKBACK 자 안에서 "\n\n" → "\n" → ". " → "다.\n" 순으로 찾기 head = text[:max_chars] lookback_start = max(0, max_chars - _BOUNDARY_LOOKBACK) boundary = -1 for marker in ("\n\n", "\n", ". ", "다.", "요.", "다 ", "요 "): pos = head.rfind(marker, lookback_start) if pos > 0: boundary = pos + len(marker) break if boundary > 0: return head[:boundary].rstrip() + "…" # 2) 경계 못 찾으면 단순 자르기 return head.rstrip() + "…" def _load_envelope_prompt() -> str: global _envelope_template_cache if _envelope_template_cache is None: prompts_dir = Path(__file__).resolve().parent.parent / "prompts" _envelope_template_cache = ( prompts_dir / _ENVELOPE_PROMPT_FILE ).read_text(encoding="utf-8") return _envelope_template_cache def _render_envelope_prompt(q: StudyQuestion, doc_block: str, q_block: str) -> str: return ( _load_envelope_prompt() .replace("{question_text}", q.question_text or "") .replace("{choice_1}", q.choice_1 or "") .replace("{choice_2}", q.choice_2 or "") .replace("{choice_3}", q.choice_3 or "") .replace("{choice_4}", q.choice_4 or "") .replace("{correct_choice}", str(q.correct_choice)) .replace("{documents_evidence_block}", doc_block) .replace("{questions_evidence_block}", q_block) ) async def run_explanation_job(session: AsyncSession, job: StudyQuestionJob) -> None: """Phase 4-A: study_question_jobs row 1건 처리. caller 가 commit 책임. job.status 는 호출 전 'pending' 가정. 종료 시 completed/failed/skipped/pending(재시도) 중 하나. """ now = lambda: datetime.now(timezone.utc) # noqa: E731 # attempt + processing 단정 job.attempts += 1 job.status = "processing" job.started_at = now() await session.flush() try: question = await session.get(StudyQuestion, job.study_question_id) if question is None or question.deleted_at is not None: # 삭제된 문제 — job 도 skipped 로 종결. job.error_code = "evidence_missing" job.error_message = "question deleted or missing" job.status = "skipped" job.completed_at = now() return # race-safe — PR-3 실시간 호출이 이미 ready 박았으면 즉시 종결. if question.ai_explanation_status == "ready": job.status = "completed" job.completed_at = now() return # 1. RAG 근거 수집 ctx = await gather_explanation_context(session, job.user_id, question) if not ctx.documents and not ctx.questions: # evidence 없음 — LLM 호출 X. job/question 둘 다 skipped 통일. job.error_code = "evidence_missing" job.error_message = "no document/question evidence in this topic" job.status = "skipped" job.completed_at = now() question.ai_explanation_status = "skipped" question.updated_at = now() return # 2. 프롬프트 + MLX primary doc_block = render_evidence_block(ctx.documents) q_block = render_evidence_block(ctx.questions) prompt = _render_envelope_prompt(question, doc_block, q_block) ai_client = AIClient() try: async with get_mlx_gate(): async with asyncio.timeout(LLM_TIMEOUT_S): raw_text = await ai_client.call_primary(prompt) primary_name = ( ai_client.ai.primary.model if hasattr(ai_client.ai, "primary") and hasattr(ai_client.ai.primary, "model") else "primary" ) finally: await ai_client.close() if not raw_text or not raw_text.strip(): # 빈 응답도 timeout 류로 처리 — 재시도 후보. job.error_code = "llm_timeout" job.error_message = "empty response from primary" return # 3. envelope 파싱. # parse_fail 시 raw 응답 첫 1000자를 payload.debug_raw_preview 에 저장 — 운영 분석. # parse_json_response 가 None 또는 dict 아닌 경우 모두 분류. def _save_raw_preview(reason: str) -> None: preview = (raw_text or "")[:1000] existing = dict(job.payload or {}) existing["debug_raw_preview"] = preview existing["parse_fail_reason"] = reason job.payload = existing envelope = parse_json_response(raw_text) if envelope is None or not isinstance(envelope, dict): job.error_code = "parse_fail" job.error_message = "envelope JSON parse failed" _save_raw_preview("not_dict") return answer_choice = envelope.get("answer_choice") explanation_md = envelope.get("explanation_md") or "" confidence = envelope.get("confidence") if not isinstance(answer_choice, int) or answer_choice not in (1, 2, 3, 4): job.error_code = "parse_fail" job.error_message = f"invalid answer_choice: {answer_choice!r}" _save_raw_preview("invalid_answer_choice") return if not explanation_md.strip(): job.error_code = "parse_fail" job.error_message = "empty explanation_md" _save_raw_preview("empty_explanation_md") return # 4. 환각 가드 — 정답 번호 일치 if answer_choice != question.correct_choice: job.error_code = "guard_fail" job.error_message = ( f"answer_choice={answer_choice} != correct_choice={question.correct_choice}" ) job.status = "failed" job.completed_at = now() question.ai_explanation_status = "failed" question.updated_at = now() return # 5. 성공 — confidence 는 1차 통과 (Phase 4-B 임계 결정). # 길이 hard cap (Phase 4-A 후속) — 1200자 초과 시 문장 경계에서 자르고 ready 유지. original_len = len(explanation_md) explanation_md = _cap_explanation_md(explanation_md) # 운영 분석 자산으로 payload 에 confidence + 길이 cap 정보 보존. job_payload = dict(job.payload or {}) job_payload["confidence"] = confidence job_payload["explanation_len_original"] = original_len job_payload["explanation_len_saved"] = len(explanation_md) job_payload["explanation_capped"] = original_len > len(explanation_md) job.payload = job_payload question.ai_explanation = explanation_md question.ai_explanation_status = "ready" question.ai_explanation_generated_at = now() question.ai_explanation_model = f"mlx:{primary_name}" question.updated_at = question.ai_explanation_generated_at job.status = "completed" job.completed_at = now() return except (asyncio.TimeoutError, httpx.HTTPError) as e: job.error_code = "llm_timeout" job.error_message = f"{type(e).__name__}: {e}" logger.warning( "study_explanation_job_timeout job_id=%s qid=%s: %s", job.id, job.study_question_id, e, ) except (json.JSONDecodeError, ValueError) as e: job.error_code = "parse_fail" job.error_message = f"{type(e).__name__}: {e}" logger.warning( "study_explanation_job_parse_fail job_id=%s qid=%s: %s", job.id, job.study_question_id, e, ) except Exception as e: # 예상 못한 예외 — error_code 미세팅 시 finally 가 None 을 retryable 로 보면 무한 루프. # 명시적으로 'unknown' 박아 재시도 정책 안에 들어가게. job.error_code = "unknown" job.error_message = f"{type(e).__name__}: {e}" logger.exception( "study_explanation_job_unknown_fail job_id=%s qid=%s", job.id, job.study_question_id, ) finally: # 재시도 분기 — guard_fail/evidence_missing 은 위 try 에서 이미 단정 종결. # 여기 도달 케이스는 llm_timeout / parse_fail / unknown. if job.status == "processing": retryable = job.error_code in ("llm_timeout", "parse_fail", "unknown") if retryable and job.attempts < job.max_attempts: job.status = "pending" # 다음 cycle 재시도 else: job.status = "failed" job.completed_at = now()