5125f82d4a
GPU = RAG context provider, Mac mini = LLM 가공 공장.
GPU 측 변경:
- app/api/internal_study.py: GET /internal/study/explanation-context/{qid}
Bearer auth, gather_explanation_context + _render_envelope_prompt 재호출.
204=evidence missing, 410=deleted/ready.
- app/workers/study_queue_consumer.py: settings.study_explanation_enabled
false 시 explanation 분기 skip (status/attempts 미변경, pending 유지 → Mac mini 흡수).
- app/core/config.py: study_explanation_enabled + internal_worker_token 2 setting.
- app/main.py: internal_study_router include (prefix /internal/study).
- docker-compose.yml: fastapi ports → 100.110.63.63:8000 Tailscale bind,
STUDY_EXPLANATION_ENABLED + INTERNAL_WORKER_TOKEN env 추가.
Mac mini 측: ~/derived-worker/ (별도 push 0, 어제 작성).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
76 lines
2.8 KiB
Python
76 lines
2.8 KiB
Python
"""PR-MacMini-Derived-Worker-1 internal endpoint.
|
|
|
|
Mac mini derived-worker 가 study explanation 가공을 위해 호출.
|
|
GPU = RAG context provider (LLM generation X), Mac mini = LLM 가공 공장.
|
|
Bearer token 보호 (settings.internal_worker_token).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
from fastapi import APIRouter, Depends, Header, HTTPException, Path, Response, status
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from core.config import settings
|
|
from core.database import async_session
|
|
from models.study_question import StudyQuestion
|
|
from services.study.explanation_rag import gather_explanation_context, render_evidence_block
|
|
from workers.study_explanation_worker import _render_envelope_prompt
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
def _verify_token(authorization: str | None = Header(default=None)) -> None:
|
|
if not settings.internal_worker_token:
|
|
raise HTTPException(status_code=503, detail="internal_worker_token not configured")
|
|
if not authorization or not authorization.lower().startswith("bearer "):
|
|
raise HTTPException(status_code=401, detail="missing Bearer token")
|
|
token = authorization[7:].strip()
|
|
if token != settings.internal_worker_token:
|
|
raise HTTPException(status_code=403, detail="invalid token")
|
|
|
|
|
|
async def _session() -> AsyncSession:
|
|
async with async_session() as s:
|
|
yield s
|
|
|
|
|
|
@router.get("/explanation-context/{question_id}")
|
|
async def get_explanation_context(
|
|
question_id: int = Path(..., ge=1),
|
|
_auth: None = Depends(_verify_token),
|
|
session: AsyncSession = Depends(_session),
|
|
):
|
|
question = await session.get(StudyQuestion, question_id)
|
|
if question is None or question.deleted_at is not None:
|
|
raise HTTPException(status_code=410, detail="question deleted or missing")
|
|
if question.ai_explanation_status == "ready":
|
|
raise HTTPException(status_code=410, detail="explanation already ready")
|
|
|
|
ctx = await gather_explanation_context(session, question.user_id, question)
|
|
docs_count = len(ctx.documents)
|
|
qs_count = len(ctx.questions)
|
|
if docs_count == 0 and qs_count == 0:
|
|
return Response(status_code=204)
|
|
|
|
doc_block = render_evidence_block(ctx.documents)
|
|
q_block = render_evidence_block(ctx.questions)
|
|
rendered_prompt = _render_envelope_prompt(question, doc_block, q_block)
|
|
|
|
logger.info(
|
|
"internal_study_context qid=%s docs=%s questions=%s prompt_len=%s",
|
|
question_id, docs_count, qs_count, len(rendered_prompt),
|
|
)
|
|
|
|
return {
|
|
"question_id": question.id,
|
|
"question_correct_choice": question.correct_choice,
|
|
"rendered_prompt": rendered_prompt,
|
|
"evidence_summary": {
|
|
"documents_count": docs_count,
|
|
"questions_count": qs_count,
|
|
},
|
|
}
|