diff --git a/app/api/study_questions.py b/app/api/study_questions.py index e308f4c..fda1981 100644 --- a/app/api/study_questions.py +++ b/app/api/study_questions.py @@ -188,13 +188,18 @@ class ReviewQuestionListResponse(BaseModel): class AttemptCreate(BaseModel): - selected_choice: int = Field(ge=1, le=4) + """PR-9: selected_choice (1~4) 또는 is_unsure 둘 중 하나 필수. + is_unsure=true 면 selected_choice 무시 + outcome='unsure' 로 박힘. + """ + selected_choice: int | None = Field(default=None, ge=1, le=4) + is_unsure: bool = False class AttemptResponse(BaseModel): is_correct: bool - selected_choice: int + selected_choice: int | None correct_choice: int + outcome: str # PR-9: correct | wrong | unsure explanation: str | None stats: QuestionAttemptStats @@ -830,18 +835,34 @@ async def submit_attempt( user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], ): - """답 제출. is_correct 판정 + attempt 1행 insert + 누적 통계 + 정답·해설 노출.""" + """답 제출. PR-9: is_unsure=true 면 outcome='unsure' + selected_choice=NULL. + 그 외엔 selected_choice 와 correct_choice 비교 → outcome='correct'/'wrong'. + """ q = await session.get(StudyQuestion, question_id) q = _verify_question_ownership(q, user) - is_correct = body.selected_choice == q.correct_choice + if body.is_unsure: + selected = None + is_correct = False + outcome = "unsure" + elif body.selected_choice is None: + raise HTTPException( + status_code=422, + detail="selected_choice (1~4) 또는 is_unsure=true 가 필요합니다", + ) + else: + selected = body.selected_choice + is_correct = selected == q.correct_choice + outcome = "correct" if is_correct else "wrong" + attempt = StudyQuestionAttempt( user_id=user.id, study_question_id=q.id, study_topic_id=q.study_topic_id, - selected_choice=body.selected_choice, + selected_choice=selected, correct_choice=q.correct_choice, is_correct=is_correct, + outcome=outcome, ) session.add(attempt) await session.commit() @@ -849,8 +870,9 @@ async def submit_attempt( stats = await _attempt_stats(session, user.id, question_id) return AttemptResponse( is_correct=is_correct, - selected_choice=body.selected_choice, + selected_choice=selected, correct_choice=q.correct_choice, + outcome=outcome, explanation=q.explanation, stats=stats, ) diff --git a/app/api/study_topics.py b/app/api/study_topics.py index 1c90bae..1bec529 100644 --- a/app/api/study_topics.py +++ b/app/api/study_topics.py @@ -15,8 +15,10 @@ - polymorphic 단일 study_topic_items 테이블은 만들지 않는다 (영구 금지). """ +import asyncio import logging from datetime import datetime, timezone +from pathlib import Path as _Path from typing import Annotated, Any from fastapi import APIRouter, Depends, HTTPException, Query @@ -25,13 +27,21 @@ from sqlalchemy import and_, delete, func, select, text as sql_text, update from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession +from ai.client import AIClient, strip_thinking from core.auth import get_current_user from core.database import get_session from core.library import LIBRARY_PREFIX, normalize_library_path from models.document import Document from models.study_session import StudySession from models.study_topic import StudyTopic, StudyTopicDocument +from models.study_topic_subject_note import StudyTopicSubjectNote from models.user import User +from services.search.llm_gate import get_mlx_gate +from services.study.subject_note_rag import ( + SubjectNoteContext, + gather_subject_note_context, + render_evidence_block, +) logger = logging.getLogger(__name__) router = APIRouter() @@ -979,3 +989,224 @@ async def detach_session_from_topic( sess.study_topic_id = None sess.updated_at = datetime.now(timezone.utc) await session.commit() + + +# ─── PR-9: 분야 설명 (study_topic_subject_notes) ─── + +SUBJECT_NOTE_TIMEOUT_S = 30.0 +_SUBJECT_NOTE_PROMPT_PATH = "study_subject_note.txt" +_subject_note_prompt_cache: str | None = None + + +def _load_subject_note_prompt() -> str: + global _subject_note_prompt_cache + if _subject_note_prompt_cache is None: + prompts_dir = _Path(__file__).resolve().parent.parent / "prompts" + _subject_note_prompt_cache = (prompts_dir / _SUBJECT_NOTE_PROMPT_PATH).read_text(encoding="utf-8") + return _subject_note_prompt_cache + + +def _render_subject_note_prompt(subject: str, scope: str, doc_block: str, q_block: str) -> str: + template = _load_subject_note_prompt() + return ( + template + .replace("{subject}", subject) + .replace("{scope}", scope or "(미지정)") + .replace("{documents_evidence_block}", doc_block) + .replace("{questions_evidence_block}", q_block) + ) + + +class SubjectNoteRequest(BaseModel): + subject: str = Field(min_length=1, max_length=120) + scope: str = Field(default="", max_length=200) + regenerate: bool = False + + +class SubjectNoteEvidence(BaseModel): + source_type: str + source_id: int + title: str + snippet: str + + +class SubjectNoteResponse(BaseModel): + subject: str + scope: str + content: str | None + status: str # ready | failed | none | stale | pending + generated_at: datetime | None + model: str | None + evidence: list[SubjectNoteEvidence] = [] + from_cache: bool = False + can_regenerate: bool = True + + +def _note_cache_response(note: StudyTopicSubjectNote) -> SubjectNoteResponse: + return SubjectNoteResponse( + subject=note.subject, + scope=note.scope, + content=note.content, + status=note.status, + generated_at=note.generated_at, + model=note.model, + evidence=[], + from_cache=True, + can_regenerate=True, + ) + + +@router.get("/{topic_id}/subject-notes", response_model=SubjectNoteResponse) +async def get_subject_note( + topic_id: int, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], + subject: str = Query(..., min_length=1, max_length=120), + scope: str = Query("", max_length=200), +): + """캐시 조회. 없으면 status='none' + content=null 응답.""" + topic = await session.get(StudyTopic, topic_id) + _verify_topic_ownership(topic, user) + note = ( + await session.execute( + select(StudyTopicSubjectNote).where( + StudyTopicSubjectNote.user_id == user.id, + StudyTopicSubjectNote.study_topic_id == topic_id, + StudyTopicSubjectNote.subject == subject, + StudyTopicSubjectNote.scope == scope, + ) + ) + ).scalar_one_or_none() + if note is None: + return SubjectNoteResponse( + subject=subject, scope=scope, content=None, status="none", + generated_at=None, model=None, evidence=[], from_cache=True, can_regenerate=True, + ) + return _note_cache_response(note) + + +@router.post("/{topic_id}/subject-notes/generate", response_model=SubjectNoteResponse) +async def generate_subject_note( + topic_id: int, + body: SubjectNoteRequest, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """분야 설명 AI 생성 + 캐시. PR-3 race-safe pending 패턴 동일. + + regenerate=false + status=ready → 캐시 반환. + pending → 409. + 그 외 → 새 생성. 실패 시 status='failed', 직전 본문 보존. + """ + topic = await session.get(StudyTopic, topic_id) + _verify_topic_ownership(topic, user) + + # upsert: 기존 행 있으면 사용, 없으면 신규 + note = ( + await session.execute( + select(StudyTopicSubjectNote).where( + StudyTopicSubjectNote.user_id == user.id, + StudyTopicSubjectNote.study_topic_id == topic_id, + StudyTopicSubjectNote.subject == body.subject, + StudyTopicSubjectNote.scope == body.scope, + ) + ) + ).scalar_one_or_none() + + if note is None: + note = StudyTopicSubjectNote( + user_id=user.id, + study_topic_id=topic_id, + subject=body.subject, + scope=body.scope, + status="none", + ) + session.add(note) + await session.flush() + await session.commit() + else: + # 캐시 단축 + if not body.regenerate: + if note.status == "ready": + return _note_cache_response(note) + if note.status == "pending": + raise HTTPException( + status_code=409, + detail={"status": "pending", "detail": "이미 생성 중입니다"}, + ) + # none/failed/stale → 새로 생성 + else: + if note.status == "pending": + raise HTTPException( + status_code=409, + detail={"status": "pending", "detail": "이미 생성 중입니다"}, + ) + + # race-safe pending 전이 + lock = await session.execute( + update(StudyTopicSubjectNote) + .where( + StudyTopicSubjectNote.id == note.id, + StudyTopicSubjectNote.status != "pending", + ) + .values(status="pending", updated_at=datetime.now(timezone.utc)) + .returning(StudyTopicSubjectNote.id) + ) + if lock.scalar_one_or_none() is None: + raise HTTPException( + status_code=409, + detail={"status": "pending", "detail": "이미 생성 중입니다"}, + ) + await session.commit() + + # RAG + try: + ctx = await gather_subject_note_context(session, user.id, topic_id, body.subject, body.scope) + except Exception as e: + logger.warning("subject_note_rag_failed: %s: %s", type(e).__name__, e) + ctx = SubjectNoteContext(documents=[], questions=[]) + + doc_block = render_evidence_block(ctx.documents) + q_block = render_evidence_block(ctx.questions) + prompt = _render_subject_note_prompt(body.subject, body.scope, doc_block, q_block) + + ai_client = AIClient() + raw_text: str | None = None + try: + async with get_mlx_gate(): + async with asyncio.timeout(SUBJECT_NOTE_TIMEOUT_S): + raw_text = await ai_client.call_primary(prompt) + except asyncio.TimeoutError: + logger.warning("subject_note_mlx_timeout topic=%s subject=%s", topic_id, body.subject) + except Exception: + logger.exception("subject_note_mlx_failed topic=%s subject=%s", topic_id, body.subject) + finally: + await ai_client.close() + + note = await session.get(StudyTopicSubjectNote, note.id) + if not raw_text or not raw_text.strip(): + note.status = "failed" + note.updated_at = datetime.now(timezone.utc) + await session.commit() + return SubjectNoteResponse( + subject=note.subject, scope=note.scope, content=note.content, + status="failed", generated_at=note.generated_at, model=note.model, + evidence=[e.to_dict() for e in ctx.all], + from_cache=False, can_regenerate=True, + ) + + cleaned = strip_thinking(raw_text).strip() + note.content = cleaned + note.status = "ready" + note.generated_at = datetime.now(timezone.utc) + primary_name = ai_client.ai.primary.model if hasattr(ai_client.ai.primary, "model") else "primary" + note.model = f"mlx:{primary_name}" + note.updated_at = note.generated_at + await session.commit() + + return SubjectNoteResponse( + subject=note.subject, scope=note.scope, content=note.content, + status="ready", generated_at=note.generated_at, model=note.model, + evidence=[e.to_dict() for e in ctx.all], + from_cache=False, can_regenerate=True, + ) diff --git a/app/models/study_question.py b/app/models/study_question.py index cdb2ad9..38ebbac 100644 --- a/app/models/study_question.py +++ b/app/models/study_question.py @@ -104,9 +104,12 @@ class StudyQuestionAttempt(Base): study_topic_id: Mapped[int] = mapped_column( BigInteger, ForeignKey("study_topics.id", ondelete="CASCADE"), nullable=False ) - selected_choice: Mapped[int] = mapped_column(SmallInteger, nullable=False) + # PR-9: selected_choice 는 NULL 허용 (unsure 케이스). is_correct 는 false 로 박힘. + selected_choice: Mapped[int | None] = mapped_column(SmallInteger, nullable=True) correct_choice: Mapped[int] = mapped_column(SmallInteger, nullable=False) is_correct: Mapped[bool] = mapped_column(Boolean, nullable=False) + # PR-9: outcome 권장값 (correct/wrong/unsure). 강한 enum 미사용. + outcome: Mapped[str] = mapped_column(String(20), nullable=False) answered_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=datetime.now, nullable=False ) diff --git a/app/models/study_topic_subject_note.py b/app/models/study_topic_subject_note.py new file mode 100644 index 0000000..b16a21c --- /dev/null +++ b/app/models/study_topic_subject_note.py @@ -0,0 +1,38 @@ +"""study_topic_subject_notes ORM (PR-9) — 분야 설명 캐시. + +(user, study_topic, subject, scope) 단위 unique. AI 즉석 생성 + 캐시. +사용자가 풀이 결과 화면에서 "모르겠음" 카드 클릭 시 호출. + +status: none/pending/ready/failed/stale (PR-3 패턴 동일). +""" + +from datetime import datetime + +from sqlalchemy import BigInteger, DateTime, ForeignKey, String, Text +from sqlalchemy.orm import Mapped, mapped_column + +from core.database import Base + + +class StudyTopicSubjectNote(Base): + __tablename__ = "study_topic_subject_notes" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + user_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("users.id", ondelete="CASCADE"), nullable=False + ) + study_topic_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("study_topics.id", ondelete="CASCADE"), nullable=False + ) + subject: Mapped[str] = mapped_column(String(120), nullable=False) + scope: Mapped[str] = mapped_column(String(200), nullable=False, default="") + content: Mapped[str | None] = mapped_column(Text) + status: Mapped[str] = mapped_column(String(20), default="none", nullable=False) + generated_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + model: Mapped[str | None] = mapped_column(String(120)) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, onupdate=datetime.now, nullable=False + ) diff --git a/app/prompts/study_subject_note.txt b/app/prompts/study_subject_note.txt new file mode 100644 index 0000000..a4b33b9 --- /dev/null +++ b/app/prompts/study_subject_note.txt @@ -0,0 +1,28 @@ +당신은 한국 기사시험(가스기사·산업안전기사 등) 학습 보조 AI 입니다. +사용자가 모르겠다고 표시한 문제의 분야에 대한 학습 자료를 작성합니다. + +【분야】 +과목: {subject} +범위: {scope} + +【참고 자료 — 우선순위】 + +▼ 자료 (1순위: 자료실 매핑 문서) +{documents_evidence_block} + +▼ 같은 분야의 다른 문제·해설 (2순위: 보조 근거) +{questions_evidence_block} + +【지침】 +1. 분야 핵심 개념을 200~500자로 정리. +2. 자주 등장하는 공식·표준값·법령 조항이 자료에 있으면 인용 ([자료: 제목]). +3. 학습 노트 형태 — 이 분야 처음 접하는 사용자가 "큰 그림"을 잡을 수 있게. +4. 정답을 단정하지 말고 개념 위주로 (특정 문제 풀이가 아닌 분야 설명). +5. **할루시네이션 방지 (절대 규칙)**: + - 자료에 없는 수치(예: "0.5 MPa", "10 mg/L")·공식·표준 번호(예: "KS B 6750", "ASME Section VIII")·법령 조항은 새로 만들어내지 않는다. + - 자료에서 확인되지 않는 내용은 "자료에서 확인되지 않음" 으로 명시한다. + - "보통 ~이다", "일반적으로 ~이다" 같은 모호한 단정도 자료 근거가 없으면 사용하지 않는다. +6. 한국어. 마크다운(굵게·리스트) 사용 가능. +7. 메타 설명·인사 없이 학습 자료만 출력. + +【학습 자료】 diff --git a/app/services/study/subject_note_rag.py b/app/services/study/subject_note_rag.py new file mode 100644 index 0000000..00aa9ce --- /dev/null +++ b/app/services/study/subject_note_rag.py @@ -0,0 +1,257 @@ +"""study_topic_subject_notes 분야 설명 RAG (PR-9). + +PR-3 explanation_rag.py 와 비슷한 패턴 — 매핑 documents 청크 + 같은 토픽 같은 subject 다른 문제·해설 → bge-reranker 로 top-K 줄임. + +쿼리는 "subject + scope" 키워드 기반 (단일 문제가 아니라 분야 키워드). +""" + +from __future__ import annotations + +import asyncio +import logging +from dataclasses import dataclass + +import httpx +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from ai.client import AIClient +from models.chunk import DocumentChunk +from models.document import Document +from models.study_question import StudyQuestion +from models.study_topic import StudyTopicDocument + +logger = logging.getLogger(__name__) + +# top-K +DOC_TOPK = 5 +Q_TOPK = 3 +MAX_RERANK_INPUT = 60 +DOC_SNIPPET_LEN = 400 +Q_SNIPPET_LEN = 300 +MAX_QUESTION_CANDIDATES = 30 +RERANK_TIMEOUT_S = 5.0 + + +@dataclass +class EvidenceItem: + source_type: str + source_id: int + title: str + snippet: str + + def to_dict(self) -> dict: + return { + "source_type": self.source_type, + "source_id": self.source_id, + "title": self.title, + "snippet": self.snippet, + } + + +@dataclass +class SubjectNoteContext: + documents: list[EvidenceItem] + questions: list[EvidenceItem] + + @property + def all(self) -> list[EvidenceItem]: + return [*self.documents, *self.questions] + + +def _truncate(text: str, n: int) -> str: + if not text: + return "" + s = text.strip() + return s if len(s) <= n else s[:n].rstrip() + "…" + + +def _build_query(subject: str, scope: str) -> str: + parts = [subject] + if scope: + parts.append(scope) + parts.append("학습 자료") + return " ".join(parts) + + +async def _rerank(client: AIClient, query: str, texts: list[str], top_k: int) -> list[int]: + if not texts: + return [] + if len(texts) > MAX_RERANK_INPUT: + texts = texts[:MAX_RERANK_INPUT] + try: + async with asyncio.timeout(RERANK_TIMEOUT_S): + results = await client.rerank(query, texts) + idxs = [r["index"] for r in results if 0 <= r.get("index", -1) < len(texts)] + return idxs[:top_k] + except (asyncio.TimeoutError, httpx.HTTPError) as e: + logger.warning("subject_note_rerank_fallback: %s: %s", type(e).__name__, e) + return list(range(min(top_k, len(texts)))) + + +async def _gather_document_evidence( + session: AsyncSession, + user_id: int, + study_topic_id: int, + query: str, + client: AIClient, +) -> list[EvidenceItem]: + doc_id_rows = ( + await session.execute( + select(StudyTopicDocument.document_id).where( + StudyTopicDocument.study_topic_id == study_topic_id, + StudyTopicDocument.user_id == user_id, + ) + ) + ).scalars().all() + doc_ids = list(doc_id_rows) + if not doc_ids: + return [] + + doc_meta_rows = ( + await session.execute( + select(Document.id, Document.title, Document.ai_summary).where( + Document.id.in_(doc_ids), + Document.deleted_at.is_(None), + ) + ) + ).all() + doc_meta: dict[int, tuple[str | None, str | None]] = { + r.id: (r.title, r.ai_summary) for r in doc_meta_rows + } + if not doc_meta: + return [] + valid_doc_ids = list(doc_meta.keys()) + + chunk_rows = ( + await session.execute( + select(DocumentChunk.doc_id, DocumentChunk.chunk_index, DocumentChunk.text) + .where( + DocumentChunk.doc_id.in_(valid_doc_ids), + DocumentChunk.chunk_index < 4, + ) + .order_by(DocumentChunk.doc_id, DocumentChunk.chunk_index) + ) + ).all() + + candidates: list[tuple[int, str]] = [] + for r in chunk_rows: + if r.text: + candidates.append((r.doc_id, r.text)) + for did, (_title, summary) in doc_meta.items(): + if summary: + candidates.append((did, summary)) + + if not candidates: + return [] + + texts = [_truncate(t, 800) for _, t in candidates] + top_idxs = await _rerank(client, query, texts, DOC_TOPK) + seen_doc_ids: set[int] = set() + out: list[EvidenceItem] = [] + for i in top_idxs: + did, text = candidates[i] + if did in seen_doc_ids: + continue + seen_doc_ids.add(did) + title = doc_meta.get(did, (None, None))[0] or f"문서 #{did}" + out.append(EvidenceItem( + source_type="document", + source_id=did, + title=title, + snippet=_truncate(text, DOC_SNIPPET_LEN), + )) + if len(out) >= DOC_TOPK: + break + return out + + +async def _gather_question_evidence( + session: AsyncSession, + user_id: int, + study_topic_id: int, + subject: str, + scope: str, + query: str, + client: AIClient, +) -> list[EvidenceItem]: + """같은 토픽 같은 subject (+scope 일치 우선) 의 문제·해설.""" + base_q = ( + select(StudyQuestion) + .where( + StudyQuestion.user_id == user_id, + StudyQuestion.study_topic_id == study_topic_id, + StudyQuestion.deleted_at.is_(None), + ) + ) + # 같은 subject 우선. 없으면 같은 토픽 전체로 fallback. + rows = ( + await session.execute( + base_q.where(StudyQuestion.subject == subject) + .order_by(StudyQuestion.created_at.desc()) + .limit(MAX_QUESTION_CANDIDATES) + ) + ).scalars().all() + if not rows: + rows = ( + await session.execute( + base_q.order_by(StudyQuestion.created_at.desc()) + .limit(MAX_QUESTION_CANDIDATES) + ) + ).scalars().all() + if not rows: + return [] + + candidates_text: list[str] = [] + for q in rows: + parts = [q.question_text or ""] + if q.explanation: + parts.append(q.explanation) + if q.ai_explanation and q.ai_explanation_status == "ready": + parts.append(q.ai_explanation) + candidates_text.append(" | ".join(parts)) + + top_idxs = await _rerank(client, query, candidates_text, Q_TOPK) + out: list[EvidenceItem] = [] + for i in top_idxs: + q = rows[i] + title_head = _truncate(q.question_text or "", 40) + out.append(EvidenceItem( + source_type="question", + source_id=q.id, + title=f"Q{q.id}: {title_head}", + snippet=_truncate(candidates_text[i], Q_SNIPPET_LEN), + )) + return out + + +async def gather_subject_note_context( + session: AsyncSession, + user_id: int, + study_topic_id: int, + subject: str, + scope: str, +) -> SubjectNoteContext: + """분야 (subject, scope) 의 RAG 근거 수집.""" + client = AIClient() + query = _build_query(subject, scope) + try: + docs, questions = await asyncio.gather( + _gather_document_evidence(session, user_id, study_topic_id, query, client), + _gather_question_evidence(session, user_id, study_topic_id, subject, scope, query, client), + ) + return SubjectNoteContext(documents=docs, questions=questions) + finally: + await client.close() + + +def render_evidence_block(items: list[EvidenceItem]) -> str: + if not items: + return "(없음)" + lines = [] + for it in items: + if it.source_type == "document": + lines.append(f"- [자료: {it.title}] {it.snippet}") + else: + lines.append(f"- [{it.title}] {it.snippet}") + return "\n".join(lines) diff --git a/frontend/src/routes/study/topics/[id]/+page.svelte b/frontend/src/routes/study/topics/[id]/+page.svelte index 67b47c3..213ee0e 100644 --- a/frontend/src/routes/study/topics/[id]/+page.svelte +++ b/frontend/src/routes/study/topics/[id]/+page.svelte @@ -515,13 +515,13 @@ {#if (detail.sections.questions?.length ?? 0) > 0} - + {/if} {#if (detail.sections.questions?.length ?? 0) === 0}
기본은 과목별 {optTarget}문제씩 무작위 균등 추출. 한 과목이 부족하면 가용한 만큼만 출제됩니다. + 풀이 중 정답·해설은 표시하지 않으며, 다 풀면 결과 화면에서 카테고리별로 한 번에 확인합니다.