diff --git a/app/api/study_sessions.py b/app/api/study_sessions.py new file mode 100644 index 0000000..4ac9eaa --- /dev/null +++ b/app/api/study_sessions.py @@ -0,0 +1,901 @@ +"""학습 세션 API — Phase 1 MVP (자격증 + 어학 일반화) + +iPad 손글씨 필사 / 모바일 암기노트 / 모바일 퀴즈 가 같은 study_sessions 데이터를 +공유. 본 모듈은 Phase 1 = iPad 필사 세션 + DB/API 일반화 까지만 다룬다. + +핵심: + - study_type 'certification' | 'language' 분기. metadata jsonb 가 도메인별 자유 메타. + - 단일 *_document_id 컬럼 ❌. 모든 미디어 연결은 study_session_assets 로 통일. + - documents 본체는 절대 삭제하지 않음 (assets 연결만 해제). + - ownership 검증: study_sessions.user_id == current_user.id (필수). + documents 는 single-user 시스템이라 컬럼 부재 — 미래 multi-user 대비 + `getattr(doc, 'user_id', None)` 로 부드럽게 검증 (값 있으면 비교, 없으면 통과). + - 409 중복: UNIQUE(study_session_id, document_id, asset_type, role) 위반. + +Phase 2~4 미사용 필드 (review_state / quiz / ocr_text / ai_summary / prompt 등) 는 +스키마에만 존재, 자동 로직 없음. 별도 PR 에서 활성. +""" + +import asyncio +import logging +from datetime import datetime, timezone +from pathlib import Path +from typing import Annotated, Any + +from fastapi import ( + APIRouter, + Depends, + Form, + HTTPException, + Query, + Request, + UploadFile, +) +from pydantic import BaseModel, Field +from sqlalchemy import and_, delete, func, select +from sqlalchemy.exc import IntegrityError +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload +from starlette.requests import ClientDisconnect + +from core.auth import get_current_user +from core.config import settings +from core.database import get_session +from core.utils import file_hash +from models.document import Document +from models.queue import enqueue_stage +from models.study_session import StudySession, StudySessionAsset +from models.user import User + +logger = logging.getLogger(__name__) +router = APIRouter() + + +# ─── Enum 검증 상수 ─── + +VALID_STUDY_TYPES: set[str] = {"certification", "language"} +VALID_MODES: set[str] = { + "copy", "trace", "blank-repeat", + "dictation", "shadowing", + "quiz", "flashcard", # Phase 2~4 활성, schema 만 수용 +} +VALID_ASSET_TYPES: set[str] = { + "source_scan", "handwriting_png", "audio", "video", "transcript", "reference", +} +VALID_ROLES: set[str | None] = { + None, + "prompt", "answer", "pronunciation", "lecture", + "listening_source", "shadowing_source", "reference", +} +VALID_REVIEW_STATES: set[str | None] = { + None, "new", "learning", "weak", "mastered", +} +VALID_ORDERS: set[str] = {"created_at", "next_review_at", "last_quiz_at"} + + +# ─── Helpers ─── + + +def _upload_error(status_code: int, error_code: str, message: str) -> HTTPException: + """업로드 실패 응답 — documents.py 와 동일한 패턴.""" + return HTTPException( + status_code=status_code, + detail={"error_code": error_code, "message": message}, + ) + + +def _verify_session_ownership( + sess: StudySession | None, user: User +) -> StudySession: + """세션 ownership 검증. 정보 누설 방지로 mismatch 도 404.""" + if sess is None or sess.user_id != user.id: + raise HTTPException(status_code=404, detail="학습 세션을 찾을 수 없습니다") + return sess + + +def _verify_document_ownership(doc: Document | None, user: User) -> Document: + """문서 ownership 검증. + + documents.user_id 컬럼은 현재 single-user 시스템이라 부재. + 미래 multi-user 대비 `getattr` 로 안전하게 비교. + """ + if doc is None or getattr(doc, "deleted_at", None) is not None: + raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + doc_user_id = getattr(doc, "user_id", None) + if doc_user_id is not None and doc_user_id != user.id: + raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + return doc + + +# ─── Pydantic Schemas ─── + + +class StudySessionAssetCreate(BaseModel): + document_id: int + asset_type: str + role: str | None = None + sort_order: int = 0 + + +class StudySessionAssetResponse(BaseModel): + id: int + document_id: int + asset_type: str + role: str | None + sort_order: int + created_at: datetime + + class Config: + from_attributes = True + + +class StudySessionCreate(BaseModel): + study_type: str = "certification" + certification: str | None = None + language_code: str | None = None + learning_level: str | None = None + subject: str | None = None + topic: str | None = None + source_text: str | None = None + source_page: int | None = None + mode: str = "copy" + prompt_question: str | None = None + expected_answer: str | None = None + metadata: dict[str, Any] | None = None + target_count: int | None = None + canvas_width: int | None = None + canvas_height: int | None = None + strokes_json: dict[str, Any] | None = None + + +class StudySessionUpdate(BaseModel): + """PATCH 부분 업데이트 — 명시 set 된 필드만 반영.""" + + certification: str | None = None + language_code: str | None = None + learning_level: str | None = None + subject: str | None = None + topic: str | None = None + source_text: str | None = None + source_page: int | None = None + mode: str | None = None + prompt_question: str | None = None + expected_answer: str | None = None + metadata: dict[str, Any] | None = None + target_count: int | None = None + repetition_count: int | None = None + canvas_width: int | None = None + canvas_height: int | None = None + strokes_json: dict[str, Any] | None = None + ocr_text: str | None = None + user_corrected_text: str | None = None + review_state: str | None = None + next_review_at: datetime | None = None + + +class StudySessionResponse(BaseModel): + id: int + user_id: int + study_type: str + certification: str | None + language_code: str | None + learning_level: str | None + subject: str | None + topic: str | None + source_text: str | None + source_page: int | None + mode: str + prompt_question: str | None + expected_answer: str | None + metadata: dict[str, Any] | None = Field(default=None) + target_count: int | None + repetition_count: int + canvas_width: int | None + canvas_height: int | None + schema_version: int + strokes_json: dict[str, Any] | None + ocr_text: str | None + user_corrected_text: str | None + ai_summary: str | None + review_state: str | None + next_review_at: datetime | None + last_quiz_at: datetime | None + correct_count: int + incorrect_count: int + assets: list[StudySessionAssetResponse] + created_at: datetime + updated_at: datetime + + +class StudySessionListResponse(BaseModel): + items: list[StudySessionResponse] + total: int + limit: int + offset: int + + +def _to_session_response(sess: StudySession) -> StudySessionResponse: + return StudySessionResponse( + id=sess.id, + user_id=sess.user_id, + study_type=sess.study_type, + certification=sess.certification, + language_code=sess.language_code, + learning_level=sess.learning_level, + subject=sess.subject, + topic=sess.topic, + source_text=sess.source_text, + source_page=sess.source_page, + mode=sess.mode, + prompt_question=sess.prompt_question, + expected_answer=sess.expected_answer, + metadata=sess.metadata_json, + target_count=sess.target_count, + repetition_count=sess.repetition_count, + canvas_width=sess.canvas_width, + canvas_height=sess.canvas_height, + schema_version=sess.schema_version, + strokes_json=sess.strokes_json, + ocr_text=sess.ocr_text, + user_corrected_text=sess.user_corrected_text, + ai_summary=sess.ai_summary, + review_state=sess.review_state, + next_review_at=sess.next_review_at, + last_quiz_at=sess.last_quiz_at, + correct_count=sess.correct_count, + incorrect_count=sess.incorrect_count, + assets=[ + StudySessionAssetResponse.model_validate(a) for a in (sess.assets or []) + ], + created_at=sess.created_at, + updated_at=sess.updated_at, + ) + + +def _validate_create_payload(body: StudySessionCreate) -> None: + if body.study_type not in VALID_STUDY_TYPES: + raise HTTPException( + status_code=422, + detail=f"study_type 은 {sorted(VALID_STUDY_TYPES)} 중 하나여야 합니다", + ) + if body.mode not in VALID_MODES: + raise HTTPException( + status_code=422, + detail=f"mode 는 {sorted(VALID_MODES)} 중 하나여야 합니다", + ) + + +# ─── 엔드포인트 ─── + + +@router.post("/", response_model=StudySessionResponse, status_code=201) +async def create_study_session( + body: StudySessionCreate, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """새 학습 세션 생성. + + 자격증 예: study_type='certification', certification='산업안전기사', + subject='산업안전보건법', topic='안전보건관리책임자의 직무', mode='copy' + 어학 예: study_type='language', language_code='ja', learning_level='JLPT N3', + subject='漢字', topic='安全', source_text='安全', + metadata={'reading':'あんぜん','meaning':'안전','unit_type':'kanji'} + """ + _validate_create_payload(body) + + sess = StudySession( + user_id=user.id, + study_type=body.study_type, + certification=body.certification, + language_code=body.language_code, + learning_level=body.learning_level, + subject=body.subject, + topic=body.topic, + source_text=body.source_text, + source_page=body.source_page, + mode=body.mode, + prompt_question=body.prompt_question, + expected_answer=body.expected_answer, + metadata_json=body.metadata, + target_count=body.target_count, + canvas_width=body.canvas_width, + canvas_height=body.canvas_height, + strokes_json=body.strokes_json, + ) + session.add(sess) + await session.flush() + await session.commit() + + # assets 빈 리스트로 초기화된 상태로 응답 + sess.assets = [] + return _to_session_response(sess) + + +@router.get("/", response_model=StudySessionListResponse) +async def list_study_sessions( + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], + study_type: str | None = Query(None), + certification: str | None = Query(None), + language_code: str | None = Query(None), + learning_level: str | None = Query(None), + subject: str | None = Query(None), + topic: str | None = Query(None), + review_state: str | None = Query(None), + document_id: int | None = Query(None, description="이 문서가 연결된 세션만"), + asset_type: str | None = Query(None, description="이 asset_type 보유 세션만"), + mode: str | None = Query(None), + due_before: datetime | None = Query(None, description="next_review_at <= due_before"), + order: str = Query("created_at"), + limit: int = Query(50, ge=1, le=200), + offset: int = Query(0, ge=0), +): + """학습 세션 목록 — Phase 1 부터 모든 filter 수용 (Phase 3/4 활성 대비).""" + if study_type is not None and study_type not in VALID_STUDY_TYPES: + raise HTTPException(status_code=422, detail="study_type 값이 올바르지 않습니다") + if review_state is not None and review_state not in VALID_REVIEW_STATES: + raise HTTPException(status_code=422, detail="review_state 값이 올바르지 않습니다") + if asset_type is not None and asset_type not in VALID_ASSET_TYPES: + raise HTTPException(status_code=422, detail="asset_type 값이 올바르지 않습니다") + if mode is not None and mode not in VALID_MODES: + raise HTTPException(status_code=422, detail="mode 값이 올바르지 않습니다") + if order not in VALID_ORDERS: + raise HTTPException(status_code=422, detail="order 값이 올바르지 않습니다") + + base = select(StudySession).where(StudySession.user_id == user.id) + if study_type is not None: + base = base.where(StudySession.study_type == study_type) + if certification is not None: + base = base.where(StudySession.certification == certification) + if language_code is not None: + base = base.where(StudySession.language_code == language_code) + if learning_level is not None: + base = base.where(StudySession.learning_level == learning_level) + if subject is not None: + base = base.where(StudySession.subject == subject) + if topic is not None: + base = base.where(StudySession.topic == topic) + if review_state is not None: + base = base.where(StudySession.review_state == review_state) + if mode is not None: + base = base.where(StudySession.mode == mode) + if due_before is not None: + base = base.where(StudySession.next_review_at <= due_before) + + # assets join filter — EXISTS 서브쿼리 + if document_id is not None or asset_type is not None: + asset_conditions = [StudySessionAsset.study_session_id == StudySession.id] + if document_id is not None: + asset_conditions.append(StudySessionAsset.document_id == document_id) + if asset_type is not None: + asset_conditions.append(StudySessionAsset.asset_type == asset_type) + base = base.where( + select(StudySessionAsset.id) + .where(and_(*asset_conditions)) + .exists() + ) + + count_query = select(func.count()).select_from(base.subquery()) + total = (await session.execute(count_query)).scalar() or 0 + + if order == "next_review_at": + ordered = base.order_by(StudySession.next_review_at.asc().nullslast(), StudySession.id.desc()) + elif order == "last_quiz_at": + ordered = base.order_by(StudySession.last_quiz_at.desc().nullslast(), StudySession.id.desc()) + else: + ordered = base.order_by(StudySession.created_at.desc(), StudySession.id.desc()) + + ordered = ( + ordered.options(selectinload(StudySession.assets)) + .offset(offset) + .limit(limit) + ) + rows = (await session.execute(ordered)).scalars().all() + + return StudySessionListResponse( + items=[_to_session_response(s) for s in rows], + total=total, + limit=limit, + offset=offset, + ) + + +@router.get("/groups") +async def get_study_groups( + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """도메인별 그룹 카운트 (Phase 3 모바일 카드 메뉴 대비, Phase 1 부터 endpoint 제공). + + 응답: {by_type: {certification: {...}, language: {...}}} + """ + # certification 그룹: certification → subject → topic + cert_query = ( + select( + StudySession.certification, + StudySession.subject, + StudySession.topic, + func.count().label("session_count"), + func.count().filter(StudySession.review_state == "weak").label("weak_count"), + func.count() + .filter( + and_( + StudySession.next_review_at.is_not(None), + StudySession.next_review_at <= datetime.now(timezone.utc), + ) + ) + .label("due_count"), + ) + .where( + StudySession.user_id == user.id, + StudySession.study_type == "certification", + ) + .group_by(StudySession.certification, StudySession.subject, StudySession.topic) + ) + cert_rows = (await session.execute(cert_query)).all() + + # language 그룹: language_code → learning_level → subject → topic + assets 보유 여부 + lang_query = ( + select( + StudySession.language_code, + StudySession.learning_level, + StudySession.subject, + StudySession.topic, + func.count().label("session_count"), + func.count().filter(StudySession.review_state == "weak").label("weak_count"), + func.count() + .filter( + and_( + StudySession.next_review_at.is_not(None), + StudySession.next_review_at <= datetime.now(timezone.utc), + ) + ) + .label("due_count"), + ) + .where( + StudySession.user_id == user.id, + StudySession.study_type == "language", + ) + .group_by( + StudySession.language_code, + StudySession.learning_level, + StudySession.subject, + StudySession.topic, + ) + ) + lang_rows = (await session.execute(lang_query)).all() + + # 어학 그룹의 has_audio / has_video — 별도 카운트 (assets 와 join) + media_query = ( + select( + StudySession.language_code, + StudySession.learning_level, + StudySession.subject, + StudySession.topic, + StudySessionAsset.asset_type, + func.count().label("c"), + ) + .join(StudySessionAsset, StudySessionAsset.study_session_id == StudySession.id) + .where( + StudySession.user_id == user.id, + StudySession.study_type == "language", + StudySessionAsset.asset_type.in_(["audio", "video"]), + ) + .group_by( + StudySession.language_code, + StudySession.learning_level, + StudySession.subject, + StudySession.topic, + StudySessionAsset.asset_type, + ) + ) + media_rows = (await session.execute(media_query)).all() + media_map: dict[tuple, dict[str, int]] = {} + for r in media_rows: + key = (r.language_code, r.learning_level, r.subject, r.topic) + media_map.setdefault(key, {"audio": 0, "video": 0})[r.asset_type] = r.c + + # certification 트리 빌드 + cert_groups: dict[str | None, dict[str | None, dict[str | None, dict]]] = {} + for r in cert_rows: + cert_groups.setdefault(r.certification, {}).setdefault(r.subject, {})[r.topic] = { + "session_count": r.session_count, + "weak_count": r.weak_count, + "due_count": r.due_count, + } + + cert_out = [] + for cert_name, subjects in cert_groups.items(): + subj_list = [] + sess_total = weak_total = due_total = 0 + for subj_name, topics in subjects.items(): + topic_list = [] + s_count = w_count = d_count = 0 + for topic_name, stats in topics.items(): + topic_list.append({ + "topic": topic_name, + "session_count": stats["session_count"], + "weak_count": stats["weak_count"], + "due_count": stats["due_count"], + }) + s_count += stats["session_count"] + w_count += stats["weak_count"] + d_count += stats["due_count"] + subj_list.append({ + "subject": subj_name, + "topics": topic_list, + "session_count": s_count, + "weak_count": w_count, + "due_count": d_count, + }) + sess_total += s_count + weak_total += w_count + due_total += d_count + cert_out.append({ + "certification": cert_name, + "subjects": subj_list, + "session_count": sess_total, + "weak_count": weak_total, + "due_count": due_total, + }) + + # language 트리 빌드 + lang_groups: dict[str | None, dict[str | None, dict[str | None, dict[str | None, dict]]]] = {} + for r in lang_rows: + media = media_map.get( + (r.language_code, r.learning_level, r.subject, r.topic), + {"audio": 0, "video": 0}, + ) + ( + lang_groups + .setdefault(r.language_code, {}) + .setdefault(r.learning_level, {}) + .setdefault(r.subject, {})[r.topic] + ) = { + "session_count": r.session_count, + "weak_count": r.weak_count, + "due_count": r.due_count, + "has_audio": media["audio"] > 0, + "has_video": media["video"] > 0, + } + + lang_out = [] + for lang_code, levels in lang_groups.items(): + for level_name, subjects in levels.items(): + subj_list = [] + for subj_name, topics in subjects.items(): + topic_list = [] + for topic_name, stats in topics.items(): + topic_list.append({ + "topic": topic_name, + "session_count": stats["session_count"], + "weak_count": stats["weak_count"], + "due_count": stats["due_count"], + "has_audio": stats["has_audio"], + "has_video": stats["has_video"], + }) + subj_list.append({"subject": subj_name, "topics": topic_list}) + lang_out.append({ + "language_code": lang_code, + "learning_level": level_name, + "subjects": subj_list, + }) + + return { + "by_type": { + "certification": {"groups": cert_out}, + "language": {"groups": lang_out}, + } + } + + +@router.get("/{session_id}", response_model=StudySessionResponse) +async def get_study_session( + session_id: int, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + sess = await session.get( + StudySession, session_id, options=[selectinload(StudySession.assets)] + ) + sess = _verify_session_ownership(sess, user) + return _to_session_response(sess) + + +@router.patch("/{session_id}", response_model=StudySessionResponse) +async def update_study_session( + session_id: int, + body: StudySessionUpdate, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + sess = await session.get( + StudySession, session_id, options=[selectinload(StudySession.assets)] + ) + sess = _verify_session_ownership(sess, user) + + # 명시 set 된 필드만 적용 + fields_set = body.model_fields_set + if "mode" in fields_set: + if body.mode not in VALID_MODES: + raise HTTPException(status_code=422, detail="mode 값이 올바르지 않습니다") + sess.mode = body.mode + if "review_state" in fields_set: + if body.review_state not in VALID_REVIEW_STATES: + raise HTTPException(status_code=422, detail="review_state 값이 올바르지 않습니다") + sess.review_state = body.review_state + + # 단순 매핑 필드 (검증 불필요) + SIMPLE_FIELDS = { + "certification", "language_code", "learning_level", "subject", "topic", + "source_text", "source_page", "prompt_question", "expected_answer", + "target_count", "repetition_count", + "canvas_width", "canvas_height", "strokes_json", + "ocr_text", "user_corrected_text", "next_review_at", + } + for fname in SIMPLE_FIELDS & fields_set: + setattr(sess, fname, getattr(body, fname)) + + if "metadata" in fields_set: + sess.metadata_json = body.metadata + + sess.updated_at = datetime.now(timezone.utc) + await session.commit() + await session.refresh(sess, attribute_names=["assets"]) + return _to_session_response(sess) + + +@router.delete("/{session_id}", status_code=204) +async def delete_study_session( + session_id: int, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """학습 세션 삭제. 연관 assets 도 cascade 로 함께 제거 (DB ON DELETE CASCADE). + documents 본체는 유지 — assets row 만 사라진다. + """ + sess = await session.get(StudySession, session_id) + sess = _verify_session_ownership(sess, user) + await session.delete(sess) + await session.commit() + + +# ─── Assets 엔드포인트 ─── + + +@router.post( + "/{session_id}/assets", + response_model=StudySessionAssetResponse, + status_code=201, +) +async def link_study_asset( + session_id: int, + body: StudySessionAssetCreate, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """기존 documents 의 id 를 study_session 에 asset 으로 연결. + + 409: 같은 (session, document, asset_type, role) 조합 이미 존재. + """ + if body.asset_type not in VALID_ASSET_TYPES: + raise HTTPException( + status_code=422, + detail=f"asset_type 은 {sorted(VALID_ASSET_TYPES)} 중 하나여야 합니다", + ) + if body.role not in VALID_ROLES: + raise HTTPException( + status_code=422, + detail=f"role 은 {sorted(r for r in VALID_ROLES if r is not None)} 중 하나 또는 NULL 이어야 합니다", + ) + + sess = await session.get(StudySession, session_id) + sess = _verify_session_ownership(sess, user) + + doc = await session.get(Document, body.document_id) + _verify_document_ownership(doc, user) + + # 사전 SELECT 로 중복 검사 + DB UNIQUE 제약 둘 다 — race condition 안전. + existing = await session.execute( + select(StudySessionAsset).where( + StudySessionAsset.study_session_id == session_id, + StudySessionAsset.document_id == body.document_id, + StudySessionAsset.asset_type == body.asset_type, + StudySessionAsset.role.is_(body.role) if body.role is None + else StudySessionAsset.role == body.role, + ) + ) + if existing.scalar_one_or_none() is not None: + raise HTTPException( + status_code=409, + detail={ + "error_code": "asset_already_linked", + "message": "해당 문서가 이미 같은 asset_type/role 로 연결되어 있습니다", + }, + ) + + asset = StudySessionAsset( + study_session_id=session_id, + document_id=body.document_id, + asset_type=body.asset_type, + role=body.role, + sort_order=body.sort_order, + ) + session.add(asset) + try: + await session.commit() + except IntegrityError: + await session.rollback() + # UNIQUE 위반 — 위 사전 SELECT 와 race 했을 가능성. 동일 메시지로 응답. + raise HTTPException( + status_code=409, + detail={ + "error_code": "asset_already_linked", + "message": "해당 문서가 이미 같은 asset_type/role 로 연결되어 있습니다", + }, + ) + await session.refresh(asset) + return StudySessionAssetResponse.model_validate(asset) + + +@router.delete( + "/{session_id}/assets/{asset_id}", status_code=204 +) +async def unlink_study_asset( + session_id: int, + asset_id: int, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """asset 연결 해제. documents 본체는 유지.""" + sess = await session.get(StudySession, session_id) + sess = _verify_session_ownership(sess, user) + + asset = await session.get(StudySessionAsset, asset_id) + if asset is None or asset.study_session_id != session_id: + raise HTTPException(status_code=404, detail="asset 을 찾을 수 없습니다") + await session.delete(asset) + await session.commit() + + +# ─── Snapshot (PNG 업로드) ─── + + +@router.post("/{session_id}/snapshot", response_model=StudySessionAssetResponse, status_code=201) +async def upload_handwriting_snapshot( + session_id: int, + request: Request, + file: UploadFile, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], + sort_order: int = Form(0), +): + """캔버스 PNG 업로드 → documents 등록 + handwriting_png asset 연결. + + documents.py upload_document 의 atomic rename + error_code 패턴을 PNG 전용으로 차용. + 동일 세션에 여러 snapshot 누적 가능 (UNIQUE 제약은 (session, document, type, role) 단위라 + document_id 가 매번 새로 생기므로 충돌 없음). + """ + sess = await session.get(StudySession, session_id) + sess = _verify_session_ownership(sess, user) + + if not file.filename: + raise _upload_error(400, "invalid_input", "파일명이 필요합니다") + safe_name = Path(file.filename).name + if not safe_name or safe_name.startswith("."): + raise _upload_error(400, "invalid_input", "유효하지 않은 파일명") + ext = Path(safe_name).suffix.lower() + if ext != ".png": + raise _upload_error( + 400, "invalid_input", "snapshot 은 PNG 파일만 지원합니다", + ) + + max_bytes = settings.upload.max_bytes + slack_ratio = settings.upload.content_length_slack_ratio + chunk_size = settings.upload.stream_chunk_bytes + + # Content-Length 사전 차단 + cl_header = request.headers.get("content-length") + if cl_header: + try: + cl = int(cl_header) + if cl > int(max_bytes * slack_ratio): + raise _upload_error(413, "body_too_large", "파일이 너무 큽니다") + except ValueError: + pass + + # NAS Inbox 경로 결정 + 충돌 회피 + inbox_dir = Path(settings.nas_mount_path) / "PKM" / "Inbox" + inbox_dir.mkdir(parents=True, exist_ok=True) + target = (inbox_dir / safe_name).resolve() + if not str(target).startswith(str(inbox_dir.resolve())): + raise _upload_error(400, "invalid_input", "잘못된 파일 경로") + + counter = 1 + stem, suffix = target.stem, target.suffix + staging = target.with_name(target.name + ".uploading") + while target.exists() or staging.exists(): + target = inbox_dir.resolve() / f"{stem}_{counter}{suffix}" + staging = target.with_name(target.name + ".uploading") + counter += 1 + + # 스트리밍 저장 + 누적 사이즈 검증 + written = 0 + try: + with staging.open("wb") as f: + while chunk := await file.read(chunk_size): + written += len(chunk) + if written > max_bytes: + raise _upload_error(413, "body_too_large", "파일이 너무 큽니다") + f.write(chunk) + if written == 0: + raise _upload_error(400, "empty_file", "빈 파일은 업로드할 수 없습니다") + except ClientDisconnect: + staging.unlink(missing_ok=True) + logger.info("snapshot aborted by client: %s (written=%d)", safe_name, written) + raise _upload_error(499, "network_abort", "업로드가 취소되었습니다") + except asyncio.TimeoutError: + staging.unlink(missing_ok=True) + logger.warning("snapshot timeout: %s (written=%d)", safe_name, written) + raise _upload_error(408, "upload_timeout", "업로드 시간 초과") + except HTTPException: + staging.unlink(missing_ok=True) + raise + except Exception: + staging.unlink(missing_ok=True) + logger.exception("snapshot internal error: %s (written=%d)", safe_name, written) + raise _upload_error(500, "internal", "업로드 처리 중 오류가 발생했습니다") + + # atomic rename → 최종 경로 + try: + staging.replace(target) + except OSError: + staging.unlink(missing_ok=True) + logger.exception("snapshot rename failed: %s -> %s", staging, target) + raise _upload_error(500, "internal", "파일 저장 후 정리 중 오류가 발생했습니다") + + # Document + ProcessingQueue('extract') + StudySessionAsset 단일 트랜잭션 + rel_path = str(target.relative_to(Path(settings.nas_mount_path))) + fhash = file_hash(target) + + # 학습 세션 메타에서 user_tags 합성 + domain_tag = sess.certification or sess.language_code or "general" + user_tags = ["handwriting", domain_tag] + if sess.subject: + user_tags.append(sess.subject) + + title = f"필기 — {sess.topic or sess.subject or 'study session'} #{session_id}" + + try: + doc = Document( + file_path=rel_path, + file_hash=fhash, + file_format="png", + file_size=written, + file_type="immutable", + title=title, + user_tags=user_tags, + ) + session.add(doc) + await session.flush() + + await enqueue_stage(session, doc.id, "extract") + + asset = StudySessionAsset( + study_session_id=session_id, + document_id=doc.id, + asset_type="handwriting_png", + role="answer", + sort_order=sort_order, + ) + session.add(asset) + + await session.commit() + await session.refresh(asset) + except Exception: + # DB 트랜잭션은 자동 rollback. 파일은 별도 자원 → 명시 unlink. + target.unlink(missing_ok=True) + raise + + return StudySessionAssetResponse.model_validate(asset) diff --git a/app/main.py b/app/main.py index 15c3804..f1655c3 100644 --- a/app/main.py +++ b/app/main.py @@ -17,6 +17,7 @@ from api.memos import router as memos_router from api.news import router as news_router from api.search import router as search_router from api.setup import router as setup_router +from api.study_sessions import router as study_sessions_router from api.video import router as video_router from core.config import settings from core.database import async_session, engine, init_db @@ -108,6 +109,7 @@ app.include_router(news_router, prefix="/api/news", tags=["news"]) app.include_router(digest_router, prefix="/api/digest", tags=["digest"]) app.include_router(audio_router, prefix="/api/audio", tags=["audio"]) app.include_router(video_router, prefix="/api/video", tags=["video"]) +app.include_router(study_sessions_router, prefix="/api/study-sessions", tags=["study-sessions"]) # TODO: Phase 5에서 추가 # app.include_router(tasks.router, prefix="/api/tasks", tags=["tasks"]) diff --git a/app/models/study_session.py b/app/models/study_session.py new file mode 100644 index 0000000..12bf23d --- /dev/null +++ b/app/models/study_session.py @@ -0,0 +1,134 @@ +"""study_sessions / study_session_assets 테이블 ORM — Phase 1 MVP + +목적: iPad 손글씨 학습 세션 (자격증 + 어학) + 모바일 암기노트/퀴즈를 위한 일반 학습 세션. + +설계 원칙: + - study_type 으로 certification / language 분기. metadata jsonb 가 도메인별 자유 메타. + - 단일 audio_document_id / video_document_id / source_document_id / handwriting_document_id + 컬럼 만들지 ❌. 모든 미디어 연결은 study_session_assets 로 통일. + - documents 본체는 절대 삭제하지 않음. assets cascade 는 sessions 또는 documents 삭제 시. + - Phase 1 미사용 필드 (review_state / quiz / ocr / ai_summary / prompt) 는 NULL 허용, + 자동 로직은 Phase 2~4 에서 별도 PR 로 활성. +""" + +from datetime import datetime +from typing import Any + +from sqlalchemy import ( + BigInteger, + DateTime, + ForeignKey, + Integer, + String, + Text, + UniqueConstraint, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from core.database import Base + + +class StudySession(Base): + __tablename__ = "study_sessions" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + user_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("users.id", ondelete="CASCADE"), nullable=False + ) + + # 도메인 분기: 'certification' | 'language' + study_type: Mapped[str] = mapped_column( + String(30), default="certification", nullable=False + ) + + # 자격증/어학 메타 + certification: Mapped[str | None] = mapped_column(String(120)) + language_code: Mapped[str | None] = mapped_column(String(20)) + learning_level: Mapped[str | None] = mapped_column(String(80)) + + # 공통 과목/주제 + subject: Mapped[str | None] = mapped_column(String(120)) + topic: Mapped[str | None] = mapped_column(String(200)) + + # 원문 텍스트 snapshot (assets 의 source_scan 과 별개로 발췌 텍스트만 보존) + source_text: Mapped[str | None] = mapped_column(Text) + source_page: Mapped[int | None] = mapped_column(Integer) + + # 학습 모드: 'copy'/'trace'/'blank-repeat'/'dictation'/'shadowing'/'quiz'/'flashcard' + mode: Mapped[str] = mapped_column(String(30), default="copy", nullable=False) + prompt_question: Mapped[str | None] = mapped_column(Text) + expected_answer: Mapped[str | None] = mapped_column(Text) + + # 도메인별 자유 메타 (어학 reading/meaning, 자격증 law_article 등) + metadata_json: Mapped[dict[str, Any] | None] = mapped_column( + "metadata", JSONB + ) + + # 횟수 카운트 (보조) + target_count: Mapped[int | None] = mapped_column(Integer) + repetition_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) + + # 필기 데이터 (원본) — Phase 1 핵심 + strokes_json: Mapped[dict[str, Any] | None] = mapped_column(JSONB) + canvas_width: Mapped[int | None] = mapped_column(Integer) + canvas_height: Mapped[int | None] = mapped_column(Integer) + schema_version: Mapped[int] = mapped_column(Integer, default=1, nullable=False) + + # 필기 파생 텍스트 — Phase 2 채움 (Phase 1 NULL) + ocr_text: Mapped[str | None] = mapped_column(Text) + user_corrected_text: Mapped[str | None] = mapped_column(Text) + ai_summary: Mapped[str | None] = mapped_column(Text) + + # SRS / 퀴즈 통계 — Phase 4 활성, Phase 1 NULL + review_state: Mapped[str | None] = mapped_column(String(20)) + next_review_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + last_quiz_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + correct_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) + incorrect_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) + + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, onupdate=datetime.now, nullable=False + ) + + # 연관 assets — 세션 삭제 시 함께 삭제 (DB ON DELETE CASCADE 와 일치) + assets: Mapped[list["StudySessionAsset"]] = relationship( + back_populates="session", + cascade="all, delete-orphan", + order_by="StudySessionAsset.sort_order", + ) + + +class StudySessionAsset(Base): + __tablename__ = "study_session_assets" + __table_args__ = ( + # POST /assets 의 409 근거. NULL role 끼리는 Postgres 기본대로 다른 값으로 취급. + UniqueConstraint( + "study_session_id", "document_id", "asset_type", "role", + name="study_session_assets_session_id_document_id_asset_type_rol_key", + ), + ) + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + study_session_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("study_sessions.id", ondelete="CASCADE"), nullable=False + ) + document_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False + ) + + # 'source_scan' | 'handwriting_png' | 'audio' | 'video' | 'transcript' | 'reference' + asset_type: Mapped[str] = mapped_column(String(30), nullable=False) + # 'prompt' | 'answer' | 'pronunciation' | 'lecture' | 'listening_source' + # | 'shadowing_source' | 'reference' + role: Mapped[str | None] = mapped_column(String(40)) + sort_order: Mapped[int] = mapped_column(Integer, default=0, nullable=False) + + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, nullable=False + ) + + session: Mapped["StudySession"] = relationship(back_populates="assets") diff --git a/migrations/164_study_sessions.sql b/migrations/164_study_sessions.sql new file mode 100644 index 0000000..6e5a8a0 --- /dev/null +++ b/migrations/164_study_sessions.sql @@ -0,0 +1,137 @@ +-- 164_study_sessions.sql +-- iPad 손글씨 학습 세션 + 모바일 암기노트/퀴즈 — Phase 1 MVP +-- plan: ~/.claude/plans/scalable-chasing-stonebraker.md +-- +-- 목적: 자격증(산업안전기사 등) + 어학(일본어/한자 우선) 두 도메인을 모두 받는 일반 학습 세션. +-- iPad write 모드(필사) / 모바일 review 모드(암기노트) / quiz 모드(SRS) 가 같은 데이터를 공유. +-- +-- 핵심 원칙: +-- - study_sessions: 학습 메타 + stroke JSON 원본 + 도메인별 자유 metadata +-- - study_session_assets: documents 의 스캔/필기PNG/오디오/영상/자막을 연결 (단일 *_id 컬럼 금지) +-- - documents 원본은 절대 삭제하지 않음 (assets 만 cascade) +-- - Phase 1 미사용 필드 (review_state / quiz / ocr / ai_summary / prompt) 는 NULL 허용, 자동 로직 X +-- +-- 인덱스 전략: +-- - 자격증/어학 카드 그룹: partial index (study_type 별) +-- - SRS 복습 큐: review_state IS NOT NULL 만 +-- - assets join: study_session_id, document_id, asset_type 별 + +CREATE TABLE IF NOT EXISTS study_sessions ( + id BIGSERIAL PRIMARY KEY, + user_id BIGINT NOT NULL REFERENCES users(id) ON DELETE CASCADE, + + -- 도메인 분기 + study_type VARCHAR(30) NOT NULL DEFAULT 'certification', + -- 'certification' | 'language' (Phase 2+ 'general' 등 확장 여지) + + -- 자격증 메타 + certification VARCHAR(120), -- 예: "산업안전기사" (study_type='certification' 시) + -- 어학 메타 + language_code VARCHAR(20), -- 'ja' | 'en' | 'zh' (study_type='language' 시) + -- 공통: 학습 레벨 (도메인 무관) + learning_level VARCHAR(80), -- 예: "JLPT N3", "TOEIC 750", "산업안전기사 1차" + + -- 공통: 과목/주제 + subject VARCHAR(120), -- 예: "산업안전보건법" / "漢字" + topic VARCHAR(200), -- 예: "안전보건관리책임자의 직무" / "安全" + + -- 원문 텍스트 snapshot (assets 의 source_scan 과 별개로 발췌 텍스트만 보존) + source_text TEXT, -- 예: "安全" / 법령 본문 발췌 + source_page INTEGER, + + -- 학습 모드 + mode VARCHAR(30) NOT NULL DEFAULT 'copy', + -- 공통: 'copy'(필사)/'trace'(트레이싱)/'blank-repeat'(깜지) + -- 어학: 'dictation'(받아쓰기)/'shadowing'(쉐도잉) + -- Phase 2+: 'quiz'/'flashcard' + prompt_question TEXT, -- Phase 2: AI 역질문 + expected_answer TEXT, -- Phase 2: 기대 정답 + + -- 도메인별 자유 메타 + -- 어학 예: {"reading":"あんぜん","meaning":"안전","romaji":"anzen","furigana":"...", + -- "example_sentence":"安全第一です。","grammar_point":"...","unit_type":"kanji"} + -- unit_type: 'kanji' | 'vocabulary' | 'sentence' | 'listening' | 'shadowing' + -- 자격증 예: {"law_article":"산업안전보건법 제15조","formula":"...","keywords":[...]} + metadata JSONB, + + -- 횟수 카운트 (보조) + target_count INTEGER, + repetition_count INTEGER NOT NULL DEFAULT 0, + + -- 필기 데이터 (원본) — Phase 1 핵심 + strokes_json JSONB, -- perfect-freehand input points + style + canvas_width INTEGER, + canvas_height INTEGER, + schema_version INTEGER NOT NULL DEFAULT 1, + + -- 필기 파생 텍스트 — Phase 2 채움 (Phase 1 NULL) + ocr_text TEXT, + user_corrected_text TEXT, + ai_summary TEXT, -- 모바일 카드 view 용 (classify worker 동기화) + + -- SRS / 퀴즈 통계 — Phase 4 활성, Phase 1 NULL + review_state VARCHAR(20), -- 'new' | 'learning' | 'weak' | 'mastered' + next_review_at TIMESTAMPTZ, + last_quiz_at TIMESTAMPTZ, + correct_count INTEGER NOT NULL DEFAULT 0, + incorrect_count INTEGER NOT NULL DEFAULT 0, + + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- 도메인+사용자별 최근 세션 조회 (자격증/어학 공통 목록) +CREATE INDEX IF NOT EXISTS idx_study_sessions_type_user_created + ON study_sessions (user_id, study_type, created_at DESC); + +-- 자격증 카드 그룹 (Phase 3) +CREATE INDEX IF NOT EXISTS idx_study_sessions_cert + ON study_sessions (user_id, certification, subject, topic) + WHERE study_type = 'certification'; + +-- 어학 카드 그룹 (Phase 3) +CREATE INDEX IF NOT EXISTS idx_study_sessions_lang + ON study_sessions (user_id, language_code, learning_level, subject, topic) + WHERE study_type = 'language'; + +-- SRS 복습 큐 (Phase 4) — review_state 가 있는 행만 +CREATE INDEX IF NOT EXISTS idx_study_sessions_review + ON study_sessions (user_id, review_state, next_review_at) + WHERE review_state IS NOT NULL; + +-- 퀴즈 통계 (Phase 4) — last_quiz_at 가 있는 행만 +CREATE INDEX IF NOT EXISTS idx_study_sessions_quiz_stats + ON study_sessions (user_id, study_type, last_quiz_at) + WHERE last_quiz_at IS NOT NULL; + + +CREATE TABLE IF NOT EXISTS study_session_assets ( + id BIGSERIAL PRIMARY KEY, + study_session_id BIGINT NOT NULL REFERENCES study_sessions(id) ON DELETE CASCADE, + document_id BIGINT NOT NULL REFERENCES documents(id) ON DELETE CASCADE, + + -- asset_type: 'source_scan' | 'handwriting_png' | 'audio' | 'video' | 'transcript' | 'reference' + asset_type VARCHAR(30) NOT NULL, + -- role: 'prompt' | 'answer' | 'pronunciation' | 'lecture' | 'listening_source' + -- | 'shadowing_source' | 'reference' + role VARCHAR(40), + sort_order INTEGER NOT NULL DEFAULT 0, + + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + + -- 같은 (세션, 문서, asset_type, role) 조합 중복 금지 → POST /assets 의 409 근거 + -- role 이 NULL 인 경우 NULL 끼리는 unique 비교 시 다른 값으로 취급 (Postgres 기본). 의도된 동작. + UNIQUE (study_session_id, document_id, asset_type, role) +); + +-- 세션별 assets 정렬 조회 +CREATE INDEX IF NOT EXISTS idx_session_assets_session + ON study_session_assets (study_session_id, sort_order); + +-- 문서가 어느 세션에 연결됐는지 역참조 (filter ?document_id=...) +CREATE INDEX IF NOT EXISTS idx_session_assets_document + ON study_session_assets (document_id); + +-- 특정 asset_type 보유 세션 조회 (filter ?asset_type=audio) +CREATE INDEX IF NOT EXISTS idx_session_assets_type + ON study_session_assets (study_session_id, asset_type);