diff --git a/app/api/study_topics.py b/app/api/study_topics.py index be7115b..5d51ad6 100644 --- a/app/api/study_topics.py +++ b/app/api/study_topics.py @@ -1238,6 +1238,10 @@ class QuizSessionStartRequest(BaseModel): wrong_only: bool = False abandon_existing: bool = False # true 면 기존 in_progress 세션을 abandoned 로 마감 후 새로 quiz_mode: QuizMode = QuizMode.random + # Phase 1-E: bucket + stage 비율 기반 선별. stage 설정 시 기존 _select_questions_for_topic + # 우회하고 select_questions_for_quiz 사용. size 가 명시되면 size 만큼 출제 (subject 무관). + stage: str | None = Field(default=None, pattern="^(intro|learning|pre_exam)$") + size: int | None = Field(default=None, ge=1, le=200) class QuizSessionSummary(BaseModel): @@ -1490,20 +1494,38 @@ async def start_quiz_session( existing.updated_at = datetime.now(timezone.utc) await session.flush() - # 신규 출제. PR-12-B: random 모드 → subject bucket 단위 type spacing 적용. - apply_spacing = body.quiz_mode == QuizMode.random - qids, distribution = await _select_questions_for_topic( - session, - user, - topic_id, - subject=body.subject, - scope=body.scope, - target_per_subject=body.target_per_subject, - wrong_only=body.wrong_only, - apply_spacing=apply_spacing, - ) - if not qids: - raise HTTPException(status_code=400, detail="출제 가능한 문제가 없습니다") + # Phase 1-E: stage 명시 시 bucket + 비율 기반 선별 (단일 풀이 진입점 vision). + # stage 미명시 시 기존 subject bucket + spacing (PR-12-B 호환). + if body.stage is not None: + from services.study.quiz_selection import select_questions_for_quiz + size = body.size or 100 + try: + qids, distribution = await select_questions_for_quiz( + session, + user_id=user.id, + study_topic_id=topic_id, + stage=body.stage, + size=size, + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + if not qids: + raise HTTPException(status_code=400, detail="출제 가능한 문제가 없습니다") + else: + # 기존 PR-12-B 경로: subject bucket + type spacing. + apply_spacing = body.quiz_mode == QuizMode.random + qids, distribution = await _select_questions_for_topic( + session, + user, + topic_id, + subject=body.subject, + scope=body.scope, + target_per_subject=body.target_per_subject, + wrong_only=body.wrong_only, + apply_spacing=apply_spacing, + ) + if not qids: + raise HTTPException(status_code=400, detail="출제 가능한 문제가 없습니다") new_session = StudyQuizSession( user_id=user.id, diff --git a/app/services/study/quiz_selection.py b/app/services/study/quiz_selection.py new file mode 100644 index 0000000..c990611 --- /dev/null +++ b/app/services/study/quiz_selection.py @@ -0,0 +1,195 @@ +"""풀이 선별 알고리즘 — bucket + stage 비율 기반 (Phase 1-E). + +vision: 단일 풀이 진입점 + 옵션 토글. stage 만 사용자 명시 (입문/학습 중/시험 직전). +자동 단계 감지 / EMA / 사용자 성과 기반 비율 조정 = Phase 1 범위 밖. + +bucket: + - unattempted: progress 없거나 last_outcome IS NULL + - wrong_or_unsure: pattern_state IN (regressed, chronic_wrong, unsure) + - due_review: due_at <= now() AND review_stage < 4 + - regressed: pattern_state = 'regressed' + - frequent: study_questions.related_repeat_grade IN (단골, 잘 나오는 반복 출제) + - random: 전체 ready 문제 (backfill) + +stage 별 비율 (목표 — 후보 부족 시 random backfill): + - intro: unattempted 55, wrong_or_unsure 30, frequent 15 + - learning: unattempted 30, wrong_or_unsure 40, due_review 20, frequent 10 + - pre_exam: wrong_or_unsure 30, due_review 20, frequent 20, random 20, regressed 10 + +bucket 우선순위 (높은 우선순위에서 먼저 추출 → 다음 bucket 은 이미 뽑힌 qid 제외): + - pre_exam: due_review > chronic_wrong/unsure > regressed > frequent > random + - learning: due_review > wrong_or_unsure > unattempted > frequent + - intro: unattempted > wrong_or_unsure > frequent + +비율 검증: + - 후보 충분 시 목표 비율 ±10% 이내 + - 후보 부족 시 random backfill 허용 + - 최종 size 충족 (못 채우면 fail) + - 동일 qid 중복 없음 +""" + +from __future__ import annotations + +import random +from collections import OrderedDict +from datetime import datetime, timezone + +from sqlalchemy import and_, or_, select +from sqlalchemy.ext.asyncio import AsyncSession + +from models.study_question import StudyQuestion +from models.study_question_progress import StudyQuestionProgress + +STAGE_INTRO = "intro" +STAGE_LEARNING = "learning" +STAGE_PRE_EXAM = "pre_exam" + +# stage 별 (bucket → 비율 %). 우선순위는 dict 순서가 곧 우선순위 (Python 3.7+ insertion order). +# 합 = 100. 후보 부족 시 random 으로 backfill. +STAGE_BUCKET_RATIOS: dict[str, OrderedDict[str, int]] = { + STAGE_INTRO: OrderedDict([ + ("unattempted", 55), + ("wrong_or_unsure", 30), + ("frequent", 15), + ]), + STAGE_LEARNING: OrderedDict([ + ("due_review", 20), + ("wrong_or_unsure", 40), + ("unattempted", 30), + ("frequent", 10), + ]), + STAGE_PRE_EXAM: OrderedDict([ + ("due_review", 20), + ("wrong_or_unsure", 30), + ("regressed", 10), + ("frequent", 20), + ("random", 20), + ]), +} + +WRONG_OR_UNSURE_PATTERNS = ("regressed", "chronic_wrong", "unsure") +FREQUENT_GRADES = ("단골", "잘 나오는 반복 출제") + + +async def select_questions_for_quiz( + session: AsyncSession, + *, + user_id: int, + study_topic_id: int, + stage: str, + size: int, +) -> tuple[list[int], dict[str, int]]: + """stage + size 입력 → (qids, bucket_distribution). + + qids 는 size 길이 (못 채우면 ValueError). Fisher-Yates 셔플 적용. + bucket_distribution 은 운영/검증용 (각 bucket 에서 몇 개 뽑혔는지). + """ + if stage not in STAGE_BUCKET_RATIOS: + raise ValueError(f"unknown stage: {stage!r}") + if size <= 0: + raise ValueError("size must be > 0") + + ratios = STAGE_BUCKET_RATIOS[stage] + now = datetime.now(timezone.utc) + + # 1. 토픽의 ready + active 문제 풀 한 번에 조회 (조인 최소화) + candidate_rows = ( + await session.execute( + select( + StudyQuestion.id, + StudyQuestion.related_repeat_grade, + ) + .where( + StudyQuestion.user_id == user_id, + StudyQuestion.study_topic_id == study_topic_id, + StudyQuestion.deleted_at.is_(None), + StudyQuestion.is_active.is_(True), + ) + ) + ).all() + if not candidate_rows: + return [], {} + all_qids = {r.id for r in candidate_rows} + frequent_qids = {r.id for r in candidate_rows if r.related_repeat_grade in FREQUENT_GRADES} + + # 2. progress 행 일괄 fetch (qid → progress) + progress_rows = ( + await session.execute( + select(StudyQuestionProgress).where( + StudyQuestionProgress.user_id == user_id, + StudyQuestionProgress.study_topic_id == study_topic_id, + StudyQuestionProgress.study_question_id.in_(all_qids), + ) + ) + ).scalars().all() + progress_by_qid: dict[int, StudyQuestionProgress] = { + p.study_question_id: p for p in progress_rows + } + + # 3. bucket 별 후보 set 작성 + unattempted: set[int] = set() + wrong_or_unsure: set[int] = set() + due_review: set[int] = set() + regressed: set[int] = set() + for qid in all_qids: + p = progress_by_qid.get(qid) + if p is None or p.last_outcome is None: + unattempted.add(qid) + continue + if p.pattern_state in WRONG_OR_UNSURE_PATTERNS: + wrong_or_unsure.add(qid) + if p.due_at is not None and p.due_at <= now and (p.review_stage or 0) < 4: + due_review.add(qid) + if p.pattern_state == "regressed": + regressed.add(qid) + + bucket_pools: dict[str, set[int]] = { + "unattempted": unattempted, + "wrong_or_unsure": wrong_or_unsure, + "due_review": due_review, + "regressed": regressed, + "frequent": frequent_qids, + "random": all_qids, + } + + # 4. 우선순위 (dict 순서 = 우선순위). 각 bucket 에서 비율만큼 추출, 다음 bucket 은 이미 뽑힌 qid 제외. + selected: list[int] = [] + selected_set: set[int] = set() + distribution: dict[str, int] = {} + + for bucket_name, pct in ratios.items(): + target = int(round(size * pct / 100)) + if target <= 0: + distribution[bucket_name] = 0 + continue + pool = list(bucket_pools[bucket_name] - selected_set) + if not pool: + distribution[bucket_name] = 0 + continue + random.shuffle(pool) + picked = pool[:target] + selected.extend(picked) + selected_set.update(picked) + distribution[bucket_name] = len(picked) + + # 5. 부족분 random backfill (전체 풀 - 이미 뽑힌) + if len(selected) < size: + backfill_pool = list(all_qids - selected_set) + random.shuffle(backfill_pool) + need = size - len(selected) + backfill = backfill_pool[:need] + selected.extend(backfill) + selected_set.update(backfill) + if backfill: + distribution["random_backfill"] = len(backfill) + + if len(selected) < size: + raise ValueError( + f"not enough questions: requested={size} available={len(selected)} " + f"(topic_questions={len(all_qids)})" + ) + + # 6. Fisher-Yates 셔플 (출제 순서 무작위) + random.shuffle(selected) + + return selected, distribution