"""풀이 선별 알고리즘 — bucket + stage 비율 기반 (Phase 1-E). vision: 단일 풀이 진입점 + 옵션 토글. stage 만 사용자 명시 (입문/학습 중/시험 직전). 자동 단계 감지 / EMA / 사용자 성과 기반 비율 조정 = Phase 1 범위 밖. bucket: - unattempted: progress 없거나 last_outcome IS NULL - wrong_or_unsure: pattern_state IN (regressed, chronic_wrong, unsure) - due_review: due_at <= now() AND review_stage < 4 - regressed: pattern_state = 'regressed' - frequent: study_questions.related_repeat_grade IN (단골, 잘 나오는 반복 출제) - random: 전체 ready 문제 (backfill) stage 별 비율 (목표 — 후보 부족 시 random backfill): - intro: unattempted 55, wrong_or_unsure 30, frequent 15 - learning: unattempted 30, wrong_or_unsure 40, due_review 20, frequent 10 - pre_exam: wrong_or_unsure 30, due_review 20, frequent 20, random 20, regressed 10 bucket 우선순위 (높은 우선순위에서 먼저 추출 → 다음 bucket 은 이미 뽑힌 qid 제외): - pre_exam: due_review > chronic_wrong/unsure > regressed > frequent > random - learning: due_review > wrong_or_unsure > unattempted > frequent - intro: unattempted > wrong_or_unsure > frequent 비율 검증: - 후보 충분 시 목표 비율 ±10% 이내 - 후보 부족 시 random backfill 허용 - 최종 size 충족 (못 채우면 fail) - 동일 qid 중복 없음 """ from __future__ import annotations import random from collections import OrderedDict from datetime import datetime, timezone from sqlalchemy import and_, or_, select from sqlalchemy.ext.asyncio import AsyncSession from models.study_question import StudyQuestion from models.study_question_progress import StudyQuestionProgress STAGE_INTRO = "intro" STAGE_LEARNING = "learning" STAGE_PRE_EXAM = "pre_exam" # stage 별 (bucket → 비율 %). 우선순위는 dict 순서가 곧 우선순위 (Python 3.7+ insertion order). # 합 = 100. 후보 부족 시 random 으로 backfill. STAGE_BUCKET_RATIOS: dict[str, OrderedDict[str, int]] = { STAGE_INTRO: OrderedDict([ ("unattempted", 55), ("wrong_or_unsure", 30), ("frequent", 15), ]), STAGE_LEARNING: OrderedDict([ ("due_review", 20), ("wrong_or_unsure", 40), ("unattempted", 30), ("frequent", 10), ]), STAGE_PRE_EXAM: OrderedDict([ ("due_review", 20), ("wrong_or_unsure", 30), ("regressed", 10), ("frequent", 20), ("random", 20), ]), } WRONG_OR_UNSURE_PATTERNS = ("regressed", "chronic_wrong", "unsure") FREQUENT_GRADES = ("단골", "잘 나오는 반복 출제") async def select_questions_for_quiz( session: AsyncSession, *, user_id: int, study_topic_id: int, stage: str, size: int, ) -> tuple[list[int], dict[str, int]]: """stage + size 입력 → (qids, bucket_distribution). qids 는 size 길이 (못 채우면 ValueError). Fisher-Yates 셔플 적용. bucket_distribution 은 운영/검증용 (각 bucket 에서 몇 개 뽑혔는지). """ if stage not in STAGE_BUCKET_RATIOS: raise ValueError(f"unknown stage: {stage!r}") if size <= 0: raise ValueError("size must be > 0") ratios = STAGE_BUCKET_RATIOS[stage] now = datetime.now(timezone.utc) # 1. 토픽의 ready + active 문제 풀 한 번에 조회 (조인 최소화) candidate_rows = ( await session.execute( select( StudyQuestion.id, StudyQuestion.related_repeat_grade, ) .where( StudyQuestion.user_id == user_id, StudyQuestion.study_topic_id == study_topic_id, StudyQuestion.deleted_at.is_(None), StudyQuestion.is_active.is_(True), ) ) ).all() if not candidate_rows: return [], {} all_qids = {r.id for r in candidate_rows} frequent_qids = {r.id for r in candidate_rows if r.related_repeat_grade in FREQUENT_GRADES} # 2. progress 행 일괄 fetch (qid → progress) progress_rows = ( await session.execute( select(StudyQuestionProgress).where( StudyQuestionProgress.user_id == user_id, StudyQuestionProgress.study_topic_id == study_topic_id, StudyQuestionProgress.study_question_id.in_(all_qids), ) ) ).scalars().all() progress_by_qid: dict[int, StudyQuestionProgress] = { p.study_question_id: p for p in progress_rows } # 3. bucket 별 후보 set 작성 unattempted: set[int] = set() wrong_or_unsure: set[int] = set() due_review: set[int] = set() regressed: set[int] = set() for qid in all_qids: p = progress_by_qid.get(qid) if p is None or p.last_outcome is None: unattempted.add(qid) continue if p.pattern_state in WRONG_OR_UNSURE_PATTERNS: wrong_or_unsure.add(qid) if p.due_at is not None and p.due_at <= now and (p.review_stage or 0) < 4: due_review.add(qid) if p.pattern_state == "regressed": regressed.add(qid) bucket_pools: dict[str, set[int]] = { "unattempted": unattempted, "wrong_or_unsure": wrong_or_unsure, "due_review": due_review, "regressed": regressed, "frequent": frequent_qids, "random": all_qids, } # 4. 우선순위 (dict 순서 = 우선순위). 각 bucket 에서 비율만큼 추출, 다음 bucket 은 이미 뽑힌 qid 제외. selected: list[int] = [] selected_set: set[int] = set() distribution: dict[str, int] = {} for bucket_name, pct in ratios.items(): target = int(round(size * pct / 100)) if target <= 0: distribution[bucket_name] = 0 continue pool = list(bucket_pools[bucket_name] - selected_set) if not pool: distribution[bucket_name] = 0 continue random.shuffle(pool) picked = pool[:target] selected.extend(picked) selected_set.update(picked) distribution[bucket_name] = len(picked) # 5. 부족분 random backfill (전체 풀 - 이미 뽑힌) if len(selected) < size: backfill_pool = list(all_qids - selected_set) random.shuffle(backfill_pool) need = size - len(selected) backfill = backfill_pool[:need] selected.extend(backfill) selected_set.update(backfill) if backfill: distribution["random_backfill"] = len(backfill) if len(selected) < size: raise ValueError( f"not enough questions: requested={size} available={len(selected)} " f"(topic_questions={len(all_qids)})" ) # 6. Fisher-Yates 셔플 (출제 순서 무작위) random.shuffle(selected) return selected, distribution