9094b2dbc5
vision 의 단일 풀이 진입점 — stage (intro/learning/pre_exam) + size 옵션으로 같은 endpoint 가 다른 분포의 문제 출제. services/study/quiz_selection.py: - bucket: unattempted / wrong_or_unsure / due_review / regressed / frequent / random - stage 별 비율: - intro: unattempted 55, wrong_or_unsure 30, frequent 15 - learning: due_review 20, wrong_or_unsure 40, unattempted 30, frequent 10 - pre_exam: due_review 20, wrong_or_unsure 30, regressed 10, frequent 20, random 20 - bucket 우선순위 (dict 순서) — 다음 bucket 은 이미 뽑힌 qid 제외 - 후보 부족 시 random backfill, 그래도 부족 시 ValueError api/study_topics.py: - QuizSessionStartRequest 에 stage / size 옵션 추가 - stage 명시 시 select_questions_for_quiz 사용 - stage 미명시 시 기존 PR-12-B 경로 (subject bucket + spacing) 호환 유지 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
196 lines
6.8 KiB
Python
196 lines
6.8 KiB
Python
"""풀이 선별 알고리즘 — bucket + stage 비율 기반 (Phase 1-E).
|
|
|
|
vision: 단일 풀이 진입점 + 옵션 토글. stage 만 사용자 명시 (입문/학습 중/시험 직전).
|
|
자동 단계 감지 / EMA / 사용자 성과 기반 비율 조정 = Phase 1 범위 밖.
|
|
|
|
bucket:
|
|
- unattempted: progress 없거나 last_outcome IS NULL
|
|
- wrong_or_unsure: pattern_state IN (regressed, chronic_wrong, unsure)
|
|
- due_review: due_at <= now() AND review_stage < 4
|
|
- regressed: pattern_state = 'regressed'
|
|
- frequent: study_questions.related_repeat_grade IN (단골, 잘 나오는 반복 출제)
|
|
- random: 전체 ready 문제 (backfill)
|
|
|
|
stage 별 비율 (목표 — 후보 부족 시 random backfill):
|
|
- intro: unattempted 55, wrong_or_unsure 30, frequent 15
|
|
- learning: unattempted 30, wrong_or_unsure 40, due_review 20, frequent 10
|
|
- pre_exam: wrong_or_unsure 30, due_review 20, frequent 20, random 20, regressed 10
|
|
|
|
bucket 우선순위 (높은 우선순위에서 먼저 추출 → 다음 bucket 은 이미 뽑힌 qid 제외):
|
|
- pre_exam: due_review > chronic_wrong/unsure > regressed > frequent > random
|
|
- learning: due_review > wrong_or_unsure > unattempted > frequent
|
|
- intro: unattempted > wrong_or_unsure > frequent
|
|
|
|
비율 검증:
|
|
- 후보 충분 시 목표 비율 ±10% 이내
|
|
- 후보 부족 시 random backfill 허용
|
|
- 최종 size 충족 (못 채우면 fail)
|
|
- 동일 qid 중복 없음
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import random
|
|
from collections import OrderedDict
|
|
from datetime import datetime, timezone
|
|
|
|
from sqlalchemy import and_, or_, select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from models.study_question import StudyQuestion
|
|
from models.study_question_progress import StudyQuestionProgress
|
|
|
|
STAGE_INTRO = "intro"
|
|
STAGE_LEARNING = "learning"
|
|
STAGE_PRE_EXAM = "pre_exam"
|
|
|
|
# stage 별 (bucket → 비율 %). 우선순위는 dict 순서가 곧 우선순위 (Python 3.7+ insertion order).
|
|
# 합 = 100. 후보 부족 시 random 으로 backfill.
|
|
STAGE_BUCKET_RATIOS: dict[str, OrderedDict[str, int]] = {
|
|
STAGE_INTRO: OrderedDict([
|
|
("unattempted", 55),
|
|
("wrong_or_unsure", 30),
|
|
("frequent", 15),
|
|
]),
|
|
STAGE_LEARNING: OrderedDict([
|
|
("due_review", 20),
|
|
("wrong_or_unsure", 40),
|
|
("unattempted", 30),
|
|
("frequent", 10),
|
|
]),
|
|
STAGE_PRE_EXAM: OrderedDict([
|
|
("due_review", 20),
|
|
("wrong_or_unsure", 30),
|
|
("regressed", 10),
|
|
("frequent", 20),
|
|
("random", 20),
|
|
]),
|
|
}
|
|
|
|
WRONG_OR_UNSURE_PATTERNS = ("regressed", "chronic_wrong", "unsure")
|
|
FREQUENT_GRADES = ("단골", "잘 나오는 반복 출제")
|
|
|
|
|
|
async def select_questions_for_quiz(
|
|
session: AsyncSession,
|
|
*,
|
|
user_id: int,
|
|
study_topic_id: int,
|
|
stage: str,
|
|
size: int,
|
|
) -> tuple[list[int], dict[str, int]]:
|
|
"""stage + size 입력 → (qids, bucket_distribution).
|
|
|
|
qids 는 size 길이 (못 채우면 ValueError). Fisher-Yates 셔플 적용.
|
|
bucket_distribution 은 운영/검증용 (각 bucket 에서 몇 개 뽑혔는지).
|
|
"""
|
|
if stage not in STAGE_BUCKET_RATIOS:
|
|
raise ValueError(f"unknown stage: {stage!r}")
|
|
if size <= 0:
|
|
raise ValueError("size must be > 0")
|
|
|
|
ratios = STAGE_BUCKET_RATIOS[stage]
|
|
now = datetime.now(timezone.utc)
|
|
|
|
# 1. 토픽의 ready + active 문제 풀 한 번에 조회 (조인 최소화)
|
|
candidate_rows = (
|
|
await session.execute(
|
|
select(
|
|
StudyQuestion.id,
|
|
StudyQuestion.related_repeat_grade,
|
|
)
|
|
.where(
|
|
StudyQuestion.user_id == user_id,
|
|
StudyQuestion.study_topic_id == study_topic_id,
|
|
StudyQuestion.deleted_at.is_(None),
|
|
StudyQuestion.is_active.is_(True),
|
|
)
|
|
)
|
|
).all()
|
|
if not candidate_rows:
|
|
return [], {}
|
|
all_qids = {r.id for r in candidate_rows}
|
|
frequent_qids = {r.id for r in candidate_rows if r.related_repeat_grade in FREQUENT_GRADES}
|
|
|
|
# 2. progress 행 일괄 fetch (qid → progress)
|
|
progress_rows = (
|
|
await session.execute(
|
|
select(StudyQuestionProgress).where(
|
|
StudyQuestionProgress.user_id == user_id,
|
|
StudyQuestionProgress.study_topic_id == study_topic_id,
|
|
StudyQuestionProgress.study_question_id.in_(all_qids),
|
|
)
|
|
)
|
|
).scalars().all()
|
|
progress_by_qid: dict[int, StudyQuestionProgress] = {
|
|
p.study_question_id: p for p in progress_rows
|
|
}
|
|
|
|
# 3. bucket 별 후보 set 작성
|
|
unattempted: set[int] = set()
|
|
wrong_or_unsure: set[int] = set()
|
|
due_review: set[int] = set()
|
|
regressed: set[int] = set()
|
|
for qid in all_qids:
|
|
p = progress_by_qid.get(qid)
|
|
if p is None or p.last_outcome is None:
|
|
unattempted.add(qid)
|
|
continue
|
|
if p.pattern_state in WRONG_OR_UNSURE_PATTERNS:
|
|
wrong_or_unsure.add(qid)
|
|
if p.due_at is not None and p.due_at <= now and (p.review_stage or 0) < 4:
|
|
due_review.add(qid)
|
|
if p.pattern_state == "regressed":
|
|
regressed.add(qid)
|
|
|
|
bucket_pools: dict[str, set[int]] = {
|
|
"unattempted": unattempted,
|
|
"wrong_or_unsure": wrong_or_unsure,
|
|
"due_review": due_review,
|
|
"regressed": regressed,
|
|
"frequent": frequent_qids,
|
|
"random": all_qids,
|
|
}
|
|
|
|
# 4. 우선순위 (dict 순서 = 우선순위). 각 bucket 에서 비율만큼 추출, 다음 bucket 은 이미 뽑힌 qid 제외.
|
|
selected: list[int] = []
|
|
selected_set: set[int] = set()
|
|
distribution: dict[str, int] = {}
|
|
|
|
for bucket_name, pct in ratios.items():
|
|
target = int(round(size * pct / 100))
|
|
if target <= 0:
|
|
distribution[bucket_name] = 0
|
|
continue
|
|
pool = list(bucket_pools[bucket_name] - selected_set)
|
|
if not pool:
|
|
distribution[bucket_name] = 0
|
|
continue
|
|
random.shuffle(pool)
|
|
picked = pool[:target]
|
|
selected.extend(picked)
|
|
selected_set.update(picked)
|
|
distribution[bucket_name] = len(picked)
|
|
|
|
# 5. 부족분 random backfill (전체 풀 - 이미 뽑힌)
|
|
if len(selected) < size:
|
|
backfill_pool = list(all_qids - selected_set)
|
|
random.shuffle(backfill_pool)
|
|
need = size - len(selected)
|
|
backfill = backfill_pool[:need]
|
|
selected.extend(backfill)
|
|
selected_set.update(backfill)
|
|
if backfill:
|
|
distribution["random_backfill"] = len(backfill)
|
|
|
|
if len(selected) < size:
|
|
raise ValueError(
|
|
f"not enough questions: requested={size} available={len(selected)} "
|
|
f"(topic_questions={len(all_qids)})"
|
|
)
|
|
|
|
# 6. Fisher-Yates 셔플 (출제 순서 무작위)
|
|
random.shuffle(selected)
|
|
|
|
return selected, distribution
|