From 19f544fb5ef850fbe05d45d161b236070b2af038 Mon Sep 17 00:00:00 2001 From: hyungi Date: Sun, 7 Jun 2026 08:08:55 +0900 Subject: [PATCH] =?UTF-8?q?feat(study):=20=EA=B3=B5=EB=B6=80=20=EC=95=94?= =?UTF-8?q?=EA=B8=B0=EB=85=B8=ED=8A=B8=20Phase=201=20=E2=80=94=20=EC=A0=95?= =?UTF-8?q?=EC=A0=95/=EC=82=AD=EC=A0=9C=20=ED=9B=85=20+=20needs=5Freview?= =?UTF-8?q?=20=ED=81=90=20+=20=EC=95=8C=EB=9E=8C=20=EC=9E=AC=EB=A3=8C=20(H?= =?UTF-8?q?R/A)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 추출 파이프라인(287~298, 별 커밋) 위 HR/A. 신규 마이그레이션 0 (DDL은 295~298 재사용). - HR 정정/삭제 훅: PATCH 본문 수정 → 파생 study_memo_cards needs_review=auto(source_changed), soft-DELETE → source_deleted. flag_cards_for_source 헬퍼(임시 플래그, 최종정리는 워커 supersede). - HR needs_review: PATCH set/clear(flagged_by='user' 서버강제) + GET /study-questions/needs-review 목록·count(부분인덱스 술어 일치, 동적 {id} 라우트보다 먼저 등록해 int 파싱 충돌 회피). - A 알람 재료: study_topics.focused_at 공부중 토글 + study_reminder cron(09/13/19 KST, due 술어 quiz_selection SQL 재현·시간슬롯 truncate 멱등·LLM 0) + GET /api/study-reminders/latest(없으면 204). - 테스트: 가드/정규화 18/18 (정량=evidence 원문·cue/cloze 누출·dedup·배치). 검증: 앱 부팅 import+mapper OK · 가드 18/18 PASS. Co-Authored-By: Claude Opus 4.8 (1M context) --- app/api/study_questions.py | 102 +++++++++++++++++ app/api/study_reminders.py | 54 +++++++++ app/api/study_topics.py | 10 ++ app/main.py | 5 + app/models/study_reminder.py | 37 ++++++ app/models/study_topic.py | 4 + app/workers/study_reminder.py | 92 +++++++++++++++ tests/test_study_memo_card_guards.py | 161 +++++++++++++++++++++++++++ 8 files changed, 465 insertions(+) create mode 100644 app/api/study_reminders.py create mode 100644 app/models/study_reminder.py create mode 100644 app/workers/study_reminder.py create mode 100644 tests/test_study_memo_card_guards.py diff --git a/app/api/study_questions.py b/app/api/study_questions.py index ab5a4c6..aeaff5a 100644 --- a/app/api/study_questions.py +++ b/app/api/study_questions.py @@ -26,6 +26,7 @@ from core.auth import get_current_user from core.config import settings from core.database import get_session from models.study_question import StudyQuestion, StudyQuestionAttempt +from models.study_memo_card import flag_cards_for_source from models.study_question_image import StudyQuestionImage from models.study_quiz_session import StudyQuizSession from models.study_topic import StudyTopic @@ -93,6 +94,8 @@ class StudyQuestionUpdate(BaseModel): explanation: str | None = None source_note: str | None = None is_active: bool | None = None + # 공부 암기노트: 검수 대기 플래그 set/clear (서버가 flagged_by='user' 강제) + needs_review: bool | None = None class QuestionAttemptStats(BaseModel): @@ -136,6 +139,10 @@ class StudyQuestionResponse(BaseModel): ai_explanation_model: str | None = None # PR-8: 첨부 이미지 images: list[StudyQuestionImageItem] = [] + # 공부 암기노트: 검수 대기 플래그 + needs_review: bool = False + flagged_at: datetime | None = None + flagged_by: str | None = None created_at: datetime updated_at: datetime stats: QuestionAttemptStats @@ -558,6 +565,9 @@ async def create_question_in_topic( ai_explanation_generated_at=q.ai_explanation_generated_at, ai_explanation_model=q.ai_explanation_model, images=await _images_for_question(session, q.id), + needs_review=q.needs_review, + flagged_at=q.flagged_at, + flagged_by=q.flagged_by, created_at=q.created_at, updated_at=q.updated_at, stats=stats, @@ -728,6 +738,73 @@ async def review_questions_for_topic( # ─── 단건 엔드포인트 ─── +class NeedsReviewItem(BaseModel): + """검수 대기 큐 항목 (공부 암기노트).""" + id: int + study_topic_id: int + question_text: str + flagged_at: datetime | None = None + flagged_by: str | None = None + + +# 주의: 아래 두 static 라우트는 /study-questions/{question_id} (동적, int) 보다 먼저 +# 정의해야 한다. 뒤에 두면 'needs-review' 가 question_id 로 파싱돼 422. +@router.get("/study-questions/needs-review", response_model=list[NeedsReviewItem]) +async def list_needs_review_questions( + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """검수 대기(needs_review=true) 문제 목록 — 전 토픽 횡단. + 부분 인덱스(WHERE deleted_at IS NULL AND needs_review)와 WHERE 술어 일치.""" + rows = ( + await session.execute( + select( + StudyQuestion.id, + StudyQuestion.study_topic_id, + StudyQuestion.question_text, + StudyQuestion.flagged_at, + StudyQuestion.flagged_by, + ) + .where( + StudyQuestion.user_id == user.id, + StudyQuestion.deleted_at.is_(None), + StudyQuestion.needs_review, + ) + .order_by(StudyQuestion.flagged_at.asc().nulls_last()) + ) + ).all() + return [ + NeedsReviewItem( + id=r.id, + study_topic_id=r.study_topic_id, + question_text=_truncate(r.question_text, 120), + flagged_at=r.flagged_at, + flagged_by=r.flagged_by, + ) + for r in rows + ] + + +@router.get("/study-questions/needs-review/count") +async def count_needs_review_questions( + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """검수 대기 건수 (결과화면 '수정 대기 N' 배지용).""" + n = ( + await session.execute( + select(func.count()) + .select_from(StudyQuestion) + .where( + StudyQuestion.user_id == user.id, + StudyQuestion.deleted_at.is_(None), + StudyQuestion.needs_review, + ) + ) + ).scalar_one() + return {"count": n} + + @router.get("/study-questions/{question_id}", response_model=StudyQuestionResponse) async def get_question( question_id: int, @@ -758,6 +835,9 @@ async def get_question( ai_explanation_generated_at=q.ai_explanation_generated_at, ai_explanation_model=q.ai_explanation_model, images=await _images_for_question(session, q.id), + needs_review=q.needs_review, + flagged_at=q.flagged_at, + flagged_by=q.flagged_by, created_at=q.created_at, updated_at=q.updated_at, stats=stats, @@ -809,6 +889,22 @@ async def update_question( if RELATED_STALE_TRIGGER & fields_set and q.related_computed_at is not None: q.related_computed_at = None + # 공부 암기노트: needs_review 검수 플래그 set/clear (사용자 액션 → flagged_by='user'). + if "needs_review" in fields_set: + q.needs_review = bool(body.needs_review) + if q.needs_review: + q.flagged_by = "user" + q.flagged_at = datetime.now(timezone.utc) + else: + q.flagged_by = None + q.flagged_at = None + + # 공부 암기노트: 본문 핵심 필드 변경 시 파생 암기카드를 검토 대기로 마킹(source_changed). + # 카드는 '구' ai_explanation 에서 추출됐으므로 정정 후 stale 가능 — 즉시 가시화 플래그. + # 최종 stale 정리는 card_extract 워커의 supersede 가 책임(새 버전 추출 시 구버전 retire). + if AI_STALE_TRIGGER & fields_set: + await flag_cards_for_source(session, source_question_id=q.id, reason="source_changed") + q.updated_at = datetime.now(timezone.utc) await session.commit() @@ -834,6 +930,9 @@ async def update_question( ai_explanation_generated_at=q.ai_explanation_generated_at, ai_explanation_model=q.ai_explanation_model, images=await _images_for_question(session, q.id), + needs_review=q.needs_review, + flagged_at=q.flagged_at, + flagged_by=q.flagged_by, created_at=q.created_at, updated_at=q.updated_at, stats=stats, @@ -867,6 +966,9 @@ async def soft_delete_question( ) .values(related_computed_at=None) ) + # 공부 암기노트: 소스 문제 삭제 시 파생 암기카드를 검토 대기로 마킹(source_deleted). + # study_questions 는 soft-delete 만이라 카드 FK CASCADE 는 미발동 — 이 훅이 실 경로. + await flag_cards_for_source(session, source_question_id=q.id, reason="source_deleted") await session.commit() diff --git a/app/api/study_reminders.py b/app/api/study_reminders.py new file mode 100644 index 0000000..1b4a3d0 --- /dev/null +++ b/app/api/study_reminders.py @@ -0,0 +1,54 @@ +"""study_reminders API — 알람 재료 조회 (공부 암기노트 Phase 1, A 워크스트림). + +GET /latest = 가장 최근 발화된 알람 1건(현재 due 스냅샷). 없으면 204. +종일 오프라인 후 과거 슬롯(09/13시)은 유실 = 의도("현재 due만"). push 채널·디바이스 UX 는 P3. +별 라우터(prefix=/api/study-reminders)로 /study-topics·/study-questions 경로와 충돌 회피. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Annotated + +from fastapi import APIRouter, Depends, Response +from pydantic import BaseModel +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from core.auth import get_current_user +from core.database import get_session +from models.study_reminder import StudyReminder +from models.user import User + +router = APIRouter() + + +class ReminderResponse(BaseModel): + id: int + due_count: int | None = None + focus_topic_names: list | None = None + fired_at: datetime + + +@router.get("/latest", response_model=ReminderResponse) +async def latest_reminder( + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """현재 due 요약 1건. 없으면 204 No Content.""" + row = ( + await session.execute( + select(StudyReminder) + .where(StudyReminder.user_id == user.id) + .order_by(StudyReminder.fired_at.desc()) + .limit(1) + ) + ).scalar_one_or_none() + if row is None: + return Response(status_code=204) + return ReminderResponse( + id=row.id, + due_count=row.due_count, + focus_topic_names=row.focus_topic_names, + fired_at=row.fired_at, + ) diff --git a/app/api/study_topics.py b/app/api/study_topics.py index 89b8aef..f56f56a 100644 --- a/app/api/study_topics.py +++ b/app/api/study_topics.py @@ -82,6 +82,8 @@ class StudyTopicUpdate(BaseModel): # PR-6: 시험 메타 exam_round_size: int | None = Field(default=None, ge=1, le=300) exam_subjects: list[str] | None = None + # 공부 암기노트: 공부중 토글 (true=focused_at=now, false=clear) + focused: bool | None = None class StudyTopicResponse(BaseModel): @@ -99,6 +101,8 @@ class StudyTopicResponse(BaseModel): # PR-6: 시험 메타 exam_round_size: int | None = None exam_subjects: list[str] = [] + # 공부 암기노트: 공부중 태그 상태 + focused: bool = False created_at: datetime updated_at: datetime @@ -193,6 +197,8 @@ class StudyTopicMeta(BaseModel): # PR-6: 시험 메타 exam_round_size: int | None = None exam_subjects: list[str] = [] + # 공부 암기노트: 공부중 태그 상태 + focused: bool = False created_at: datetime updated_at: datetime @@ -679,6 +685,9 @@ async def update_study_topic( topic.exam_round_size = body.exam_round_size if "exam_subjects" in fields_set and body.exam_subjects is not None: topic.exam_subjects = body.exam_subjects + # 공부 암기노트: 공부중 태그 토글 (focused_at IS NOT NULL = reminder/세션 대상) + if "focused" in fields_set: + topic.focused_at = datetime.now(timezone.utc) if body.focused else None topic.updated_at = datetime.now(timezone.utc) try: @@ -721,6 +730,7 @@ async def update_study_topic( question_count=int(qc), exam_round_size=topic.exam_round_size, exam_subjects=topic.exam_subjects or [], + focused=topic.focused_at is not None, created_at=topic.created_at, updated_at=topic.updated_at, ) diff --git a/app/main.py b/app/main.py index c4861f2..4159145 100644 --- a/app/main.py +++ b/app/main.py @@ -27,6 +27,7 @@ from api.study_question_progress import router as study_question_progress_router from api.study_questions import router as study_questions_router from api.study_sessions import router as study_sessions_router from api.study_topics import router as study_topics_router +from api.study_reminders import router as study_reminders_router from api.video import router as video_router from core.config import settings from core.database import async_session, engine, init_db @@ -56,6 +57,7 @@ async def lifespan(app: FastAPI): from workers.study_session_queue_consumer import consume_study_session_queue from workers.study_memo_card_jobs_consumer import consume_study_memo_card_queue from workers.study_card_enqueue import run as study_card_enqueue_run + from workers.study_reminder import run as study_reminder_run from workers.study_question_embed_worker import ( refresh_stale_related as study_q_related_refresh, run as study_q_embed_run, @@ -111,6 +113,8 @@ async def lifespan(app: FastAPI): scheduler.add_job(daily_digest_run, CronTrigger(hour=20, timezone=KST), id="daily_digest") scheduler.add_job(global_digest_run, CronTrigger(hour=4, minute=0, timezone=KST), id="global_digest") scheduler.add_job(morning_briefing_run, CronTrigger(hour=5, minute=10, timezone=KST), id="morning_briefing") + # 공부 암기노트 Phase 1: 공부중 토픽 due 요약 알람 재료 (09/13/19 KST). LLM 0. + scheduler.add_job(study_reminder_run, CronTrigger(hour="9,13,19", timezone=KST), id="study_reminder") scheduler.add_job(news_collector_run, "interval", hours=6, id="news_collector") scheduler.start() @@ -162,6 +166,7 @@ app.include_router(study_sessions_router, prefix="/api/study-sessions", tags=["s app.include_router(study_topics_router, prefix="/api/study-topics", tags=["study-topics"]) # study_questions: 라우터 안에서 /study-topics/{id}/questions 와 /study-questions/{id} 두 줄기를 모두 정의하므로 prefix=/api 로 등록 app.include_router(study_questions_router, prefix="/api", tags=["study-questions"]) +app.include_router(study_reminders_router, prefix="/api/study-reminders", tags=["study-reminders"]) # Phase 1: 학습 진행 상태 (review-complete + review-queue). prefix=/api/study-topics 안에 정의됨. app.include_router(study_question_progress_router, prefix="/api", tags=["study-progress"]) diff --git a/app/models/study_reminder.py b/app/models/study_reminder.py new file mode 100644 index 0000000..7b73fe1 --- /dev/null +++ b/app/models/study_reminder.py @@ -0,0 +1,37 @@ +"""study_reminders ORM — 알람 재료 append-only (공부 암기노트 Phase 1). + +study_reminder cron(09/13/19 KST)이 focus 토픽 due 요약을 1행 INSERT, GET /reminders/latest +가 읽는다. UPDATE/DELETE 없음. fired_at 은 시간 슬롯으로 truncate 해서 UNIQUE(user, fired_at) +멱등(on_conflict_do_nothing)을 성립시킨다(raw now() 마이크로초면 멱등 무효). +study_topic_id 는 nullable(전체 집계 행은 NULL) + ON DELETE SET NULL(이력 보존). +""" + +from __future__ import annotations + +from datetime import datetime + +from sqlalchemy import BigInteger, DateTime, ForeignKey, Integer +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from core.database import Base + + +class StudyReminder(Base): + __tablename__ = "study_reminders" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + user_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey("users.id", ondelete="CASCADE"), nullable=False + ) + study_topic_id: Mapped[int | None] = mapped_column( + BigInteger, ForeignKey("study_topics.id", ondelete="SET NULL") + ) + due_count: Mapped[int | None] = mapped_column(Integer) + focus_topic_names: Mapped[list | None] = mapped_column(JSONB) + fired_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, nullable=False + ) + + # active partial unique 없음 — UNIQUE(user_id, fired_at) 는 migration 298 inline constraint. diff --git a/app/models/study_topic.py b/app/models/study_topic.py index 6f5777e..fa28e64 100644 --- a/app/models/study_topic.py +++ b/app/models/study_topic.py @@ -45,6 +45,10 @@ class StudyTopic(Base): exam_round_size: Mapped[int | None] = mapped_column(Integer) exam_subjects: Mapped[list] = mapped_column(JSONB, nullable=False, default=list) + # 공부 암기노트 Phase 1: 공부중 태그 (DDL=migration 295). + # focused_at IS NOT NULL = 포커스 중 (reminder/세션-prep 대상). + focused_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=datetime.now, nullable=False ) diff --git a/app/workers/study_reminder.py b/app/workers/study_reminder.py new file mode 100644 index 0000000..f746db2 --- /dev/null +++ b/app/workers/study_reminder.py @@ -0,0 +1,92 @@ +"""study_reminder — focus 토픽 due 요약 cron (공부 암기노트 Phase 1, A 워크스트림). + +09/13/19 KST 발화(main.py CronTrigger). '공부중'(focused_at IS NOT NULL) 토픽별 복습 due +건수를 집계해 study_reminders 에 append. LLM 0 (순수 집계 → GPU 분석 측). + +due 술어는 quiz_selection.py:141 의 due_review 와 동일하게 SQL 로 재현: + due_at IS NOT NULL AND due_at <= now AND (review_stage IS NULL OR review_stage < 4) + (= Python `(review_stage or 0) < 4` 와 NULL 의미 동일). +quiz_selection 은 단일 토픽 ORM 순회라 import 불가 → 재현 + 측정 등가성 게이트(테스트). + +fired_at 은 시간 슬롯(분/초 절삭)으로 박아 UNIQUE(user, fired_at) on_conflict_do_nothing 멱등. +due 0 이면 row 미생성(noise 방지). 놓친 시각은 그냥 skip(stale 복구 미적용 — 시각 민감). +""" + +from __future__ import annotations + +import logging +from collections import defaultdict +from datetime import datetime, timezone + +from sqlalchemy import func, or_, select +from sqlalchemy.dialects.postgresql import insert as pg_insert + +from core.database import async_session +from models.study_question_progress import StudyQuestionProgress +from models.study_reminder import StudyReminder +from models.study_topic import StudyTopic +from models.user import User # noqa: F401 (mapper 초기화 defensive) + +logger = logging.getLogger("study_reminder") + + +async def run() -> None: + """APScheduler cron 진입점. focus 토픽 due 집계 → study_reminders append.""" + now = datetime.now(timezone.utc) + slot = now.replace(minute=0, second=0, microsecond=0) # 시간 슬롯 truncate (멱등 키) + + async with async_session() as session: + topics = ( + await session.execute( + select(StudyTopic.id, StudyTopic.user_id, StudyTopic.name) + .where( + StudyTopic.focused_at.is_not(None), + StudyTopic.deleted_at.is_(None), + ) + ) + ).all() + if not topics: + return + + by_user: dict[int, dict] = defaultdict(lambda: {"due": 0, "names": []}) + for t in topics: + due = ( + await session.execute( + select(func.count()) + .select_from(StudyQuestionProgress) + .where( + StudyQuestionProgress.user_id == t.user_id, + StudyQuestionProgress.study_topic_id == t.id, + StudyQuestionProgress.due_at.is_not(None), + StudyQuestionProgress.due_at <= now, + or_( + StudyQuestionProgress.review_stage.is_(None), + StudyQuestionProgress.review_stage < 4, + ), + ) + ) + ).scalar_one() + by_user[t.user_id]["due"] += due + by_user[t.user_id]["names"].append( + {"topic_id": t.id, "name": t.name, "due": due} + ) + + inserted = 0 + for uid, agg in by_user.items(): + if agg["due"] <= 0: + continue # due 0 → reminder 미생성 + result = await session.execute( + pg_insert(StudyReminder) + .values( + user_id=uid, + study_topic_id=None, + due_count=agg["due"], + focus_topic_names=agg["names"], + fired_at=slot, + ) + .on_conflict_do_nothing(index_elements=["user_id", "fired_at"]) + ) + inserted += result.rowcount or 0 + await session.commit() + if inserted: + logger.info("study_reminder fired slot=%s users=%d", slot.isoformat(), inserted) diff --git a/tests/test_study_memo_card_guards.py b/tests/test_study_memo_card_guards.py new file mode 100644 index 0000000..4d9be29 --- /dev/null +++ b/tests/test_study_memo_card_guards.py @@ -0,0 +1,161 @@ +"""공부 암기노트 Phase 1 — 정규화 + 카드 가드 단위 테스트 (W-3 / G-3). + +card_normalize / study_memo_card_guards 는 stdlib 만 의존(DB/MLX 없음). +정량 토큰 정규화·dedup·근거(정량=evidence 원문)·누출·배치 dedup 동작(분기)을 검증. +정량 기대값은 hard gate 로 두지 않고 동작만 assert (메모리 규칙). +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(ROOT / "app")) + +from services.study import card_normalize as cn # noqa: E402 +from services.study.study_memo_card_guards import ( # noqa: E402 + guard_card, + guard_cards, +) + + +# ─── 정규화 (G-3) ─── + +def test_normalize_num_unit_space_removed(): + assert cn.normalize_token("0.5 MPa") == "0.5MPa" + assert cn.normalize_token("100 ℃") == "100℃" + + +def test_normalize_thousands_separator_removed(): + assert cn.normalize_token("1,000kg") == "1000kg" + assert cn.normalize_token("5,000 kPa") == "5000kPa" + + +def test_normalize_no_unit_conversion(): + # 단위 환산 절대 금지 — 원문 표기 보존. + assert cn.normalize_token("1000mm") == "1000mm" + assert "m" in cn.normalize_token("1000mm") + + +def test_normalize_decimal_comma_protected(): + # 천단위가 아닌 소수 콤마(3자리 그룹 아님)는 보존. + assert cn.normalize_token("3,14") == "3,14" + + +def test_is_quantitative(): + assert cn.is_quantitative("0.5MPa") is True + assert cn.is_quantitative("0종 장소") is True # 숫자 0 포함 + assert cn.is_quantitative("안전간극") is False + + +def test_dedup_hash_stable_and_scoped(): + # 공백 차이는 정규화로 동일 hash. + assert cn.compute_dedup_hash(7, "cloze", "0.5 MPa") == cn.compute_dedup_hash(7, "cloze", "0.5MPa") + # format 다르면 다른 hash. + assert cn.compute_dedup_hash(7, "cloze", "0.5MPa") != cn.compute_dedup_hash(7, "qa", "0.5MPa") + # source 다르면 다른 hash. + assert cn.compute_dedup_hash(7, "qa", "x") != cn.compute_dedup_hash(8, "qa", "x") + + +def test_leak_detection(): + assert cn.is_cue_leak("정답은 0.5MPa 이다", "0.5 MPa") is True + assert cn.is_cue_leak("설계압력은 얼마인가", "0.5 MPa") is False + assert cn.is_cloze_self_leak("설계압력 [____] 즉 0.5 MPa 이다", "0.5MPa") is True + assert cn.is_cloze_self_leak("설계압력은 [____] 이상이다", "0.5MPa") is False + + +def test_evidence_match_normalized(): + refs = [{"snippet": "최고압력 0.5 MPa 이상", "source_id": 1, "source_type": "document"}] + assert len(cn.matching_evidence("0.5MPa", refs)) == 1 + assert cn.matching_evidence("9.9MPa", refs) == [] + + +# ─── 카드 가드 (W-3) ─── + +EVID = [{"snippet": "내압 방폭구조의 안전간극은 0.5 MPa 기준", "source_id": 1, "source_type": "document"}] +EXPL = "내압 방폭구조는 안전간극을 통해 화염 온도를 낮춘다. 0종 장소는 항상 존재하는 장소다." + + +def _g(card, evid=EVID, expl=EXPL): + return guard_card(card, source_question_id=1, ai_explanation=expl, evidence_refs=evid) + + +def test_guard_valid_qa_via_explanation(): + # 비정량 fact 가 ai_explanation 에 등장 → 통과 (evidence 불필요). + g = _g({"format": "qa", "cue": "내압 방폭구조의 화염온도를 낮추는 것은?", "fact": "안전간극"}) + assert g is not None and g.format == "qa" and g.dedup_hash + + +def test_guard_valid_cloze_quant_in_evidence(): + # 정량 토큰이 evidence 원문에 등장 → 통과 + 매칭 evidence 기록. + g = _g({ + "format": "cloze", + "cue": "안전간극 기준 압력", + "fact": "0.5MPa", + "cloze_text": "안전간극은 [____] 기준이다", + }) + assert g is not None and g.format == "cloze" + assert len(g.matched_evidence) == 1 + + +def test_guard_drop_quant_not_in_evidence(): + # 정량 토큰이 evidence 에 없으면 drop (할루시네이션 차단). + g = _g({"format": "cloze", "cue": "압력", "fact": "9.9MPa", "cloze_text": "압력은 [____]"}) + assert g is None + + +def test_guard_drop_cue_leak(): + g = _g({"format": "qa", "cue": "안전간극이 정답이다", "fact": "안전간극"}) + assert g is None + + +def test_guard_drop_cloze_self_leak(): + g = _g({ + "format": "cloze", + "cue": "압력 기준", + "fact": "0.5MPa", + "cloze_text": "기준은 [____] 즉 0.5 MPa 이다", + }) + assert g is None + + +def test_guard_drop_invalid_format_or_empty(): + assert _g({"format": "ox", "cue": "a", "fact": "안전간극"}) is None + assert _g({"format": "qa", "cue": "", "fact": "안전간극"}) is None + assert _g({"format": "qa", "cue": "a", "fact": ""}) is None + + +def test_guard_drop_cloze_without_blank(): + g = _g({"format": "cloze", "cue": "압력", "fact": "0.5MPa", "cloze_text": "빈칸 없는 문장"}) + assert g is None + + +def test_guard_drop_hallucinated_concept(): + # 비정량이지만 explanation/evidence 어디에도 없으면 drop. + g = _g({"format": "qa", "cue": "무엇?", "fact": "존재하지않는개념용어XYZ"}) + assert g is None + + +def test_guard_cards_batch_dedup(): + # 같은 (qid, format, 정답) 2장 → dedup_hash 동일 → 1장만. + cards = [ + {"format": "qa", "cue": "화염온도를 낮추는 것은?", "fact": "안전간극"}, + {"format": "qa", "cue": "내압 방폭의 핵심 원리는?", "fact": "안전간극"}, + ] + out = guard_cards(cards, source_question_id=1, ai_explanation=EXPL, evidence_refs=EVID) + assert len(out) == 1 + + +def test_guard_cards_all_dropped_returns_empty(): + cards = [{"format": "qa", "cue": "x", "fact": "할루시네이션없는근거XYZ"}] + out = guard_cards(cards, source_question_id=1, ai_explanation=EXPL, evidence_refs=EVID) + assert out == [] + + +_TESTS = [v for k, v in dict(globals()).items() if k.startswith("test_")] + +if __name__ == "__main__": + for t in _TESTS: + t() + print(f"OK ({len(_TESTS)} tests)")