diff --git a/app/api/study_concepts.py b/app/api/study_concepts.py index 1c6d36e..4918f1d 100644 --- a/app/api/study_concepts.py +++ b/app/api/study_concepts.py @@ -8,7 +8,7 @@ from __future__ import annotations from typing import Annotated -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, HTTPException from sqlalchemy.ext.asyncio import AsyncSession from core.auth import get_current_user @@ -43,6 +43,20 @@ async def get_today_concepts( return await cc.today_concepts(session, user.id, topic_id, limit) +@router.get("/concepts/{doc_id}") +async def get_concept_detail( + doc_id: int, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], + topic_id: int = DEFAULT_TOPIC_ID, +): + """개념 리더 재료 — 구조 파싱(요약/본문/빈출/관련) + 백링크 해소 + 회독/SR + 이전/다음.""" + detail = await cc.concept_detail(session, user.id, topic_id, doc_id) + if detail is None: + raise HTTPException(status_code=404, detail="concept not found") + return detail + + @router.post("/concepts/{doc_id}/read") async def post_concept_read( doc_id: int, diff --git a/app/services/study/concept_curriculum.py b/app/services/study/concept_curriculum.py index 9c48e63..efcd8f1 100644 --- a/app/services/study/concept_curriculum.py +++ b/app/services/study/concept_curriculum.py @@ -18,6 +18,7 @@ from models.document_read import DocumentRead from models.study_concept_progress import StudyConceptProgress from models.study_question_progress import StudyQuestionProgress from models.study_topic import StudyTopic +from services.study.concept_parser import parse_concept, resolve_related from services.study.sr_schedule import advance, first_due # 개념 행 조회 — 태그로 개념문서 필터 + 회독 진행 LEFT JOIN. md_content 는 전송 안 하고 @@ -205,3 +206,79 @@ async def mark_read( await session.commit() await session.refresh(prog) return {"ok": True, "review_stage": prog.review_stage, "due_at": prog.due_at} + + +_CONCEPT_ONE_SQL = text( + """ + SELECT d.id AS doc_id, d.title AS title, d.md_content AS md_content, + split_part(replace(d.user_tags::text, '"', ''), '/', 3) AS subject, + (d.md_content LIKE '%★★★%') AS f3, + (d.md_content LIKE '%★★%') AS f2, + EXISTS ( + SELECT 1 FROM document_reads r + WHERE r.document_id = d.id AND r.user_id = :uid + ) AS is_read, + p.review_stage AS review_stage, + p.due_at AS due_at + FROM documents d + LEFT JOIN study_concept_progress p ON p.concept_doc_id = d.id AND p.user_id = :uid + WHERE d.id = :doc_id AND d.deleted_at IS NULL AND d.user_tags::text LIKE :like + """ +) + + +async def concept_detail( + session: AsyncSession, user_id: int, topic_id: int, doc_id: int +) -> dict | None: + """개념 리더 재료 — md 구조 파싱 + 관련개념 백링크 해소 + 회독/SR 상태 + 같은 과목 이전/다음.""" + name = await _topic_name(session, topic_id) + if not name: + return None + like = f"%@library/{name}/%" + row = ( + await session.execute( + _CONCEPT_ONE_SQL, {"uid": user_id, "doc_id": doc_id, "like": like} + ) + ).mappings().first() + if row is None: + return None + + parsed = parse_concept(row["md_content"] or "") + + # 백링크 해소 + 이전/다음 = 같은 토픽 개념 title 인덱스(회독 rows 재사용) + idx = await _concept_rows(session, user_id, name) + title_index = [(r["doc_id"], r["title"], r["subject"]) for r in idx] + resolved = resolve_related(parsed["related"], title_index) + + # 이전/다음 = 같은 과목, title 순 + same = sorted( + [(r["doc_id"], r["title"]) for r in idx if r["subject"] == row["subject"]], + key=lambda x: (x[1] or "", x[0]), + ) + ids = [d for d, _ in same] + prev_id = next_id = None + if doc_id in ids: + pos = ids.index(doc_id) + if pos > 0: + prev_id = ids[pos - 1] + if pos < len(ids) - 1: + next_id = ids[pos + 1] + + freq = 3 if row["f3"] else (2 if row["f2"] else 1) + + return { + "doc_id": row["doc_id"], + "db_title": row["title"], + "title": parsed["title"] or row["title"], + "subject": row["subject"], + "freq": freq, + "summary": parsed["summary"], + "body": parsed["body"], + "bincheol": parsed["bincheol"], + "related": resolved, + "is_read": row["is_read"], + "review_stage": row["review_stage"], + "due_at": row["due_at"], + "prev_id": prev_id, + "next_id": next_id, + } diff --git a/app/services/study/concept_parser.py b/app/services/study/concept_parser.py new file mode 100644 index 0000000..4e57e79 --- /dev/null +++ b/app/services/study/concept_parser.py @@ -0,0 +1,175 @@ +"""concept_parser — 개념노트 markdown 구조 파서 + 관련개념 백링크 해소 (이론 리더용). + +정찰 실측 불변식(273/273): 개념노트는 고정 골격을 100% 따름 — + # {H1 제목} (첫 줄, DB title 과 다른 표시용 제목) + > **한 줄 요약**: {요약} (blockquote, 라벨 고정) + ## {본문 라벨} ... (BODY, 자유 라벨 H2 0~N, 트레일 ★ 가능) + ## 빈출 포인트 (항상, 관련개념 직전) + ## 관련 개념 (항상, 문서 최종 섹션) + +코드펜스(``` ASCII 도식) 내부의 ##/- 는 무시. 헤딩 트레일 ★ 는 스트립(라벨 정규화). +'빈출 포인트'/'관련 개념' 앵커만 이름으로 잡고 나머지 BODY 는 순서·위치로 처리(라벨 화이트리스트 금지). +순수 함수 · LLM 0. +""" + +from __future__ import annotations + +import re + +_FENCE = re.compile(r"^\s*```") +_H1 = re.compile(r"^#\s+(.+?)\s*$") +_H2 = re.compile(r"^##\s+(.+?)\s*$") # ### 는 매칭 안 됨(## 뒤 \s 요구) +_SUMMARY = re.compile(r"^>\s*\*\*한 줄 요약\*\*:\s*(.+)$") +_STAR_SUFFIX = re.compile(r"\s*★+\s*$") +_TRAIL_STARS = re.compile(r"★+\s*$") +_BINCHEOL_ITEM = re.compile(r"^\s*-\s+(★*)\s*(.+)$") +_RELATED_ITEM = re.compile(r"^\s*-\s+(.+)$") +_PAREN = re.compile(r"\s*\(.*$") # 괄호부터 끝(clarifier 힌트 절단) +_NUM_PREFIX = re.compile(r"^\d+_") +_STRIP_SYM = re.compile(r"[\s_·,./()\-]") + +_ANCHOR_BINCHEOL = "빈출 포인트" +_ANCHOR_RELATED = "관련 개념" + + +def parse_concept(md: str) -> dict: + """개념노트 md → {title, summary, body[{label,stars,md}], bincheol[{tier,text}], related[{raw,phrase,hint}]}.""" + lines = (md or "").split("\n") + title: str | None = None + summary: str | None = None + body: list[dict] = [] + bincheol_lines: list[str] = [] + related_lines: list[str] = [] + + in_fence = False + zone = "pre" # pre | body | bincheol | related + body_cur: dict | None = None + + def emit(line: str) -> None: + if body_cur is not None: + body_cur["_lines"].append(line) + elif zone == "bincheol": + bincheol_lines.append(line) + elif zone == "related": + related_lines.append(line) + # pre-zone 내용(요약 앞 잡음)은 버림 + + for ln in lines: + if _FENCE.match(ln): + in_fence = not in_fence + emit(ln) + continue + if in_fence: + emit(ln) + continue + + if title is None: + m = _H1.match(ln) + if m: + title = m.group(1).strip() + continue + if summary is None: + m = _SUMMARY.match(ln) + if m: + summary = m.group(1).strip() + continue + + m2 = _H2.match(ln) + if m2: + raw_label = m2.group(1).strip() + star_m = _TRAIL_STARS.search(raw_label) + stars = len(star_m.group(0).strip()) if star_m else 0 + label = _STAR_SUFFIX.sub("", raw_label).strip() + if label == _ANCHOR_BINCHEOL: + zone = "bincheol" + body_cur = None + continue + if label == _ANCHOR_RELATED: + zone = "related" + body_cur = None + continue + body_cur = {"label": label, "stars": stars, "_lines": []} + body.append(body_cur) + zone = "body" + continue + + emit(ln) + + body_out = [] + for s in body: + text = "\n".join(s["_lines"]).strip() + if text or s["label"]: + body_out.append({"label": s["label"], "stars": s["stars"], "md": text}) + + bincheol = [] + for ln in bincheol_lines: + m = _BINCHEOL_ITEM.match(ln) + if m: + bincheol.append({"tier": len(m.group(1)), "text": m.group(2).strip()}) + + related = [] + for ln in related_lines: + m = _RELATED_ITEM.match(ln) + if m: + raw = m.group(1).strip() + phrase = _PAREN.sub("", raw).strip() + hint = raw[len(phrase):].strip() if len(raw) > len(phrase) else "" + if phrase: + related.append({"raw": raw, "phrase": phrase, "hint": hint}) + + return { + "title": title, + "summary": summary, + "body": body_out, + "bincheol": bincheol, + "related": related, + } + + +def _normalize(s: str) -> str: + """해소용 정규화: NN_ 접두 제거 → 소문자 → 공백/기호 제거. 영문은 lowercase 유지.""" + s = _NUM_PREFIX.sub("", s or "") + s = s.lower() + s = _STRIP_SYM.sub("", s) + return s + + +def resolve_related(related: list[dict], title_index: list[tuple]) -> list[dict]: + """관련개념 구절 → 개념 doc 해소. title_index = [(doc_id, title, subject), ...]. + + 다단 fallback(정찰 ~79%): 정규화 exact → 양방향 substring(≥2자 가드) → 미해소=dangling(doc_id None). + """ + norm_exact: dict[str, int] = {} + norm_list: list[tuple[str, int, str]] = [] + for did, ttl, _subj in title_index: + n = _normalize(ttl) + if n: + norm_exact.setdefault(n, did) + norm_list.append((n, did, ttl)) + + out = [] + for it in related: + pn = _normalize(it["phrase"]) + did: int | None = None + rtitle: str | None = None + if pn and len(pn) >= 2: + if pn in norm_exact: + did = norm_exact[pn] + else: + # substring 폴백: title-norm ⊆ phrase-norm 방향만(짧은 phrase 가 더 큰 title 을 + # 삼키는 오결선 방지, 예: '염산'→'염산나트륨' X) + 길이차 최소(가장 구체적) + + # doc_id tiebreak(순서 무관 결정성). 후보 없으면 dangling(doc_id None). + cands = [ + (abs(len(n) - len(pn)), cand, ttl) + for n, cand, ttl in norm_list + if len(n) >= 2 and n in pn + ] + if cands: + cands.sort(key=lambda c: (c[0], c[1])) + _, did, rtitle = cands[0] + if did is not None and rtitle is None: + rtitle = next((t for d, t, _ in title_index if d == did), None) + out.append( + {"phrase": it["phrase"], "hint": it["hint"], "doc_id": did, "title": rtitle} + ) + return out diff --git a/frontend/src/routes/study/+page.svelte b/frontend/src/routes/study/+page.svelte index 34625a8..871cf92 100644 --- a/frontend/src/routes/study/+page.svelte +++ b/frontend/src/routes/study/+page.svelte @@ -110,7 +110,7 @@ {#each todayConcepts as c (c.doc_id)}
  • {#each Array(c.freq) as _}★{/each} - {c.title} + {c.title} {c.reason} + {#if mode === 'recall'} + 각 섹션을 떠올린 뒤 확인하세요 + {/if} + + + + {#if concept.body.length > 0} +
    + {#each concept.body as sec, i (i)} +
    +
    +

    {sec.label}

    + {#if sec.stars > 0} + {#each Array(sec.stars) as _}★{/each} + {/if} +
    + {#if shown(i)} +
    + +
    + {:else} + + {/if} +
    + {/each} +
    + {/if} + + + {#if concept.bincheol.length > 0} +
    +

    + 빈출 포인트 +

    + +
    + {/if} + + + {#if concept.related.length > 0} +
    +

    관련 개념

    +
    + {#each concept.related as rel} + {#if rel.doc_id} + + {rel.phrase} + + {:else} + + {rel.phrase} + + {/if} + {/each} +
    +
    + {/if} + + +
    + {#if concept.prev_id} + + {/if} +
    + + {#if concept.next_id} + + {/if} +
    + {/if} +