Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6d447f9cba | |||
| f38ec177d7 | |||
| da4a2e81c3 | |||
| 966a4315c8 | |||
| 3c42b7b97a | |||
| 91ce54c1cd | |||
| 9ec0a184a0 | |||
| a22b2c7647 | |||
| c44692fddc | |||
| 7487739aec | |||
| a8d3af2b62 | |||
| 51a7c96b56 | |||
| eb83d41ba5 | |||
| 62794b3857 | |||
| 8cdfe6006d | |||
| 3fb613916a | |||
| 0c7211e24b |
+337
-2
@@ -28,7 +28,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from starlette.requests import ClientDisconnect
|
||||
|
||||
from ai.client import AIClient, _load_prompt, parse_json_response
|
||||
from core.auth import get_current_user
|
||||
from core.auth import get_current_user, get_egress_class
|
||||
from core.config import settings
|
||||
from core.database import async_session, get_session
|
||||
from core.utils import file_hash
|
||||
@@ -742,11 +742,31 @@ async def get_document(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
egress_class: Annotated[str, Depends(get_egress_class)],
|
||||
):
|
||||
"""문서 단건 조회. 본문(extracted_text)·canonical markdown 동봉."""
|
||||
"""문서 단건 조회. 본문(extracted_text)·canonical markdown 동봉.
|
||||
|
||||
cloud egress(갭2): egress=cloud 토큰(예: Claude/MCP)은 search 와 동일한 cloud-eligibility
|
||||
게이트를 통과한 문서만 열람 가능 — id 직접 fetch 로 비공개/인프라/개인/restricted 문서를
|
||||
우회 열람하는 경로를 차단한다. 부적격은 404(존재 자체 비노출). local 토큰=게이트 미발동(무회귀).
|
||||
"""
|
||||
from sqlalchemy import text as sql_text
|
||||
from services.search.retrieval_service import cloud_eligible_doc_sql
|
||||
|
||||
doc = await session.get(Document, doc_id)
|
||||
if not doc or doc.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
if egress_class == "cloud":
|
||||
eligible = (
|
||||
await session.execute(
|
||||
sql_text(
|
||||
"SELECT 1 FROM documents WHERE id = :doc_id AND deleted_at IS NULL"
|
||||
+ cloud_eligible_doc_sql("")
|
||||
).bindparams(doc_id=doc_id)
|
||||
)
|
||||
).first()
|
||||
if eligible is None:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
return DocumentDetailResponse.model_validate(doc)
|
||||
|
||||
|
||||
@@ -1028,6 +1048,19 @@ async def get_document_image_raw(
|
||||
DocumentImage.image_key == image_key,
|
||||
)
|
||||
)
|
||||
if img is None:
|
||||
# clause-KB: 절-문서는 부모 표준 이미지를 공유(md_content=부모 슬라이스) → parent_id 폴백.
|
||||
from sqlalchemy import text as sql_text
|
||||
_par = (await session.execute(
|
||||
sql_text("SELECT parent_id FROM documents WHERE id = :id").bindparams(id=doc_id)
|
||||
)).scalar()
|
||||
if _par is not None:
|
||||
img = await session.scalar(
|
||||
select(DocumentImage).where(
|
||||
DocumentImage.document_id == _par,
|
||||
DocumentImage.image_key == image_key,
|
||||
)
|
||||
)
|
||||
if img is None:
|
||||
raise HTTPException(status_code=404, detail="이미지를 찾을 수 없습니다")
|
||||
|
||||
@@ -1801,3 +1834,305 @@ async def analyze_document(
|
||||
error_code=error_code,
|
||||
source=source,
|
||||
)
|
||||
|
||||
|
||||
# ─── ASME 절-지식베이스: 유기적 책 네비 (clause-KB, doc_kind='clause' 자식 문서 기반) ───
|
||||
class ClauseTocItem(BaseModel):
|
||||
id: int
|
||||
clause_code: str | None = None
|
||||
clause_part: str | None = None
|
||||
clause_order: int | None = None
|
||||
title: str | None = None
|
||||
|
||||
|
||||
class ClauseBookResponse(BaseModel):
|
||||
parent_id: int
|
||||
parent_title: str | None = None
|
||||
clauses: list[ClauseTocItem]
|
||||
|
||||
|
||||
@router.get("/{doc_id}/clauses", response_model=ClauseBookResponse)
|
||||
async def get_document_clauses(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""부모 표준 doc 의 절-문서 목차(유기적 책 TOC). doc_kind='clause' 자식을 clause_order 순 반환.
|
||||
|
||||
절-문서는 in_corpus=false + doc_kind='clause'(검색 제외)라 일반 목록/검색엔 안 뜨지만,
|
||||
이 책-내 네비는 부모 표준에서 자식 절로 진입하는 전용 경로다(ASME 2025판=한 권의 책).
|
||||
"""
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
parent = await session.get(Document, doc_id)
|
||||
if not parent or parent.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
rows = (
|
||||
await session.execute(
|
||||
sql_text(
|
||||
"""
|
||||
SELECT id, clause_code, clause_part, clause_order, title
|
||||
FROM documents
|
||||
WHERE parent_id = :pid AND doc_kind = 'clause' AND deleted_at IS NULL
|
||||
ORDER BY clause_order
|
||||
"""
|
||||
).bindparams(pid=doc_id)
|
||||
)
|
||||
).mappings().all()
|
||||
return ClauseBookResponse(
|
||||
parent_id=doc_id,
|
||||
parent_title=parent.title,
|
||||
clauses=[ClauseTocItem(**dict(r)) for r in rows],
|
||||
)
|
||||
|
||||
|
||||
class BacklinkRef(BaseModel):
|
||||
code: str
|
||||
doc_id: int | None = None # 해소된 절-문서(같은 부모) — dangling 이면 None
|
||||
title: str | None = None
|
||||
anchor: str | None = None
|
||||
ctx: str | None = None
|
||||
|
||||
|
||||
class BacklinksResponse(BaseModel):
|
||||
doc_id: int
|
||||
clause_code: str | None = None
|
||||
parent_id: int | None = None
|
||||
prev: ClauseTocItem | None = None
|
||||
next: ClauseTocItem | None = None
|
||||
forward: list[BacklinkRef] # 이 절이 참조하는 절들
|
||||
back: list[BacklinkRef] # 이 절을 참조하는 절들
|
||||
|
||||
|
||||
@router.get("/{doc_id}/backlinks", response_model=BacklinksResponse)
|
||||
async def get_document_backlinks(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""절-문서의 양방향 백링크 + 같은 부모 내 이전/다음 절(유기적 책 흐름)."""
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
doc = await session.get(Document, doc_id)
|
||||
if not doc or doc.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
|
||||
_meta = (await session.execute(sql_text(
|
||||
"SELECT parent_id, clause_code, clause_order FROM documents WHERE id = :id"
|
||||
).bindparams(id=doc_id))).mappings().first()
|
||||
_parent_id = _meta["parent_id"] if _meta else None
|
||||
_clause_code = _meta["clause_code"] if _meta else None
|
||||
_clause_order = _meta["clause_order"] if _meta else None
|
||||
forward = (
|
||||
await session.execute(
|
||||
sql_text(
|
||||
"""
|
||||
SELECT cl.dst_code AS code, cl.dst_doc_id AS doc_id, cl.anchor, cl.ctx, d.title
|
||||
FROM clause_links cl
|
||||
LEFT JOIN documents d ON d.id = cl.dst_doc_id
|
||||
WHERE cl.src_doc_id = :id
|
||||
ORDER BY cl.char_off NULLS LAST
|
||||
LIMIT 300
|
||||
"""
|
||||
).bindparams(id=doc_id)
|
||||
)
|
||||
).mappings().all()
|
||||
back = (
|
||||
await session.execute(
|
||||
sql_text(
|
||||
"""
|
||||
SELECT s.clause_code AS code, cl.src_doc_id AS doc_id, s.title, cl.ctx
|
||||
FROM clause_links cl
|
||||
JOIN documents s ON s.id = cl.src_doc_id
|
||||
WHERE cl.dst_doc_id = :id
|
||||
ORDER BY s.clause_order NULLS LAST
|
||||
LIMIT 300
|
||||
"""
|
||||
).bindparams(id=doc_id)
|
||||
)
|
||||
).mappings().all()
|
||||
|
||||
prev = nxt = None
|
||||
if _parent_id is not None and _clause_order is not None:
|
||||
prow = (
|
||||
await session.execute(
|
||||
sql_text(
|
||||
"""
|
||||
SELECT id, clause_code, clause_part, clause_order, title FROM documents
|
||||
WHERE parent_id = :pid AND doc_kind='clause' AND deleted_at IS NULL
|
||||
AND clause_order < :ord
|
||||
ORDER BY clause_order DESC LIMIT 1
|
||||
"""
|
||||
).bindparams(pid=_parent_id, ord=_clause_order)
|
||||
)
|
||||
).mappings().first()
|
||||
nrow = (
|
||||
await session.execute(
|
||||
sql_text(
|
||||
"""
|
||||
SELECT id, clause_code, clause_part, clause_order, title FROM documents
|
||||
WHERE parent_id = :pid AND doc_kind='clause' AND deleted_at IS NULL
|
||||
AND clause_order > :ord
|
||||
ORDER BY clause_order ASC LIMIT 1
|
||||
"""
|
||||
).bindparams(pid=_parent_id, ord=_clause_order)
|
||||
)
|
||||
).mappings().first()
|
||||
prev = ClauseTocItem(**dict(prow)) if prow else None
|
||||
nxt = ClauseTocItem(**dict(nrow)) if nrow else None
|
||||
|
||||
return BacklinksResponse(
|
||||
doc_id=doc_id,
|
||||
clause_code=_clause_code,
|
||||
parent_id=_parent_id,
|
||||
prev=prev,
|
||||
next=nxt,
|
||||
forward=[BacklinkRef(**dict(r)) for r in forward],
|
||||
back=[BacklinkRef(**dict(r)) for r in back],
|
||||
)
|
||||
|
||||
|
||||
# ─── 관련 문서 (유사도, on-demand pgvector KNN — 저부하·무저장) ───
|
||||
class RelatedItem(BaseModel):
|
||||
id: int
|
||||
title: str | None = None
|
||||
ai_domain: str | None = None
|
||||
material_type: str | None = None
|
||||
year: int | None = None
|
||||
sim: float | None = None
|
||||
|
||||
|
||||
class RelatedResponse(BaseModel):
|
||||
doc_id: int
|
||||
related: list[RelatedItem]
|
||||
|
||||
|
||||
@router.get("/{doc_id}/related", response_model=RelatedResponse)
|
||||
async def get_related_documents(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
limit: int = 8,
|
||||
same_type: bool = True,
|
||||
):
|
||||
"""문서-레벨 임베딩 코사인 최근접 = '관련 문서'. on-demand(저장/배치 없음).
|
||||
|
||||
인용그래프가 부적합한 코퍼스(업계 기술기사=인용망 부재)의 대안 연결 레이어.
|
||||
same_type=true면 같은 material_type 내, false면 전 코퍼스. doc_kind='clause'(절-문서)는 제외.
|
||||
"""
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
lim = max(1, min(limit, 30))
|
||||
type_clause = "AND d.material_type = src.material_type" if same_type else ""
|
||||
rows = (
|
||||
await session.execute(
|
||||
sql_text(
|
||||
f"""
|
||||
WITH src AS (
|
||||
SELECT embedding, material_type FROM documents WHERE id = :id
|
||||
)
|
||||
SELECT d.id, d.title, d.ai_domain, d.material_type, d.facet_year AS year,
|
||||
round((1 - (d.embedding <=> (SELECT embedding FROM src)))::numeric, 3) AS sim
|
||||
FROM documents d, src
|
||||
WHERE d.doc_kind = 'standard' AND d.deleted_at IS NULL
|
||||
AND d.id <> :id AND d.embedding IS NOT NULL
|
||||
AND (SELECT embedding FROM src) IS NOT NULL
|
||||
{type_clause}
|
||||
ORDER BY d.embedding <=> (SELECT embedding FROM src)
|
||||
LIMIT :lim
|
||||
"""
|
||||
).bindparams(id=doc_id, lim=lim)
|
||||
)
|
||||
).mappings().all()
|
||||
return RelatedResponse(
|
||||
doc_id=doc_id,
|
||||
related=[RelatedItem(**{k: r[k] for k in ("id", "title", "ai_domain", "material_type", "year")}, sim=float(r["sim"]) if r["sim"] is not None else None) for r in rows],
|
||||
)
|
||||
|
||||
|
||||
# ─── 절 공부도구 (노트/형광펜/암기카드) — clause_study ───
|
||||
class StudyItem(BaseModel):
|
||||
id: int
|
||||
kind: str
|
||||
payload: dict = {}
|
||||
created_at: datetime | None = None
|
||||
|
||||
|
||||
class StudyListResponse(BaseModel):
|
||||
doc_id: int
|
||||
items: list[StudyItem]
|
||||
|
||||
|
||||
class StudyCreate(BaseModel):
|
||||
kind: str # note | highlight | card
|
||||
payload: dict = {}
|
||||
|
||||
|
||||
def _parse_payload(p):
|
||||
import json
|
||||
if isinstance(p, str):
|
||||
try:
|
||||
return json.loads(p)
|
||||
except Exception:
|
||||
return {}
|
||||
return p or {}
|
||||
|
||||
|
||||
@router.get("/{doc_id}/study", response_model=StudyListResponse)
|
||||
async def list_study(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""절-문서의 공부도구 항목(노트/형광펜/암기카드) 목록."""
|
||||
from sqlalchemy import text as sql_text
|
||||
rows = (
|
||||
await session.execute(
|
||||
sql_text("SELECT id, kind, payload, created_at FROM clause_study "
|
||||
"WHERE doc_id = :id ORDER BY created_at DESC").bindparams(id=doc_id)
|
||||
)
|
||||
).mappings().all()
|
||||
return StudyListResponse(
|
||||
doc_id=doc_id,
|
||||
items=[StudyItem(id=r["id"], kind=r["kind"], payload=_parse_payload(r["payload"]),
|
||||
created_at=r["created_at"]) for r in rows],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{doc_id}/study", response_model=StudyItem, status_code=201)
|
||||
async def add_study(
|
||||
doc_id: int,
|
||||
body: StudyCreate,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""노트/형광펜/암기카드 1건 추가."""
|
||||
import json
|
||||
from sqlalchemy import text as sql_text
|
||||
if body.kind not in ("note", "highlight", "card"):
|
||||
raise HTTPException(status_code=400, detail="kind 는 note/highlight/card")
|
||||
row = (
|
||||
await session.execute(
|
||||
sql_text("INSERT INTO clause_study(doc_id, kind, payload) "
|
||||
"VALUES (:d, :k, cast(:p AS jsonb)) RETURNING id, kind, payload, created_at")
|
||||
.bindparams(d=doc_id, k=body.kind, p=json.dumps(body.payload, ensure_ascii=False))
|
||||
)
|
||||
).mappings().first()
|
||||
await session.commit()
|
||||
return StudyItem(id=row["id"], kind=row["kind"], payload=_parse_payload(row["payload"]),
|
||||
created_at=row["created_at"])
|
||||
|
||||
|
||||
@router.delete("/{doc_id}/study/{study_id}", status_code=204)
|
||||
async def delete_study(
|
||||
doc_id: int,
|
||||
study_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
from sqlalchemy import text as sql_text
|
||||
await session.execute(
|
||||
sql_text("DELETE FROM clause_study WHERE id = :s AND doc_id = :d")
|
||||
.bindparams(s=study_id, d=doc_id)
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
+7
-1
@@ -15,7 +15,7 @@ from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from core.auth import get_current_user
|
||||
from core.auth import get_current_user, get_egress_class
|
||||
from core.database import get_session
|
||||
from core.utils import setup_logger
|
||||
from models.user import User
|
||||
@@ -139,6 +139,7 @@ def _build_search_debug(pr: PipelineResult) -> SearchDebug:
|
||||
async def search(
|
||||
q: str,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
egress_class: Annotated[str, Depends(get_egress_class)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
background_tasks: BackgroundTasks,
|
||||
mode: str = Query("hybrid", pattern="^(fts|trgm|vector|hybrid)$"),
|
||||
@@ -211,6 +212,8 @@ async def search(
|
||||
None, description="안전 자료실 C-1: 관할 필터 (KR/US/EU/JP/GB/INT)"),
|
||||
year_from: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 하한 (NULL=created_at fallback)"),
|
||||
year_to: int | None = Query(None, ge=1900, le=2100, description="published_date 연도 상한"),
|
||||
domain_bucket: str | None = Query(None, description="377: domain_bucket 스코프 CSV (Safety,Engineering,Law,Philosophy,Programming,General,News). domain_bucket = ANY"),
|
||||
exclude_bucket: str | None = Query(None, description="377: domain_bucket 제외 CSV (예: News). 지식질의 시 News 기본제외용"),
|
||||
facets: bool = Query(False, description="안전 자료실 C-1 후속: top-K 결과 분류 축 분포(material_type/jurisdiction/version_status)를 응답 facets 에 집계. 미지정=계산/노출 0"),
|
||||
):
|
||||
"""문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)"""
|
||||
@@ -221,6 +224,9 @@ async def search(
|
||||
jurisdiction=jurisdiction,
|
||||
year_from=year_from,
|
||||
year_to=year_to,
|
||||
domain_buckets=[b.strip() for b in domain_bucket.split(",") if b.strip()] if domain_bucket else None,
|
||||
exclude_buckets=[b.strip() for b in exclude_bucket.split(",") if b.strip()] if exclude_bucket else None,
|
||||
cloud_egress=(egress_class == "cloud"),
|
||||
)
|
||||
pr = await run_search(
|
||||
session,
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
"""study_concepts API — 이론공부 홈(오늘의 개념 · 진도 · 회독 SR). prefix = /api/study.
|
||||
|
||||
문제풀이 표면 무접촉. 개념문서(가스기사 태그) 읽기 집계 + 회독 SR write 만. 단일 토픽(가스기사=4).
|
||||
경로: GET /curriculum · GET /today-concepts · POST /concepts/{doc_id}/read.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from core.auth import get_current_user
|
||||
from core.database import get_session
|
||||
from models.user import User
|
||||
from services.study import concept_curriculum as cc
|
||||
from services.study import concept_links as cl
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# 가스기사 단일 토픽 운영(현행). 다토픽 확장 시 쿼리 파라미터로 승격.
|
||||
DEFAULT_TOPIC_ID = 4
|
||||
|
||||
|
||||
@router.get("/curriculum")
|
||||
async def get_curriculum(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
topic_id: int = DEFAULT_TOPIC_ID,
|
||||
):
|
||||
"""과목별 회독 진도 + 개념/문항 복습 due 요약."""
|
||||
return await cc.curriculum(session, user.id, topic_id)
|
||||
|
||||
|
||||
@router.get("/today-concepts")
|
||||
async def get_today_concepts(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
topic_id: int = DEFAULT_TOPIC_ID,
|
||||
limit: int = 6,
|
||||
):
|
||||
"""오늘 공부할 개념(재복습 → 미독 빈출순)."""
|
||||
return await cc.today_concepts(session, user.id, topic_id, limit)
|
||||
|
||||
|
||||
@router.get("/concepts/weakness-map")
|
||||
async def get_weakness_map(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
topic_id: int = DEFAULT_TOPIC_ID,
|
||||
limit: int = 12,
|
||||
):
|
||||
"""개념 약점 지도 — 링크된 기출 정답률로 약점 개념(정답률<60%) 우선(이론↔문제)."""
|
||||
name = await cc._topic_name(session, topic_id)
|
||||
if not name:
|
||||
return {"weak": [], "weak_total": 0, "evaluated_total": 0}
|
||||
return await cl.weakness_map(session, user.id, name, limit)
|
||||
|
||||
|
||||
@router.get("/concepts/{doc_id}")
|
||||
async def get_concept_detail(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
topic_id: int = DEFAULT_TOPIC_ID,
|
||||
):
|
||||
"""개념 리더 재료 — 구조 파싱(요약/본문/빈출/관련) + 백링크 해소 + 회독/SR + 이전/다음."""
|
||||
detail = await cc.concept_detail(session, user.id, topic_id, doc_id)
|
||||
if detail is None:
|
||||
raise HTTPException(status_code=404, detail="concept not found")
|
||||
return detail
|
||||
|
||||
|
||||
@router.get("/concepts/{doc_id}/questions")
|
||||
async def get_concept_questions(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
limit: int = 20,
|
||||
):
|
||||
"""개념 관련 기출 + 내 정답률 (이론↔문제 브리지)."""
|
||||
return await cl.related_questions(session, user.id, doc_id, limit)
|
||||
|
||||
|
||||
@router.post("/concepts/{doc_id}/read")
|
||||
async def post_concept_read(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
topic_id: int = DEFAULT_TOPIC_ID,
|
||||
):
|
||||
"""개념 회독 처리 → 회독 플래그 + SR 입고/전진."""
|
||||
return await cc.mark_read(session, user.id, topic_id, doc_id)
|
||||
+11
-2
@@ -31,11 +31,11 @@ def hash_password(password: str) -> str:
|
||||
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
|
||||
|
||||
|
||||
def create_access_token(subject: str, expires_minutes: int | None = None) -> str:
|
||||
def create_access_token(subject: str, expires_minutes: int | None = None, egress: str = "local") -> str:
|
||||
minutes = expires_minutes if expires_minutes is not None else ACCESS_TOKEN_EXPIRE_MINUTES
|
||||
now = datetime.now(timezone.utc)
|
||||
expire = now + timedelta(minutes=minutes)
|
||||
payload = {"sub": subject, "exp": expire, "iat": int(now.timestamp()), "type": "access"}
|
||||
payload = {"sub": subject, "exp": expire, "iat": int(now.timestamp()), "type": "access", "egress": egress}
|
||||
return jwt.encode(payload, settings.jwt_secret, algorithm=ALGORITHM)
|
||||
|
||||
|
||||
@@ -100,6 +100,15 @@ def verify_totp(code: str, secret: str | None = None) -> bool:
|
||||
return totp.verify(code)
|
||||
|
||||
|
||||
async def get_egress_class(
|
||||
credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)],
|
||||
) -> str:
|
||||
"""토큰 egress claim -> 'cloud'|'local' (갭2 cloud-egress allowlist). claim 부재=local
|
||||
(비파괴; 기존 토큰=신뢰/로컬). 쿼리파라미터 아님 -> 호출자가 끌 수 없음(우회 차단)."""
|
||||
payload = decode_token(credentials.credentials)
|
||||
return (payload or {}).get("egress", "local")
|
||||
|
||||
|
||||
async def get_current_user(
|
||||
credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
|
||||
@@ -33,6 +33,7 @@ from api.study_sessions import router as study_sessions_router
|
||||
from api.study_topics import router as study_topics_router
|
||||
from api.study_reminders import router as study_reminders_router
|
||||
from api.study_cards import router as study_cards_router
|
||||
from api.study_concepts import router as study_concepts_router
|
||||
from api.video import router as video_router
|
||||
from core.config import settings
|
||||
from core.database import async_session, engine, init_db
|
||||
@@ -249,6 +250,8 @@ app.include_router(study_reminders_router, prefix="/api/study-reminders", tags=[
|
||||
app.include_router(study_cards_router, prefix="/api/study-cards", tags=["study-cards"])
|
||||
# Phase 1: 학습 진행 상태 (review-complete + review-queue). prefix=/api/study-topics 안에 정의됨.
|
||||
app.include_router(study_question_progress_router, prefix="/api", tags=["study-progress"])
|
||||
# 이론공부 홈: 오늘의 개념·진도·회독 SR (개념문서 소비 표면, 문제풀이 무접촉).
|
||||
app.include_router(study_concepts_router, prefix="/api/study", tags=["study-theory"])
|
||||
|
||||
# TODO: Phase 5에서 추가
|
||||
# app.include_router(tasks.router, prefix="/api/tasks", tags=["tasks"])
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
"""study_concept_progress — 사용자 × 개념문서 단위 간격반복(SR) 진행 (이론공부 홈).
|
||||
|
||||
문제 SR(study_question_progress)의 개념(이론)판. '개념문서' = documents 한 건(가스기사 태그).
|
||||
회독(첫 read) → 복습 큐 진입, 이후 회독마다 sr_schedule 산술(1·3·7·14·졸업) 공용 전진.
|
||||
concept_doc_id 는 documents.id 를 가리키나 FK 미설정 — hot 테이블(documents) 락 회피(clause_study 선례).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, ForeignKey, SmallInteger, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from core.database import Base
|
||||
|
||||
|
||||
class StudyConceptProgress(Base):
|
||||
__tablename__ = "study_concept_progress"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"user_id", "concept_doc_id", name="uq_concept_progress_user_doc"
|
||||
),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
user_id: Mapped[int] = mapped_column(
|
||||
BigInteger, ForeignKey("users.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
study_topic_id: Mapped[int] = mapped_column(
|
||||
BigInteger, ForeignKey("study_topics.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
# documents.id 참조 — FK 없음(락 회피). 개념문서 삭제 시 고아 행은 read 집계에서 자연 제외.
|
||||
concept_doc_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||||
|
||||
# 복습 큐 (sr_schedule 공용): stage 0~3 = 1·3·7·14일, 4 = 졸업(due_at NULL)
|
||||
review_stage: Mapped[int | None] = mapped_column(SmallInteger)
|
||||
due_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
last_read_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=datetime.now, nullable=False
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=datetime.now, onupdate=datetime.now, nullable=False
|
||||
)
|
||||
@@ -76,10 +76,15 @@ class AxisFilter:
|
||||
jurisdiction: str | None = None
|
||||
year_from: int | None = None
|
||||
year_to: int | None = None
|
||||
domain_buckets: list[str] | None = None # 377: domain_bucket = ANY (도메인 스코프)
|
||||
exclude_buckets: list[str] | None = None # 377: domain_bucket <> ALL (예: News 제외)
|
||||
cloud_egress: bool = False # 갭2: 클라우드 소비자 cloud-eligibility allowlist 강제(토큰 claim 유래)
|
||||
|
||||
def active(self) -> bool:
|
||||
return bool(self.material_types or self.jurisdiction
|
||||
or self.year_from is not None or self.year_to is not None)
|
||||
or self.year_from is not None or self.year_to is not None
|
||||
or self.domain_buckets or self.exclude_buckets
|
||||
or self.cloud_egress)
|
||||
|
||||
|
||||
def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str:
|
||||
@@ -104,6 +109,22 @@ def _axis_sql(alias: str, af: "AxisFilter | None", params: dict) -> str:
|
||||
if af.year_to is not None:
|
||||
cl.append(f"COALESCE({p}published_date, {p}created_at::date) <= make_date(:af_yt, 12, 31)")
|
||||
params["af_yt"] = af.year_to
|
||||
if af.domain_buckets:
|
||||
cl.append(f"{p}domain_bucket = ANY(:af_db)")
|
||||
params["af_db"] = af.domain_buckets
|
||||
if af.exclude_buckets:
|
||||
cl.append(f"{p}domain_bucket <> ALL(:af_xdb)")
|
||||
params["af_xdb"] = af.exclude_buckets
|
||||
if af.cloud_egress:
|
||||
# 갭2 클라우드 egress allowlist(default-deny). restricted 는 _license_sql 별도 차단.
|
||||
cl.append(
|
||||
f"({p}data_origin = 'external' OR ("
|
||||
f"{p}data_origin = 'work' "
|
||||
f"AND {p}domain_bucket IN ('Engineering','Safety','Law') "
|
||||
f"AND ({p}source_channel IS NULL OR {p}source_channel::text NOT IN ('voice','chat','memo')) "
|
||||
f"AND {p}category::text IS DISTINCT FROM 'memo' "
|
||||
f"AND ({p}user_note IS NULL OR {p}user_note = '')))"
|
||||
)
|
||||
return " AND " + " AND ".join(cl)
|
||||
|
||||
|
||||
@@ -121,7 +142,21 @@ def _license_sql(alias: str) -> str:
|
||||
술어 정의 = license_filter.restricted_exclude_sql 공유(digest/briefing/study 풀이와 단일 source).
|
||||
"""
|
||||
from services.search.license_filter import restricted_exclude_sql
|
||||
return " AND " + restricted_exclude_sql(alias)
|
||||
_p = (alias + ".") if alias else ""
|
||||
# ASME clause-KB(379): clause docs (doc_kind='clause') = read/nav/backlink only, excluded from retrieval/digest legs.
|
||||
return " AND " + restricted_exclude_sql(alias) + f" AND {_p}doc_kind = 'standard'"
|
||||
|
||||
|
||||
def cloud_eligible_doc_sql(alias: str = "") -> str:
|
||||
"""단일 문서가 cloud 소비자(예: Claude/MCP)에게 노출 가능한가 = search retrieval 과
|
||||
동일한 egress allowlist(갭2) + license 제한(B-4) 결합 술어. fetch_document(cloud) 가
|
||||
search 와 byte-동일 게이트를 공유하도록 단일 source([[feedback_structural_integrity_over_path_discipline]]).
|
||||
|
||||
cloud_egress·license leg 모두 bind 파라미터 없는 리터럴 술어라 호출측 추가 params 불요.
|
||||
주의: _license_sql 은 소유자 단건 다운로드엔 미적용(a안)이지만, cloud 노출은 구매 전자책
|
||||
verbatim 누출을 막아야 하므로 여기선 항상 적용 = search 와 동일(local 토큰은 이 게이트 미발동).
|
||||
반환 ' AND (egress allowlist) AND (license)' (alias='' = 컬럼 직접 참조). default-deny."""
|
||||
return _axis_sql(alias, AxisFilter(cloud_egress=True), {}) + _license_sql(alias)
|
||||
|
||||
|
||||
# 2단계 gate (R2-B1) — SQL string interpolation 직전 final allowlist.
|
||||
|
||||
@@ -0,0 +1,284 @@
|
||||
"""concept_curriculum — 이론공부 홈 재료 (오늘의 개념 · 진도 · 회독 SR).
|
||||
|
||||
개념문서 = documents (user_tags = @library/{topic}/{과목}/... , 가스기사). is_read = 회독,
|
||||
md_content 의 ★ 개수 = 빈출 tier(★★★=3 / ★★=2 / else 1). 회독 SR = study_concept_progress
|
||||
+ sr_schedule(문제 SR 공용 산술). 읽기 전용 집계 + mark_read(회독+SR 입고)만 write. LLM 0.
|
||||
|
||||
문제풀이 표면 무접촉 — 여기서 읽는 study_question_progress 는 '문항 due 카운트'만(홈 표시용).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import func, or_, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from models.document_read import DocumentRead
|
||||
from models.study_concept_progress import StudyConceptProgress
|
||||
from models.study_question_progress import StudyQuestionProgress
|
||||
from models.study_topic import StudyTopic
|
||||
from services.study.concept_parser import parse_concept, resolve_related
|
||||
from services.study.sr_schedule import advance, first_due
|
||||
|
||||
# 개념 행 조회 — 태그로 개념문서 필터 + 회독 진행 LEFT JOIN. md_content 는 전송 안 하고
|
||||
# ★ 유무만 서버측 boolean 으로(홈이 자주 호출돼도 페이로드 최소).
|
||||
# is_read = document_reads(회독 정본, is_read 컬럼 아님) EXISTS. library unread 와 동일 기준.
|
||||
_CONCEPT_ROWS_SQL = text(
|
||||
"""
|
||||
SELECT d.id AS doc_id,
|
||||
d.title AS title,
|
||||
EXISTS (
|
||||
SELECT 1 FROM document_reads r
|
||||
WHERE r.document_id = d.id AND r.user_id = :uid
|
||||
) AS is_read,
|
||||
(d.md_content LIKE '%★★★%') AS f3,
|
||||
(d.md_content LIKE '%★★%') AS f2,
|
||||
split_part(replace(d.user_tags::text, '"', ''), '/', 3) AS subject,
|
||||
p.review_stage AS review_stage,
|
||||
p.due_at AS due_at,
|
||||
p.last_read_at AS last_read_at
|
||||
FROM documents d
|
||||
LEFT JOIN study_concept_progress p
|
||||
ON p.concept_doc_id = d.id AND p.user_id = :uid
|
||||
WHERE d.user_tags::text LIKE :like
|
||||
AND d.deleted_at IS NULL
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def _topic_name(session: AsyncSession, topic_id: int) -> str | None:
|
||||
return (
|
||||
await session.execute(select(StudyTopic.name).where(StudyTopic.id == topic_id))
|
||||
).scalar_one_or_none()
|
||||
|
||||
|
||||
async def _concept_rows(session: AsyncSession, user_id: int, topic_name: str):
|
||||
like = f"%@library/{topic_name}/%"
|
||||
return (
|
||||
await session.execute(_CONCEPT_ROWS_SQL, {"uid": user_id, "like": like})
|
||||
).mappings().all()
|
||||
|
||||
|
||||
def _freq(row) -> int:
|
||||
if row["f3"]:
|
||||
return 3
|
||||
if row["f2"]:
|
||||
return 2
|
||||
return 1
|
||||
|
||||
|
||||
def _is_due(row, now: datetime) -> bool:
|
||||
return (
|
||||
row["due_at"] is not None
|
||||
and row["due_at"] <= now
|
||||
and (row["review_stage"] or 0) < 4
|
||||
)
|
||||
|
||||
|
||||
def _item(row) -> dict:
|
||||
return {
|
||||
"doc_id": row["doc_id"],
|
||||
"title": row["title"],
|
||||
"subject": row["subject"],
|
||||
"freq": _freq(row),
|
||||
"review_stage": row["review_stage"],
|
||||
"due_at": row["due_at"],
|
||||
}
|
||||
|
||||
|
||||
async def _question_due_count(session: AsyncSession, user_id: int, topic_id: int, now: datetime) -> int:
|
||||
"""문항 복습 due (기존 study_question_progress 엔진 재사용, 홈 표시용)."""
|
||||
return (
|
||||
await session.execute(
|
||||
select(func.count())
|
||||
.select_from(StudyQuestionProgress)
|
||||
.where(
|
||||
StudyQuestionProgress.user_id == user_id,
|
||||
StudyQuestionProgress.study_topic_id == topic_id,
|
||||
StudyQuestionProgress.due_at.is_not(None),
|
||||
StudyQuestionProgress.due_at <= now,
|
||||
or_(
|
||||
StudyQuestionProgress.review_stage.is_(None),
|
||||
StudyQuestionProgress.review_stage < 4,
|
||||
),
|
||||
)
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
|
||||
async def curriculum(session: AsyncSession, user_id: int, topic_id: int) -> dict:
|
||||
"""과목별 회독 진도 + 개념/문항 복습 due 요약 (진도 대시보드)."""
|
||||
name = await _topic_name(session, topic_id)
|
||||
rows = await _concept_rows(session, user_id, name) if name else []
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
subj: dict[str, dict] = {}
|
||||
for r in rows:
|
||||
s = subj.setdefault(r["subject"], {"subject": r["subject"], "total": 0, "read": 0})
|
||||
s["total"] += 1
|
||||
if r["is_read"]:
|
||||
s["read"] += 1
|
||||
|
||||
total = len(rows)
|
||||
read = sum(1 for r in rows if r["is_read"])
|
||||
concept_due = sum(1 for r in rows if _is_due(r, now))
|
||||
question_due = await _question_due_count(session, user_id, topic_id, now)
|
||||
|
||||
return {
|
||||
"topic_id": topic_id,
|
||||
"topic_name": name,
|
||||
"subjects": sorted(subj.values(), key=lambda x: x["subject"]),
|
||||
"total": total,
|
||||
"read": read,
|
||||
"concept_due": concept_due,
|
||||
"question_due": question_due,
|
||||
}
|
||||
|
||||
|
||||
async def today_concepts(
|
||||
session: AsyncSession, user_id: int, topic_id: int, limit: int = 6
|
||||
) -> dict:
|
||||
"""오늘 공부할 개념 = 재복습(SR due) 먼저 → 미독(빈출 우선). 졸업/재복습대기 제외."""
|
||||
name = await _topic_name(session, topic_id)
|
||||
rows = await _concept_rows(session, user_id, name) if name else []
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
due = [r for r in rows if _is_due(r, now)]
|
||||
due.sort(key=lambda r: r["due_at"])
|
||||
|
||||
# 미독 & 아직 SR 큐 진입 전(due_at NULL) → 빈출 높은 순
|
||||
unread = [r for r in rows if not r["is_read"] and r["due_at"] is None]
|
||||
unread.sort(key=lambda r: (-_freq(r), r["subject"], r["title"]))
|
||||
|
||||
picked = [{**_item(r), "reason": "재복습"} for r in due]
|
||||
picked += [{**_item(r), "reason": "신규"} for r in unread]
|
||||
|
||||
return {
|
||||
"concepts": picked[:limit],
|
||||
"due_total": len(due),
|
||||
"unread_total": len(unread),
|
||||
}
|
||||
|
||||
|
||||
async def mark_read(
|
||||
session: AsyncSession, user_id: int, topic_id: int, doc_id: int, now: datetime | None = None
|
||||
) -> dict:
|
||||
"""개념 회독 처리 = document_reads(+1) + 회독 SR 입고/전진.
|
||||
|
||||
회독 정본 = document_reads(append-only), documents.is_read 컬럼 아님(library unread 와 정합).
|
||||
첫 회독 → first_due(stage 0, 내일). 이후 회독은 'due 도래(due_at<=now)' 때만 correct 로 전진
|
||||
(이른 재열람/다중클릭 과전진 방지). stage 4 졸업 후엔 due_at NULL 이라 전진 없음.
|
||||
"""
|
||||
now = now or datetime.now(timezone.utc)
|
||||
|
||||
# 회독 로그 append (+1) — 사용자 명시 회독. 자동 아님(엔드포인트 = 명시 POST).
|
||||
session.add(DocumentRead(user_id=user_id, document_id=doc_id, read_at=now))
|
||||
|
||||
prog = (
|
||||
await session.execute(
|
||||
select(StudyConceptProgress).where(
|
||||
StudyConceptProgress.user_id == user_id,
|
||||
StudyConceptProgress.concept_doc_id == doc_id,
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
|
||||
if prog is None:
|
||||
stage, due = first_due(now)
|
||||
prog = StudyConceptProgress(
|
||||
user_id=user_id,
|
||||
study_topic_id=topic_id,
|
||||
concept_doc_id=doc_id,
|
||||
review_stage=stage,
|
||||
due_at=due,
|
||||
last_read_at=now,
|
||||
)
|
||||
session.add(prog)
|
||||
else:
|
||||
# due 도래 시에만 전진 — 미래 due(재열람 이른 클릭)는 stage 불변, last_read_at 만 갱신.
|
||||
if prog.due_at is not None and prog.due_at <= now:
|
||||
res = advance(prog.review_stage, "correct", now)
|
||||
if res is not None:
|
||||
prog.review_stage, prog.due_at = res
|
||||
prog.last_read_at = now
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(prog)
|
||||
return {"ok": True, "review_stage": prog.review_stage, "due_at": prog.due_at}
|
||||
|
||||
|
||||
_CONCEPT_ONE_SQL = text(
|
||||
"""
|
||||
SELECT d.id AS doc_id, d.title AS title, d.md_content AS md_content,
|
||||
split_part(replace(d.user_tags::text, '"', ''), '/', 3) AS subject,
|
||||
(d.md_content LIKE '%★★★%') AS f3,
|
||||
(d.md_content LIKE '%★★%') AS f2,
|
||||
EXISTS (
|
||||
SELECT 1 FROM document_reads r
|
||||
WHERE r.document_id = d.id AND r.user_id = :uid
|
||||
) AS is_read,
|
||||
p.review_stage AS review_stage,
|
||||
p.due_at AS due_at
|
||||
FROM documents d
|
||||
LEFT JOIN study_concept_progress p ON p.concept_doc_id = d.id AND p.user_id = :uid
|
||||
WHERE d.id = :doc_id AND d.deleted_at IS NULL AND d.user_tags::text LIKE :like
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def concept_detail(
|
||||
session: AsyncSession, user_id: int, topic_id: int, doc_id: int
|
||||
) -> dict | None:
|
||||
"""개념 리더 재료 — md 구조 파싱 + 관련개념 백링크 해소 + 회독/SR 상태 + 같은 과목 이전/다음."""
|
||||
name = await _topic_name(session, topic_id)
|
||||
if not name:
|
||||
return None
|
||||
like = f"%@library/{name}/%"
|
||||
row = (
|
||||
await session.execute(
|
||||
_CONCEPT_ONE_SQL, {"uid": user_id, "doc_id": doc_id, "like": like}
|
||||
)
|
||||
).mappings().first()
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
parsed = parse_concept(row["md_content"] or "")
|
||||
|
||||
# 백링크 해소 + 이전/다음 = 같은 토픽 개념 title 인덱스(회독 rows 재사용)
|
||||
idx = await _concept_rows(session, user_id, name)
|
||||
title_index = [(r["doc_id"], r["title"], r["subject"]) for r in idx]
|
||||
resolved = resolve_related(parsed["related"], title_index)
|
||||
|
||||
# 이전/다음 = 같은 과목, title 순
|
||||
same = sorted(
|
||||
[(r["doc_id"], r["title"]) for r in idx if r["subject"] == row["subject"]],
|
||||
key=lambda x: (x[1] or "", x[0]),
|
||||
)
|
||||
ids = [d for d, _ in same]
|
||||
prev_id = next_id = None
|
||||
if doc_id in ids:
|
||||
pos = ids.index(doc_id)
|
||||
if pos > 0:
|
||||
prev_id = ids[pos - 1]
|
||||
if pos < len(ids) - 1:
|
||||
next_id = ids[pos + 1]
|
||||
|
||||
freq = 3 if row["f3"] else (2 if row["f2"] else 1)
|
||||
|
||||
return {
|
||||
"doc_id": row["doc_id"],
|
||||
"db_title": row["title"],
|
||||
"title": parsed["title"] or row["title"],
|
||||
"subject": row["subject"],
|
||||
"freq": freq,
|
||||
"summary": parsed["summary"],
|
||||
"body": parsed["body"],
|
||||
"bincheol": parsed["bincheol"],
|
||||
"related": resolved,
|
||||
"is_read": row["is_read"],
|
||||
"review_stage": row["review_stage"],
|
||||
"due_at": row["due_at"],
|
||||
"prev_id": prev_id,
|
||||
"next_id": next_id,
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
"""concept_links — 이론↔문제 브리지 롤업 (Stage B).
|
||||
|
||||
study_concept_links(개념 doc ↔ 기출문항, 임베딩 코사인) + study_question_progress(내 풀이상태)를
|
||||
조인해 (a) 개념별 관련 기출 + 내 정답률(related_questions), (b) 개념 약점 지도(weakness_map) 산출.
|
||||
읽기 전용 집계 · LLM 0. 링크 적재는 scripts/concept_links_backfill.sql(임베딩) 배치.
|
||||
정답률 = 링크된 문항 중 progress.last_outcome 기준(attempted=풀이이력 보유, correct=최근정답).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
_ACCURACY_WEAK_PCT = 60 # 정답률 < 60% = 약점(attempted>0 일 때만)
|
||||
|
||||
_AGG_SQL = text(
|
||||
"""
|
||||
SELECT count(*) AS linked,
|
||||
count(pr.study_question_id) FILTER (WHERE pr.last_outcome IS NOT NULL) AS attempted,
|
||||
count(*) FILTER (WHERE pr.last_outcome = 'correct') AS correct
|
||||
FROM study_concept_links l
|
||||
LEFT JOIN study_question_progress pr
|
||||
ON pr.study_question_id = l.question_id AND pr.user_id = :uid
|
||||
WHERE l.concept_doc_id = :doc_id AND l.link_source = 'embedding'
|
||||
"""
|
||||
)
|
||||
|
||||
_QROWS_SQL = text(
|
||||
"""
|
||||
SELECT q.id AS id, q.subject AS subject, q.exam_round AS exam_round,
|
||||
q.exam_question_number AS qnum, l.score AS score,
|
||||
pr.last_outcome AS last_outcome, pr.review_stage AS review_stage
|
||||
FROM study_concept_links l
|
||||
JOIN study_questions q ON q.id = l.question_id AND q.deleted_at IS NULL AND q.is_active
|
||||
LEFT JOIN study_question_progress pr
|
||||
ON pr.study_question_id = q.id AND pr.user_id = :uid
|
||||
WHERE l.concept_doc_id = :doc_id AND l.link_source = 'embedding'
|
||||
ORDER BY l.score DESC
|
||||
LIMIT :limit
|
||||
"""
|
||||
)
|
||||
|
||||
_WEAKNESS_SQL = text(
|
||||
"""
|
||||
SELECT d.id AS doc_id, d.title AS title,
|
||||
split_part(replace(d.user_tags::text, '"', ''), '/', 3) AS subject,
|
||||
count(l.id) AS linked,
|
||||
count(pr.study_question_id) FILTER (WHERE pr.last_outcome IS NOT NULL) AS attempted,
|
||||
count(*) FILTER (WHERE pr.last_outcome = 'correct') AS correct
|
||||
FROM documents d
|
||||
JOIN study_concept_links l ON l.concept_doc_id = d.id AND l.link_source = 'embedding'
|
||||
LEFT JOIN study_question_progress pr
|
||||
ON pr.study_question_id = l.question_id AND pr.user_id = :uid
|
||||
WHERE d.user_tags::text LIKE :like AND d.deleted_at IS NULL
|
||||
GROUP BY d.id, d.title, subject
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def related_questions(
|
||||
session: AsyncSession, user_id: int, doc_id: int, limit: int = 20
|
||||
) -> dict:
|
||||
"""개념 doc 의 관련 기출 + 내 정답률(전체 링크 기준 집계 + 상위 N 표시용)."""
|
||||
agg = (
|
||||
await session.execute(_AGG_SQL, {"uid": user_id, "doc_id": doc_id})
|
||||
).mappings().first()
|
||||
rows = (
|
||||
await session.execute(
|
||||
_QROWS_SQL, {"uid": user_id, "doc_id": doc_id, "limit": limit}
|
||||
)
|
||||
).mappings().all()
|
||||
|
||||
linked = (agg["linked"] if agg else 0) or 0
|
||||
attempted = (agg["attempted"] if agg else 0) or 0
|
||||
correct = (agg["correct"] if agg else 0) or 0
|
||||
accuracy = round(100 * correct / attempted) if attempted else None
|
||||
|
||||
return {
|
||||
"linked": linked,
|
||||
"attempted": attempted,
|
||||
"correct": correct,
|
||||
"accuracy": accuracy,
|
||||
"questions": [
|
||||
{
|
||||
"id": r["id"],
|
||||
"subject": r["subject"],
|
||||
"exam_round": r["exam_round"],
|
||||
"qnum": r["qnum"],
|
||||
"score": round(r["score"], 3) if r["score"] is not None else None,
|
||||
"last_outcome": r["last_outcome"],
|
||||
"review_stage": r["review_stage"],
|
||||
}
|
||||
for r in rows
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
async def weakness_map(
|
||||
session: AsyncSession, user_id: int, topic_name: str, limit: int = 12
|
||||
) -> dict:
|
||||
"""개념 약점 지도 — 링크된 기출 정답률로 개념 채색. 약점(attempted>0·정답률<60%) 우선 정렬."""
|
||||
like = f"%@library/{topic_name}/%"
|
||||
rows = (
|
||||
await session.execute(_WEAKNESS_SQL, {"uid": user_id, "like": like})
|
||||
).mappings().all()
|
||||
|
||||
concepts = []
|
||||
for r in rows:
|
||||
attempted = r["attempted"] or 0
|
||||
correct = r["correct"] or 0
|
||||
accuracy = round(100 * correct / attempted) if attempted else None
|
||||
if accuracy is None:
|
||||
state = "unattempted"
|
||||
elif accuracy < _ACCURACY_WEAK_PCT:
|
||||
state = "weak"
|
||||
else:
|
||||
state = "ok"
|
||||
concepts.append(
|
||||
{
|
||||
"doc_id": r["doc_id"],
|
||||
"title": r["title"],
|
||||
"subject": r["subject"],
|
||||
"linked": r["linked"] or 0,
|
||||
"attempted": attempted,
|
||||
"accuracy": accuracy,
|
||||
"state": state,
|
||||
}
|
||||
)
|
||||
|
||||
# 약점 우선(정답률 오름차순) → 미평가는 뒤로. 홈 위젯용 상위 N.
|
||||
weak = sorted(
|
||||
[c for c in concepts if c["state"] == "weak"],
|
||||
key=lambda c: (c["accuracy"], -c["attempted"], c["doc_id"]),
|
||||
)
|
||||
return {
|
||||
"weak": weak[:limit],
|
||||
"weak_total": len(weak),
|
||||
"evaluated_total": sum(1 for c in concepts if c["state"] != "unattempted"),
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
"""concept_parser — 개념노트 markdown 구조 파서 + 관련개념 백링크 해소 (이론 리더용).
|
||||
|
||||
정찰 실측 불변식(273/273): 개념노트는 고정 골격을 100% 따름 —
|
||||
# {H1 제목} (첫 줄, DB title 과 다른 표시용 제목)
|
||||
> **한 줄 요약**: {요약} (blockquote, 라벨 고정)
|
||||
## {본문 라벨} ... (BODY, 자유 라벨 H2 0~N, 트레일 ★ 가능)
|
||||
## 빈출 포인트 (항상, 관련개념 직전)
|
||||
## 관련 개념 (항상, 문서 최종 섹션)
|
||||
|
||||
코드펜스(``` ASCII 도식) 내부의 ##/- 는 무시. 헤딩 트레일 ★ 는 스트립(라벨 정규화).
|
||||
'빈출 포인트'/'관련 개념' 앵커만 이름으로 잡고 나머지 BODY 는 순서·위치로 처리(라벨 화이트리스트 금지).
|
||||
순수 함수 · LLM 0.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
_FENCE = re.compile(r"^\s*```")
|
||||
_H1 = re.compile(r"^#\s+(.+?)\s*$")
|
||||
_H2 = re.compile(r"^##\s+(.+?)\s*$") # ### 는 매칭 안 됨(## 뒤 \s 요구)
|
||||
_SUMMARY = re.compile(r"^>\s*\*\*한 줄 요약\*\*:\s*(.+)$")
|
||||
_STAR_SUFFIX = re.compile(r"\s*★+\s*$")
|
||||
_TRAIL_STARS = re.compile(r"★+\s*$")
|
||||
_BINCHEOL_ITEM = re.compile(r"^\s*-\s+(★*)\s*(.+)$")
|
||||
_RELATED_ITEM = re.compile(r"^\s*-\s+(.+)$")
|
||||
_PAREN = re.compile(r"\s*\(.*$") # 괄호부터 끝(clarifier 힌트 절단)
|
||||
_NUM_PREFIX = re.compile(r"^\d+_")
|
||||
_STRIP_SYM = re.compile(r"[\s_·,./()\-]")
|
||||
|
||||
_ANCHOR_BINCHEOL = "빈출 포인트"
|
||||
_ANCHOR_RELATED = "관련 개념"
|
||||
|
||||
|
||||
def parse_concept(md: str) -> dict:
|
||||
"""개념노트 md → {title, summary, body[{label,stars,md}], bincheol[{tier,text}], related[{raw,phrase,hint}]}."""
|
||||
lines = (md or "").split("\n")
|
||||
title: str | None = None
|
||||
summary: str | None = None
|
||||
body: list[dict] = []
|
||||
bincheol_lines: list[str] = []
|
||||
related_lines: list[str] = []
|
||||
|
||||
in_fence = False
|
||||
zone = "pre" # pre | body | bincheol | related
|
||||
body_cur: dict | None = None
|
||||
|
||||
def emit(line: str) -> None:
|
||||
if body_cur is not None:
|
||||
body_cur["_lines"].append(line)
|
||||
elif zone == "bincheol":
|
||||
bincheol_lines.append(line)
|
||||
elif zone == "related":
|
||||
related_lines.append(line)
|
||||
# pre-zone 내용(요약 앞 잡음)은 버림
|
||||
|
||||
for ln in lines:
|
||||
if _FENCE.match(ln):
|
||||
in_fence = not in_fence
|
||||
emit(ln)
|
||||
continue
|
||||
if in_fence:
|
||||
emit(ln)
|
||||
continue
|
||||
|
||||
if title is None:
|
||||
m = _H1.match(ln)
|
||||
if m:
|
||||
title = m.group(1).strip()
|
||||
continue
|
||||
if summary is None:
|
||||
m = _SUMMARY.match(ln)
|
||||
if m:
|
||||
summary = m.group(1).strip()
|
||||
continue
|
||||
|
||||
m2 = _H2.match(ln)
|
||||
if m2:
|
||||
raw_label = m2.group(1).strip()
|
||||
star_m = _TRAIL_STARS.search(raw_label)
|
||||
stars = len(star_m.group(0).strip()) if star_m else 0
|
||||
label = _STAR_SUFFIX.sub("", raw_label).strip()
|
||||
if label == _ANCHOR_BINCHEOL:
|
||||
zone = "bincheol"
|
||||
body_cur = None
|
||||
continue
|
||||
if label == _ANCHOR_RELATED:
|
||||
zone = "related"
|
||||
body_cur = None
|
||||
continue
|
||||
body_cur = {"label": label, "stars": stars, "_lines": []}
|
||||
body.append(body_cur)
|
||||
zone = "body"
|
||||
continue
|
||||
|
||||
emit(ln)
|
||||
|
||||
body_out = []
|
||||
for s in body:
|
||||
text = "\n".join(s["_lines"]).strip()
|
||||
if text or s["label"]:
|
||||
body_out.append({"label": s["label"], "stars": s["stars"], "md": text})
|
||||
|
||||
bincheol = []
|
||||
for ln in bincheol_lines:
|
||||
m = _BINCHEOL_ITEM.match(ln)
|
||||
if m:
|
||||
bincheol.append({"tier": len(m.group(1)), "text": m.group(2).strip()})
|
||||
|
||||
related = []
|
||||
for ln in related_lines:
|
||||
m = _RELATED_ITEM.match(ln)
|
||||
if m:
|
||||
raw = m.group(1).strip()
|
||||
phrase = _PAREN.sub("", raw).strip()
|
||||
hint = raw[len(phrase):].strip() if len(raw) > len(phrase) else ""
|
||||
if phrase:
|
||||
related.append({"raw": raw, "phrase": phrase, "hint": hint})
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"summary": summary,
|
||||
"body": body_out,
|
||||
"bincheol": bincheol,
|
||||
"related": related,
|
||||
}
|
||||
|
||||
|
||||
def _normalize(s: str) -> str:
|
||||
"""해소용 정규화: NN_ 접두 제거 → 소문자 → 공백/기호 제거. 영문은 lowercase 유지."""
|
||||
s = _NUM_PREFIX.sub("", s or "")
|
||||
s = s.lower()
|
||||
s = _STRIP_SYM.sub("", s)
|
||||
return s
|
||||
|
||||
|
||||
def resolve_related(related: list[dict], title_index: list[tuple]) -> list[dict]:
|
||||
"""관련개념 구절 → 개념 doc 해소. title_index = [(doc_id, title, subject), ...].
|
||||
|
||||
다단 fallback(정찰 ~79%): 정규화 exact → 양방향 substring(≥2자 가드) → 미해소=dangling(doc_id None).
|
||||
"""
|
||||
norm_exact: dict[str, int] = {}
|
||||
norm_list: list[tuple[str, int, str]] = []
|
||||
for did, ttl, _subj in title_index:
|
||||
n = _normalize(ttl)
|
||||
if n:
|
||||
norm_exact.setdefault(n, did)
|
||||
norm_list.append((n, did, ttl))
|
||||
|
||||
out = []
|
||||
for it in related:
|
||||
pn = _normalize(it["phrase"])
|
||||
did: int | None = None
|
||||
rtitle: str | None = None
|
||||
if pn and len(pn) >= 2:
|
||||
if pn in norm_exact:
|
||||
did = norm_exact[pn]
|
||||
else:
|
||||
# substring 폴백: title-norm ⊆ phrase-norm 방향만(짧은 phrase 가 더 큰 title 을
|
||||
# 삼키는 오결선 방지, 예: '염산'→'염산나트륨' X) + 길이차 최소(가장 구체적) +
|
||||
# doc_id tiebreak(순서 무관 결정성). 후보 없으면 dangling(doc_id None).
|
||||
cands = [
|
||||
(abs(len(n) - len(pn)), cand, ttl)
|
||||
for n, cand, ttl in norm_list
|
||||
if len(n) >= 2 and n in pn
|
||||
]
|
||||
if cands:
|
||||
cands.sort(key=lambda c: (c[0], c[1]))
|
||||
_, did, rtitle = cands[0]
|
||||
if did is not None and rtitle is None:
|
||||
rtitle = next((t for d, t, _ in title_index if d == did), None)
|
||||
out.append(
|
||||
{"phrase": it["phrase"], "hint": it["hint"], "doc_id": did, "title": rtitle}
|
||||
)
|
||||
return out
|
||||
@@ -0,0 +1,45 @@
|
||||
<script>
|
||||
// 관련 문서 (유사도) — 문서 레벨 임베딩 KNN. 자기완결: docId 받아 /related 조회.
|
||||
import { onMount } from 'svelte';
|
||||
import { api } from '$lib/api';
|
||||
|
||||
let { documentId } = $props();
|
||||
let items = $state([]);
|
||||
let loaded = $state(false);
|
||||
|
||||
const KIND = { law: '법령', guide: '지침', paper: '논문', standard: '표준', incident: '사례' };
|
||||
|
||||
onMount(async () => {
|
||||
try {
|
||||
const r = await api(`/documents/${documentId}/related?limit=6`);
|
||||
items = r?.related ?? [];
|
||||
} catch (e) { /* silent */ }
|
||||
finally { loaded = true; }
|
||||
});
|
||||
</script>
|
||||
|
||||
{#if items.length}
|
||||
<div class="rel">
|
||||
<div class="lab">관련 문서</div>
|
||||
{#each items as it (it.id)}
|
||||
<a class="ri" href={`/documents/${it.id}`}>
|
||||
<span class="rt">{it.title}</span>
|
||||
<span class="rm">
|
||||
{#if it.material_type && KIND[it.material_type]}<span class="kind">{KIND[it.material_type]}</span>{/if}
|
||||
<span class="rs">{Math.round((it.sim ?? 0) * 100)}</span>
|
||||
</span>
|
||||
</a>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<style>
|
||||
.rel { background: var(--surface); border: 1px solid var(--border); border-radius: 14px; padding: 13px; }
|
||||
.lab { font-size: 10.5px; font-weight: 700; color: var(--text-dim); letter-spacing: .4px; margin-bottom: 8px; }
|
||||
.ri { display: flex; align-items: baseline; gap: 8px; padding: 5px 6px; border-radius: 7px; text-decoration: none; }
|
||||
.ri:hover { background: var(--surface-hover, #ecf0e8); }
|
||||
.rt { flex: 1; font-size: 12px; line-height: 1.4; color: var(--text); overflow: hidden; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; }
|
||||
.rm { flex-shrink: 0; display: flex; align-items: center; gap: 5px; }
|
||||
.kind { font-size: 9px; font-weight: 700; color: var(--accent-hover, #3d7256); background: #e3efe2; border: 1px solid #cfe3cd; border-radius: 4px; padding: 0 4px; }
|
||||
.rs { font-size: 10.5px; font-family: ui-monospace, Menlo, monospace; color: var(--faint, #9aa090); }
|
||||
</style>
|
||||
@@ -0,0 +1,33 @@
|
||||
<script>
|
||||
// 시안 B — 글로벌 네비 슬림 아이콘 레일 (분류 사이드바 접힘 상태). 앱 토큰 사용.
|
||||
import { page } from '$app/stores';
|
||||
import { Home, FolderTree, Newspaper, StickyNote, Hash, GraduationCap, MessageCircle, Inbox, CalendarCheck } from 'lucide-svelte';
|
||||
|
||||
const items = [
|
||||
{ href: '/', icon: Home, label: '홈', exact: true },
|
||||
{ href: '/library', icon: FolderTree, label: '문서' },
|
||||
{ href: '/news', icon: Newspaper, label: '뉴스' },
|
||||
{ href: '/memos', icon: StickyNote, label: '메모' },
|
||||
{ href: '/clause', icon: Hash, label: '절' },
|
||||
{ href: '/events', icon: CalendarCheck, label: '일정' },
|
||||
{ href: '/study', icon: GraduationCap, label: '공부' },
|
||||
{ href: '/chat', icon: MessageCircle, label: '이드' },
|
||||
{ href: '/inbox', icon: Inbox, label: '편지함' },
|
||||
];
|
||||
let path = $derived($page.url.pathname);
|
||||
const active = (it) => (it.exact ? path === it.href : path.startsWith(it.href));
|
||||
</script>
|
||||
|
||||
<nav class="flex flex-col items-center gap-1 py-2 h-full overflow-y-auto bg-sidebar">
|
||||
{#each items as it (it.href)}
|
||||
{@const Icon = it.icon}
|
||||
<a
|
||||
href={it.href}
|
||||
title={it.label}
|
||||
class="flex flex-col items-center justify-center gap-0.5 w-12 h-[46px] rounded-lg text-dim hover:bg-surface-hover hover:text-accent transition-colors {active(it) ? 'bg-surface-active text-accent font-semibold' : ''}"
|
||||
>
|
||||
<Icon size={17} strokeWidth={1.75} />
|
||||
<span class="text-[8.5px] leading-none tracking-tight">{it.label}</span>
|
||||
</a>
|
||||
{/each}
|
||||
</nav>
|
||||
@@ -11,6 +11,7 @@
|
||||
import { queueOverview } from '$lib/stores/queueOverview';
|
||||
import { MACHINE_STATE_LABEL, machineChipClass } from '$lib/utils/queueDisplay';
|
||||
import Sidebar from '$lib/components/Sidebar.svelte';
|
||||
import SlimRail from '$lib/components/SlimRail.svelte';
|
||||
import SystemStatusDot from '$lib/components/SystemStatusDot.svelte';
|
||||
import QueueDrawer from '$lib/components/QueueDrawer.svelte';
|
||||
import QuickMemoButton from '$lib/components/QuickMemoButton.svelte';
|
||||
@@ -21,7 +22,7 @@
|
||||
const PUBLIC_PATHS = ['/login', '/setup', '/__styleguide'];
|
||||
const NO_CHROME_PATHS = ['/login', '/setup', '/__styleguide'];
|
||||
// /news = 풀스크린 브리핑 → 데스크탑 상시 사이드바 없음
|
||||
const NO_SIDEBAR_PATHS = ['/news'];
|
||||
const NO_SIDEBAR_PATHS = ['/news', '/book']; // /book = 책 몰입(글로벌 분류 트리 숨김, 상단 네비 유지)
|
||||
|
||||
// toast 의미 토큰 매핑 (A-8 B3)
|
||||
const TOAST_CLASS = {
|
||||
@@ -198,8 +199,8 @@
|
||||
<!-- 메인: 데스크탑 상시 사이드바 + 콘텐츠 -->
|
||||
<div class="flex-1 min-h-0 flex">
|
||||
{#if showSidebar}
|
||||
<aside class="hidden lg:block shrink-0 overflow-hidden transition-[width] duration-200 ease-out {sidebarCollapsed ? 'w-0 border-r-0' : 'w-sidebar border-r border-default'}">
|
||||
<Sidebar />
|
||||
<aside class="hidden lg:block shrink-0 overflow-hidden transition-[width] duration-200 ease-out {sidebarCollapsed ? 'w-14 border-r border-default' : 'w-sidebar border-r border-default'}">
|
||||
{#if sidebarCollapsed}<SlimRail />{:else}<Sidebar />{/if}
|
||||
</aside>
|
||||
{/if}
|
||||
<main class="flex-1 min-w-0 overflow-auto">
|
||||
|
||||
@@ -0,0 +1,281 @@
|
||||
<script>
|
||||
// ASME/법령 절-KB — 코드북·공부 리더 (r2). parent 표준/법령을 한 권의 책처럼.
|
||||
// 좌 인덱스(Part/章→절/조) · 중 본문(MarkdownDoc=공식·표·이미지) · breadcrumb·이전다음·양방향 백링크.
|
||||
import { onMount, tick } from 'svelte';
|
||||
import { page } from '$app/stores';
|
||||
import { goto } from '$app/navigation';
|
||||
import { api } from '$lib/api';
|
||||
import MarkdownDoc from '$lib/components/MarkdownDoc.svelte';
|
||||
|
||||
let parentId = $state(null);
|
||||
let parentTitle = $state('');
|
||||
let clauses = $state([]);
|
||||
let selectedId = $state(null);
|
||||
let clauseDoc = $state(null);
|
||||
let links = $state(null);
|
||||
let expanded = $state({});
|
||||
let loading = $state(false);
|
||||
let q = $state('');
|
||||
|
||||
// 공부도구 (노트/형광펜/암기카드) — clause_study
|
||||
let studyItems = $state([]);
|
||||
let studyOpen = $state(false);
|
||||
let noteDraft = $state('');
|
||||
const KLABEL = { note: '노트', highlight: '형광펜', card: '암기카드' };
|
||||
async function loadStudy(id) {
|
||||
try { const r = await api(`/documents/${id}/study`); studyItems = r?.items ?? []; }
|
||||
catch { studyItems = []; }
|
||||
}
|
||||
async function addStudy(kind, payload) {
|
||||
if (!selectedId) return;
|
||||
try { await api(`/documents/${selectedId}/study`, { method: 'POST', body: JSON.stringify({ kind, payload }) }); await loadStudy(selectedId); }
|
||||
catch (e) { console.warn(e); }
|
||||
}
|
||||
function selText() { return (typeof window !== 'undefined' && window.getSelection ? window.getSelection().toString() : '').trim(); }
|
||||
function addNote() { const t = noteDraft.trim(); if (!t) return; addStudy('note', { text: t }); noteDraft = ''; }
|
||||
function addHighlight() { const s = selText(); if (!s) { studyOpen = true; alert('본문에서 형광펜 칠할 부분을 먼저 드래그하세요'); return; } addStudy('highlight', { text: s }); studyOpen = true; }
|
||||
function addCard() {
|
||||
const s = selText();
|
||||
const code = links?.clause_code ?? selMeta?.clause_code ?? '';
|
||||
addStudy('card', { cue: `${code} ${strip(clauseDoc?.title, code)}`.trim(), fact: s || (clauseDoc?.md_content ?? clauseDoc?.extracted_text ?? '').replace(/[#*>]/g, '').slice(0, 280).trim() });
|
||||
studyOpen = true;
|
||||
}
|
||||
async function delStudy(id) {
|
||||
try { await api(`/documents/${selectedId}/study/${id}`, { method: 'DELETE' }); await loadStudy(selectedId); } catch {}
|
||||
}
|
||||
|
||||
let parts = $derived.by(() => {
|
||||
const out = [], idx = {};
|
||||
for (const c of clauses) {
|
||||
const p = c.clause_part || '·';
|
||||
if (!(p in idx)) { idx[p] = out.length; out.push({ part: p, items: [] }); }
|
||||
out[idx[p]].items.push(c);
|
||||
}
|
||||
return out;
|
||||
});
|
||||
let visibleParts = $derived.by(() => {
|
||||
const term = q.trim().toLowerCase();
|
||||
if (!term) return parts;
|
||||
return parts.map(g => ({ part: g.part, items: g.items.filter(c =>
|
||||
(c.clause_code || '').toLowerCase().includes(term) || (c.title || '').toLowerCase().includes(term)) }))
|
||||
.filter(g => g.items.length);
|
||||
});
|
||||
let selMeta = $derived(clauses.find((c) => c.id === selectedId) || null);
|
||||
const strip = (t, c) => (t || '').replace(c || '', '').replace(/^[(\s)]+|[(\s)]+$/g, '').trim();
|
||||
|
||||
async function loadBook() {
|
||||
const r = await api(`/documents/${parentId}/clauses`);
|
||||
parentTitle = r?.parent_title ?? '';
|
||||
clauses = r?.clauses ?? [];
|
||||
const e = {};
|
||||
for (const c of clauses) e[c.clause_part || '·'] = true;
|
||||
expanded = e;
|
||||
}
|
||||
async function loadClause(id) {
|
||||
if (!id) return;
|
||||
loading = true;
|
||||
selectedId = id;
|
||||
try {
|
||||
const [d, l] = await Promise.all([api(`/documents/${id}`), api(`/documents/${id}/backlinks`)]);
|
||||
clauseDoc = d; links = l;
|
||||
loadStudy(id);
|
||||
const sel = clauses.find((c) => c.id === id);
|
||||
if (sel) expanded = { ...expanded, [sel.clause_part || '·']: true };
|
||||
goto(`/book/${parentId}?c=${id}`, { replaceState: true, keepFocus: true, noScroll: true });
|
||||
await tick(); window.scrollTo({ top: 0 });
|
||||
} finally { loading = false; }
|
||||
}
|
||||
onMount(async () => {
|
||||
parentId = Number($page.params.id);
|
||||
await loadBook();
|
||||
const c = Number($page.url.searchParams.get('c'));
|
||||
await loadClause(c && clauses.find((x) => x.id === c) ? c : clauses[0]?.id);
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="book">
|
||||
<!-- top bar -->
|
||||
<div class="bar">
|
||||
<span class="brand">절-KB</span>
|
||||
<span class="crumbs">{parentTitle} {#if selMeta}<b class="sep">›</b> {selMeta.clause_part} <b class="sep">›</b> <b>{links?.clause_code ?? selMeta.clause_code}</b>{/if}</span>
|
||||
<div class="search"><input placeholder="절·조 번호 또는 키워드" bind:value={q} /></div>
|
||||
<div class="tools"><span class="tool on">읽기</span><span class="tool">형광펜</span><span class="tool">노트</span><span class="tool">암기카드</span></div>
|
||||
</div>
|
||||
|
||||
<div class="main">
|
||||
<!-- left index -->
|
||||
<aside class="idx">
|
||||
<a class="btitle" href={`/documents/${parentId}`}>{parentTitle || '표준'}</a>
|
||||
<div class="bmeta">절 {clauses.length} · 한 권의 책처럼 탐색</div>
|
||||
{#each visibleParts as g (g.part)}
|
||||
<div class="parttab" role="button" tabindex="0" onclick={() => (expanded = { ...expanded, [g.part]: !expanded[g.part] })}>
|
||||
<span class="bar2"></span><span class="pname">{g.part}</span><span class="ct">{g.items.length}</span>
|
||||
</div>
|
||||
{#if expanded[g.part] || q.trim()}
|
||||
{#each g.items as c (c.id)}
|
||||
<div class="ci" class:on={c.id === selectedId} role="button" tabindex="0" onclick={() => loadClause(c.id)}>
|
||||
<span class="no">{c.clause_code}</span><span class="tt">{strip(c.title, c.clause_code)}</span>
|
||||
</div>
|
||||
{/each}
|
||||
{/if}
|
||||
{/each}
|
||||
</aside>
|
||||
|
||||
<!-- reader -->
|
||||
<section class="read">
|
||||
<div class="col">
|
||||
{#if clauseDoc}
|
||||
<div class="studybar">
|
||||
<button class="sbtn" title="선택 형광펜" onclick={addHighlight}>▰</button>
|
||||
<button class="sbtn" class:on={studyOpen} title="노트/공부" onclick={() => (studyOpen = !studyOpen)}>✎</button>
|
||||
<button class="sbtn" title="암기카드 추가" onclick={addCard}>+</button>
|
||||
{#if studyItems.length}<span class="scount">{studyItems.length}</span>{/if}
|
||||
</div>
|
||||
<div class="kicker"><span class="pth">{selMeta?.clause_part}</span></div>
|
||||
<div class="h-no">{links?.clause_code ?? selMeta?.clause_code}</div>
|
||||
<h1 class="h-title">{strip(clauseDoc.title, links?.clause_code ?? '')}</h1>
|
||||
|
||||
<div class="flow">
|
||||
<button class="fl" disabled={!links?.prev} onclick={() => loadClause(links?.prev?.id)}>← {links?.prev?.clause_code ?? ''}</button>
|
||||
<button class="fl next" disabled={!links?.next} onclick={() => loadClause(links?.next?.id)}>{links?.next?.clause_code ?? ''} →</button>
|
||||
</div>
|
||||
|
||||
{#key clauseDoc.id}
|
||||
<div class="docbody">
|
||||
<MarkdownDoc documentId={clauseDoc.id} mdContent={clauseDoc.md_content ?? clauseDoc.extracted_text} mdStatus={null} class="prose prose-base max-w-none" />
|
||||
</div>
|
||||
{/key}
|
||||
|
||||
{#if links && (links.forward.length || links.back.length)}
|
||||
<section class="conn">
|
||||
{#if links.forward.length}
|
||||
<div><h4>이 절이 참조 <span>{links.forward.length}</span></h4>
|
||||
<div class="chiprow">{#each links.forward as f}
|
||||
{#if f.doc_id}<button class="ref" onclick={() => loadClause(f.doc_id)}>{f.code}</button>
|
||||
{:else}<span class="ref dg" title="외부/미분해">{f.code}</span>{/if}
|
||||
{/each}</div></div>
|
||||
{/if}
|
||||
{#if links.back.length}
|
||||
<div><h4>이 절을 참조 <span>{links.back.length}</span></h4>
|
||||
<div class="chiprow">{#each links.back as b}<button class="ref" onclick={() => loadClause(b.doc_id)}>{b.code}</button>{/each}</div></div>
|
||||
{/if}
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
{#if studyOpen}
|
||||
<section class="study">
|
||||
<div class="slab">공부 — 노트 · 형광펜 · 암기카드{#if studyItems.length} <span>{studyItems.length}</span>{/if}</div>
|
||||
<div class="noteadd">
|
||||
<textarea bind:value={noteDraft} placeholder="이 절에 노트…" rows="2"></textarea>
|
||||
<button class="nbtn" onclick={addNote}>노트 저장</button>
|
||||
</div>
|
||||
{#if studyItems.length}
|
||||
<ul class="slist">
|
||||
{#each studyItems as it (it.id)}
|
||||
<li class="sitem">
|
||||
<span class="skind k-{it.kind}">{KLABEL[it.kind] ?? it.kind}</span>
|
||||
<span class="stext">{it.payload?.text ?? it.payload?.cue ?? ''}</span>
|
||||
<button class="sdel" title="삭제" onclick={() => delStudy(it.id)}>×</button>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{:else}
|
||||
<p class="shint">본문을 드래그한 뒤 형광펜(▰)/암기카드(+), 또는 위에 노트를 적으세요.</p>
|
||||
{/if}
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
<div class="pager">
|
||||
<button class="pg" disabled={!links?.prev} onclick={() => loadClause(links?.prev?.id)}>
|
||||
<div class="d">← 이전</div><div class="t"><span class="pno">{links?.prev?.clause_code ?? '—'}</span> {strip(links?.prev?.title, links?.prev?.clause_code)}</div></button>
|
||||
<button class="pg next" disabled={!links?.next} onclick={() => loadClause(links?.next?.id)}>
|
||||
<div class="d">다음 →</div><div class="t"><span class="pno">{links?.next?.clause_code ?? '—'}</span> {strip(links?.next?.title, links?.next?.clause_code)}</div></button>
|
||||
</div>
|
||||
{:else}
|
||||
<p class="empty">{loading ? '불러오는 중…' : '왼쪽에서 절을 선택하세요'}</p>
|
||||
{/if}
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
:global(body) { background: var(--bg); }
|
||||
.book { --paper:#fbfcf9; --serif:"Iowan Old Style","Palatino Linotype","Noto Serif KR",Georgia,serif;
|
||||
display:flex; flex-direction:column; min-height:100vh; }
|
||||
.bar { display:flex; align-items:center; gap:14px; height:50px; padding:0 18px; background:var(--paper); border-bottom:1px solid var(--border); }
|
||||
.brand { font-weight:700; font-size:13.5px; color:var(--text); }
|
||||
.crumbs { color:var(--text-dim); font-size:12.5px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; max-width:46%; }
|
||||
.crumbs b { color:var(--text); font-weight:600; } .crumbs .sep { color:#c8d6c0; margin:0 5px; }
|
||||
.search { margin-left:auto; }
|
||||
.search input { width:280px; background:var(--surface); border:1px solid var(--border); border-radius:9px; padding:7px 12px; font-size:13px; color:var(--text); outline:none; }
|
||||
.search input:focus { border-color:var(--accent); }
|
||||
.tools { display:flex; gap:2px; }
|
||||
.tool { font-size:12px; color:var(--text-dim); padding:6px 10px; border-radius:8px; border:1px solid transparent; cursor:pointer; }
|
||||
.tool:hover { background:var(--surface); } .tool.on { background:#ecf0e8; border-color:var(--border); color:var(--accent-hover); font-weight:600; }
|
||||
|
||||
.main { display:flex; align-items:flex-start; flex:1; }
|
||||
.idx { width:264px; flex-shrink:0; align-self:stretch; border-right:1px solid var(--border);
|
||||
background:linear-gradient(180deg,#f6f8f3,#f1f4ec); padding:16px 10px 30px 16px; position:sticky; top:0; max-height:100vh; overflow:auto; }
|
||||
.btitle { display:block; font-family:var(--serif); font-size:15.5px; font-weight:600; color:var(--text); text-decoration:none; line-height:1.32; }
|
||||
.btitle:hover { text-decoration:underline; }
|
||||
.bmeta { font-size:11px; color:#9aa090; margin:3px 0 14px; }
|
||||
.parttab { display:flex; align-items:center; gap:8px; margin:11px 0 4px; padding:3px 4px; border-radius:6px; cursor:pointer;
|
||||
font-size:11px; font-weight:700; letter-spacing:.5px; color:var(--text-dim); text-transform:uppercase; }
|
||||
.parttab:hover { background:#fff; } .parttab .bar2 { width:3px; height:12px; border-radius:2px; background:var(--domain-engineering); }
|
||||
.parttab .pname { flex:1; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; } .parttab .ct { color:#9aa090; font-weight:600; letter-spacing:0; }
|
||||
.ci { display:flex; gap:9px; align-items:baseline; padding:4px 9px; border-radius:7px; cursor:pointer; line-height:1.4; }
|
||||
.ci .no { font-family:ui-monospace,Menlo,monospace; font-size:11px; color:var(--accent); font-weight:600; min-width:52px; white-space:nowrap; }
|
||||
.ci .tt { font-size:12.5px; color:var(--text-dim); overflow:hidden; text-overflow:ellipsis; }
|
||||
.ci:hover { background:#fff; }
|
||||
.ci.on { background:#fff; box-shadow:inset 3px 0 0 var(--accent), 0 1px 2px rgba(35,41,31,.05); }
|
||||
.ci.on .no { color:var(--accent-hover); font-weight:700; } .ci.on .tt { color:var(--text); font-weight:600; }
|
||||
|
||||
.read { flex:1; min-width:0; padding:34px 40px 80px; }
|
||||
.col { max-width:680px; margin:0 auto; position:relative; }
|
||||
.studybar { position:absolute; right:-30px; top:4px; display:flex; flex-direction:column; gap:6px; }
|
||||
.sbtn { width:34px; height:34px; border-radius:9px; border:1px solid var(--border); background:var(--paper); color:var(--text-dim); font-size:13px; cursor:pointer; }
|
||||
.sbtn:hover { background:var(--surface); color:var(--accent-hover); }
|
||||
.kicker { margin-bottom:5px; } .kicker .pth { font-size:11.5px; color:#9aa090; font-weight:600; letter-spacing:.3px; }
|
||||
.h-no { font-family:ui-monospace,Menlo,monospace; font-size:13px; color:var(--accent); font-weight:700; letter-spacing:.5px; }
|
||||
.h-title { font-family:var(--serif); font-size:26px; line-height:1.24; font-weight:600; margin:2px 0 14px; letter-spacing:-.2px; color:var(--text); }
|
||||
.flow { display:flex; justify-content:space-between; gap:8px; margin-bottom:18px; }
|
||||
.flow .fl { font-size:11.5px; color:var(--text-dim); background:var(--surface); border:1px solid var(--border); border-radius:8px; padding:5px 11px; cursor:pointer; }
|
||||
.flow .fl:hover:not(:disabled) { background:#ecf0e8; } .flow .fl:disabled { opacity:.35; cursor:default; }
|
||||
.docbody { font-size:15.5px; }
|
||||
.docbody :global(.prose) { color:#2a3024; line-height:1.78; }
|
||||
.docbody :global(.prose h1), .docbody :global(.prose h2), .docbody :global(.prose h3) { font-family:var(--serif); }
|
||||
.docbody :global(a) { color:var(--accent-hover); }
|
||||
.conn { margin-top:34px; padding-top:18px; border-top:1px solid var(--border); display:grid; grid-template-columns:1fr 1fr; gap:22px; }
|
||||
.conn h4 { font-size:11px; font-weight:700; color:var(--text-dim); letter-spacing:.4px; margin:0 0 9px; } .conn h4 span { color:#9aa090; font-weight:500; }
|
||||
.chiprow { display:flex; flex-wrap:wrap; gap:5px; }
|
||||
.ref { font-family:ui-monospace,Menlo,monospace; font-size:11.5px; font-weight:600; color:var(--accent-hover); background:#eef4ec; border:1px solid #d9e6d8; border-radius:6px; padding:2px 8px; cursor:pointer; }
|
||||
.ref:hover { background:#e2efe0; } .ref.dg { color:#9aa090; background:var(--surface); border-color:var(--border); cursor:default; }
|
||||
.pager { display:flex; gap:10px; margin-top:30px; }
|
||||
.pg { flex:1; text-align:left; border:1px solid var(--border); border-radius:11px; padding:11px 14px; background:var(--paper); cursor:pointer; }
|
||||
.pg.next { text-align:right; } .pg:hover:not(:disabled) { border-color:#cfd7c6; background:#fff; } .pg:disabled { opacity:.4; cursor:default; }
|
||||
.pg .d { font-size:10.5px; color:#9aa090; } .pg .t { font-size:12.5px; color:var(--text-dim); font-weight:600; margin-top:1px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
||||
.pg .pno { font-family:ui-monospace,Menlo,monospace; color:var(--accent); }
|
||||
.empty { color:#9aa090; text-align:center; padding:80px 0; }
|
||||
.sbtn.on { background:#ecf0e8; color:var(--accent-hover,#3d7256); border-color:var(--border); }
|
||||
.scount { font-size:9px; font-weight:700; color:#fff; background:var(--accent,#4f8a6b); border-radius:8px; padding:1px 5px; text-align:center; }
|
||||
.study { margin-top:24px; padding:14px; border:1px solid var(--border); border-radius:12px; background:var(--surface); }
|
||||
.slab { font-size:11px; font-weight:700; color:var(--text-dim); letter-spacing:.3px; margin-bottom:9px; }
|
||||
.slab span { color:var(--accent-hover,#3d7256); }
|
||||
.noteadd { display:flex; gap:8px; align-items:flex-end; margin-bottom:10px; }
|
||||
.noteadd textarea { flex:1; resize:vertical; border:1px solid var(--border); border-radius:8px; padding:7px 9px; font-size:12.5px; font-family:inherit; color:var(--text); background:var(--paper,#fbfcf9); outline:none; }
|
||||
.noteadd textarea:focus { border-color:var(--accent); }
|
||||
.nbtn { flex-shrink:0; font-size:12px; color:#fff; background:var(--accent,#4f8a6b); border:0; border-radius:8px; padding:8px 12px; cursor:pointer; }
|
||||
.nbtn:hover { background:var(--accent-hover,#3d7256); }
|
||||
.slist { list-style:none; margin:0; padding:0; display:flex; flex-direction:column; gap:5px; }
|
||||
.sitem { display:flex; align-items:baseline; gap:8px; padding:6px 8px; border-radius:8px; background:var(--paper,#fbfcf9); border:1px solid var(--border); }
|
||||
.skind { flex-shrink:0; font-size:9.5px; font-weight:700; border-radius:4px; padding:1px 6px; }
|
||||
.k-note { color:#3d7256; background:#e3efe2; border:1px solid #cfe3cd; }
|
||||
.k-highlight { color:#8a6306; background:#faf3e2; border:1px solid #ecdca3; }
|
||||
.k-card { color:#1d4ed8; background:#eef4fc; border:1px solid #d7e4f7; }
|
||||
.stext { flex:1; font-size:12px; line-height:1.5; color:var(--text); white-space:pre-wrap; word-break:break-word; }
|
||||
.sdel { flex-shrink:0; background:none; border:0; color:var(--faint,#9aa090); cursor:pointer; font-size:14px; }
|
||||
.sdel:hover { color:var(--error,#c0392b); }
|
||||
.shint { font-size:11.5px; color:var(--faint,#9aa090); margin:0; }
|
||||
@media(max-width:820px){ .idx{display:none} .read{padding:24px 18px} .conn{grid-template-columns:1fr} .studybar{position:static;flex-direction:row} .crumbs{max-width:30%} .search input{width:150px} }
|
||||
</style>
|
||||
@@ -16,6 +16,7 @@
|
||||
import Skeleton from '$lib/components/ui/Skeleton.svelte';
|
||||
import HandwriteCanvas from '$lib/components/HandwriteCanvas.svelte';
|
||||
import MarkdownDoc from '$lib/components/MarkdownDoc.svelte';
|
||||
import RelatedDocs from '$lib/components/RelatedDocs.svelte';
|
||||
import { renderDocMarkdown } from '$lib/utils/docMarkdown';
|
||||
import MarkdownStatusBadge from '$lib/components/MarkdownStatusBadge.svelte';
|
||||
import NoteEditor from '$lib/components/editors/NoteEditor.svelte';
|
||||
@@ -321,6 +322,7 @@
|
||||
<!-- ════ 우 슬림 레일 (시안 카드 스타일) ════ -->
|
||||
{#snippet rail()}
|
||||
<div style="display:flex;flex-direction:column;gap:11px;font-size:14px;">
|
||||
<RelatedDocs documentId={doc.id} />
|
||||
{#if doc.ai_tldr || doc.ai_summary}
|
||||
<div style="background:#f4f7f1;border:1px solid #dde3d6;border-radius:14px;padding:13px;">
|
||||
<div style="font-size:10.5px;font-weight:700;color:#697061;letter-spacing:.4px;margin-bottom:7px;">TL;DR</div>
|
||||
|
||||
@@ -1,13 +1,58 @@
|
||||
<script>
|
||||
// /study — 학습 hub.
|
||||
// 주제로 보기(퀴즈·복습·통계) / 자료 학습 / 필사 세션 / 암기카드 검수.
|
||||
// /study — 학습 hub + 데일리 랜딩('오늘의 공부' 대시보드).
|
||||
// 상단 = 이론 홈(진도·오늘의 개념·복습 due, 재노출 트리거). 하단 = 기존 모드 진입.
|
||||
import { onMount } from 'svelte';
|
||||
import { api } from '$lib/api';
|
||||
import { BookOpen, PenLine, GraduationCap, FolderKanban, Layers, Repeat, Flag, Inbox, Activity } from 'lucide-svelte';
|
||||
import { addToast } from '$lib/stores/toast';
|
||||
import { BookOpen, PenLine, GraduationCap, FolderKanban, Layers, Repeat, Flag, Inbox, Activity, CalendarCheck, Target } from 'lucide-svelte';
|
||||
|
||||
let cardReviewCount = $state(0);
|
||||
let questionFlagCount = $state(0);
|
||||
|
||||
// 오늘의 공부 (이론 홈)
|
||||
let curriculum = $state(null);
|
||||
let todayConcepts = $state([]);
|
||||
let weakConcepts = $state([]); // 약점 개념(관련 기출 정답률 낮음)
|
||||
let dashLoading = $state(true);
|
||||
|
||||
let readPct = $derived(
|
||||
curriculum && curriculum.total ? Math.round((curriculum.read / curriculum.total) * 100) : 0
|
||||
);
|
||||
|
||||
async function loadDashboard() {
|
||||
dashLoading = true;
|
||||
try {
|
||||
const [cur, today] = await Promise.all([
|
||||
api('/study/curriculum'),
|
||||
api('/study/today-concepts?limit=6'),
|
||||
]);
|
||||
curriculum = cur;
|
||||
todayConcepts = today?.concepts ?? [];
|
||||
} catch {
|
||||
// 코어 대시보드 실패해도 허브 나머지는 동작 (조용히)
|
||||
} finally {
|
||||
dashLoading = false;
|
||||
}
|
||||
// 약점 개념 = 비차단(신규 엔드포인트 실패해도 코어 대시보드 블랙아웃 방지)
|
||||
try {
|
||||
const weak = await api('/study/concepts/weakness-map?limit=5');
|
||||
weakConcepts = weak?.weak ?? [];
|
||||
} catch {}
|
||||
}
|
||||
|
||||
async function markRead(doc) {
|
||||
try {
|
||||
await api(`/study/concepts/${doc.doc_id}/read`, { method: 'POST' });
|
||||
todayConcepts = todayConcepts.filter((c) => c.doc_id !== doc.doc_id);
|
||||
addToast('success', `회독: ${doc.title}`);
|
||||
loadDashboard(); // 진도 갱신
|
||||
} catch {
|
||||
addToast('error', '회독 처리 실패');
|
||||
}
|
||||
}
|
||||
|
||||
onMount(async () => {
|
||||
loadDashboard();
|
||||
try {
|
||||
const r = await api('/study-cards/needs-review/count');
|
||||
cardReviewCount = r?.count ?? 0;
|
||||
@@ -27,6 +72,80 @@
|
||||
<p class="text-sm text-dim mt-1">주제별 퀴즈·복습(SRS)·통계 / 학습 자료 회독 / 손글씨 필사 세션.</p>
|
||||
</header>
|
||||
|
||||
<!-- 오늘의 공부 (이론 홈 대시보드 = 데일리 트리거) -->
|
||||
<section class="mb-5 rounded-lg border border-default bg-surface p-4 md:p-5">
|
||||
<div class="flex items-center gap-2 mb-3">
|
||||
<CalendarCheck size={18} class="text-accent" />
|
||||
<h2 class="text-base font-semibold text-text">오늘의 공부</h2>
|
||||
{#if curriculum}
|
||||
<span class="ml-auto text-xs text-dim">이론 회독 <span class="text-text font-medium">{curriculum.read}</span> / {curriculum.total} ({readPct}%)</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if dashLoading}
|
||||
<p class="text-xs text-dim">불러오는 중…</p>
|
||||
{:else}
|
||||
{#if curriculum}
|
||||
<div class="h-2 rounded-full bg-bg overflow-hidden mb-3">
|
||||
<div class="h-full bg-accent" style="width: {readPct}%"></div>
|
||||
</div>
|
||||
<div class="flex flex-wrap gap-x-4 gap-y-1 mb-4 text-xs text-dim">
|
||||
{#each curriculum.subjects as s}
|
||||
<span>{s.subject} <span class="text-text">{s.read}/{s.total}</span></span>
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
<div class="flex flex-wrap gap-2 mb-4">
|
||||
<a
|
||||
href="/study/topics/{curriculum.topic_id}/review-queue"
|
||||
class="flex items-center gap-1.5 rounded border border-default px-3 py-1.5 text-xs text-dim hover:border-accent hover:text-text transition-colors"
|
||||
>
|
||||
<Repeat size={13} /> 문항 복습 <span class="font-semibold text-text">{curriculum.question_due}</span>
|
||||
</a>
|
||||
<span class="flex items-center gap-1.5 rounded border border-default px-3 py-1.5 text-xs text-dim">
|
||||
<BookOpen size={13} /> 개념 재복습 <span class="font-semibold text-text">{curriculum.concept_due}</span>
|
||||
</span>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<div class="text-xs text-dim mb-2">오늘의 개념</div>
|
||||
{#if todayConcepts.length === 0}
|
||||
<p class="text-xs text-dim">오늘 볼 개념이 없습니다. 잘 하고 있어요.</p>
|
||||
{:else}
|
||||
<ul class="space-y-1.5">
|
||||
{#each todayConcepts as c (c.doc_id)}
|
||||
<li class="flex items-center gap-2 rounded border border-default px-3 py-2">
|
||||
<span class="text-accent shrink-0 text-xs" title="빈출">{#each Array(c.freq) as _}★{/each}</span>
|
||||
<a href="/study/read/{c.doc_id}" class="text-sm text-text hover:text-accent truncate flex-1">{c.title}</a>
|
||||
<span class="shrink-0 text-[10px] rounded-full px-2 py-0.5 {c.reason === '재복습' ? 'bg-accent/15 text-accent' : 'bg-surface border border-default text-dim'}">{c.reason}</span>
|
||||
<button
|
||||
type="button"
|
||||
onclick={() => markRead(c)}
|
||||
class="shrink-0 text-xs rounded border border-default px-2 py-1 text-dim hover:border-accent hover:text-accent transition-colors"
|
||||
>읽음</button>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{/if}
|
||||
|
||||
{#if weakConcepts.length > 0}
|
||||
<div class="mt-4 pt-3 border-t border-default">
|
||||
<div class="text-xs text-dim mb-2 flex items-center gap-1.5">
|
||||
<Target size={13} class="text-error" /> 약점 개념 <span class="text-faint">(관련 기출 정답률 낮음)</span>
|
||||
</div>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{#each weakConcepts as w (w.doc_id)}
|
||||
<a href="/study/read/{w.doc_id}"
|
||||
class="text-xs rounded-full border border-error/40 bg-error/10 text-error px-3 py-1 hover:bg-error/20 transition-colors">
|
||||
{w.title.replace(/^\d+_/, '')} <span class="font-semibold">{w.accuracy}%</span>
|
||||
</a>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
</section>
|
||||
|
||||
<a
|
||||
href="/study/topics"
|
||||
class="block mb-3 p-5 rounded-lg border border-default bg-surface hover:border-accent hover:bg-accent/5 transition-colors"
|
||||
@@ -126,7 +245,8 @@
|
||||
<div class="mt-6 p-4 rounded-lg border border-dashed border-default/60 text-xs text-dim">
|
||||
<div class="font-medium text-dim mb-1">예정</div>
|
||||
<ul class="list-disc list-inside space-y-0.5">
|
||||
<li>애플워치 빠른복습 + 공부 알람(push)</li>
|
||||
<li>개념 학습 리더 (가리고 떠올리기 · 빈출★ · 관련개념 백링크)</li>
|
||||
<li>이론↔문제 연결 (개념별 정답률 · 약점 개념 지도)</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
<script>
|
||||
/**
|
||||
* /study/read/[docId] — 개념 학습 리더.
|
||||
* 개념노트(가스기사 documents)를 구조(요약/본문/빈출★/관련개념)로 렌더 +
|
||||
* '떠올리기' 능동 회상 토글 + 회독 SR(POST read) + 관련개념 백링크 + 이전/다음.
|
||||
* 본문 렌더 = MarkdownDoc(KaTeX + docimg 내장). 서버 파싱 = /api/study/concepts/{id}.
|
||||
*/
|
||||
import { page } from '$app/stores';
|
||||
import { api } from '$lib/api';
|
||||
import { addToast } from '$lib/stores/toast';
|
||||
import { renderMathMarkdownInline } from '$lib/utils/mathMarkdown';
|
||||
import MarkdownDoc from '$lib/components/MarkdownDoc.svelte';
|
||||
import Button from '$lib/components/ui/Button.svelte';
|
||||
import EmptyState from '$lib/components/ui/EmptyState.svelte';
|
||||
import Skeleton from '$lib/components/ui/Skeleton.svelte';
|
||||
import { BookOpen, ArrowLeft, Eye, EyeOff, Check, ChevronLeft, ChevronRight, FileQuestion } from 'lucide-svelte';
|
||||
|
||||
let docId = $derived($page.params.docId);
|
||||
|
||||
let concept = $state(null);
|
||||
let relatedQ = $state(null); // 관련 기출(이론↔문제, 비차단)
|
||||
let loading = $state(true);
|
||||
let notFound = $state(false);
|
||||
let mode = $state('read'); // 'read' | 'recall'(떠올리기)
|
||||
let revealed = $state({}); // {sectionIndex: true}
|
||||
let marking = $state(false);
|
||||
|
||||
const STAGE_LABEL = { 0: '복습 시작', 1: '복습 1단계', 2: '복습 2단계', 3: '복습 3단계', 4: '학습 완료' };
|
||||
const OUTCOME_MARK = { correct: '○', wrong: '✕', unsure: '?' };
|
||||
const OUTCOME_CLASS = { correct: 'text-success', wrong: 'text-error', unsure: 'text-warning' };
|
||||
const outcomeMark = (o) => OUTCOME_MARK[o] ?? '–';
|
||||
const outcomeClass = (o) => OUTCOME_CLASS[o] ?? 'text-faint';
|
||||
|
||||
async function load() {
|
||||
const reqId = docId; // in-flight 가드: 백링크 연타 시 stale 응답 무시
|
||||
loading = true;
|
||||
notFound = false;
|
||||
concept = null;
|
||||
relatedQ = null;
|
||||
revealed = {};
|
||||
mode = 'read';
|
||||
try {
|
||||
const data = await api(`/study/concepts/${reqId}`);
|
||||
if (reqId !== docId) return; // 그새 다른 개념으로 이동 → 폐기
|
||||
concept = data;
|
||||
} catch (e) {
|
||||
if (reqId !== docId) return;
|
||||
if (e?.status === 404) notFound = true;
|
||||
else addToast('error', '개념을 불러오지 못했습니다');
|
||||
return; // 본문 실패 → 관련기출 스킵
|
||||
} finally {
|
||||
if (reqId === docId) loading = false;
|
||||
}
|
||||
// 관련 기출(비차단 — 실패해도 본문 표시엔 영향 없음)
|
||||
try {
|
||||
const rq = await api(`/study/concepts/${reqId}/questions?limit=6`);
|
||||
if (reqId === docId) relatedQ = rq;
|
||||
} catch {}
|
||||
}
|
||||
|
||||
// $effect 가 마운트 1회 + docId 변경(백링크/이전·다음) 재로드를 모두 커버 (onMount 불필요)
|
||||
$effect(() => {
|
||||
void docId;
|
||||
load();
|
||||
});
|
||||
|
||||
function toggleMode() {
|
||||
mode = mode === 'read' ? 'recall' : 'read';
|
||||
revealed = {};
|
||||
}
|
||||
function reveal(i) {
|
||||
revealed = { ...revealed, [i]: true };
|
||||
}
|
||||
function shown(i) {
|
||||
return mode === 'read' || revealed[i];
|
||||
}
|
||||
|
||||
async function markRead() {
|
||||
marking = true;
|
||||
try {
|
||||
const r = await api(`/study/concepts/${docId}/read`, { method: 'POST' });
|
||||
if (concept) {
|
||||
concept.is_read = true;
|
||||
concept.review_stage = r?.review_stage ?? concept.review_stage;
|
||||
concept.due_at = r?.due_at ?? concept.due_at;
|
||||
}
|
||||
addToast('success', '회독 완료 — 다음 복습에 다시 나옵니다');
|
||||
} catch {
|
||||
addToast('error', '회독 처리 실패');
|
||||
} finally {
|
||||
marking = false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head><title>{concept?.title ?? '개념'} — 공부</title></svelte:head>
|
||||
|
||||
<div class="p-4 md:p-6 max-w-3xl mx-auto">
|
||||
<!-- 상단 네비 -->
|
||||
<div class="flex items-center gap-2 text-xs md:text-sm mb-4 min-w-0">
|
||||
<a href="/study" class="text-dim hover:text-text flex items-center gap-1 shrink-0">
|
||||
<ArrowLeft size={14} /> 공부
|
||||
</a>
|
||||
{#if concept?.subject}
|
||||
<span class="text-faint shrink-0">/</span>
|
||||
<span class="text-dim truncate">{concept.subject}</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if loading}
|
||||
<Skeleton h="h-10" rounded="card" />
|
||||
<div class="mt-3 space-y-2">
|
||||
{#each Array(4) as _}<Skeleton h="h-24" rounded="card" />{/each}
|
||||
</div>
|
||||
{:else if notFound}
|
||||
<EmptyState icon={BookOpen} title="개념을 찾을 수 없습니다" description="삭제되었거나 잘못된 주소입니다." />
|
||||
{:else if concept}
|
||||
<!-- 제목 + 빈출 tier -->
|
||||
<header class="mb-3">
|
||||
<div class="flex items-start gap-2">
|
||||
<h1 class="text-xl md:text-2xl font-semibold text-text flex-1">{concept.title}</h1>
|
||||
<span class="text-accent text-sm shrink-0 mt-1" title="빈출도">
|
||||
{#each Array(concept.freq) as _}★{/each}
|
||||
</span>
|
||||
</div>
|
||||
{#if concept.is_read || (concept.review_stage !== null && concept.review_stage !== undefined)}
|
||||
<div class="mt-1 text-xs text-dim">
|
||||
{#if concept.review_stage !== null && concept.review_stage !== undefined}
|
||||
{STAGE_LABEL[concept.review_stage] ?? '복습 중'}
|
||||
{:else}회독함{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</header>
|
||||
|
||||
<!-- 한 줄 요약 (고정 표시) -->
|
||||
{#if concept.summary}
|
||||
<div class="mb-4 rounded-lg border-l-4 border-accent bg-accent/10 px-4 py-3 markdown-body text-sm text-text">
|
||||
{@html renderMathMarkdownInline(concept.summary)}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- 모드 토글 -->
|
||||
<div class="flex items-center gap-2 mb-4">
|
||||
<Button variant={mode === 'recall' ? 'primary' : 'secondary'} size="sm" icon={mode === 'recall' ? EyeOff : Eye} onclick={toggleMode}>
|
||||
{mode === 'recall' ? '떠올리기 모드' : '읽기 모드'}
|
||||
</Button>
|
||||
{#if mode === 'recall'}
|
||||
<span class="text-xs text-dim">각 섹션을 떠올린 뒤 확인하세요</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- 본문 섹션 -->
|
||||
{#if concept.body.length > 0}
|
||||
<div class="space-y-3 mb-5">
|
||||
{#each concept.body as sec, i (i)}
|
||||
<section class="rounded-lg border border-default bg-surface overflow-hidden">
|
||||
<div class="flex items-center gap-2 px-4 py-2.5 border-b border-default bg-surface-hover">
|
||||
<h2 class="text-sm font-semibold text-text flex-1">{sec.label}</h2>
|
||||
{#if sec.stars > 0}
|
||||
<span class="text-accent text-xs shrink-0">{#each Array(sec.stars) as _}★{/each}</span>
|
||||
{/if}
|
||||
</div>
|
||||
{#if shown(i)}
|
||||
<div class="px-4 py-3">
|
||||
<MarkdownDoc documentId={concept.doc_id} mdContent={sec.md} mdStatus={null}
|
||||
class="markdown-body max-w-none text-text" />
|
||||
</div>
|
||||
{:else}
|
||||
<button type="button" onclick={() => reveal(i)}
|
||||
class="w-full px-4 py-6 text-center text-sm text-dim hover:text-accent hover:bg-accent/5 transition-colors">
|
||||
<Eye size={16} class="inline mr-1" /> 떠올린 뒤 확인
|
||||
</button>
|
||||
{/if}
|
||||
</section>
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- 빈출 포인트 -->
|
||||
{#if concept.bincheol.length > 0}
|
||||
<section class="mb-5 rounded-lg border border-default bg-surface p-4">
|
||||
<h2 class="text-sm font-semibold text-text mb-2 flex items-center gap-1.5">
|
||||
<span class="text-accent">★</span> 빈출 포인트
|
||||
</h2>
|
||||
<ul class="space-y-1.5">
|
||||
{#each concept.bincheol as item}
|
||||
<li class="flex gap-2 text-sm text-text">
|
||||
<span class="text-accent shrink-0 text-xs mt-0.5">{#each Array(item.tier || 1) as _}★{/each}</span>
|
||||
<span class="markdown-body flex-1">{@html renderMathMarkdownInline(item.text)}</span>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
<!-- 관련 개념 (백링크) -->
|
||||
{#if concept.related.length > 0}
|
||||
<section class="mb-5">
|
||||
<h2 class="text-xs text-dim mb-2">관련 개념</h2>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{#each concept.related as rel}
|
||||
{#if rel.doc_id}
|
||||
<a href="/study/read/{rel.doc_id}"
|
||||
class="text-xs rounded-full border border-accent/40 bg-accent/10 text-accent px-3 py-1 hover:bg-accent/20 transition-colors">
|
||||
{rel.phrase}
|
||||
</a>
|
||||
{:else}
|
||||
<span class="text-xs rounded-full border border-default bg-surface text-faint px-3 py-1" title="아직 없는 개념">
|
||||
{rel.phrase}
|
||||
</span>
|
||||
{/if}
|
||||
{/each}
|
||||
</div>
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
<!-- 관련 기출 (이론↔문제 브리지) -->
|
||||
{#if relatedQ && relatedQ.linked > 0}
|
||||
<section class="mb-5 rounded-lg border border-default bg-surface p-4">
|
||||
<h2 class="text-sm font-semibold text-text mb-2 flex items-center gap-1.5">
|
||||
<FileQuestion size={15} class="text-accent" /> 관련 기출
|
||||
<span class="ml-1 text-xs font-normal text-dim">
|
||||
{relatedQ.linked}문항{#if relatedQ.accuracy !== null} · 정답률 <span class="{relatedQ.accuracy < 60 ? 'text-error' : 'text-text'} font-medium">{relatedQ.accuracy}%</span>{:else} · 아직 안 풂{/if}
|
||||
</span>
|
||||
</h2>
|
||||
<ul class="space-y-0.5">
|
||||
{#each relatedQ.questions as q (q.id)}
|
||||
<li>
|
||||
<a href="/study/topics/4/questions/{q.id}"
|
||||
class="flex items-center gap-2 text-xs py-1 text-dim hover:text-accent transition-colors">
|
||||
<span class="{outcomeClass(q.last_outcome)} shrink-0 w-4 text-center font-bold">{outcomeMark(q.last_outcome)}</span>
|
||||
<span class="truncate">{q.subject ?? '기출'}{#if q.exam_round} · {q.exam_round}{/if}</span>
|
||||
</a>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
<!-- 액션바 -->
|
||||
<div class="flex items-center gap-2 border-t border-default pt-4 mt-2">
|
||||
{#if concept.prev_id}
|
||||
<Button variant="ghost" size="sm" icon={ChevronLeft} href="/study/read/{concept.prev_id}">이전</Button>
|
||||
{/if}
|
||||
<div class="flex-1"></div>
|
||||
<Button variant="primary" size="sm" icon={Check} onclick={markRead} loading={marking}>
|
||||
{concept.is_read ? '다시 회독' : '회독 완료'}
|
||||
</Button>
|
||||
{#if concept.next_id}
|
||||
<Button variant="secondary" size="sm" icon={ChevronRight} href="/study/read/{concept.next_id}">다음 개념</Button>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
@@ -0,0 +1,37 @@
|
||||
-- 379_asme_clause_kb.sql
|
||||
-- ASME 절-지식베이스: 절 = 개별 documents 행(parent_id) + 절↔절 백링크 + 태깅 (additive, idempotent)
|
||||
-- 검색 무접촉: 절 doc 은 embedding NULL(벡터 제외) + doc_kind='clause'(retrieval doc-leg 필터로 제외).
|
||||
|
||||
ALTER TABLE documents
|
||||
ADD COLUMN IF NOT EXISTS parent_id bigint REFERENCES documents(id),
|
||||
ADD COLUMN IF NOT EXISTS doc_kind text NOT NULL DEFAULT 'standard',
|
||||
ADD COLUMN IF NOT EXISTS clause_code text,
|
||||
ADD COLUMN IF NOT EXISTS clause_part text,
|
||||
ADD COLUMN IF NOT EXISTS clause_order int;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_parent_id ON documents(parent_id) WHERE parent_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_doc_kind ON documents(doc_kind);
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_clause_code ON documents(clause_code) WHERE clause_code IS NOT NULL;
|
||||
|
||||
-- 절↔절 백링크 (dangling 허용: dst_doc_id nullable)
|
||||
CREATE TABLE IF NOT EXISTS clause_links (
|
||||
id bigserial PRIMARY KEY,
|
||||
src_doc_id bigint NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
dst_code text NOT NULL,
|
||||
dst_doc_id bigint REFERENCES documents(id) ON DELETE SET NULL,
|
||||
anchor text,
|
||||
ctx text,
|
||||
char_off int
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_clause_links_src ON clause_links(src_doc_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_clause_links_dst ON clause_links(dst_doc_id) WHERE dst_doc_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_clause_links_dstcode ON clause_links(dst_code);
|
||||
|
||||
-- 태깅 (Part 자동 + 주제)
|
||||
CREATE TABLE IF NOT EXISTS document_tags (
|
||||
doc_id bigint NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
tag text NOT NULL,
|
||||
tag_kind text NOT NULL DEFAULT 'topic',
|
||||
PRIMARY KEY (doc_id, tag)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_document_tags_tag ON document_tags(tag);
|
||||
@@ -0,0 +1,9 @@
|
||||
-- 380_clause_study.sql — 절-문서 공부도구(노트/형광펜/암기카드) 저장. FK 없음(documents 락 회피).
|
||||
CREATE TABLE IF NOT EXISTS clause_study (
|
||||
id bigserial PRIMARY KEY,
|
||||
doc_id bigint NOT NULL,
|
||||
kind text NOT NULL, -- 'note' | 'highlight' | 'card'
|
||||
payload jsonb NOT NULL DEFAULT '{}',
|
||||
created_at timestamptz NOT NULL DEFAULT now()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_clause_study_doc ON clause_study(doc_id, kind);
|
||||
@@ -0,0 +1,16 @@
|
||||
-- 381_study_concept_progress.sql — 이론 개념(문서) 간격반복(SR) 진행. 이론공부 홈 트리거.
|
||||
-- concept_doc_id 는 documents.id 를 가리키나 FK 미설정(hot 테이블 락 회피, clause_study 380 선례).
|
||||
-- SR 산술은 study_question_progress 와 동일(sr_schedule 공용): stage 0→1→2→3(1·3·7·14일)→4 졸업.
|
||||
CREATE TABLE IF NOT EXISTS study_concept_progress (
|
||||
id bigserial PRIMARY KEY,
|
||||
user_id bigint NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||
study_topic_id bigint NOT NULL REFERENCES study_topics(id) ON DELETE CASCADE,
|
||||
concept_doc_id bigint NOT NULL,
|
||||
review_stage smallint,
|
||||
due_at timestamptz,
|
||||
last_read_at timestamptz,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
updated_at timestamptz NOT NULL DEFAULT now(),
|
||||
CONSTRAINT uq_concept_progress_user_doc UNIQUE (user_id, concept_doc_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_concept_progress_due ON study_concept_progress(user_id, due_at) WHERE due_at IS NOT NULL;
|
||||
@@ -0,0 +1,15 @@
|
||||
-- 382_study_concept_links.sql — 개념문서 ↔ 기출문항 링크 (이론↔문제 브리지, Stage B).
|
||||
-- concept_doc_id=documents.id, question_id=study_questions.id — FK 없음(hot 테이블 락 회피, 선례).
|
||||
-- link_source: 'embedding'(bge-m3 코사인 top-k, 주력) | 'ref'(해설 .md 참조, 후속 enrichment).
|
||||
-- score=코사인 유사도(0~1). UNIQUE(doc,question,source) — source별 공존 허용(재튜닝=source 전삭제 후 재삽입).
|
||||
CREATE TABLE IF NOT EXISTS study_concept_links (
|
||||
id bigserial PRIMARY KEY,
|
||||
concept_doc_id bigint NOT NULL,
|
||||
question_id bigint NOT NULL,
|
||||
link_source text NOT NULL,
|
||||
score double precision,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
CONSTRAINT uq_concept_link UNIQUE (concept_doc_id, question_id, link_source)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_concept_links_doc ON study_concept_links(concept_doc_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_concept_links_q ON study_concept_links(question_id);
|
||||
@@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ASME clause-KB backlinks: resolve clause-id mentions in each clause doc -> clause_links.
|
||||
dst resolved to the clause doc of the same parent (top-level code); sub-code mention -> anchor;
|
||||
unresolved (cross-standard / material spec not split) -> dangling (dst_doc_id NULL).
|
||||
Idempotent per parent. Usage: python3 asme_backlinks_persist.py <parent_id> [--commit]
|
||||
"""
|
||||
import asyncio, os, re, sys
|
||||
|
||||
MENTION_RE = re.compile(r'(?<![A-Za-z0-9])([A-Z]{1,4}-\d+(?:\.\d+)*[A-Za-z]?)(?![A-Za-z0-9])')
|
||||
def top(code): return re.match(r'^[A-Z]{1,4}-\d+', code).group(0)
|
||||
|
||||
async def main():
|
||||
parent = int(sys.argv[1]); commit = '--commit' in sys.argv
|
||||
import asyncpg
|
||||
conn = await asyncpg.connect(os.environ['DATABASE_URL'].replace('+asyncpg', ''))
|
||||
docs = await conn.fetch("SELECT id, clause_code, md_content FROM documents "
|
||||
"WHERE parent_id=$1 AND doc_kind='clause' ORDER BY clause_order", parent)
|
||||
code2id = {d['clause_code']: d['id'] for d in docs}
|
||||
edges = [] # (src_id, dst_code, dst_doc_id, anchor, ctx, char_off)
|
||||
resolved = dangling = 0
|
||||
for d in docs:
|
||||
body = d['md_content']; src_top = d['clause_code']
|
||||
seen = set()
|
||||
for m in MENTION_RE.finditer(body):
|
||||
code = m.group(1); t = top(code)
|
||||
if t == src_top: continue # self-reference
|
||||
if (d['id'], code) in seen: continue # dedup per (src,dst_code)
|
||||
seen.add((d['id'], code))
|
||||
dst_id = code2id.get(t) # resolve to same-parent clause doc
|
||||
anchor = code.lower().replace('.', '-') if code != t else None
|
||||
off = m.start()
|
||||
ctx = re.sub(r'\s+', ' ', body[max(0, off-50):off+50]).strip()
|
||||
edges.append((d['id'], code, dst_id, anchor, ctx, off))
|
||||
if dst_id: resolved += 1
|
||||
else: dangling += 1
|
||||
print(f"parent={parent} clause_docs={len(docs)} edges={len(edges)} resolved={resolved} dangling={dangling}")
|
||||
# top referenced clauses
|
||||
from collections import Counter
|
||||
tgt = Counter(top(e[1]) for e in edges if e[2])
|
||||
print("most-referenced:", tgt.most_common(8))
|
||||
if not commit:
|
||||
print("DRY-RUN. pass --commit to persist."); await conn.close(); return
|
||||
async with conn.transaction():
|
||||
ids = [d['id'] for d in docs]
|
||||
await conn.execute("DELETE FROM clause_links WHERE src_doc_id = ANY($1::bigint[])", ids)
|
||||
await conn.executemany(
|
||||
"INSERT INTO clause_links(src_doc_id,dst_code,dst_doc_id,anchor,ctx,char_off) "
|
||||
"VALUES ($1,$2,$3,$4,$5,$6)", edges)
|
||||
n = await conn.fetchval("SELECT count(*) FROM clause_links WHERE src_doc_id = ANY($1::bigint[])", ids)
|
||||
print(f"COMMITTED: {n} clause_links for parent {parent}")
|
||||
await conn.close()
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ASME clause-KB persist (v2: over-CAP pagination). Split a parent standard into per-clause
|
||||
documents (A-granularity); over-CAP clause bodies are paginated into readable page-docs.
|
||||
Idempotent per parent. doc_kind='clause', embedding NULL (search-excluded), parent_id=<parent>.
|
||||
Usage: python3 asme_clause_persist.py <parent_id> [--commit]
|
||||
"""
|
||||
import asyncio, os, re, sys, hashlib, statistics
|
||||
|
||||
CAP = 12000; PAGE_TOK = 11000
|
||||
EN, KO = 0.217, 0.529
|
||||
LINE_RE = re.compile(r'^([ \t#>*]{0,8})([A-Z]{2,4}-\d+(?:\.\d+)*[A-Za-z]?)(.*)$')
|
||||
MENTION_RE = re.compile(r'(?<![A-Za-z0-9])([A-Z]{1,4}-\d+(?:\.\d+)*[A-Za-z]?)(?![A-Za-z0-9])')
|
||||
EXACT_TOP = re.compile(r'^[A-Z]{2,4}-\d+$')
|
||||
TITLE_AFTER = re.compile(r'^[\s.]*[A-Z(]')
|
||||
REF_LEAD = re.compile(r'^[\s.]*(and|or|to|of|in|on|the|as|is|are|shall|through|per|see|with|'
|
||||
r'for|by|that|which|such|또는|및|등|의|은|는|에|을|를|과|와)\b', re.I)
|
||||
|
||||
def tok(s):
|
||||
ko = sum(1 for c in s if '가' <= c <= '힣'); return int((len(s)-ko)*EN + ko*KO)
|
||||
|
||||
def clean_title(rest):
|
||||
t = re.sub(r'<sup>ð</sup>\s*\**\d*\**\s*<sup>Þ</sup>', '', rest)
|
||||
t = re.sub(r'ð\**\d*\**Þ', '', t)
|
||||
t = t.replace('**', '').replace('#', '')
|
||||
return re.sub(r'\s+', ' ', t).strip(' *:—-')
|
||||
|
||||
def is_header(markup, rest):
|
||||
if '#' in markup or '*' in markup: return True
|
||||
rs = rest.strip()
|
||||
if rs == '': return True
|
||||
if REF_LEAD.match(rest): return False
|
||||
if rs[0] in ',;.)': return False
|
||||
if '가' <= rs[0] <= '힣': return False
|
||||
if rs[0].islower(): return False
|
||||
return bool(TITLE_AFTER.match(rs))
|
||||
|
||||
def paginate(body):
|
||||
"""split an over-CAP body into <=MAX_PAGES line-aligned pages of ~PAGE_TOK tokens."""
|
||||
pages, cur, ct = [], [], 0
|
||||
for ln in body.split('\n'):
|
||||
lt = tok(ln) + 1
|
||||
if ct + lt > PAGE_TOK and cur:
|
||||
pages.append('\n'.join(cur)); cur, ct = [ln], lt
|
||||
else:
|
||||
cur.append(ln); ct += lt
|
||||
if cur: pages.append('\n'.join(cur))
|
||||
return pages
|
||||
|
||||
def build_clauses(text):
|
||||
lines = text.split('\n'); off = []; a = 0
|
||||
for ln in lines: off.append(a); a += len(ln) + 1
|
||||
bounds = []; seen = set()
|
||||
for i, ln in enumerate(lines):
|
||||
m = LINE_RE.match(ln)
|
||||
if not m: continue
|
||||
markup, code, rest = m.group(1), m.group(2), m.group(3)
|
||||
if not EXACT_TOP.match(code): continue
|
||||
if not is_header(markup, rest): continue
|
||||
if code in seen: continue
|
||||
seen.add(code); bounds.append((off[i], code, clean_title(rest)))
|
||||
raw = []
|
||||
for idx, (start, code, title) in enumerate(bounds):
|
||||
end = bounds[idx+1][0] if idx+1 < len(bounds) else len(text)
|
||||
body = text[start:end]
|
||||
part = re.match(r'^[A-Z]{2,4}', code).group(0)
|
||||
links = sorted(set(re.match(r'^[A-Z]{1,4}-\d+', mm).group(0)
|
||||
for mm in MENTION_RE.findall(body)) - {code})
|
||||
raw.append(dict(code=code, part=part, title=(code + (' ' + title if title else '')),
|
||||
body=body, tok=tok(body), links=links))
|
||||
# expand over-CAP into pages; assign running clause_order
|
||||
final, order = [], 0
|
||||
for c in raw:
|
||||
if c['tok'] <= CAP:
|
||||
final.append({**c, 'order': order}); order += 1; continue
|
||||
pages = paginate(c['body'])
|
||||
for pi, pb in enumerate(pages):
|
||||
code = c['code'] if pi == 0 else f"{c['code']}·p{pi+1}"
|
||||
title = c['title'] if pi == 0 else f"{c['title']} (페이지 {pi+1}/{len(pages)})"
|
||||
final.append(dict(code=code, part=c['part'], order=order, title=title,
|
||||
body=pb, tok=tok(pb), links=c['links'] if pi == 0 else []))
|
||||
order += 1
|
||||
return final
|
||||
|
||||
async def main():
|
||||
parent = int(sys.argv[1]); commit = '--commit' in sys.argv
|
||||
import asyncpg
|
||||
conn = await asyncpg.connect(os.environ['DATABASE_URL'].replace('+asyncpg', ''))
|
||||
row = await conn.fetchrow("SELECT md_content, ai_domain, data_origin FROM documents WHERE id=$1", parent)
|
||||
if not row: print(f"parent {parent} not found"); return
|
||||
clauses = build_clauses(row['md_content'])
|
||||
toks = [c['tok'] for c in clauses]
|
||||
over = [c for c in clauses if c['tok'] > CAP]
|
||||
print(f"parent={parent} clause_docs={len(clauses)} median_tok={int(statistics.median(toks))} "
|
||||
f"max_tok={max(toks)} over_cap_remaining={len(over)}")
|
||||
if over: print("still over-CAP:", [f"{c['code']}:{c['tok']}t" for c in over])
|
||||
if not commit:
|
||||
print("DRY-RUN. pass --commit to persist."); await conn.close(); return
|
||||
async with conn.transaction():
|
||||
deld = await conn.execute("DELETE FROM documents WHERE parent_id=$1 AND doc_kind='clause'", parent)
|
||||
print("deleted prior:", deld)
|
||||
for c in clauses:
|
||||
fh = hashlib.sha256(f"{parent}:{c['code']}:{c['body']}".encode()).hexdigest()
|
||||
cid = await conn.fetchval("""
|
||||
INSERT INTO documents
|
||||
(file_format, file_hash, title, md_content, parent_id, doc_kind,
|
||||
clause_code, clause_part, clause_order, ai_domain, data_origin,
|
||||
md_status, review_status, conversion_status, preview_status)
|
||||
VALUES ('md',$1,$2,$3,$4,'clause',$5,$6,$7,$8,$9,'success','approved','none','none')
|
||||
RETURNING id
|
||||
""", fh, c['title'], c['body'], parent, c['code'], c['part'], c['order'],
|
||||
row['ai_domain'], row['data_origin'] or 'external')
|
||||
await conn.execute("INSERT INTO document_tags(doc_id,tag,tag_kind) VALUES ($1,$2,'part') "
|
||||
"ON CONFLICT DO NOTHING", cid, c['part'])
|
||||
n = await conn.fetchval("SELECT count(*) FROM documents WHERE parent_id=$1 AND doc_kind='clause'", parent)
|
||||
print(f"COMMITTED: {n} clause docs for parent {parent}")
|
||||
await conn.close()
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,23 @@
|
||||
-- concept_links_backfill.sql — 개념↔문항 임베딩 링크 재생성 (Stage B, 멱등·재실행 안전).
|
||||
-- 정찰 확정: bge-m3 1024d 코사인, per-concept top-k=10, threshold 0.62 → ~2362링크·284/289개념·964문항.
|
||||
-- 재튜닝 시 DELETE(embedding 소스만) 후 재삽입 = ref 링크(후속) 불변. 개념 doc = 가스기사 태그.
|
||||
DELETE FROM study_concept_links WHERE link_source = 'embedding';
|
||||
INSERT INTO study_concept_links (concept_doc_id, question_id, link_source, score)
|
||||
WITH cd AS (
|
||||
SELECT id, embedding FROM documents
|
||||
WHERE user_tags::text LIKE '%@library/가스기사/%'
|
||||
AND deleted_at IS NULL AND embedding IS NOT NULL
|
||||
),
|
||||
ranked AS (
|
||||
SELECT cd.id AS concept_doc_id, q.id AS question_id,
|
||||
1 - (q.embedding <=> cd.embedding) AS score,
|
||||
row_number() OVER (PARTITION BY cd.id ORDER BY q.embedding <=> cd.embedding) AS rn
|
||||
FROM cd
|
||||
JOIN study_questions q
|
||||
ON q.study_topic_id = 4 AND q.embedding IS NOT NULL
|
||||
AND q.deleted_at IS NULL AND q.is_active
|
||||
)
|
||||
SELECT concept_doc_id, question_id, 'embedding', score
|
||||
FROM ranked
|
||||
WHERE rn <= 10 AND score >= 0.62
|
||||
ON CONFLICT (concept_doc_id, question_id, link_source) DO NOTHING;
|
||||
@@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env python3
|
||||
"""기술지침(KOSHA guide) 절-KB persist: 번호섹션(# 1. 목적 / ## 4.1) 단위 분해 + 제본.
|
||||
ASME/법령과 동일 clause-KB 모델(doc_kind='clause', parent_id=지침, 검색제외, /book 리더 공용).
|
||||
Usage: python3 guide_clause_persist.py <id|all> [--commit]
|
||||
"""
|
||||
import asyncio, os, re, sys, hashlib, statistics
|
||||
|
||||
CAP = 12000; PAGE_TOK = 11000
|
||||
EN, KO = 0.217, 0.529
|
||||
# 번호섹션 헤더: '# 1. 목 적', '## 4.1 누출...' (번호 1~3자리=연도(4자리) 배제)
|
||||
ART_RE = re.compile(r'^#{1,6}\s*(\d{1,3}(?:\.\d{1,3})*)\.?\s+(\S.*)$')
|
||||
TOP_RE = re.compile(r'^\d{1,3}$')
|
||||
# 외부 표준/법규 참조(대부분 dangling): ASME B16.5 · KS B 1501 · 규칙 제N조
|
||||
EXT_RE = re.compile(r'(ASME\s+[A-Z][0-9.]+|KS\s+[A-Z]\s*[0-9]+|ISO\s+[0-9]+|제\d+조)')
|
||||
|
||||
def tok(s):
|
||||
ko = sum(1 for c in s if '가' <= c <= '힣'); return int((len(s)-ko)*EN + ko*KO)
|
||||
|
||||
def build_sections(text):
|
||||
lines = text.split('\n'); off = []; a = 0
|
||||
for ln in lines: off.append(a); a += len(ln) + 1
|
||||
bounds = []; seen = set()
|
||||
for i, ln in enumerate(lines):
|
||||
m = ART_RE.match(ln)
|
||||
if not m: continue
|
||||
code, name = m.group(1), m.group(2).strip()
|
||||
if not TOP_RE.match(code): continue # top-level 번호섹션만 경계
|
||||
if code in seen: continue
|
||||
if len(name) < 1: continue
|
||||
seen.add(code); bounds.append((off[i], code, name))
|
||||
out = []
|
||||
for idx, (start, code, name) in enumerate(bounds):
|
||||
end = bounds[idx+1][0] if idx+1 < len(bounds) else len(text)
|
||||
body = text[start:end].strip()
|
||||
ext = sorted(set(EXT_RE.findall(body)))[:8]
|
||||
out.append(dict(code=code, part='본문', order=0, title=f"{code}. {name}"[:120],
|
||||
body=body, tok=tok(body), links=[], ext=ext))
|
||||
# over-CAP 페이지네이션 + 순번
|
||||
final, order = [], 0
|
||||
for c in out:
|
||||
if c['tok'] <= CAP:
|
||||
final.append({**c, 'order': order}); order += 1; continue
|
||||
pages, cur, ct = [], [], 0
|
||||
for ln in c['body'].split('\n'):
|
||||
lt = tok(ln)+1
|
||||
if ct+lt > PAGE_TOK and cur: pages.append('\n'.join(cur)); cur=[ln]; ct=lt
|
||||
else: cur.append(ln); ct+=lt
|
||||
if cur: pages.append('\n'.join(cur))
|
||||
for pi, pb in enumerate(pages):
|
||||
final.append(dict(code=c['code'] if pi==0 else f"{c['code']}·p{pi+1}", part='본문',
|
||||
order=order, title=c['title'] if pi==0 else f"{c['title']} (p{pi+1})",
|
||||
body=pb, tok=tok(pb), links=[], ext=[]))
|
||||
order += 1
|
||||
return final
|
||||
|
||||
async def process_one(conn, gid, commit, verbose=True):
|
||||
row = await conn.fetchrow("SELECT title, md_content, ai_domain, data_origin FROM documents WHERE id=$1", gid)
|
||||
if not row: return ('notfound', 0)
|
||||
if not row['md_content']: return ('nullmd', 0)
|
||||
secs = build_sections(row['md_content'])
|
||||
if len(secs) < 2: return ('few', len(secs)) # 섹션 2 미만 = 번호구조 아님
|
||||
toks = [c['tok'] for c in secs]
|
||||
if verbose:
|
||||
print(f"guide={gid} «{(row['title'] or '')[:40]}» 섹션={len(secs)} median={int(statistics.median(toks))} max={max(toks)}")
|
||||
print(" 샘플:", [c['title'][:26] for c in secs[:7]])
|
||||
if not commit: return ('dry', len(secs))
|
||||
async with conn.transaction():
|
||||
await conn.execute("DELETE FROM clause_links WHERE src_doc_id IN (SELECT id FROM documents WHERE parent_id=$1 AND doc_kind='clause')", gid)
|
||||
await conn.execute("DELETE FROM documents WHERE parent_id=$1 AND doc_kind='clause'", gid)
|
||||
for c in secs:
|
||||
fh = hashlib.sha256(f"{gid}:{c['code']}:{c['body']}".encode()).hexdigest()
|
||||
cid = await conn.fetchval("""
|
||||
INSERT INTO documents (file_format,file_hash,title,md_content,parent_id,doc_kind,
|
||||
clause_code,clause_part,clause_order,ai_domain,data_origin,
|
||||
md_status,review_status,conversion_status,preview_status)
|
||||
VALUES ('md',$1,$2,$3,$4,'clause',$5,$6,$7,$8,$9,'success','approved','none','none') RETURNING id
|
||||
""", fh, c['title'], c['body'], gid, c['code'], c['part'], c['order'], row['ai_domain'], row['data_origin'] or 'external')
|
||||
await conn.execute("INSERT INTO document_tags(doc_id,tag,tag_kind) VALUES ($1,'기술지침','kind') ON CONFLICT DO NOTHING", cid)
|
||||
n = await conn.fetchval("SELECT count(*) FROM documents WHERE parent_id=$1 AND doc_kind='clause'", gid)
|
||||
print(f" COMMITTED: {n} 섹션 for guide {gid}")
|
||||
return ('committed', len(secs))
|
||||
|
||||
async def main():
|
||||
import asyncpg
|
||||
arg = sys.argv[1]; commit = '--commit' in sys.argv
|
||||
conn = await asyncpg.connect(os.environ['DATABASE_URL'].replace('+asyncpg', ''))
|
||||
if arg == 'all':
|
||||
gs = await conn.fetch("SELECT id FROM documents WHERE material_type='guide' AND doc_kind='standard' "
|
||||
"AND deleted_at IS NULL AND md_content IS NOT NULL ORDER BY id")
|
||||
agg = {}; tot = 0
|
||||
for i, r in enumerate(gs):
|
||||
st, n = await process_one(conn, r['id'], commit, verbose=False)
|
||||
agg[st] = agg.get(st, 0)+1; tot += n if st in ('dry','committed') else 0
|
||||
if commit and (i+1) % 40 == 0: print(f" …{i+1}/{len(gs)} (누적섹션 {tot})")
|
||||
print(f"BATCH {'COMMIT' if commit else 'DRY'} guides={len(gs)} status={agg} 총섹션={tot}")
|
||||
else:
|
||||
await process_one(conn, int(arg), commit, verbose=True)
|
||||
await conn.close()
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""법령 조-KB persist: 법령을 조(條) 단위 개별 문서로 분해 + 조↔조 백링크 + 장(章) 태그.
|
||||
ASME clause-KB와 동일 모델(doc_kind='clause', parent_id=법령, embedding NULL, 검색제외).
|
||||
법령 추출 노이즈(조 앞 ### 메타 반복) 트림. Usage: python3 law_clause_persist.py <law_id> [--commit]
|
||||
"""
|
||||
import asyncio, os, re, sys, hashlib, statistics
|
||||
|
||||
CAP = 12000; PAGE_TOK = 11000
|
||||
EN, KO = 0.217, 0.529
|
||||
# 조 헤더: '### 제3조의2(가스안전관리...) 본문'
|
||||
ART_RE = re.compile(r'^#{0,6}\s*(제\d+조(?:의\d+)?)\s*\(([^)]*)\)\s*(.*)$')
|
||||
CHAP_RE = re.compile(r'^#{1,6}\s*(제\d+장(?:의\d+)?)\s*(.*)$') # 장 = part
|
||||
# 같은-법 조 멘션(백링크)
|
||||
MENTION_RE = re.compile(r'제\d+조(?:의\d+)?')
|
||||
# 타법 참조: 「법명」 ... 제N조
|
||||
EXTLAW_RE = re.compile(r'「([^」]+)」')
|
||||
|
||||
def tok(s):
|
||||
ko = sum(1 for c in s if '가' <= c <= '힣'); return int((len(s)-ko)*EN + ko*KO)
|
||||
def art_code(c): return c # '제3조의2'
|
||||
|
||||
def build_articles(text):
|
||||
lines = text.split('\n'); off = []; a = 0
|
||||
for ln in lines: off.append(a); a += len(ln) + 1
|
||||
arts = [] # (line_idx, code, name, part)
|
||||
cur_part = None
|
||||
for i, ln in enumerate(lines):
|
||||
ch = CHAP_RE.match(ln)
|
||||
if ch and not ART_RE.match(ln):
|
||||
cur_part = (ch.group(1) + (' ' + ch.group(2).strip() if ch.group(2).strip() else '')).strip()
|
||||
continue
|
||||
m = ART_RE.match(ln)
|
||||
if m:
|
||||
arts.append((i, m.group(1), m.group(2).strip(), cur_part))
|
||||
# 본문 슬라이스 + 다음 조 앞 메타 노이즈 트림
|
||||
out = []
|
||||
for idx, (li, code, name, part) in enumerate(arts):
|
||||
end_li = arts[idx+1][0] if idx+1 < len(arts) else len(lines)
|
||||
body_lines = lines[li:end_li]
|
||||
# 트림: 끝에서부터 '### {짧은 메타}' (조번호/조문/날짜/제목, [개정] 제N조 아님) 제거
|
||||
while len(body_lines) > 1:
|
||||
last = body_lines[-1].strip()
|
||||
if last == '':
|
||||
body_lines.pop(); continue
|
||||
mh = re.match(r'^#{1,6}\s+(.*)$', last)
|
||||
if mh:
|
||||
c = mh.group(1).strip()
|
||||
if not c.startswith('[') and not c.startswith('제') and (
|
||||
c in ('조문', 'N') or re.fullmatch(r'\d+', c) or re.fullmatch(r'\d{8}', c) or len(c) <= 30):
|
||||
body_lines.pop(); continue
|
||||
break
|
||||
body = '\n'.join(body_lines).strip()
|
||||
links = sorted(set(MENTION_RE.findall(body)) - {code})
|
||||
ext = sorted(set(EXTLAW_RE.findall(body)))[:6]
|
||||
out.append(dict(code=code, part=part or '본칙', order=0,
|
||||
title=f"{code}({name})" if name else code,
|
||||
body=body, tok=tok(body), links=links, ext=ext))
|
||||
# 페이지네이션(over-CAP) + 순번
|
||||
final, order = [], 0
|
||||
for c in out:
|
||||
if c['tok'] <= CAP:
|
||||
final.append({**c, 'order': order}); order += 1; continue
|
||||
# 11K 토큰 라인 단위 분할
|
||||
pages, cur, ct = [], [], 0
|
||||
for ln in c['body'].split('\n'):
|
||||
lt = tok(ln)+1
|
||||
if ct+lt > PAGE_TOK and cur: pages.append('\n'.join(cur)); cur=[ln]; ct=lt
|
||||
else: cur.append(ln); ct+=lt
|
||||
if cur: pages.append('\n'.join(cur))
|
||||
for pi, pb in enumerate(pages):
|
||||
final.append(dict(code=c['code'] if pi==0 else f"{c['code']}·p{pi+1}", part=c['part'],
|
||||
order=order, title=c['title'] if pi==0 else f"{c['title']} (p{pi+1}/{len(pages)})",
|
||||
body=pb, tok=tok(pb), links=c['links'] if pi==0 else [], ext=[]))
|
||||
order += 1
|
||||
return final
|
||||
|
||||
async def process_one(conn, law, commit, verbose=True):
|
||||
row = await conn.fetchrow("SELECT title, coalesce(md_content, extracted_text) AS md_content, ai_domain, data_origin FROM documents WHERE id=$1", law)
|
||||
if not row: return ('notfound', 0, 0)
|
||||
if not row['md_content']: return ('nullmd', 0, 0)
|
||||
arts = build_articles(row['md_content'])
|
||||
if not arts: return ('noart', 0, 0)
|
||||
toks = [c['tok'] for c in arts]
|
||||
nlink = sum(len(c['links']) for c in arts)
|
||||
if verbose:
|
||||
parts = {}
|
||||
for c in arts: parts[c['part']] = parts.get(c['part'], 0)+1
|
||||
print(f"law={law} «{(row['title'] or '')[:34]}» 조문={len(arts)} median={int(statistics.median(toks))} "
|
||||
f"max={max(toks)} 장={len(parts)} 백링크={nlink}")
|
||||
print(" 샘플:", [c['title'][:22] for c in arts[:6]])
|
||||
if not commit:
|
||||
return ('dry', len(arts), nlink)
|
||||
async with conn.transaction():
|
||||
await conn.execute(
|
||||
"DELETE FROM clause_links WHERE src_doc_id IN (SELECT id FROM documents WHERE parent_id=$1 AND doc_kind='clause')", law)
|
||||
await conn.execute("DELETE FROM documents WHERE parent_id=$1 AND doc_kind='clause'", law)
|
||||
code2id = {}
|
||||
for c in arts:
|
||||
fh = hashlib.sha256(f"{law}:{c['code']}:{c['body']}".encode()).hexdigest()
|
||||
cid = await conn.fetchval("""
|
||||
INSERT INTO documents (file_format,file_hash,title,md_content,parent_id,doc_kind,
|
||||
clause_code,clause_part,clause_order,ai_domain,data_origin,
|
||||
md_status,review_status,conversion_status,preview_status)
|
||||
VALUES ('md',$1,$2,$3,$4,'clause',$5,$6,$7,$8,$9,'success','approved','none','none') RETURNING id
|
||||
""", fh, c['title'], c['body'], law, c['code'], c['part'], c['order'],
|
||||
row['ai_domain'], row['data_origin'] or 'external')
|
||||
code2id[c['code']] = cid
|
||||
await conn.execute("INSERT INTO document_tags(doc_id,tag,tag_kind) VALUES ($1,$2,'chapter') ON CONFLICT DO NOTHING", cid, c['part'])
|
||||
# 조↔조 백링크 (같은 법 내부; 타법 참조는 dangling)
|
||||
edges = []
|
||||
for c in arts:
|
||||
src = code2id[c['code']]
|
||||
for dst in c['links']:
|
||||
edges.append((src, dst, code2id.get(dst), None, None, None))
|
||||
if edges:
|
||||
await conn.executemany(
|
||||
"INSERT INTO clause_links(src_doc_id,dst_code,dst_doc_id,anchor,ctx,char_off) VALUES ($1,$2,$3,$4,$5,$6)", edges)
|
||||
n = await conn.fetchval("SELECT count(*) FROM documents WHERE parent_id=$1 AND doc_kind='clause'", law)
|
||||
print(f" COMMITTED: {n} 조문 + {len(edges)} 백링크 for law {law}")
|
||||
return ('committed', n, len(edges))
|
||||
|
||||
|
||||
async def main():
|
||||
import asyncpg
|
||||
arg = sys.argv[1]; commit = '--commit' in sys.argv
|
||||
conn = await asyncpg.connect(os.environ['DATABASE_URL'].replace('+asyncpg', ''))
|
||||
if arg == 'all':
|
||||
laws = await conn.fetch("SELECT lm.document_id AS id FROM legal_meta lm "
|
||||
"JOIN documents d ON d.id=lm.document_id "
|
||||
"WHERE lm.law_doc_kind='primary' AND lm.version_status='current' "
|
||||
"AND coalesce(d.md_content, d.extracted_text) IS NOT NULL "
|
||||
"ORDER BY lm.document_id")
|
||||
agg = {}; tot_art = tot_link = 0; zero = []
|
||||
for i, r in enumerate(laws):
|
||||
st, na, nl = await process_one(conn, r['id'], commit, verbose=False)
|
||||
agg[st] = agg.get(st, 0) + 1
|
||||
tot_art += na; tot_link += nl
|
||||
if st == 'noart': zero.append(r['id'])
|
||||
if commit and (i + 1) % 30 == 0: print(f" …{i+1}/{len(laws)} (누적 조 {tot_art})")
|
||||
print(f"BATCH {'COMMIT' if commit else 'DRY'} laws={len(laws)} status={agg} 총조문={tot_art} 총백링크={tot_link}")
|
||||
if zero: print(f" 0-조(추출구조 이질) {len(zero)}건: {zero[:20]}")
|
||||
else:
|
||||
await process_one(conn, int(arg), commit, verbose=True)
|
||||
await conn.close()
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
"""논문 인용그래프 가능성 측정(read-only) — 본문 DOI로 코퍼스내 인용 엣지 추정.
|
||||
own_doi = 헤더(앞 2500자) 첫 DOI / cited = References 이후(또는 전체) DOI. owner 맵 → 엣지.
|
||||
"""
|
||||
import asyncio, os, re, sys
|
||||
|
||||
DOI_RE = re.compile(r'10\.\d{4,9}/[^\s"<>)\]\},;]+')
|
||||
REF_RE = re.compile(r'(references|참고문헌|bibliography|reference\s*list)', re.I)
|
||||
|
||||
def norm(d): return d.rstrip('.').lower()
|
||||
|
||||
async def main():
|
||||
import asyncpg
|
||||
conn = await asyncpg.connect(os.environ['DATABASE_URL'].replace('+asyncpg', ''))
|
||||
rows = await conn.fetch("SELECT id, title, coalesce(md_content, extracted_text) AS txt FROM documents "
|
||||
"WHERE material_type='paper' AND doc_kind='standard' AND deleted_at IS NULL "
|
||||
"AND coalesce(md_content, extracted_text) IS NOT NULL")
|
||||
owner = {} # doi -> paper id (헤더 DOI = 그 논문 소유)
|
||||
cited = {} # paper id -> set(cited doi)
|
||||
n_own = n_refsec = 0
|
||||
for r in rows:
|
||||
txt = r['txt']
|
||||
head = txt[:2500]
|
||||
hdois = [norm(d) for d in DOI_RE.findall(head)]
|
||||
if hdois:
|
||||
owner.setdefault(hdois[0], r['id']); n_own += 1
|
||||
m = REF_RE.search(txt)
|
||||
body = txt[m.start():] if m else ''
|
||||
if m: n_refsec += 1
|
||||
cds = set(norm(d) for d in DOI_RE.findall(body))
|
||||
if cds: cited[r['id']] = cds
|
||||
# 엣지: paper -> owner(cited doi)
|
||||
edges = []
|
||||
for pid, cds in cited.items():
|
||||
for d in cds:
|
||||
o = owner.get(d)
|
||||
if o and o != pid: edges.append((pid, o, d))
|
||||
cited_papers = set(e[0] for e in edges)
|
||||
target_papers = set(e[1] for e in edges)
|
||||
print(f"papers={len(rows)} 헤더DOI보유={n_own} References보유={n_refsec} owner_map={len(owner)}")
|
||||
print(f"인용엣지(코퍼스내)={len(edges)} 인용하는논문={len(cited_papers)} 피인용논문={len(target_papers)}")
|
||||
# 피인용 top
|
||||
from collections import Counter
|
||||
top = Counter(e[1] for e in edges).most_common(6)
|
||||
if top:
|
||||
idmap = {r['id']: r['title'] for r in rows}
|
||||
print("피인용 top:")
|
||||
for pid, c in top: print(f" {c}회 ← {(idmap.get(pid) or '')[:48]}")
|
||||
await conn.close()
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""OpenAlex 고신뢰 매치율 측정 — References 보유 논문(학술 추정) 표본."""
|
||||
import asyncio, os, re
|
||||
|
||||
def toks(s):
|
||||
return set(re.findall(r'[a-z0-9]+', (s or '').lower()))
|
||||
def sim(a, b):
|
||||
ta, tb = toks(a), toks(b)
|
||||
if not ta or not tb: return 0.0
|
||||
return len(ta & tb) / len(ta | tb)
|
||||
|
||||
async def main():
|
||||
import asyncpg, httpx
|
||||
conn = await asyncpg.connect(os.environ['DATABASE_URL'].replace('+asyncpg', ''))
|
||||
rows = await conn.fetch("SELECT id, title FROM documents WHERE material_type='paper' "
|
||||
"AND doc_kind='standard' AND deleted_at IS NULL AND title IS NOT NULL "
|
||||
"AND coalesce(md_content,extracted_text) ~* 'references|참고문헌' "
|
||||
"ORDER BY id LIMIT 40")
|
||||
hi = mid = lo = 0; hits = []
|
||||
async with httpx.AsyncClient(timeout=20) as client:
|
||||
for r in rows:
|
||||
title = re.sub(r'\s+', ' ', r['title']).strip()
|
||||
try:
|
||||
resp = await client.get("https://api.openalex.org/works",
|
||||
params={"search": title[:200], "per_page": 1, "mailto": "hyun49196@gmail.com"})
|
||||
res = (resp.json().get("results") or [])
|
||||
if not res: lo += 1; continue
|
||||
s = sim(title, res[0].get("title"))
|
||||
if s >= 0.6: hi += 1; hits.append((s, title[:40], (res[0].get('title') or '')[:40], res[0].get('cited_by_count'), len(res[0].get('referenced_works') or [])))
|
||||
elif s >= 0.4: mid += 1
|
||||
else: lo += 1
|
||||
except Exception: lo += 1
|
||||
print(f"표본={len(rows)} 고신뢰(≥0.6)={hi} 중간(0.4~0.6)={mid} 저신뢰/무매치={lo}")
|
||||
print("고신뢰 매치 샘플:")
|
||||
for s, a, b, cb, rf in hits[:8]:
|
||||
print(f" sim={s:.2f} cited={cb} refs={rf} | {a} ≈ {b}")
|
||||
await conn.close()
|
||||
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
"""OpenAlex 보강 타당성 테스트 — 소수 논문 제목으로 매칭/메타 확인 (외부 API)."""
|
||||
import asyncio, os, re
|
||||
|
||||
async def main():
|
||||
import asyncpg, httpx
|
||||
conn = await asyncpg.connect(os.environ['DATABASE_URL'].replace('+asyncpg', ''))
|
||||
rows = await conn.fetch("SELECT id, title FROM documents WHERE material_type='paper' "
|
||||
"AND doc_kind='standard' AND deleted_at IS NULL AND title IS NOT NULL "
|
||||
"AND length(title) > 15 ORDER BY id LIMIT 6")
|
||||
async with httpx.AsyncClient(timeout=20) as client:
|
||||
for r in rows:
|
||||
title = re.sub(r'\s+', ' ', r['title']).strip()
|
||||
try:
|
||||
resp = await client.get("https://api.openalex.org/works",
|
||||
params={"search": title[:200], "per_page": 1, "mailto": "hyun49196@gmail.com"})
|
||||
js = resp.json()
|
||||
res = (js.get("results") or [])
|
||||
if not res:
|
||||
print(f"[{r['id']}] NO MATCH | {title[:50]}"); continue
|
||||
w = res[0]
|
||||
oid = (w.get("id") or "").split("/")[-1]
|
||||
print(f"[{r['id']}] {title[:46]}")
|
||||
print(f" → OA {oid} | {(w.get('title') or '')[:46]} | {w.get('publication_year')} | "
|
||||
f"cited_by={w.get('cited_by_count')} | refs={len(w.get('referenced_works') or [])} | doi={w.get('doi')}")
|
||||
except Exception as e:
|
||||
print(f"[{r['id']}] ERROR {type(e).__name__}: {e}")
|
||||
await conn.close()
|
||||
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user