feat(docs): 관련 문서(유사도 KNN) 엔드포인트+패널 + 법령/지침 splitter

This commit is contained in:
hyungi
2026-06-30 06:10:11 +00:00
parent c44692fddc
commit a22b2c7647
7 changed files with 432 additions and 0 deletions
+58
View File
@@ -1990,3 +1990,61 @@ async def get_document_backlinks(
forward=[BacklinkRef(**dict(r)) for r in forward],
back=[BacklinkRef(**dict(r)) for r in back],
)
# ─── 관련 문서 (유사도, on-demand pgvector KNN — 저부하·무저장) ───
class RelatedItem(BaseModel):
id: int
title: str | None = None
ai_domain: str | None = None
material_type: str | None = None
year: int | None = None
sim: float | None = None
class RelatedResponse(BaseModel):
doc_id: int
related: list[RelatedItem]
@router.get("/{doc_id}/related", response_model=RelatedResponse)
async def get_related_documents(
doc_id: int,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
limit: int = 8,
same_type: bool = True,
):
"""문서-레벨 임베딩 코사인 최근접 = '관련 문서'. on-demand(저장/배치 없음).
인용그래프가 부적합한 코퍼스(업계 기술기사=인용망 부재)의 대안 연결 레이어.
same_type=true면 같은 material_type 내, false면 전 코퍼스. doc_kind='clause'(절-문서)는 제외.
"""
from sqlalchemy import text as sql_text
lim = max(1, min(limit, 30))
type_clause = "AND d.material_type = src.material_type" if same_type else ""
rows = (
await session.execute(
sql_text(
f"""
WITH src AS (
SELECT embedding, material_type FROM documents WHERE id = :id
)
SELECT d.id, d.title, d.ai_domain, d.material_type, d.facet_year AS year,
round((1 - (d.embedding <=> (SELECT embedding FROM src)))::numeric, 3) AS sim
FROM documents d, src
WHERE d.doc_kind = 'standard' AND d.deleted_at IS NULL
AND d.id <> :id AND d.embedding IS NOT NULL
AND (SELECT embedding FROM src) IS NOT NULL
{type_clause}
ORDER BY d.embedding <=> (SELECT embedding FROM src)
LIMIT :lim
"""
).bindparams(id=doc_id, lim=lim)
)
).mappings().all()
return RelatedResponse(
doc_id=doc_id,
related=[RelatedItem(**{k: r[k] for k in ("id", "title", "ai_domain", "material_type", "year")}, sim=float(r["sim"]) if r["sim"] is not None else None) for r in rows],
)