feat(library): Phase 2A facet 탐색 기반 — 컬럼 + API + 필터
documents 테이블에 facet_company/topic/year/doctype 4개 축 추가. facet_values 사전 테이블 + CRUD API. facet-counts 집계 API (교차 필터링 지원). 문서 목록 API에 facet 필터 파라미터 추가. DocumentResponse/DocumentUpdate 스키마에 facet 필드 포함. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+23
-1
@@ -54,6 +54,10 @@ class DocumentResponse(BaseModel):
|
||||
source_channel: str | None
|
||||
data_origin: str | None
|
||||
doc_purpose: str | None
|
||||
facet_company: str | None = None
|
||||
facet_topic: str | None = None
|
||||
facet_year: int | None = None
|
||||
facet_doctype: str | None = None
|
||||
extracted_at: datetime | None
|
||||
ai_processed_at: datetime | None
|
||||
embedded_at: datetime | None
|
||||
@@ -84,6 +88,10 @@ class DocumentUpdate(BaseModel):
|
||||
data_origin: str | None = None
|
||||
doc_purpose: str | None = None
|
||||
pinned: bool | None = None
|
||||
facet_company: str | None = None
|
||||
facet_topic: str | None = None
|
||||
facet_year: int | None = None
|
||||
facet_doctype: str | None = None
|
||||
|
||||
|
||||
# ─── 스키마 (트리) ───
|
||||
@@ -207,8 +215,12 @@ async def list_library_documents(
|
||||
sort: str = Query("updated_desc"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(20, ge=1, le=100),
|
||||
facet_company: str | None = None,
|
||||
facet_topic: str | None = None,
|
||||
facet_year: int | None = None,
|
||||
facet_doctype: str | None = None,
|
||||
):
|
||||
"""자료실 문서 목록 (prefix match, title 검색, 정렬)"""
|
||||
"""자료실 문서 목록 (prefix match, title 검색, facet 필터, 정렬)"""
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
from core.library import LIBRARY_PREFIX, normalize_library_path
|
||||
@@ -248,6 +260,16 @@ async def list_library_documents(
|
||||
if q:
|
||||
query = query.where(Document.title.ilike(f"%{q}%"))
|
||||
|
||||
# facet 필터
|
||||
if facet_company:
|
||||
query = query.where(Document.facet_company == facet_company)
|
||||
if facet_topic:
|
||||
query = query.where(Document.facet_topic == facet_topic)
|
||||
if facet_year:
|
||||
query = query.where(Document.facet_year == facet_year)
|
||||
if facet_doctype:
|
||||
query = query.where(Document.facet_doctype == facet_doctype)
|
||||
|
||||
# 전체 건수
|
||||
count_query = select(func.count()).select_from(query.subquery())
|
||||
total = (await session.execute(count_query)).scalar()
|
||||
|
||||
@@ -14,8 +14,11 @@ from core.database import get_session
|
||||
from core.library import LIBRARY_PREFIX, MAX_DEPTH, normalize_library_path
|
||||
from models.category import LibraryCategory
|
||||
from models.document import Document
|
||||
from models.facet_value import FacetValue
|
||||
from models.user import User
|
||||
|
||||
FACET_TYPES = ("company", "topic", "doctype") # year는 사전 불필요
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@@ -332,3 +335,193 @@ async def get_library_tree(
|
||||
return nodes
|
||||
|
||||
return build_tree(root)
|
||||
|
||||
|
||||
# ─── Facet API (Phase 2) ───
|
||||
|
||||
|
||||
class FacetValueResponse(BaseModel):
|
||||
facet_type: str
|
||||
value: str
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class FacetCountItem(BaseModel):
|
||||
value: str
|
||||
count: int
|
||||
|
||||
|
||||
class FacetCountsResponse(BaseModel):
|
||||
company: list[FacetCountItem]
|
||||
topic: list[FacetCountItem]
|
||||
year: list[FacetCountItem]
|
||||
doctype: list[FacetCountItem]
|
||||
|
||||
|
||||
@router.get("/facets", response_model=dict[str, list[str]])
|
||||
async def get_facet_values(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""facet 축별 허용값 사전 (year는 실제 데이터 기반)"""
|
||||
result: dict[str, list[str]] = {}
|
||||
|
||||
for ft in FACET_TYPES:
|
||||
rows = await session.execute(
|
||||
select(FacetValue.value)
|
||||
.where(FacetValue.facet_type == ft)
|
||||
.order_by(FacetValue.value)
|
||||
)
|
||||
result[ft] = [r[0] for r in rows]
|
||||
|
||||
# year는 사전 없이 실제 문서 값에서 추출
|
||||
year_rows = await session.execute(
|
||||
select(Document.facet_year)
|
||||
.where(
|
||||
Document.deleted_at == None, # noqa: E711
|
||||
Document.facet_year != None, # noqa: E711
|
||||
)
|
||||
.distinct()
|
||||
.order_by(Document.facet_year.desc())
|
||||
)
|
||||
result["year"] = [str(r[0]) for r in year_rows]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@router.post("/facets", response_model=FacetValueResponse, status_code=201)
|
||||
async def add_facet_value(
|
||||
body: FacetValueResponse,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
):
|
||||
"""facet 사전에 새 값 추가"""
|
||||
if body.facet_type not in FACET_TYPES:
|
||||
raise HTTPException(status_code=400, detail=f"허용 facet: {', '.join(FACET_TYPES)}")
|
||||
|
||||
value = body.value.strip()
|
||||
if not value:
|
||||
raise HTTPException(status_code=400, detail="빈 값")
|
||||
|
||||
existing = await session.execute(
|
||||
select(FacetValue).where(
|
||||
FacetValue.facet_type == body.facet_type,
|
||||
FacetValue.value == value,
|
||||
)
|
||||
)
|
||||
if existing.scalar_one_or_none():
|
||||
raise HTTPException(status_code=409, detail="이미 존재하는 값")
|
||||
|
||||
fv = FacetValue(facet_type=body.facet_type, value=value)
|
||||
session.add(fv)
|
||||
await session.commit()
|
||||
return FacetValueResponse(facet_type=body.facet_type, value=value)
|
||||
|
||||
|
||||
@router.get("/facet-counts", response_model=FacetCountsResponse)
|
||||
async def get_facet_counts(
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
library_path: str | None = None,
|
||||
facet_company: str | None = None,
|
||||
facet_topic: str | None = None,
|
||||
facet_year: int | None = None,
|
||||
facet_doctype: str | None = None,
|
||||
q: str | None = None,
|
||||
):
|
||||
"""현재 필터 기준 facet별 집계 count"""
|
||||
|
||||
def base_query():
|
||||
query = select(Document).where(
|
||||
Document.deleted_at == None, # noqa: E711
|
||||
Document.doc_purpose == "business",
|
||||
)
|
||||
if library_path:
|
||||
exact = f"{LIBRARY_PREFIX}{library_path}"
|
||||
prefix = f"{LIBRARY_PREFIX}{library_path}/%"
|
||||
query = query.where(
|
||||
sql_text("""
|
||||
EXISTS (
|
||||
SELECT 1 FROM jsonb_array_elements_text(
|
||||
COALESCE(documents.user_tags, '[]'::jsonb)
|
||||
) AS t
|
||||
WHERE t = :exact OR t LIKE :prefix
|
||||
)
|
||||
""").bindparams(exact=exact, prefix=prefix)
|
||||
)
|
||||
if q:
|
||||
query = query.where(Document.title.ilike(f"%{q}%"))
|
||||
return query
|
||||
|
||||
result = FacetCountsResponse(company=[], topic=[], year=[], doctype=[])
|
||||
|
||||
# company counts (다른 facet 필터 적용, 자기 자신 제외)
|
||||
q_company = base_query()
|
||||
if facet_topic:
|
||||
q_company = q_company.where(Document.facet_topic == facet_topic)
|
||||
if facet_year:
|
||||
q_company = q_company.where(Document.facet_year == facet_year)
|
||||
if facet_doctype:
|
||||
q_company = q_company.where(Document.facet_doctype == facet_doctype)
|
||||
rows = await session.execute(
|
||||
select(Document.facet_company, func.count())
|
||||
.where(Document.facet_company != None) # noqa: E711
|
||||
.where(Document.id.in_(q_company.with_only_columns(Document.id).subquery().select()))
|
||||
.group_by(Document.facet_company)
|
||||
.order_by(func.count().desc())
|
||||
)
|
||||
result.company = [FacetCountItem(value=r[0], count=r[1]) for r in rows]
|
||||
|
||||
# topic counts
|
||||
q_topic = base_query()
|
||||
if facet_company:
|
||||
q_topic = q_topic.where(Document.facet_company == facet_company)
|
||||
if facet_year:
|
||||
q_topic = q_topic.where(Document.facet_year == facet_year)
|
||||
if facet_doctype:
|
||||
q_topic = q_topic.where(Document.facet_doctype == facet_doctype)
|
||||
rows = await session.execute(
|
||||
select(Document.facet_topic, func.count())
|
||||
.where(Document.facet_topic != None) # noqa: E711
|
||||
.where(Document.id.in_(q_topic.with_only_columns(Document.id).subquery().select()))
|
||||
.group_by(Document.facet_topic)
|
||||
.order_by(func.count().desc())
|
||||
)
|
||||
result.topic = [FacetCountItem(value=r[0], count=r[1]) for r in rows]
|
||||
|
||||
# year counts
|
||||
q_year = base_query()
|
||||
if facet_company:
|
||||
q_year = q_year.where(Document.facet_company == facet_company)
|
||||
if facet_topic:
|
||||
q_year = q_year.where(Document.facet_topic == facet_topic)
|
||||
if facet_doctype:
|
||||
q_year = q_year.where(Document.facet_doctype == facet_doctype)
|
||||
rows = await session.execute(
|
||||
select(Document.facet_year, func.count())
|
||||
.where(Document.facet_year != None) # noqa: E711
|
||||
.where(Document.id.in_(q_year.with_only_columns(Document.id).subquery().select()))
|
||||
.group_by(Document.facet_year)
|
||||
.order_by(Document.facet_year.desc())
|
||||
)
|
||||
result.year = [FacetCountItem(value=str(r[0]), count=r[1]) for r in rows]
|
||||
|
||||
# doctype counts
|
||||
q_doctype = base_query()
|
||||
if facet_company:
|
||||
q_doctype = q_doctype.where(Document.facet_company == facet_company)
|
||||
if facet_topic:
|
||||
q_doctype = q_doctype.where(Document.facet_topic == facet_topic)
|
||||
if facet_year:
|
||||
q_doctype = q_doctype.where(Document.facet_year == facet_year)
|
||||
rows = await session.execute(
|
||||
select(Document.facet_doctype, func.count())
|
||||
.where(Document.facet_doctype != None) # noqa: E711
|
||||
.where(Document.id.in_(q_doctype.with_only_columns(Document.id).subquery().select()))
|
||||
.group_by(Document.facet_doctype)
|
||||
.order_by(func.count().desc())
|
||||
)
|
||||
result.doctype = [FacetCountItem(value=r[0], count=r[1]) for r in rows]
|
||||
|
||||
return result
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
from datetime import datetime
|
||||
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy import BigInteger, Boolean, DateTime, Enum, String, Text
|
||||
from sqlalchemy import BigInteger, Boolean, DateTime, Enum, Integer, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
@@ -99,6 +99,12 @@ class Document(Base):
|
||||
)
|
||||
title: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# facet 탐색 축 (Phase 2)
|
||||
facet_company: Mapped[str | None] = mapped_column(Text)
|
||||
facet_topic: Mapped[str | None] = mapped_column(Text)
|
||||
facet_year: Mapped[int | None] = mapped_column(Integer)
|
||||
facet_doctype: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# 타임스탬프
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=datetime.now
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
"""facet_values 테이블 ORM — facet 축별 허용값 사전"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import BigInteger, Boolean, DateTime, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from core.database import Base
|
||||
|
||||
|
||||
class FacetValue(Base):
|
||||
__tablename__ = "facet_values"
|
||||
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
||||
facet_type: Mapped[str] = mapped_column(Text, nullable=False) # company, topic, doctype
|
||||
value: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
is_system: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=datetime.now
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS facet_company TEXT
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS facet_topic TEXT
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS facet_year INT
|
||||
@@ -0,0 +1 @@
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS facet_doctype TEXT
|
||||
@@ -0,0 +1 @@
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_facet_company ON documents (facet_company) WHERE facet_company IS NOT NULL
|
||||
@@ -0,0 +1 @@
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_facet_topic ON documents (facet_topic) WHERE facet_topic IS NOT NULL
|
||||
@@ -0,0 +1 @@
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_facet_year ON documents (facet_year) WHERE facet_year IS NOT NULL
|
||||
@@ -0,0 +1 @@
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_facet_doctype ON documents (facet_doctype) WHERE facet_doctype IS NOT NULL
|
||||
@@ -0,0 +1,7 @@
|
||||
CREATE TABLE IF NOT EXISTS facet_values (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
facet_type TEXT NOT NULL,
|
||||
value TEXT NOT NULL,
|
||||
is_system BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS uq_facet_values_type_value ON facet_values (facet_type, value)
|
||||
Reference in New Issue
Block a user