Files
hyungi_document_server/app/api/library.py
T
hyungi 63be005c6f fix(security): 보안 위생 5건 — library admin 게이트·edit_url SSRF·보안헤더·8080 바인드·하드코딩 비번 제거
M3 library.py: categories POST/PATCH/DELETE + facets POST 를 get_current_user→require_admin
(공유 분류 CRUD 를 17주체→admin 한정, news/digest 패턴 정합).
M1 documents.py: update_document PATCH 에 edit_url validate_feed_url 가드 — 내부/메타데이터 주소
후속 fetch(fulltext_worker) latent SSRF 차단(API 레이어 무방비 해소, news.py 동형).
Caddyfile: 보안 헤더(nosniff·X-Frame SAMEORIGIN·Referrer-Policy·-Server). HSTS 는 edge 소관.
compose: caddy 8080:80 0.0.0.0→127.0.0.1 (LAN 우회 차단, 실 ingress=home-caddy→caddy:80 도커망).
scripts: 하드코딩 죽은 DB 비번 → os.environ (1차 감사 누락분, .env 한정 점검이 놓침).

별도(DB): test-% 계정 12개 비활성화 (공유풀 주체 17→5, 랜덤해시라 비번노출 아님·위생).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-20 05:48:02 +00:00

508 lines
17 KiB
Python

"""자료실 분류 체계 CRUD API — /api/library"""
from datetime import datetime
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy import func, select
from sqlalchemy import text as sql_text
from sqlalchemy.ext.asyncio import AsyncSession
from core.auth import get_current_user, require_admin
from core.database import get_session
from core.library import LIBRARY_PREFIX, MAX_DEPTH, normalize_library_path
from models.category import LibraryCategory
from models.document import Document
from models.facet_value import FacetValue
from models.user import User
FACET_TYPES = ("company", "topic", "doctype") # year는 사전 불필요
router = APIRouter()
# ─── 스키마 ───
class CategoryCreate(BaseModel):
path: str
class CategoryRename(BaseModel):
path: str
new_name: str
class CategoryResponse(BaseModel):
id: int
path: str
name: str
parent_path: str | None
depth: int
is_system: bool
created_at: datetime
updated_at: datetime
model_config = {"from_attributes": True}
class CategoryTreeNode(BaseModel):
name: str
path: str
count: int
# 현재 사용자 기준, 해당 경로 (하위 경로 포함) 의 안 본 자료 수.
# 0 이면 모두 1+회독.
unread_count: int = 0
is_category: bool
is_system: bool
has_children: bool
children: list["CategoryTreeNode"]
# ─── 엔드포인트 ───
@router.get("/categories", response_model=list[CategoryResponse])
async def list_categories(
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""전체 카테고리 flat 목록 (path 순)"""
result = await session.execute(
select(LibraryCategory).order_by(LibraryCategory.path)
)
return [CategoryResponse.model_validate(c) for c in result.scalars().all()]
@router.post("/categories", response_model=CategoryResponse, status_code=201)
async def create_category(
body: CategoryCreate,
user: Annotated[User, Depends(require_admin)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""카테고리 생성 (조상 자동 생성 포함)"""
try:
normalized = normalize_library_path(body.path)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
segments = normalized.split("/")
if len(segments) > MAX_DEPTH:
raise HTTPException(status_code=400, detail=f"최대 {MAX_DEPTH}단계까지 가능")
# 중복 검사
existing = await session.execute(
select(LibraryCategory).where(LibraryCategory.path == normalized)
)
if existing.scalar_one_or_none():
raise HTTPException(status_code=409, detail="이미 존재하는 분류 경로")
# 조상 자동 생성
for i in range(1, len(segments)):
ancestor_path = "/".join(segments[:i])
ancestor_name = segments[i - 1]
ancestor_parent = "/".join(segments[: i - 1]) or None
exists = await session.execute(
select(LibraryCategory.id).where(
LibraryCategory.path == ancestor_path
)
)
if not exists.scalar_one_or_none():
session.add(LibraryCategory(
path=ancestor_path,
name=ancestor_name,
parent_path=ancestor_parent,
depth=i,
))
# 본 카테고리 생성
category = LibraryCategory(
path=normalized,
name=segments[-1],
parent_path="/".join(segments[:-1]) or None,
depth=len(segments),
)
session.add(category)
await session.commit()
await session.refresh(category)
return CategoryResponse.model_validate(category)
@router.patch("/categories", response_model=CategoryResponse)
async def rename_category(
body: CategoryRename,
user: Annotated[User, Depends(require_admin)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""카테고리 이름 변경 (leaf only, path 기반 식별)"""
# 카테고리 조회
result = await session.execute(
select(LibraryCategory).where(LibraryCategory.path == body.path)
)
category = result.scalar_one_or_none()
if not category:
raise HTTPException(status_code=404, detail="분류를 찾을 수 없습니다")
# 시스템 분류 보호
if category.is_system:
raise HTTPException(status_code=422, detail="시스템 분류는 변경할 수 없습니다")
# leaf 검사
children = await session.execute(
select(func.count()).where(
LibraryCategory.parent_path == category.path
)
)
if children.scalar() > 0:
raise HTTPException(
status_code=422, detail="하위 분류가 있어 이름을 변경할 수 없습니다"
)
# new_name 검증
new_name = body.new_name.strip()
if not new_name:
raise HTTPException(status_code=400, detail="빈 이름")
if len(new_name) > 30:
raise HTTPException(status_code=400, detail="이름은 30자 이하")
# 새 path 계산
new_path = (
f"{category.parent_path}/{new_name}" if category.parent_path else new_name
)
# 중복 검사
dup = await session.execute(
select(LibraryCategory.id).where(LibraryCategory.path == new_path)
)
if dup.scalar_one_or_none():
raise HTTPException(status_code=409, detail="같은 이름의 분류가 이미 존재합니다")
old_tag = f"{LIBRARY_PREFIX}{category.path}"
new_tag = f"{LIBRARY_PREFIX}{new_path}"
# 문서 태그 갱신
await session.execute(
sql_text("""
UPDATE documents
SET user_tags = COALESCE((
SELECT jsonb_agg(
CASE WHEN elem = :old_tag THEN :new_tag ELSE elem END
)
FROM jsonb_array_elements_text(
COALESCE(user_tags, '[]'::jsonb)
) AS elem
), '[]'::jsonb)
WHERE user_tags @> :old_tag_jsonb
""").bindparams(
old_tag=old_tag,
new_tag=new_tag,
old_tag_jsonb=f'["{old_tag}"]',
)
)
# 카테고리 row 갱신 (path, name만. parent_path 유지)
category.path = new_path
category.name = new_name
await session.commit()
await session.refresh(category)
return CategoryResponse.model_validate(category)
@router.delete("/categories", status_code=204)
async def delete_category(
path: str = Query(..., description="삭제할 카테고리 경로"),
user: Annotated[User, Depends(require_admin)] = None,
session: Annotated[AsyncSession, Depends(get_session)] = None,
):
"""카테고리 삭제 (leaf only, 문서 없는 경우만)"""
result = await session.execute(
select(LibraryCategory).where(LibraryCategory.path == path)
)
category = result.scalar_one_or_none()
if not category:
raise HTTPException(status_code=404, detail="분류를 찾을 수 없습니다")
if category.is_system:
raise HTTPException(status_code=422, detail="시스템 분류는 삭제할 수 없습니다")
# leaf 검사
children = await session.execute(
select(func.count()).where(
LibraryCategory.parent_path == category.path
)
)
if children.scalar() > 0:
raise HTTPException(
status_code=422, detail="하위 분류가 있어 삭제할 수 없습니다"
)
# 문서 연결 검사
tag = f"{LIBRARY_PREFIX}{category.path}"
doc_count = await session.execute(
sql_text("""
SELECT COUNT(*) FROM documents
WHERE deleted_at IS NULL
AND EXISTS (
SELECT 1 FROM jsonb_array_elements_text(
COALESCE(user_tags, '[]'::jsonb)
) AS t
WHERE t = :tag
)
""").bindparams(tag=tag)
)
if doc_count.scalar() > 0:
raise HTTPException(
status_code=422,
detail="이 분류에 속한 문서가 있어 삭제할 수 없습니다. 문서를 먼저 이동하세요.",
)
await session.delete(category)
await session.commit()
@router.get("/tree", response_model=list[CategoryTreeNode])
async def get_library_tree(
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""카테고리 저장소 + 문서 태그 count 머지 트리"""
# 1. 카테고리 전체 fetch
cat_result = await session.execute(
select(LibraryCategory).order_by(LibraryCategory.path)
)
categories = cat_result.scalars().all()
# path → category 매핑
cat_map: dict[str, LibraryCategory] = {c.path: c for c in categories}
# 2. 문서 태그에서 doc count 집계
doc_result = await session.execute(
select(Document.id, Document.user_tags).where(
Document.deleted_at == None, # noqa: E711
Document.user_tags != None, # noqa: E711
)
)
# path → set of doc_ids
path_docs: dict[str, set[int]] = {}
for doc_id, tags in doc_result:
if not tags:
continue
seen_ancestors: set[str] = set()
for tag in tags:
if not isinstance(tag, str) or not tag.startswith(LIBRARY_PREFIX):
continue
path = tag[len(LIBRARY_PREFIX):]
parts = path.split("/")
for i in range(1, len(parts) + 1):
ancestor = "/".join(parts[:i])
if ancestor not in seen_ancestors:
path_docs.setdefault(ancestor, set()).add(doc_id)
seen_ancestors.add(ancestor)
# 2.5 현재 사용자가 1+회독 한 doc_id 집합 (안 본 자료 = 전체 - 읽음)
from models.document_read import DocumentRead
read_result = await session.execute(
select(DocumentRead.document_id)
.where(DocumentRead.user_id == user.id)
.group_by(DocumentRead.document_id)
)
read_doc_ids: set[int] = {r[0] for r in read_result}
# 3. 모든 path 합산 (카테고리 + 태그)
all_paths = set(cat_map.keys()) | set(path_docs.keys())
# 4. 트리 구축
root: dict = {}
for p in sorted(all_paths):
parts = p.split("/")
node = root
for i, part in enumerate(parts):
if part not in node:
node[part] = {"_children": {}}
node = node[part]["_children"] if i < len(parts) - 1 else node[part]
def build_tree(d: dict, prefix: str = "") -> list[dict]:
nodes = []
for name, data in sorted(d.items()):
if name.startswith("_"):
continue
path = f"{prefix}/{name}" if prefix else name
children_dict = data.get("_children", {})
children = build_tree(children_dict, path)
cat = cat_map.get(path)
# path_docs[path] 는 이미 본 노드의 자손 doc 까지 누적되어 있음 (위 ancestor 누적 로직).
# 따라서 unread_count 도 하위 경로 전체 합산 (bottom-up 별도 계산 불필요).
docs_at_path = path_docs.get(path, set())
unread = len(docs_at_path - read_doc_ids)
nodes.append(CategoryTreeNode(
name=name,
path=path,
count=len(docs_at_path),
unread_count=unread,
is_category=path in cat_map,
is_system=cat.is_system if cat else False,
has_children=len(children) > 0,
children=children,
))
return nodes
return build_tree(root)
# ─── Facet API (Phase 2) ───
class FacetValueResponse(BaseModel):
facet_type: str
value: str
model_config = {"from_attributes": True}
class FacetCountItem(BaseModel):
value: str
count: int
class FacetCountsResponse(BaseModel):
company: list[FacetCountItem]
topic: list[FacetCountItem]
year: list[FacetCountItem]
doctype: list[FacetCountItem]
@router.get("/facets", response_model=dict[str, list[str]])
async def get_facet_values(
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""facet 축별 허용값 사전 (year는 실제 데이터 기반)"""
result: dict[str, list[str]] = {}
for ft in FACET_TYPES:
rows = await session.execute(
select(FacetValue.value)
.where(FacetValue.facet_type == ft)
.order_by(FacetValue.value)
)
result[ft] = [r[0] for r in rows]
# year는 사전 없이 실제 문서 값에서 추출
year_rows = await session.execute(
select(Document.facet_year)
.where(
Document.deleted_at == None, # noqa: E711
Document.facet_year != None, # noqa: E711
)
.distinct()
.order_by(Document.facet_year.desc())
)
result["year"] = [str(r[0]) for r in year_rows]
return result
@router.post("/facets", response_model=FacetValueResponse, status_code=201)
async def add_facet_value(
body: FacetValueResponse,
user: Annotated[User, Depends(require_admin)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""facet 사전에 새 값 추가"""
if body.facet_type not in FACET_TYPES:
raise HTTPException(status_code=400, detail=f"허용 facet: {', '.join(FACET_TYPES)}")
value = body.value.strip()
if not value:
raise HTTPException(status_code=400, detail="빈 값")
existing = await session.execute(
select(FacetValue).where(
FacetValue.facet_type == body.facet_type,
FacetValue.value == value,
)
)
if existing.scalar_one_or_none():
raise HTTPException(status_code=409, detail="이미 존재하는 값")
fv = FacetValue(facet_type=body.facet_type, value=value)
session.add(fv)
await session.commit()
return FacetValueResponse(facet_type=body.facet_type, value=value)
@router.get("/facet-counts", response_model=FacetCountsResponse)
async def get_facet_counts(
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
library_path: str | None = None,
facet_company: str | None = None,
facet_topic: str | None = None,
facet_year: int | None = None,
facet_doctype: str | None = None,
q: str | None = None,
):
"""현재 필터 기준 facet별 집계 count"""
def base_query():
query = select(Document).where(
Document.deleted_at == None, # noqa: E711
Document.doc_purpose == "business",
)
if library_path:
exact = f"{LIBRARY_PREFIX}{library_path}"
prefix = f"{LIBRARY_PREFIX}{library_path}/%"
query = query.where(
sql_text("""
EXISTS (
SELECT 1 FROM jsonb_array_elements_text(
COALESCE(documents.user_tags, '[]'::jsonb)
) AS t
WHERE t = :exact OR t LIKE :prefix
)
""").bindparams(exact=exact, prefix=prefix)
)
if q:
query = query.where(Document.title.ilike(f"%{q}%"))
return query
result = FacetCountsResponse(company=[], topic=[], year=[], doctype=[])
# R10: 4 facet 블록 중복 제거 — 적용된 facet 필터(값 있는 것만)를 모아 각 축 집계 시
# '자기 자신 축'만 제외하고 적용하는 헬퍼로. 쿼리/자기제외/order_by/value 매핑 모두 동일.
applied: dict = {}
if facet_company:
applied["company"] = Document.facet_company == facet_company
if facet_topic:
applied["topic"] = Document.facet_topic == facet_topic
if facet_year:
applied["year"] = Document.facet_year == facet_year
if facet_doctype:
applied["doctype"] = Document.facet_doctype == facet_doctype
async def _facet_count(name, facet_col, order_by, value_fn):
q = base_query()
for k, cond in applied.items():
if k != name: # 자기 자신 facet 필터는 제외 (다른 축만 적용)
q = q.where(cond)
rows = await session.execute(
select(facet_col, func.count())
.where(facet_col != None) # noqa: E711
.where(Document.id.in_(q.with_only_columns(Document.id).subquery().select()))
.group_by(facet_col)
.order_by(order_by)
)
return [FacetCountItem(value=value_fn(r[0]), count=r[1]) for r in rows]
result.company = await _facet_count("company", Document.facet_company, func.count().desc(), lambda v: v)
result.topic = await _facet_count("topic", Document.facet_topic, func.count().desc(), lambda v: v)
result.year = await _facet_count("year", Document.facet_year, Document.facet_year.desc(), lambda v: str(v))
result.doctype = await _facet_count("doctype", Document.facet_doctype, func.count().desc(), lambda v: v)
return result