Files
hyungi_document_server/app/api/documents.py
Hyungi Ahn d63a6b85e1 feat: 사이드바 3단계 재귀 트리 + 너비 확장 (320px)
- tree API: domain 경로를 파싱하여 계층 구조로 반환
  (Industrial_Safety → Practice → Patrol_Inspection)
- Sidebar: 재귀 snippet으로 N단계 트리 렌더링
- domain 필터: prefix 매칭 (상위 클릭 시 하위 전부 포함)
- 사이드바 너비: 260px → 320px

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 14:03:36 +09:00

417 lines
13 KiB
Python

"""문서 CRUD API"""
import shutil
from datetime import datetime, timezone
from pathlib import Path
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Query, UploadFile, status
from fastapi.responses import FileResponse
from pydantic import BaseModel
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from core.auth import get_current_user
from core.config import settings
from core.database import get_session
from core.utils import file_hash
from models.document import Document
from models.queue import ProcessingQueue
from models.user import User
router = APIRouter()
# ─── 스키마 ───
class DocumentResponse(BaseModel):
id: int
file_path: str
file_format: str
file_size: int | None
file_type: str
title: str | None
ai_domain: str | None
ai_sub_group: str | None
ai_tags: list | None
ai_summary: str | None
document_type: str | None
importance: str | None
ai_confidence: float | None
user_note: str | None
original_path: str | None
original_format: str | None
conversion_status: str | None
edit_url: str | None
preview_status: str | None
source_channel: str | None
data_origin: str | None
extracted_at: datetime | None
ai_processed_at: datetime | None
embedded_at: datetime | None
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class DocumentListResponse(BaseModel):
items: list[DocumentResponse]
total: int
page: int
page_size: int
class DocumentUpdate(BaseModel):
title: str | None = None
ai_domain: str | None = None
ai_sub_group: str | None = None
ai_tags: list | None = None
user_note: str | None = None
edit_url: str | None = None
source_channel: str | None = None
data_origin: str | None = None
# ─── 스키마 (트리) ───
class TreeNode(BaseModel):
name: str
path: str
count: int
children: list["TreeNode"]
# ─── 엔드포인트 ───
@router.get("/tree")
async def get_document_tree(
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""도메인 트리 (3단계 경로 파싱, 사이드바용)"""
from sqlalchemy import text as sql_text
result = await session.execute(
sql_text("""
SELECT ai_domain, COUNT(*)
FROM documents
WHERE ai_domain IS NOT NULL AND ai_domain != ''
GROUP BY ai_domain
ORDER BY ai_domain
""")
)
# 경로를 트리로 파싱
root: dict = {}
for domain_path, count in result:
parts = domain_path.split("/")
node = root
for part in parts:
if part not in node:
node[part] = {"_count": 0, "_children": {}}
node[part]["_count"] += count
node = node[part]["_children"]
def build_tree(d: dict, prefix: str = "") -> list[dict]:
nodes = []
for name, data in sorted(d.items()):
path = f"{prefix}/{name}" if prefix else name
children = build_tree(data["_children"], path)
nodes.append({
"name": name,
"path": path,
"count": data["_count"],
"children": children,
})
return nodes
return build_tree(root)
@router.get("/", response_model=DocumentListResponse)
async def list_documents(
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
domain: str | None = None,
sub_group: str | None = None,
source: str | None = None,
format: str | None = None,
):
"""문서 목록 조회 (페이지네이션 + 필터)"""
query = select(Document)
if domain:
# prefix 매칭: Industrial_Safety 클릭 시 하위 전부 포함
query = query.where(Document.ai_domain.startswith(domain))
if source:
query = query.where(Document.source_channel == source)
if format:
query = query.where(Document.file_format == format)
# 전체 건수
count_query = select(func.count()).select_from(query.subquery())
total = (await session.execute(count_query)).scalar()
# 페이지네이션
query = query.order_by(Document.created_at.desc())
query = query.offset((page - 1) * page_size).limit(page_size)
result = await session.execute(query)
items = result.scalars().all()
return DocumentListResponse(
items=[DocumentResponse.model_validate(doc) for doc in items],
total=total,
page=page,
page_size=page_size,
)
@router.get("/{doc_id}", response_model=DocumentResponse)
async def get_document(
doc_id: int,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""문서 단건 조회"""
doc = await session.get(Document, doc_id)
if not doc:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
return DocumentResponse.model_validate(doc)
@router.get("/{doc_id}/file")
async def get_document_file(
doc_id: int,
session: Annotated[AsyncSession, Depends(get_session)],
token: str | None = Query(None, description="Bearer token (iframe용)"),
user: User | None = Depends(lambda: None),
):
"""문서 원본 파일 서빙 (Bearer 헤더 또는 ?token= 쿼리 파라미터)"""
from core.auth import decode_token
# 쿼리 파라미터 토큰 검증
if token:
payload = decode_token(token)
if not payload or payload.get("type") != "access":
raise HTTPException(status_code=401, detail="유효하지 않은 토큰")
else:
# 일반 Bearer 헤더 인증 시도
raise HTTPException(status_code=401, detail="토큰이 필요합니다")
doc = await session.get(Document, doc_id)
if not doc:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
file_path = Path(settings.nas_mount_path) / doc.file_path
if not file_path.exists():
raise HTTPException(status_code=404, detail="파일을 찾을 수 없습니다")
# 미디어 타입 매핑
media_types = {
".pdf": "application/pdf",
".jpg": "image/jpeg", ".jpeg": "image/jpeg",
".png": "image/png", ".gif": "image/gif",
".bmp": "image/bmp", ".tiff": "image/tiff",
".svg": "image/svg+xml",
".txt": "text/plain", ".md": "text/plain",
".html": "text/html", ".csv": "text/csv",
".json": "application/json", ".xml": "application/xml",
}
suffix = file_path.suffix.lower()
media_type = media_types.get(suffix, "application/octet-stream")
return FileResponse(
path=str(file_path),
media_type=media_type,
headers={"Content-Disposition": "inline"},
)
@router.post("/", response_model=DocumentResponse, status_code=201)
async def upload_document(
file: UploadFile,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""파일 업로드 → Inbox 저장 + DB 등록 + 처리 큐 등록"""
if not file.filename:
raise HTTPException(status_code=400, detail="파일명이 필요합니다")
# 파일명 정규화 (경로 이탈 방지)
safe_name = Path(file.filename).name
if not safe_name or safe_name.startswith("."):
raise HTTPException(status_code=400, detail="유효하지 않은 파일명")
# Inbox에 파일 저장
inbox_dir = Path(settings.nas_mount_path) / "PKM" / "Inbox"
inbox_dir.mkdir(parents=True, exist_ok=True)
target = (inbox_dir / safe_name).resolve()
# Inbox 하위 경로 검증
if not str(target).startswith(str(inbox_dir.resolve())):
raise HTTPException(status_code=400, detail="잘못된 파일 경로")
# 중복 파일명 처리
counter = 1
stem, suffix = target.stem, target.suffix
while target.exists():
target = inbox_dir.resolve() / f"{stem}_{counter}{suffix}"
counter += 1
content = await file.read()
target.write_bytes(content)
# 상대 경로 (NAS 루트 기준)
rel_path = str(target.relative_to(Path(settings.nas_mount_path)))
fhash = file_hash(target)
ext = target.suffix.lstrip(".").lower() or "unknown"
# DB 등록
doc = Document(
file_path=rel_path,
file_hash=fhash,
file_format=ext,
file_size=len(content),
file_type="immutable",
title=target.stem,
source_channel="manual",
)
session.add(doc)
await session.flush()
# 처리 큐 등록
session.add(ProcessingQueue(
document_id=doc.id,
stage="extract",
status="pending",
))
await session.commit()
return DocumentResponse.model_validate(doc)
@router.patch("/{doc_id}", response_model=DocumentResponse)
async def update_document(
doc_id: int,
body: DocumentUpdate,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""문서 메타데이터 수정 (수동 오버라이드)"""
doc = await session.get(Document, doc_id)
if not doc:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
update_data = body.model_dump(exclude_unset=True)
for field, value in update_data.items():
setattr(doc, field, value)
doc.updated_at = datetime.now(timezone.utc)
await session.commit()
return DocumentResponse.model_validate(doc)
@router.put("/{doc_id}/content")
async def save_document_content(
doc_id: int,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
body: dict = None,
):
"""Markdown 원본 파일 저장 + extracted_text 갱신"""
doc = await session.get(Document, doc_id)
if not doc:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
if doc.file_format not in ("md", "txt"):
raise HTTPException(status_code=400, detail="편집 가능한 포맷이 아닙니다 (md, txt만 가능)")
content = body.get("content", "") if body else ""
file_path = Path(settings.nas_mount_path) / doc.file_path
file_path.write_text(content, encoding="utf-8")
# 메타 갱신
doc.file_size = len(content.encode("utf-8"))
doc.file_hash = file_hash(file_path)
doc.extracted_text = content[:15000]
doc.updated_at = datetime.now(timezone.utc)
await session.commit()
return DocumentResponse.model_validate(doc)
@router.get("/{doc_id}/preview")
async def get_document_preview(
doc_id: int,
session: Annotated[AsyncSession, Depends(get_session)],
token: str | None = Query(None, description="Bearer token (iframe용)"),
):
"""PDF 미리보기 캐시 서빙"""
from core.auth import decode_token
if token:
payload = decode_token(token)
if not payload or payload.get("type") != "access":
raise HTTPException(status_code=401, detail="유효하지 않은 토큰")
else:
raise HTTPException(status_code=401, detail="토큰이 필요합니다")
doc = await session.get(Document, doc_id)
if not doc:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
preview_path = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf"
if not preview_path.exists():
raise HTTPException(status_code=404, detail="미리보기가 아직 생성되지 않았습니다")
return FileResponse(
path=str(preview_path),
media_type="application/pdf",
headers={"Content-Disposition": "inline"},
)
@router.delete("/{doc_id}")
async def delete_document(
doc_id: int,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
delete_file: bool = Query(False, description="NAS 파일도 함께 삭제"),
):
"""문서 삭제 (기본: DB만 삭제, 파일 유지)"""
doc = await session.get(Document, doc_id)
if not doc:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
if delete_file:
# 원본 파일 삭제
file_path = Path(settings.nas_mount_path) / doc.file_path
if file_path.exists():
file_path.unlink()
# 변환본 삭제
if doc.original_path:
orig = Path(settings.nas_mount_path) / doc.original_path
if orig.exists():
orig.unlink()
# preview 캐시 삭제
preview = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf"
if preview.exists():
preview.unlink()
# 관련 processing_queue 먼저 삭제 (FK 제약)
from sqlalchemy import delete as sql_delete
await session.execute(
sql_delete(ProcessingQueue).where(ProcessingQueue.document_id == doc_id)
)
await session.delete(doc)
await session.commit()
return {"message": f"문서 {doc_id} 삭제됨", "file_deleted": delete_file}