"""문서 CRUD API""" import shutil from datetime import datetime, timezone from pathlib import Path from typing import Annotated from fastapi import APIRouter, Depends, HTTPException, Query, UploadFile, status from fastapi.responses import FileResponse from pydantic import BaseModel from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from core.auth import get_current_user from core.config import settings from core.database import get_session from core.utils import file_hash from models.document import Document from models.queue import ProcessingQueue from models.user import User router = APIRouter() # ─── 스키마 ─── class DocumentResponse(BaseModel): id: int file_path: str file_format: str file_size: int | None file_type: str title: str | None ai_domain: str | None ai_sub_group: str | None ai_tags: list | None ai_summary: str | None user_note: str | None edit_url: str | None preview_status: str | None source_channel: str | None data_origin: str | None extracted_at: datetime | None ai_processed_at: datetime | None embedded_at: datetime | None created_at: datetime updated_at: datetime class Config: from_attributes = True class DocumentListResponse(BaseModel): items: list[DocumentResponse] total: int page: int page_size: int class DocumentUpdate(BaseModel): title: str | None = None ai_domain: str | None = None ai_sub_group: str | None = None ai_tags: list | None = None user_note: str | None = None edit_url: str | None = None source_channel: str | None = None data_origin: str | None = None # ─── 스키마 (트리) ─── class SubGroupNode(BaseModel): sub_group: str count: int class DomainNode(BaseModel): domain: str count: int children: list[SubGroupNode] # ─── 엔드포인트 ─── @router.get("/tree", response_model=list[DomainNode]) async def get_document_tree( user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], ): """도메인/sub_group 트리 (사이드바용)""" from sqlalchemy import text as sql_text result = await session.execute( sql_text(""" SELECT ai_domain, ai_sub_group, COUNT(*) FROM documents WHERE ai_domain IS NOT NULL GROUP BY ai_domain, ai_sub_group ORDER BY ai_domain, ai_sub_group """) ) tree: dict[str, DomainNode] = {} for domain, sub_group, count in result: if domain not in tree: tree[domain] = DomainNode(domain=domain, count=0, children=[]) tree[domain].count += count if sub_group: tree[domain].children.append(SubGroupNode(sub_group=sub_group, count=count)) return list(tree.values()) @router.get("/", response_model=DocumentListResponse) async def list_documents( user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], page: int = Query(1, ge=1), page_size: int = Query(20, ge=1, le=100), domain: str | None = None, sub_group: str | None = None, source: str | None = None, format: str | None = None, ): """문서 목록 조회 (페이지네이션 + 필터)""" query = select(Document) if domain: query = query.where(Document.ai_domain == domain) if sub_group: query = query.where(Document.ai_sub_group == sub_group) if source: query = query.where(Document.source_channel == source) if format: query = query.where(Document.file_format == format) # 전체 건수 count_query = select(func.count()).select_from(query.subquery()) total = (await session.execute(count_query)).scalar() # 페이지네이션 query = query.order_by(Document.created_at.desc()) query = query.offset((page - 1) * page_size).limit(page_size) result = await session.execute(query) items = result.scalars().all() return DocumentListResponse( items=[DocumentResponse.model_validate(doc) for doc in items], total=total, page=page, page_size=page_size, ) @router.get("/{doc_id}", response_model=DocumentResponse) async def get_document( doc_id: int, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], ): """문서 단건 조회""" doc = await session.get(Document, doc_id) if not doc: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") return DocumentResponse.model_validate(doc) @router.get("/{doc_id}/file") async def get_document_file( doc_id: int, session: Annotated[AsyncSession, Depends(get_session)], token: str | None = Query(None, description="Bearer token (iframe용)"), user: User | None = Depends(lambda: None), ): """문서 원본 파일 서빙 (Bearer 헤더 또는 ?token= 쿼리 파라미터)""" from core.auth import decode_token # 쿼리 파라미터 토큰 검증 if token: payload = decode_token(token) if not payload or payload.get("type") != "access": raise HTTPException(status_code=401, detail="유효하지 않은 토큰") else: # 일반 Bearer 헤더 인증 시도 raise HTTPException(status_code=401, detail="토큰이 필요합니다") doc = await session.get(Document, doc_id) if not doc: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") file_path = Path(settings.nas_mount_path) / doc.file_path if not file_path.exists(): raise HTTPException(status_code=404, detail="파일을 찾을 수 없습니다") # 미디어 타입 매핑 media_types = { ".pdf": "application/pdf", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".gif": "image/gif", ".bmp": "image/bmp", ".tiff": "image/tiff", ".svg": "image/svg+xml", ".txt": "text/plain", ".md": "text/plain", ".html": "text/html", ".csv": "text/csv", ".json": "application/json", ".xml": "application/xml", } suffix = file_path.suffix.lower() media_type = media_types.get(suffix, "application/octet-stream") return FileResponse( path=str(file_path), media_type=media_type, headers={"Content-Disposition": "inline"}, ) @router.post("/", response_model=DocumentResponse, status_code=201) async def upload_document( file: UploadFile, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], ): """파일 업로드 → Inbox 저장 + DB 등록 + 처리 큐 등록""" if not file.filename: raise HTTPException(status_code=400, detail="파일명이 필요합니다") # 파일명 정규화 (경로 이탈 방지) safe_name = Path(file.filename).name if not safe_name or safe_name.startswith("."): raise HTTPException(status_code=400, detail="유효하지 않은 파일명") # Inbox에 파일 저장 inbox_dir = Path(settings.nas_mount_path) / "PKM" / "Inbox" inbox_dir.mkdir(parents=True, exist_ok=True) target = (inbox_dir / safe_name).resolve() # Inbox 하위 경로 검증 if not str(target).startswith(str(inbox_dir.resolve())): raise HTTPException(status_code=400, detail="잘못된 파일 경로") # 중복 파일명 처리 counter = 1 stem, suffix = target.stem, target.suffix while target.exists(): target = inbox_dir.resolve() / f"{stem}_{counter}{suffix}" counter += 1 content = await file.read() target.write_bytes(content) # 상대 경로 (NAS 루트 기준) rel_path = str(target.relative_to(Path(settings.nas_mount_path))) fhash = file_hash(target) ext = target.suffix.lstrip(".").lower() or "unknown" # DB 등록 doc = Document( file_path=rel_path, file_hash=fhash, file_format=ext, file_size=len(content), file_type="immutable", title=target.stem, source_channel="manual", ) session.add(doc) await session.flush() # 처리 큐 등록 session.add(ProcessingQueue( document_id=doc.id, stage="extract", status="pending", )) await session.commit() return DocumentResponse.model_validate(doc) @router.patch("/{doc_id}", response_model=DocumentResponse) async def update_document( doc_id: int, body: DocumentUpdate, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], ): """문서 메타데이터 수정 (수동 오버라이드)""" doc = await session.get(Document, doc_id) if not doc: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") update_data = body.model_dump(exclude_unset=True) for field, value in update_data.items(): setattr(doc, field, value) doc.updated_at = datetime.now(timezone.utc) await session.commit() return DocumentResponse.model_validate(doc) @router.put("/{doc_id}/content") async def save_document_content( doc_id: int, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], body: dict = None, ): """Markdown 원본 파일 저장 + extracted_text 갱신""" doc = await session.get(Document, doc_id) if not doc: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") if doc.file_format not in ("md", "txt"): raise HTTPException(status_code=400, detail="편집 가능한 포맷이 아닙니다 (md, txt만 가능)") content = body.get("content", "") if body else "" file_path = Path(settings.nas_mount_path) / doc.file_path file_path.write_text(content, encoding="utf-8") # 메타 갱신 doc.file_size = len(content.encode("utf-8")) doc.file_hash = file_hash(file_path) doc.extracted_text = content[:15000] doc.updated_at = datetime.now(timezone.utc) await session.commit() return DocumentResponse.model_validate(doc) @router.get("/{doc_id}/preview") async def get_document_preview( doc_id: int, session: Annotated[AsyncSession, Depends(get_session)], token: str | None = Query(None, description="Bearer token (iframe용)"), ): """PDF 미리보기 캐시 서빙""" from core.auth import decode_token if token: payload = decode_token(token) if not payload or payload.get("type") != "access": raise HTTPException(status_code=401, detail="유효하지 않은 토큰") else: raise HTTPException(status_code=401, detail="토큰이 필요합니다") doc = await session.get(Document, doc_id) if not doc: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") preview_path = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf" if not preview_path.exists(): raise HTTPException(status_code=404, detail="미리보기가 아직 생성되지 않았습니다") return FileResponse( path=str(preview_path), media_type="application/pdf", headers={"Content-Disposition": "inline"}, ) @router.delete("/{doc_id}") async def delete_document( doc_id: int, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], delete_file: bool = Query(False, description="NAS 파일도 함께 삭제"), ): """문서 삭제 (기본: DB만 삭제, 파일 유지)""" doc = await session.get(Document, doc_id) if not doc: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") if delete_file: file_path = Path(settings.nas_mount_path) / doc.file_path if file_path.exists(): file_path.unlink() await session.delete(doc) await session.commit() return {"message": f"문서 {doc_id} 삭제됨", "file_deleted": delete_file}