feat: PDF/HTML 폴더 분리 및 필터링 개선

- 업로드 시 HTML과 PDF를 별도 폴더에 저장 (/documents/, /pdfs/) - 프론트엔드 필터링을 폴더 경로 기준으로 단순화 - PDF 삭제 시 외래키 참조 해제 로직 추가 - book-documents.js, book-editor.js 필터링 통일 - HTML 문서 목록에서 PDF 완전 분리
2025-08-26 07:44:25 +09:00
parent 4038040faa
commit 04ae64fc4d
20 changed files with 1334 additions and 73 deletions
--- a/backend/migrations/005_add_matched_pdf_id.sql
+++ b/backend/migrations/005_add_matched_pdf_id.sql
@@ -0,0 +1,12 @@
+-- 문서에 PDF 매칭 필드 추가
+-- Migration: 005_add_matched_pdf_id.sql
+
+-- matched_pdf_id 컬럼 추가
+ALTER TABLE documents 
+ADD COLUMN matched_pdf_id UUID REFERENCES documents(id);
+
+-- 인덱스 추가 (성능 향상)
+CREATE INDEX idx_documents_matched_pdf_id ON documents(matched_pdf_id);
+
+-- 코멘트 추가
+COMMENT ON COLUMN documents.matched_pdf_id IS '매칭된 PDF 문서 ID (HTML 문서에 연결된 원본 PDF)';
--- a/backend/migrations/006_make_html_path_nullable.sql
+++ b/backend/migrations/006_make_html_path_nullable.sql
@@ -0,0 +1,9 @@
+-- HTML 경로를 nullable로 변경 (PDF만 업로드하는 경우 대응)
+-- Migration: 006_make_html_path_nullable.sql
+
+-- html_path 컬럼을 nullable로 변경
+ALTER TABLE documents 
+ALTER COLUMN html_path DROP NOT NULL;
+
+-- 코멘트 업데이트
+COMMENT ON COLUMN documents.html_path IS 'HTML 파일 경로 (PDF만 업로드하는 경우 null 가능)';
--- a/backend/src/api/routes/documents.py
+++ b/backend/src/api/routes/documents.py
@@ -3,7 +3,7 @@
 """
 from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy import select, delete, and_, or_
+from sqlalchemy import select, delete, and_, or_, update
 from sqlalchemy.orm import selectinload
 from typing import List, Optional
 import os
@@ -27,7 +27,7 @@ class DocumentResponse(BaseModel):
    id: str
    title: str
    description: Optional[str]
-    html_path: str
+    html_path: Optional[str]  # PDF만 업로드하는 경우 None 가능
    pdf_path: Optional[str]
    thumbnail_path: Optional[str]
    file_size: Optional[int]
@@ -50,6 +50,9 @@ class DocumentResponse(BaseModel):
    category_id: Optional[str] = None
    category_name: Optional[str] = None
    sort_order: int = 0
+    
+    # PDF 매칭 정보
+    matched_pdf_id: Optional[str] = None

    class Config:
        from_attributes = True
@@ -128,7 +131,7 @@ async def list_documents(
            id=str(doc.id),
            title=doc.title,
            description=doc.description,
-            html_path=doc.html_path,
+            html_path=doc.html_path,  # None 가능 (PDF만 업로드한 경우)
            pdf_path=doc.pdf_path,
            thumbnail_path=doc.thumbnail_path,
            file_size=doc.file_size,
@@ -148,7 +151,9 @@ async def list_documents(
            # 소분류 정보 추가
            category_id=str(doc.category.id) if doc.category else None,
            category_name=doc.category.name if doc.category else None,
-            sort_order=doc.sort_order
+            sort_order=doc.sort_order,
+            # PDF 매칭 정보 추가
+            matched_pdf_id=str(doc.matched_pdf_id) if doc.matched_pdf_id else None
        )
        response_data.append(doc_data)
    
@@ -257,11 +262,15 @@ async def upload_document(
    db: AsyncSession = Depends(get_db)
 ):
    """문서 업로드"""
-    # 파일 확장자 확인
-    if not html_file.filename.lower().endswith(('.html', '.htm')):
+    # 파일 확장자 확인 (HTML 또는 PDF 허용)
+    file_extension = html_file.filename.lower()
+    is_pdf_file = file_extension.endswith('.pdf')
+    is_html_file = file_extension.endswith(('.html', '.htm'))
+    
+    if not (is_html_file or is_pdf_file):
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Only HTML files are allowed for the main document"
+            detail="Only HTML and PDF files are allowed"
        )
    
    if pdf_file and not pdf_file.filename.lower().endswith('.pdf'):
@@ -272,24 +281,45 @@ async def upload_document(
    
    # 고유 파일명 생성
    doc_id = str(uuid.uuid4())
-    html_filename = f"{doc_id}.html"
-    pdf_filename = f"{doc_id}.pdf" if pdf_file else None
    
-    # 파일 저장 경로
-    html_path = os.path.join(settings.UPLOAD_DIR, "documents", html_filename)
-    pdf_path = os.path.join(settings.UPLOAD_DIR, "documents", pdf_filename) if pdf_file else None
+    # 메인 파일 처리 (HTML 또는 PDF) - 폴더 분리
+    if is_pdf_file:
+        main_filename = f"{doc_id}.pdf"
+        pdf_dir = os.path.join(settings.UPLOAD_DIR, "pdfs")
+        os.makedirs(pdf_dir, exist_ok=True)  # PDF 폴더 생성
+        main_path = os.path.join(pdf_dir, main_filename)
+        html_path = None  # PDF만 업로드하는 경우 html_path는 None
+        pdf_path = main_path  # PDF 파일인 경우 pdf_path에 저장
+    else:
+        main_filename = f"{doc_id}.html"
+        html_dir = os.path.join(settings.UPLOAD_DIR, "documents")
+        os.makedirs(html_dir, exist_ok=True)  # HTML 폴더 생성
+        main_path = os.path.join(html_dir, main_filename)
+        html_path = main_path
+        pdf_path = None
+    
+    # 추가 PDF 파일 처리 (HTML 파일과 함께 업로드된 경우)
+    additional_pdf_path = None
+    if pdf_file:
+        additional_pdf_filename = f"{doc_id}_additional.pdf"
+        pdf_dir = os.path.join(settings.UPLOAD_DIR, "pdfs")
+        os.makedirs(pdf_dir, exist_ok=True)  # PDF 폴더 생성
+        additional_pdf_path = os.path.join(pdf_dir, additional_pdf_filename)
    
    try:
-        # HTML 파일 저장
-        async with aiofiles.open(html_path, 'wb') as f:
+        # 메인 파일 저장 (HTML 또는 PDF)
+        async with aiofiles.open(main_path, 'wb') as f:
            content = await html_file.read()
            await f.write(content)
        
-        # PDF 파일 저장 (있는 경우)
-        if pdf_file and pdf_path:
-            async with aiofiles.open(pdf_path, 'wb') as f:
-                content = await pdf_file.read()
-                await f.write(content)
+        # 추가 PDF 파일 저장 (HTML과 함께 업로드된 경우)
+        if pdf_file and additional_pdf_path:
+            async with aiofiles.open(additional_pdf_path, 'wb') as f:
+                additional_content = await pdf_file.read()
+                await f.write(additional_content)
+            # HTML 파일인 경우 추가 PDF를 pdf_path로 설정
+            if is_html_file:
+                pdf_path = additional_pdf_path
        
        # 서적 ID 검증 (있는 경우)
        validated_book_id = None
@@ -370,15 +400,16 @@ async def upload_document(
            updated_at=document_with_tags.updated_at,
            document_date=document_with_tags.document_date,
            uploader_name=current_user.full_name or current_user.email,
-            tags=[tag.name for tag in document_with_tags.tags]
+            tags=[tag.name for tag in document_with_tags.tags],
+            matched_pdf_id=str(document_with_tags.matched_pdf_id) if document_with_tags.matched_pdf_id else None
        )
    
    except Exception as e:
        # 파일 정리
-        if os.path.exists(html_path):
-            os.remove(html_path)
-        if pdf_path and os.path.exists(pdf_path):
-            os.remove(pdf_path)
+        if os.path.exists(main_path):
+            os.remove(main_path)
+        if additional_pdf_path and os.path.exists(additional_pdf_path):
+            os.remove(additional_pdf_path)
        
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
@@ -429,7 +460,8 @@ async def get_document(
        updated_at=document.updated_at,
        document_date=document.document_date,
        uploader_name=document.uploader.full_name or document.uploader.email,
-        tags=[tag.name for tag in document.tags]
+        tags=[tag.name for tag in document.tags],
+        matched_pdf_id=str(document.matched_pdf_id) if document.matched_pdf_id else None
    )


@@ -475,6 +507,175 @@ async def get_document_content(
        raise HTTPException(status_code=500, detail=f"Error reading document: {str(e)}")


+class UpdateDocumentRequest(BaseModel):
+    """문서 업데이트 요청"""
+    title: Optional[str] = None
+    description: Optional[str] = None
+    sort_order: Optional[int] = None
+    matched_pdf_id: Optional[str] = None
+    is_public: Optional[bool] = None
+    tags: Optional[List[str]] = None
+
+
+@router.put("/{document_id}", response_model=DocumentResponse)
+async def update_document(
+    document_id: str,
+    update_data: UpdateDocumentRequest,
+    current_user: User = Depends(get_current_active_user),
+    db: AsyncSession = Depends(get_db)
+):
+    """문서 정보 업데이트"""
+    try:
+        doc_uuid = UUID(document_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid document ID format")
+    
+    # 문서 조회
+    result = await db.execute(
+        select(Document)
+        .options(selectinload(Document.tags), selectinload(Document.uploader), selectinload(Document.book))
+        .where(Document.id == doc_uuid)
+    )
+    document = result.scalar_one_or_none()
+    
+    if not document:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="Document not found"
+        )
+    
+    # 권한 확인 (관리자이거나 문서 소유자)
+    if not current_user.is_admin and document.uploaded_by != current_user.id:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Not enough permissions to update this document"
+        )
+    
+    # 업데이트할 필드들 적용
+    update_fields = update_data.model_dump(exclude_unset=True)
+    
+    for field, value in update_fields.items():
+        if field == "matched_pdf_id":
+            # PDF 매칭 처리
+            if value:
+                try:
+                    pdf_uuid = UUID(value)
+                    # PDF 문서가 실제로 존재하는지 확인
+                    pdf_result = await db.execute(select(Document).where(Document.id == pdf_uuid))
+                    pdf_doc = pdf_result.scalar_one_or_none()
+                    if pdf_doc:
+                        setattr(document, field, pdf_uuid)
+                except ValueError:
+                    # 잘못된 UUID 형식이면 무시
+                    pass
+            else:
+                # None으로 설정하여 매칭 해제
+                setattr(document, field, None)
+        elif field == "tags":
+            # 태그 처리
+            if value is not None:
+                # 기존 태그 관계 제거
+                document.tags.clear()
+                
+                # 새 태그 추가
+                for tag_name in value:
+                    tag_name = tag_name.strip()
+                    if tag_name:
+                        # 기존 태그 찾기 또는 생성
+                        tag_result = await db.execute(select(Tag).where(Tag.name == tag_name))
+                        tag = tag_result.scalar_one_or_none()
+                        
+                        if not tag:
+                            tag = Tag(
+                                name=tag_name,
+                                created_by=current_user.id
+                            )
+                            db.add(tag)
+                            await db.flush()
+                        
+                        document.tags.append(tag)
+        else:
+            # 일반 필드 업데이트
+            setattr(document, field, value)
+    
+    # 업데이트 시간 갱신
+    document.updated_at = datetime.utcnow()
+    
+    await db.commit()
+    await db.refresh(document)
+    
+    # 응답 데이터 생성
+    return DocumentResponse(
+        id=str(document.id),
+        title=document.title,
+        description=document.description,
+        html_path=document.html_path,
+        pdf_path=document.pdf_path,
+        thumbnail_path=document.thumbnail_path,
+        file_size=document.file_size,
+        page_count=document.page_count,
+        language=document.language,
+        is_public=document.is_public,
+        is_processed=document.is_processed,
+        created_at=document.created_at,
+        updated_at=document.updated_at,
+        document_date=document.document_date,
+        uploader_name=document.uploader.full_name or document.uploader.email,
+        tags=[tag.name for tag in document.tags],
+        book_id=str(document.book.id) if document.book else None,
+        book_title=document.book.title if document.book else None,
+        book_author=document.book.author if document.book else None,
+        sort_order=document.sort_order,
+        matched_pdf_id=str(document.matched_pdf_id) if document.matched_pdf_id else None
+    )
+
+
+@router.get("/{document_id}/download")
+async def download_document(
+    document_id: str,
+    current_user: User = Depends(get_current_active_user),
+    db: AsyncSession = Depends(get_db)
+):
+    """문서 파일 다운로드"""
+    try:
+        doc_uuid = UUID(document_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid document ID format")
+    
+    # 문서 조회
+    query = select(Document).where(Document.id == doc_uuid)
+    result = await db.execute(query)
+    document = result.scalar_one_or_none()
+    
+    if not document:
+        raise HTTPException(status_code=404, detail="Document not found")
+    
+    # 권한 확인
+    if not current_user.is_admin and not document.is_public and document.uploaded_by != current_user.id:
+        raise HTTPException(status_code=403, detail="Access denied")
+    
+    # 다운로드할 파일 경로 결정 (PDF 우선, 없으면 HTML)
+    file_path = document.pdf_path if document.pdf_path else document.html_path
+    
+    if not file_path or not os.path.exists(file_path):
+        raise HTTPException(status_code=404, detail="Document file not found")
+    
+    # 파일 응답
+    from fastapi.responses import FileResponse
+    
+    # 파일명 설정
+    filename = document.original_filename
+    if not filename:
+        extension = '.pdf' if document.pdf_path else '.html'
+        filename = f"{document.title}{extension}"
+    
+    return FileResponse(
+        path=file_path,
+        filename=filename,
+        media_type='application/octet-stream'
+    )
+
+
@router.delete("/{document_id}")
 async def delete_document(
    document_id: str,
@@ -514,6 +715,15 @@ async def delete_document(
    try:
        print(f"DEBUG: Starting deletion of document {document_id}")
        
+        # 0. PDF 참조 해제 (외래키 제약조건 해결)
+        # 이 문서를 matched_pdf_id로 참조하는 모든 문서의 참조를 NULL로 설정
+        await db.execute(
+            update(Document)
+            .where(Document.matched_pdf_id == document_id)
+            .values(matched_pdf_id=None)
+        )
+        print(f"DEBUG: Cleared matched_pdf_id references to document {document_id}")
+        
        # 1. 먼저 해당 문서의 모든 하이라이트 ID 조회
        highlight_ids_result = await db.execute(select(Highlight.id).where(Highlight.document_id == document_id))
        highlight_ids = [row[0] for row in highlight_ids_result.fetchall()]
--- a/backend/src/models/document.py
+++ b/backend/src/models/document.py
@@ -31,9 +31,10 @@ class Document(Base):
    description = Column(Text, nullable=True)
    
    # 파일 정보
-    html_path = Column(String(1000), nullable=False)  # HTML 파일 경로
+    html_path = Column(String(1000), nullable=True)  # HTML 파일 경로 (PDF만 업로드하는 경우 null 가능)
    pdf_path = Column(String(1000), nullable=True)   # PDF 원본 경로 (선택)
    thumbnail_path = Column(String(1000), nullable=True)  # 썸네일 경로
+    matched_pdf_id = Column(UUID(as_uuid=True), ForeignKey('documents.id'), nullable=True)  # 매칭된 PDF 문서 ID
    
    # 메타데이터
    file_size = Column(Integer, nullable=True)  # 바이트 단위