diff --git a/app/api/documents.py b/app/api/documents.py index 315d643..d786ee9 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Annotated from urllib.parse import quote -from fastapi import APIRouter, Depends, HTTPException, Query, UploadFile, status +from fastapi import APIRouter, Depends, Form, HTTPException, Query, UploadFile, status from fastapi.responses import FileResponse from pydantic import BaseModel from sqlalchemy import func, select @@ -53,6 +53,7 @@ class DocumentResponse(BaseModel): preview_status: str | None source_channel: str | None data_origin: str | None + doc_purpose: str | None extracted_at: datetime | None ai_processed_at: datetime | None embedded_at: datetime | None @@ -81,6 +82,7 @@ class DocumentUpdate(BaseModel): edit_url: str | None = None source_channel: str | None = None data_origin: str | None = None + doc_purpose: str | None = None pinned: bool | None = None @@ -396,8 +398,29 @@ async def upload_document( file: UploadFile, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], + doc_purpose: str | None = Form(None, description="business | knowledge"), + library_path: str | None = Form(None, description="자료실 경로 (자동 @library/ 태깅)"), ): """파일 업로드 → Inbox 저장 + DB 등록 + 처리 큐 등록""" + from core.library import LIBRARY_PREFIX, normalize_library_path + + # doc_purpose 검증 + if doc_purpose is not None: + doc_purpose = doc_purpose.strip().lower() + if doc_purpose == "": + doc_purpose = None + elif doc_purpose not in ("business", "knowledge"): + raise HTTPException(status_code=400, detail="doc_purpose는 business 또는 knowledge만 가능") + + # library_path 검증 + 정규화 + library_tag = None + if library_path: + try: + normalized = normalize_library_path(library_path) + library_tag = f"{LIBRARY_PREFIX}{normalized}" + except ValueError as e: + raise HTTPException(status_code=400, detail=f"잘못된 자료실 경로: {e}") + if not file.filename: raise HTTPException(status_code=400, detail="파일명이 필요합니다") @@ -439,6 +462,8 @@ async def upload_document( file_type="immutable", title=target.stem, source_channel="manual", + doc_purpose=doc_purpose, + user_tags=[library_tag] if library_tag else [], ) session.add(doc) await session.flush() @@ -477,6 +502,12 @@ async def update_document( except (TypeError, ValueError) as e: raise HTTPException(status_code=400, detail=str(e)) + # doc_purpose 검증 + if "doc_purpose" in update_data: + val = update_data["doc_purpose"] + if val is not None and val not in ("business", "knowledge"): + raise HTTPException(status_code=400, detail="doc_purpose는 business 또는 knowledge만 가능") + for field, value in update_data.items(): setattr(doc, field, value) doc.updated_at = datetime.now(timezone.utc) diff --git a/app/models/document.py b/app/models/document.py index a51476c..3a535f1 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -93,6 +93,10 @@ class Document(Base): data_origin: Mapped[str | None] = mapped_column( Enum("work", "external", name="data_origin") ) + # 용도 구분 (우선순위: 수동 수정 > 업로드 명시값 > AI 추론) + doc_purpose: Mapped[str | None] = mapped_column( + Enum("business", "knowledge", name="document_purpose") + ) title: Mapped[str | None] = mapped_column(Text) # 타임스탬프 diff --git a/app/prompts/classify.txt b/app/prompts/classify.txt index b3d072d..9c5b626 100644 --- a/app/prompts/classify.txt +++ b/app/prompts/classify.txt @@ -8,7 +8,8 @@ You are a document classification AI. Analyze the document below and respond ONL "tags": ["tag1", "tag2"], "importance": "medium", "sourceChannel": "inbox_route", - "dataOrigin": "work or external" + "dataOrigin": "work or external", + "docPurpose": "business or knowledge" } ## Domain Taxonomy (select the most specific leaf node) @@ -89,5 +90,12 @@ Reference, Standard, Manual, Drawing, Template, Note, Academic_Paper, Law_Docume - work: company-related (TK, Technicalkorea, factory, production) - external: external reference (news, papers, laws, general info) +## docPurpose +- business: 업무 수행에 직접 사용 (양식, 보고서, 체크리스트, 제출물, 계획서) +- knowledge: 참조·학습·보관 목적 (법령, 논문, 기사, 레퍼런스, 기술 문서, 교육 자료) +- Template, Checklist, Report, Specification → business 가능성 높음 +- Academic_Paper, Law_Document, Reference, Standard → knowledge 가능성 높음 +- Meeting_Minutes, Memo → 문맥 판단 (실행 기록이면 business, 참조용이면 knowledge) + ## Document to classify {document_text} diff --git a/app/workers/classify_worker.py b/app/workers/classify_worker.py index 9ebc456..7e66afa 100644 --- a/app/workers/classify_worker.py +++ b/app/workers/classify_worker.py @@ -107,6 +107,12 @@ async def process(document_id: int, session: AsyncSession) -> None: if parsed.get("dataOrigin") and not doc.data_origin: doc.data_origin = parsed["dataOrigin"] + # 용도 (AI는 빈 값만 채움 — 수동/업로드 명시값 우선) + if parsed.get("docPurpose") and not doc.doc_purpose: + purpose = parsed["docPurpose"] + if purpose in ("business", "knowledge"): + doc.doc_purpose = purpose + # ─── 요약 ─── summary = await client.summarize(doc.extracted_text[:15000]) doc.ai_summary = strip_thinking(summary) diff --git a/frontend/src/lib/components/DocumentCard.svelte b/frontend/src/lib/components/DocumentCard.svelte index 36b111a..a6e367a 100644 --- a/frontend/src/lib/components/DocumentCard.svelte +++ b/frontend/src/lib/components/DocumentCard.svelte @@ -142,7 +142,11 @@ {#if doc.score !== undefined} {(doc.score * 100).toFixed(0)}% {/if} - {#if doc.data_origin} + {#if doc.doc_purpose} + + {doc.doc_purpose === 'business' ? '업무' : '참조'} + + {:else if doc.data_origin} {doc.data_origin} diff --git a/frontend/src/lib/components/editors/FileInfoView.svelte b/frontend/src/lib/components/editors/FileInfoView.svelte index 4820496..09b087c 100644 --- a/frontend/src/lib/components/editors/FileInfoView.svelte +++ b/frontend/src/lib/components/editors/FileInfoView.svelte @@ -1,5 +1,8 @@