From 8fdea886762375338261d79f67dc42f6d02909da Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Thu, 23 Apr 2026 15:32:01 +0900 Subject: [PATCH] =?UTF-8?q?feat(documents):=20=C2=A71=20category=20enum=20?= =?UTF-8?q?+=20ai=5Fsuggestion=20=EC=8A=B9=EC=9D=B8=20=ED=8C=8C=EC=9D=B4?= =?UTF-8?q?=ED=94=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit plan: ~/.claude/plans/luminous-sprouting-hamster.md §1 - migrations/143_category.sql: doc_category enum (6 활성 + 3 유보) + documents.category + documents.ai_suggestion JSONB + 2 idx. - app/models/document.py: category (Enum, create_type=False), ai_suggestion (JSONB). - app/prompts/classify.txt: document_type enum 에 7 실무 doctype 추가 (발주서/세금계산서/명세표/도면/증명서/계획서/시방서) + facet_doctype 필드 directive. - config.yaml: document_types 에 7 항목 추가 (worker 검증 통과). - app/workers/classify_worker.py: FACET_DOCTYPES / LIBRARY_SUGGESTION_DOCTYPES 상수, facet_doctype 파싱(기존값 미덮어씀), 발주서/세금계산서/명세표 감지 시 ai_suggestion={proposed_category=library, proposed_path=@library/ 거래/{YYYY}/{doctype}, source_updated_at=doc.updated_at.isoformat(), ...}. category / user_tags 자동 전이 금지 (suggestion-only). - app/api/documents.py: · DocumentResponse 에 category / ai_suggestion 노출 · GET /documents ?category= / ?has_suggestion / ?proposed_category (category 지정 시 기본 news/memo 제외 해제 — §2 승인 UI 계약) · GET /documents/library 를 Document.category=='library' 기반으로 재구현 (path subquery 는 user_tags 유지 — 분류 내부 서가 경로) · POST /documents/{id}/accept-suggestion — FOR UPDATE + idempotent no-op + dual 409 stale (payload source_updated_at / documents.updated_at) + user_tags idempotent append · DELETE /documents/{id}/suggestion — idempotent, stale 검사 없음 - scripts/backfill_category.py: dry-run / apply. 매핑(news/memo/@library/else) + 3-way 상대 검증 (all_rows==categorized, uncategorized==0, cat_library==has_library_tag — 자동 전이 금지 정책 검증). 남은 DoD (원격 배포 후): docker compose up → migration 143 적용 → backfill apply → smoke (drive_sync 발주서 업로드 suggestion 생성 / category 유지, accept-suggestion idempotency + 409 stale 두 벡터, /documents?category=library == /documents/library 건수 일치). Co-Authored-By: Claude Opus 4.7 (1M context) --- app/api/documents.py | 168 +++++++++++++++++++++++++++--- app/models/document.py | 13 +++ app/prompts/classify.txt | 18 +++- app/workers/classify_worker.py | 30 ++++++ config.yaml | 7 ++ migrations/143_category.sql | 30 ++++++ scripts/backfill_category.py | 182 +++++++++++++++++++++++++++++++++ 7 files changed, 433 insertions(+), 15 deletions(-) create mode 100644 migrations/143_category.sql create mode 100644 scripts/backfill_category.py diff --git a/app/api/documents.py b/app/api/documents.py index d1f4599..e645065 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -76,6 +76,8 @@ class DocumentResponse(BaseModel): facet_topic: str | None = None facet_year: int | None = None facet_doctype: str | None = None + category: str | None = None + ai_suggestion: dict | None = None extracted_at: datetime | None ai_processed_at: datetime | None embedded_at: datetime | None @@ -93,6 +95,11 @@ class DocumentListResponse(BaseModel): page_size: int +class AcceptSuggestionRequest(BaseModel): + """§1 accept-suggestion 요청 body — stale payload / doc 수정 검출.""" + expected_source_updated_at: datetime + + class DocumentUpdate(BaseModel): title: str | None = None ai_domain: str | None = None @@ -238,7 +245,12 @@ async def list_library_documents( facet_year: int | None = None, facet_doctype: str | None = None, ): - """자료실 문서 목록 (prefix match, title 검색, facet 필터, 정렬)""" + """자료실 문서 목록 (category='library' 기반, prefix match, facet 필터, 정렬) + + §1 재구현: 기존 `user_tags @library/%` 필터 → `category='library'` 필터로 전환. + 백필 정책상 `category='library' ⇔ user_tags has @library/...` 관계가 유지됨. + `path` 지정 시 하위 경로 매칭은 기존처럼 user_tags 기반 유지 (분류 내부 서가 경로). + """ from sqlalchemy import text as sql_text from core.library import LIBRARY_PREFIX, normalize_library_path @@ -252,6 +264,7 @@ async def list_library_documents( query = select(Document).where( Document.deleted_at == None, # noqa: E711 + Document.category == "library", ) if path: @@ -265,15 +278,6 @@ async def list_library_documents( ) """).bindparams(exact=exact, prefix=prefix) ) - else: - query = query.where( - sql_text(""" - EXISTS ( - SELECT 1 FROM jsonb_array_elements_text(documents.user_tags) AS t - WHERE t LIKE '@library/%' - ) - """) - ) if q: query = query.where(Document.title.ilike(f"%{q}%")) @@ -322,14 +326,40 @@ async def list_documents( source: str | None = None, format: str | None = None, review_status: str | None = Query(None, description="pending | approved | rejected"), + category: str | None = Query(None, description="doc_category enum — 지정 시 기본 news/memo 제외 해제"), + has_suggestion: bool | None = Query(None, description="true: ai_suggestion IS NOT NULL"), + proposed_category: str | None = Query(None, description="ai_suggestion.proposed_category 필터"), ): - """문서 목록 조회 (페이지네이션 + 필터, 뉴스/메모 제외)""" + """문서 목록 조회 (페이지네이션 + 필터). + + 기본은 뉴스/메모 제외. `category` 지정 시 해당 카테고리만 반환 (기본 제외 해제). + §2 승인 UI 용: `has_suggestion=true&proposed_category=library` 조합. + """ query = select(Document).where( Document.deleted_at == None, # noqa: E711 - Document.source_channel != "news", - Document.file_type != "note", ) + if category: + # 명시적 카테고리 필터 — 기본 exclude 해제 + query = query.where(Document.category == category) + else: + # 기본 목록: 뉴스/메모 제외 (문서함 용도) + query = query.where( + Document.source_channel != "news", + Document.file_type != "note", + ) + + if has_suggestion is True: + query = query.where(Document.ai_suggestion.isnot(None)) + elif has_suggestion is False: + query = query.where(Document.ai_suggestion.is_(None)) + + if proposed_category: + # ai_suggestion JSONB 의 proposed_category 값 매칭 + query = query.where( + Document.ai_suggestion["proposed_category"].astext == proposed_category + ) + if domain: # prefix 매칭: Industrial_Safety 클릭 시 하위 전부 포함 query = query.where(Document.ai_domain.startswith(domain)) @@ -404,6 +434,8 @@ async def get_document_file( raise HTTPException(status_code=404, detail="파일을 찾을 수 없습니다") # 미디어 타입 매핑 + # HTML5