feat(documents): §1 category enum + ai_suggestion 승인 파이프

plan: ~/.claude/plans/luminous-sprouting-hamster.md §1 - migrations/143_category.sql: doc_category enum (6 활성 + 3 유보) + documents.category + documents.ai_suggestion JSONB + 2 idx. - app/models/document.py: category (Enum, create_type=False), ai_suggestion (JSONB). - app/prompts/classify.txt: document_type enum 에 7 실무 doctype 추가 (발주서/세금계산서/명세표/도면/증명서/계획서/시방서) + facet_doctype 필드 directive. - config.yaml: document_types 에 7 항목 추가 (worker 검증 통과). - app/workers/classify_worker.py: FACET_DOCTYPES / LIBRARY_SUGGESTION_DOCTYPES 상수, facet_doctype 파싱(기존값 미덮어씀), 발주서/세금계산서/명세표 감지 시 ai_suggestion={proposed_category=library, proposed_path=@library/ 거래/{YYYY}/{doctype}, source_updated_at=doc.updated_at.isoformat(), ...}. category / user_tags 자동 전이 금지 (suggestion-only). - app/api/documents.py: · DocumentResponse 에 category / ai_suggestion 노출 · GET /documents ?category=<cat> / ?has_suggestion / ?proposed_category (category 지정 시 기본 news/memo 제외 해제 — §2 승인 UI 계약) · GET /documents/library 를 Document.category=='library' 기반으로 재구현 (path subquery 는 user_tags 유지 — 분류 내부 서가 경로) · POST /documents/{id}/accept-suggestion — FOR UPDATE + idempotent no-op + dual 409 stale (payload source_updated_at / documents.updated_at) + user_tags idempotent append · DELETE /documents/{id}/suggestion — idempotent, stale 검사 없음 - scripts/backfill_category.py: dry-run / apply. 매핑(news/memo/@library/else) + 3-way 상대 검증 (all_rows==categorized, uncategorized==0, cat_library==has_library_tag — 자동 전이 금지 정책 검증). 남은 DoD (원격 배포 후): docker compose up → migration 143 적용 → backfill apply → smoke (drive_sync 발주서 업로드 suggestion 생성 / category 유지, accept-suggestion idempotency + 409 stale 두 벡터, /documents?category=library == /documents/library 건수 일치). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 15:32:01 +09:00
parent e861784c86
commit 8fdea88676
7 changed files with 433 additions and 15 deletions
@@ -76,6 +76,8 @@ class DocumentResponse(BaseModel):
    facet_topic: str | None = None
    facet_year: int | None = None
    facet_doctype: str | None = None
+    category: str | None = None
+    ai_suggestion: dict | None = None
    extracted_at: datetime | None
    ai_processed_at: datetime | None
    embedded_at: datetime | None
@@ -93,6 +95,11 @@ class DocumentListResponse(BaseModel):
    page_size: int


+class AcceptSuggestionRequest(BaseModel):
+    """§1 accept-suggestion 요청 body — stale payload / doc 수정 검출."""
+    expected_source_updated_at: datetime
+
+
 class DocumentUpdate(BaseModel):
    title: str | None = None
    ai_domain: str | None = None
@@ -238,7 +245,12 @@ async def list_library_documents(
    facet_year: int | None = None,
    facet_doctype: str | None = None,
 ):
-    """자료실 문서 목록 (prefix match, title 검색, facet 필터, 정렬)"""
+    """자료실 문서 목록 (category='library' 기반, prefix match, facet 필터, 정렬)
+
+    §1 재구현: 기존 `user_tags @library/%` 필터 → `category='library'` 필터로 전환.
+    백필 정책상 `category='library' ⇔ user_tags has @library/...` 관계가 유지됨.
+    `path` 지정 시 하위 경로 매칭은 기존처럼 user_tags 기반 유지 (분류 내부 서가 경로).
+    """
    from sqlalchemy import text as sql_text

    from core.library import LIBRARY_PREFIX, normalize_library_path
@@ -252,6 +264,7 @@ async def list_library_documents(

    query = select(Document).where(
        Document.deleted_at == None,  # noqa: E711
+        Document.category == "library",
    )

    if path:
@@ -265,15 +278,6 @@ async def list_library_documents(
                )
            """).bindparams(exact=exact, prefix=prefix)
        )
-    else:
-        query = query.where(
-            sql_text("""
-                EXISTS (
-                    SELECT 1 FROM jsonb_array_elements_text(documents.user_tags) AS t
-                    WHERE t LIKE '@library/%'
-                )
-            """)
-        )

    if q:
        query = query.where(Document.title.ilike(f"%{q}%"))
@@ -322,14 +326,40 @@ async def list_documents(
    source: str | None = None,
    format: str | None = None,
    review_status: str | None = Query(None, description="pending | approved | rejected"),
+    category: str | None = Query(None, description="doc_category enum — 지정 시 기본 news/memo 제외 해제"),
+    has_suggestion: bool | None = Query(None, description="true: ai_suggestion IS NOT NULL"),
+    proposed_category: str | None = Query(None, description="ai_suggestion.proposed_category 필터"),
 ):
-    """문서 목록 조회 (페이지네이션 + 필터, 뉴스/메모 제외)"""
+    """문서 목록 조회 (페이지네이션 + 필터).
+
+    기본은 뉴스/메모 제외. `category` 지정 시 해당 카테고리만 반환 (기본 제외 해제).
+    §2 승인 UI 용: `has_suggestion=true&proposed_category=library` 조합.
+    """
    query = select(Document).where(
        Document.deleted_at == None,  # noqa: E711
-        Document.source_channel != "news",
-        Document.file_type != "note",
    )

+    if category:
+        # 명시적 카테고리 필터 — 기본 exclude 해제
+        query = query.where(Document.category == category)
+    else:
+        # 기본 목록: 뉴스/메모 제외 (문서함 용도)
+        query = query.where(
+            Document.source_channel != "news",
+            Document.file_type != "note",
+        )
+
+    if has_suggestion is True:
+        query = query.where(Document.ai_suggestion.isnot(None))
+    elif has_suggestion is False:
+        query = query.where(Document.ai_suggestion.is_(None))
+
+    if proposed_category:
+        # ai_suggestion JSONB 의 proposed_category 값 매칭
+        query = query.where(
+            Document.ai_suggestion["proposed_category"].astext == proposed_category
+        )
+
    if domain:
        # prefix 매칭: Industrial_Safety 클릭 시 하위 전부 포함
        query = query.where(Document.ai_domain.startswith(domain))
@@ -404,6 +434,8 @@ async def get_document_file(
        raise HTTPException(status_code=404, detail="파일을 찾을 수 없습니다")

    # 미디어 타입 매핑
+    # HTML5 <audio>/<video> 직접 재생을 위해 audio/video mime 포함. Starlette
+    # FileResponse 가 Range 헤더 자동 처리 → 영상 스트리밍 OK (§3).
    media_types = {
        ".pdf": "application/pdf",
        ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
@@ -413,6 +445,12 @@ async def get_document_file(
        ".txt": "text/plain", ".md": "text/plain",
        ".html": "text/html", ".csv": "text/csv",
        ".json": "application/json", ".xml": "application/xml",
+        # 오디오
+        ".mp3": "audio/mpeg", ".m4a": "audio/mp4",
+        ".opus": "audio/ogg", ".ogg": "audio/ogg",
+        ".wav": "audio/wav", ".flac": "audio/flac",
+        # 비디오 — direct play 호환 (§3 최소판)
+        ".mp4": "video/mp4", ".webm": "video/webm",
    }
    suffix = file_path.suffix.lower()
    media_type = media_types.get(suffix, "application/octet-stream")
@@ -610,6 +648,110 @@ async def update_document(
    return DocumentResponse.model_validate(doc)


+@router.post("/{doc_id}/accept-suggestion", response_model=DocumentResponse)
+async def accept_suggestion(
+    doc_id: int,
+    body: AcceptSuggestionRequest,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """§1 AI suggestion 승인 — category / user_tags 전이 (idempotent + stale 검사).
+
+    - 200 (no-op): ai_suggestion 이 이미 NULL — 이전 승인/반려 후 중복 호출로 간주
+    - 200 (applied): payload 적용 + ai_suggestion 을 NULL 로 clear
+    - 409 Conflict: 두 가지 벡터로 stale 감지
+      · ai_suggestion.source_updated_at != expected → 새 classify 가 payload 덮어씀
+      · documents.updated_at != expected → 사용자가 doc 을 다른 경로로 수정함
+    """
+    from sqlalchemy import select as sa_select
+
+    from core.library import validate_user_tags
+
+    # FOR UPDATE 로 동시 승인 race 방지
+    result = await session.execute(
+        sa_select(Document).where(Document.id == doc_id).with_for_update()
+    )
+    doc = result.scalar_one_or_none()
+    if not doc or doc.deleted_at is not None:
+        raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
+
+    if doc.ai_suggestion is None:
+        # idempotent no-op — 이미 처리됨 (2번째 POST / 반려 후 POST)
+        return DocumentResponse.model_validate(doc)
+
+    expected = body.expected_source_updated_at
+
+    # Stale 검사 1: payload 교체 감지 (새 classify 결과가 덮어쓴 경우)
+    raw_src = doc.ai_suggestion.get("source_updated_at")
+    suggestion_src = None
+    if isinstance(raw_src, str):
+        try:
+            suggestion_src = datetime.fromisoformat(raw_src)
+        except ValueError:
+            suggestion_src = None
+    if suggestion_src is None or suggestion_src != expected:
+        raise HTTPException(
+            status_code=409,
+            detail="제안 payload 가 교체되었습니다. 목록을 새로고침하세요.",
+        )
+
+    # Stale 검사 2: 문서 전체 수정 감지 (사용자가 title/태그를 다른 경로로 편집)
+    if doc.updated_at != expected:
+        raise HTTPException(
+            status_code=409,
+            detail="문서가 다른 곳에서 수정되었습니다. 목록을 새로고침하세요.",
+        )
+
+    # payload 적용
+    proposed_category = doc.ai_suggestion.get("proposed_category")
+    proposed_path = doc.ai_suggestion.get("proposed_path")
+
+    if not proposed_category:
+        raise HTTPException(status_code=422, detail="proposed_category 누락된 suggestion")
+
+    doc.category = proposed_category
+
+    # user_tags append (중복 방지, normalize + dedup 통과)
+    if proposed_path:
+        current_tags = list(doc.user_tags or [])
+        if proposed_path not in current_tags:
+            current_tags.append(proposed_path)
+        try:
+            doc.user_tags = validate_user_tags(current_tags)
+        except (TypeError, ValueError) as e:
+            raise HTTPException(
+                status_code=422, detail=f"proposed_path 태그 검증 실패: {e}"
+            )
+
+    doc.ai_suggestion = None
+    doc.updated_at = datetime.now(timezone.utc)
+    await session.commit()
+    await session.refresh(doc)
+
+    return DocumentResponse.model_validate(doc)
+
+
+@router.delete("/{doc_id}/suggestion", status_code=204)
+async def delete_suggestion(
+    doc_id: int,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """§1 AI suggestion 반려 — idempotent, stale 검사 없음.
+
+    철학: 승인은 보호, 반려는 단순. 사용자의 "이 제안 버려라" 최종 의사결정이므로
+    payload 가 바뀌어도 "버린다" 의도는 동일하게 유효.
+    """
+    doc = await session.get(Document, doc_id)
+    if not doc or doc.deleted_at is not None:
+        raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
+
+    if doc.ai_suggestion is not None:
+        doc.ai_suggestion = None
+        doc.updated_at = datetime.now(timezone.utc)
+        await session.commit()
+
+
@router.put("/{doc_id}/content")
 async def save_document_content(
    doc_id: int,
@@ -102,6 +102,19 @@ class Document(Base):
    )
    title: Mapped[str | None] = mapped_column(Text)

+    # 카테고리 (1차 진입점 — UI 탭/라우트 분기)
+    # 6 활성: document / library / news / memo / audio / video
+    # 3 유보: mail / calendar / plex
+    category: Mapped[str | None] = mapped_column(
+        Enum("document", "library", "news", "memo", "audio", "video",
+             "mail", "calendar", "plex",
+             name="doc_category", create_type=False)
+    )
+
+    # AI 가 제안했지만 미승인된 변경 후보 (category / path / doctype)
+    # /accept-suggestion 승인 시에만 category / user_tags 반영 (자동 전이 금지)
+    ai_suggestion: Mapped[dict | None] = mapped_column(JSONB)
+
    # facet 탐색 축 (Phase 2)
    facet_company: Mapped[str | None] = mapped_column(Text)
    facet_topic: Mapped[str | None] = mapped_column(Text)
@@ -4,6 +4,7 @@ You are a document classification AI. Analyze the document below and respond ONL
 {
  "domain": "Level1/Level2/Level3",
  "document_type": "one of document_types",
+  "facet_doctype": "one of facet_doctypes or null",
  "confidence": 0.85,
  "tags": ["tag1", "tag2"],
  "importance": "medium",
@@ -57,7 +58,7 @@ General/
 - 2-level paths allowed ONLY when no leaf exists (e.g., Engineering/Civil)

 ## Document Types (select exactly ONE)
-Reference, Standard, Manual, Drawing, Template, Note, Academic_Paper, Law_Document, Report, Memo, Checklist, Meeting_Minutes, Specification
+Reference, Standard, Manual, Drawing, Template, Note, Academic_Paper, Law_Document, Report, Memo, Checklist, Meeting_Minutes, Specification, 발주서, 세금계산서, 명세표, 도면, 증명서, 계획서, 시방서

 ### Document Type Detection Rules
 - Step-by-step instructions → Manual
@@ -66,9 +67,22 @@ Reference, Standard, Manual, Drawing, Template, Note, Academic_Paper, Law_Docume
 - Meeting discussion → Meeting_Minutes
 - Checklist format → Checklist
 - Academic/research format → Academic_Paper
- Technical drawings → Drawing
+- Technical drawings → Drawing / 도면
+- 발주 내역, 품목·수량·단가 표 → 발주서
+- 공급자/공급받는자/세액 양식 → 세금계산서
+- 거래 명세/납품 명세 → 명세표
+- 자격 증빙·수료·재직 → 증명서
+- 업무·프로젝트 추진안 → 계획서
+- 공사 시방·재료 기준 → 시방서
 - If unclear → Note

+## facet_doctype (실무 문서 유형 식별 신호)
+Select ONE of: 발주서, 세금계산서, 명세표, 도면, 증명서, 계획서, 시방서
+If the document clearly does NOT fit any of the above, return null.
+- This field is independent of document_type — use it to flag business-document types
+  that drive 자료실(library) 자동 분류 제안.
+- 발주서 / 세금계산서 / 명세표 는 자료실 "거래" 분류의 승인 대기 제안으로 연결된다.
+
 ## Confidence (0.0 ~ 1.0)
 - How confident are you in the domain classification?
 - 0.85+ = high confidence, 0.6~0.85 = moderate, <0.6 = uncertain
@@ -16,6 +16,12 @@ MAX_CLASSIFY_TEXT = 8000
 # settings에서 taxonomy/document_types 로딩
 DOCUMENT_TYPES = set(settings.document_types)

+# facet_doctype 허용값 (실무 문서 유형 — AI 식별 신호, library 자동 분류 제안 트리거)
+FACET_DOCTYPES = {"발주서", "세금계산서", "명세표", "도면", "증명서", "계획서", "시방서"}
+
+# 자료실 자동 분류 제안 대상 (거래 하위)
+LIBRARY_SUGGESTION_DOCTYPES = {"발주서", "세금계산서", "명세표"}
+

 def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]:
    """taxonomy dict에서 모든 유효한 경로를 추출"""
@@ -113,6 +119,30 @@ async def process(document_id: int, session: AsyncSession) -> None:
            if purpose in ("business", "knowledge"):
                doc.doc_purpose = purpose

+        # ─── facet_doctype 식별 (§1 실무 문서 유형 신호) ───
+        # AI 식별값이 허용 enum 이면 facet_doctype 저장. 기존 값이 있으면 덮어쓰지 않음
+        # (수동 수정 / Phase 2 facet 우선). document.category / user_tags 는 **건드리지 않음**.
+        ai_doctype_raw = parsed.get("facet_doctype")
+        ai_doctype = ai_doctype_raw if ai_doctype_raw in FACET_DOCTYPES else None
+        if ai_doctype and not doc.facet_doctype:
+            doc.facet_doctype = ai_doctype
+
+        # ─── ai_suggestion 저장 (자료실 승인 대기함 제안, §1) ───
+        # 발주서/세금계산서/명세표 → 자료실 '거래' 분류 제안. 자동 전이 금지.
+        # /accept-suggestion 승인 UI 에서만 실제 category='library' + @library/... 부여.
+        if ai_doctype in LIBRARY_SUGGESTION_DOCTYPES:
+            year = doc.facet_year or datetime.now(timezone.utc).year
+            doc.ai_suggestion = {
+                "proposed_category": "library",
+                "proposed_path": f"@library/거래/{year}/{ai_doctype}",
+                "proposed_doctype": ai_doctype,
+                "confidence": doc.ai_confidence,
+                "source_updated_at": (
+                    doc.updated_at.isoformat() if doc.updated_at else None
+                ),
+                "reason": "classify pipeline",
+            }
+
        # ─── 요약 ───
        summary = await client.summarize(doc.extracted_text[:50000])
        doc.ai_summary = strip_thinking(summary)
@@ -112,6 +112,13 @@ document_types:
  - Checklist
  - Meeting_Minutes
  - Specification
+  - 발주서
+  - 세금계산서
+  - 명세표
+  - 도면
+  - 증명서
+  - 계획서
+  - 시방서

 schedule:
  law_monitor: "07:00"
@@ -0,0 +1,30 @@
+-- 143_category.sql
+-- Document Server 통합 플랫폼 Section 1: category enum + ai_suggestion
+-- plan: luminous-sprouting-hamster.md §1
+--
+-- doc_category enum (6 활성 + 3 유보):
+--   document / library / news / memo / audio / video
+--   mail / calendar / plex (유보)
+--
+-- ai_suggestion (JSONB): 승인 전 제안 payload
+--   {
+--     proposed_category, proposed_path, proposed_doctype,
+--     confidence, source_updated_at, reason
+--   }
+-- 자동 전이 금지 — /accept-suggestion 승인 시에만 category / user_tags 변경
+
+CREATE TYPE doc_category AS ENUM (
+  'document', 'library', 'news', 'memo', 'audio', 'video',
+  'mail', 'calendar', 'plex'
+);
+
+ALTER TABLE documents
+  ADD COLUMN IF NOT EXISTS category       doc_category,
+  ADD COLUMN IF NOT EXISTS ai_suggestion  JSONB;
+
+CREATE INDEX IF NOT EXISTS idx_documents_category
+  ON documents(category);
+
+CREATE INDEX IF NOT EXISTS idx_documents_has_suggestion
+  ON documents(id)
+  WHERE ai_suggestion IS NOT NULL;
@@ -0,0 +1,182 @@
+"""§1 백필 — documents.category 전체 행 채우기.
+
+plan: luminous-sprouting-hamster.md §1
+
+매핑 규칙 (category IS NULL 인 모든 행 대상):
+  source_channel='news'                   → category='news'
+  source_channel='memo'                   → category='memo'
+  user_tags 에 '@library/' 태그 보유      → category='library'
+  그 외                                    → category='document'
+
+자동 library 전이 금지 — 기존 @library/ 태그 보유분만 'library' 로 이행.
+audio/video 는 §3 이후 생성 (백필 대상 없음).
+
+실행:
+  docker compose exec fastapi python /app/scripts/backfill_category.py --dry-run
+  docker compose exec fastapi python /app/scripts/backfill_category.py --apply
+
+로컬:
+  python scripts/backfill_category.py --dry-run
+  DATABASE_URL=postgresql+asyncpg://... python scripts/backfill_category.py --apply
+"""
+
+import argparse
+import asyncio
+import os
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app"))
+
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+
+
+CLASSIFY_SQL = """
+WITH classified AS (
+  SELECT
+    id,
+    CASE
+      WHEN source_channel = 'news'                                    THEN 'news'
+      WHEN source_channel = 'memo'                                    THEN 'memo'
+      WHEN file_type      = 'note'                                    THEN 'memo'
+      WHEN EXISTS (
+        SELECT 1 FROM jsonb_array_elements_text(
+          COALESCE(user_tags, '[]'::jsonb)
+        ) AS t
+        WHERE t LIKE '@library/%'
+      )                                                                THEN 'library'
+      ELSE                                                                  'document'
+    END AS target_category
+  FROM documents
+  WHERE category IS NULL
+)
+SELECT target_category, COUNT(*) AS n FROM classified GROUP BY 1 ORDER BY 2 DESC;
+"""
+
+APPLY_SQL = """
+UPDATE documents
+   SET category = CASE
+     WHEN source_channel = 'news'                                    THEN 'news'::doc_category
+     WHEN source_channel = 'memo'                                    THEN 'memo'::doc_category
+     WHEN file_type      = 'note'                                    THEN 'memo'::doc_category
+     WHEN EXISTS (
+       SELECT 1 FROM jsonb_array_elements_text(
+         COALESCE(documents.user_tags, '[]'::jsonb)
+       ) AS t
+       WHERE t LIKE '@library/%'
+     )                                                                THEN 'library'::doc_category
+     ELSE                                                                  'document'::doc_category
+   END
+ WHERE category IS NULL;
+"""
+
+VERIFY_SQL = """
+SELECT
+  (SELECT COUNT(*) FROM documents)                                            AS all_rows,
+  (SELECT COUNT(*) FROM documents WHERE category IS NOT NULL)                 AS categorized,
+  (SELECT COUNT(*) FROM documents WHERE category IS NULL)                     AS uncategorized,
+  (SELECT COUNT(*) FROM documents WHERE category = 'library')                 AS cat_library,
+  (SELECT COUNT(*) FROM documents
+     WHERE EXISTS (
+       SELECT 1 FROM jsonb_array_elements_text(
+         COALESCE(user_tags, '[]'::jsonb)
+       ) AS t
+       WHERE t LIKE '@library/%'
+     ))                                                                       AS has_library_tag;
+"""
+
+DIST_SQL = """
+SELECT COALESCE(category::text, '(null)') AS category, COUNT(*) AS n
+  FROM documents
+ GROUP BY category
+ ORDER BY n DESC;
+"""
+
+
+async def run(apply: bool) -> int:
+    database_url = os.getenv(
+        "DATABASE_URL",
+        "postgresql+asyncpg://pkm:pkm@localhost:5432/pkm",
+    )
+
+    engine = create_async_engine(database_url)
+    session_factory = async_sessionmaker(
+        engine, class_=AsyncSession, expire_on_commit=False
+    )
+
+    async with session_factory() as session:
+        # 1. 현재 분포
+        print("=== 현재 category 분포 ===")
+        rows = (await session.execute(text(DIST_SQL))).all()
+        for row in rows:
+            print(f"  {row.category:12}  {row.n}")
+
+        # 2. 분류 예상 (NULL 대상만)
+        print("\n=== NULL → target category (매핑 예상) ===")
+        rows = (await session.execute(text(CLASSIFY_SQL))).all()
+        pending_total = 0
+        for row in rows:
+            print(f"  {row.target_category:12}  {row.n}")
+            pending_total += row.n
+
+        if pending_total == 0:
+            print("\n백필 대상 없음 (모든 행 이미 category 설정됨).")
+            await engine.dispose()
+            return 0
+
+        if not apply:
+            print(f"\n[dry-run] {pending_total}건 영향. --apply 로 실제 적용.")
+            await engine.dispose()
+            return 0
+
+        # 3. apply
+        print(f"\n[apply] UPDATE 실행 — {pending_total}건 대상")
+        result = await session.execute(text(APPLY_SQL))
+        await session.commit()
+        print(f"  rowcount = {result.rowcount}")
+
+        # 4. verify
+        print("\n=== 백필 후 검증 ===")
+        row = (await session.execute(text(VERIFY_SQL))).one()
+        print(f"  all_rows         = {row.all_rows}")
+        print(f"  categorized      = {row.categorized}")
+        print(f"  uncategorized    = {row.uncategorized}")
+        print(f"  cat_library      = {row.cat_library}")
+        print(f"  has_library_tag  = {row.has_library_tag}")
+
+        fail = []
+        if row.uncategorized != 0:
+            fail.append(f"uncategorized={row.uncategorized} (기대 0)")
+        if row.all_rows != row.categorized:
+            fail.append(f"all={row.all_rows} != categorized={row.categorized}")
+        if row.cat_library != row.has_library_tag:
+            fail.append(
+                f"cat_library={row.cat_library} != has_library_tag={row.has_library_tag} "
+                "(자동 전이 없음 정책 위반)"
+            )
+        if fail:
+            print("\n!! 검증 실패:")
+            for f in fail:
+                print(f"  - {f}")
+            await engine.dispose()
+            return 1
+
+        print("\n검증 통과.")
+
+    await engine.dispose()
+    return 0
+
+
+def main():
+    parser = argparse.ArgumentParser(description="documents.category 백필")
+    mode = parser.add_mutually_exclusive_group(required=True)
+    mode.add_argument("--dry-run", action="store_true", help="변경 없이 분포만 보고")
+    mode.add_argument("--apply", action="store_true", help="실제 UPDATE 실행")
+    args = parser.parse_args()
+
+    rc = asyncio.run(run(apply=args.apply))
+    sys.exit(rc)
+
+
+if __name__ == "__main__":
+    main()