From 24142ea605cc61346ade5555b9697c6b91d799cc Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Mon, 6 Apr 2026 07:15:13 +0900
Subject: [PATCH] =?UTF-8?q?fix:=20Codex=20=EB=A6=AC=EB=B7=B0=205=EA=B1=B4?=
 =?UTF-8?q?=20=EC=88=98=EC=A0=95=20(critical=201=20+=20high=204)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. [critical] config.yaml → settings 객체에서 taxonomy 로드 (import crash 방지)
2. [high] ODF 변환: file_path 유지, derived_path 별도 필드 (무한 중복 방지)
3. [high] 법령 분할: 첫 장 이전 조문을 "서문"으로 보존
4. [high] Inbox: review_status 필드 분리 (pending/approved/rejected)
5. [high] 삭제: soft-delete (deleted_at) + worker 방어 + active_documents 뷰
   - 모든 조회에 deleted_at IS NULL 일관 적용
   - queue_consumer: row 없으면 gracefully skip

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app/api/dashboard.py                   |  5 ++--
 app/api/documents.py                   | 33 +++++++-------------------
 app/api/search.py                      |  7 +++---
 app/core/config.py                     |  9 +++++++
 app/models/document.py                 |  9 ++++---
 app/workers/classify_worker.py         | 11 +++------
 app/workers/extract_worker.py          | 16 ++++---------
 app/workers/law_monitor.py             | 12 ++++++----
 app/workers/queue_consumer.py          |  6 +++++
 frontend/src/routes/inbox/+page.svelte |  2 +-
 migrations/009_review_status.sql       |  8 +++++++
 migrations/010_soft_delete.sql         |  7 ++++++
 12 files changed, 66 insertions(+), 59 deletions(-)
 create mode 100644 migrations/009_review_status.sql
 create mode 100644 migrations/010_soft_delete.sql

diff --git a/app/api/dashboard.py b/app/api/dashboard.py
index 8e33b7b..d583df4 100644
--- a/app/api/dashboard.py
+++ b/app/api/dashboard.py
@@ -62,11 +62,12 @@ async def get_dashboard(
     today_rows = today_result.all()
     today_added = sum(row[1] for row in today_rows)
 
-    # Inbox 미분류 수 (ai_domain이 없는 문서 = 미분류)
+    # Inbox 미분류 수 (review_status = pending)
     inbox_result = await session.execute(
         select(func.count(Document.id))
         .where(
-            (Document.ai_domain == None) | (Document.ai_domain == "")
+            Document.review_status == "pending",
+            Document.deleted_at == None,
         )
     )
     inbox_count = inbox_result.scalar() or 0
diff --git a/app/api/documents.py b/app/api/documents.py
index 5dd494f..a867120 100644
--- a/app/api/documents.py
+++ b/app/api/documents.py
@@ -40,9 +40,10 @@ class DocumentResponse(BaseModel):
     importance: str | None
     ai_confidence: float | None
     user_note: str | None
-    original_path: str | None
+    derived_path: str | None
     original_format: str | None
     conversion_status: str | None
+    review_status: str | None
     edit_url: str | None
     preview_status: str | None
     source_channel: str | None
@@ -101,6 +102,7 @@ async def get_document_tree(
             SELECT ai_domain, COUNT(*)
             FROM documents
             WHERE ai_domain IS NOT NULL AND ai_domain != ''
+              AND deleted_at IS NULL
             GROUP BY ai_domain
             ORDER BY ai_domain
         """)
@@ -145,7 +147,7 @@ async def list_documents(
     format: str | None = None,
 ):
     """문서 목록 조회 (페이지네이션 + 필터)"""
-    query = select(Document)
+    query = select(Document).where(Document.deleted_at == None)
 
     if domain:
         # prefix 매칭: Industrial_Safety 클릭 시 하위 전부 포함
@@ -181,7 +183,7 @@ async def get_document(
 ):
     """문서 단건 조회"""
     doc = await session.get(Document, doc_id)
-    if not doc:
+    if not doc or doc.deleted_at is not None:
         raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
     return DocumentResponse.model_validate(doc)
 
@@ -390,27 +392,8 @@ async def delete_document(
     if not doc:
         raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
 
-    if delete_file:
-        # 원본 파일 삭제
-        file_path = Path(settings.nas_mount_path) / doc.file_path
-        if file_path.exists():
-            file_path.unlink()
-        # 변환본 삭제
-        if doc.original_path:
-            orig = Path(settings.nas_mount_path) / doc.original_path
-            if orig.exists():
-                orig.unlink()
-        # preview 캐시 삭제
-        preview = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf"
-        if preview.exists():
-            preview.unlink()
-
-    # 관련 processing_queue 먼저 삭제 (FK 제약)
-    from sqlalchemy import delete as sql_delete
-    await session.execute(
-        sql_delete(ProcessingQueue).where(ProcessingQueue.document_id == doc_id)
-    )
-    await session.delete(doc)
+    # soft-delete (물리 파일은 cleanup job에서 나중에 정리)
+    doc.deleted_at = datetime.now(timezone.utc)
     await session.commit()
 
-    return {"message": f"문서 {doc_id} 삭제됨", "file_deleted": delete_file}
+    return {"message": f"문서 {doc_id} soft-delete 완료"}
diff --git a/app/api/search.py b/app/api/search.py
index a8e107a..5d3cbf4 100644
--- a/app/api/search.py
+++ b/app/api/search.py
@@ -93,13 +93,14 @@ async def _search_text(session: AsyncSession, query: str, limit: int) -> list[Se
                        ELSE 'fts'
                    END AS match_reason
             FROM documents
-            WHERE coalesce(title, '') ILIKE '%%' || :q || '%%'
+            WHERE deleted_at IS NULL
+              AND (coalesce(title, '') ILIKE '%%' || :q || '%%'
                OR coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%'
                OR coalesce(user_note, '') ILIKE '%%' || :q || '%%'
                OR coalesce(ai_summary, '') ILIKE '%%' || :q || '%%'
                OR coalesce(extracted_text, '') ILIKE '%%' || :q || '%%'
                OR to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, ''))
-                  @@ plainto_tsquery('simple', :q)
+                  @@ plainto_tsquery('simple', :q))
             ORDER BY score DESC
             LIMIT :limit
         """),
@@ -124,7 +125,7 @@ async def _search_vector(session: AsyncSession, query: str, limit: int) -> list[
                    left(extracted_text, 200) AS snippet,
                    'vector' AS match_reason
             FROM documents
-            WHERE embedding IS NOT NULL
+            WHERE embedding IS NOT NULL AND deleted_at IS NULL
             ORDER BY embedding <=> cast(:embedding AS vector)
             LIMIT :limit
         """),
diff --git a/app/core/config.py b/app/core/config.py
index 1a0d076..897e269 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -44,6 +44,10 @@ class Settings(BaseModel):
     # kordoc
     kordoc_endpoint: str = "http://kordoc-service:3100"
 
+    # 분류 체계
+    taxonomy: dict = {}
+    document_types: list[str] = []
+
 
 def load_settings() -> Settings:
     """config.yaml + 환경변수에서 설정 로딩"""
@@ -81,6 +85,9 @@ def load_settings() -> Settings:
             nas_mount = raw["nas"].get("mount_path", nas_mount)
             nas_pkm = raw["nas"].get("pkm_root", nas_pkm)
 
+    taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
+    document_types = raw.get("document_types", []) if config_path.exists() and raw else []
+
     return Settings(
         database_url=database_url,
         ai=ai_config,
@@ -89,6 +96,8 @@ def load_settings() -> Settings:
         jwt_secret=jwt_secret,
         totp_secret=totp_secret,
         kordoc_endpoint=kordoc_endpoint,
+        taxonomy=taxonomy,
+        document_types=document_types,
     )
 
 
diff --git a/app/models/document.py b/app/models/document.py
index c4868b9..bad4566 100644
--- a/app/models/document.py
+++ b/app/models/document.py
@@ -50,12 +50,15 @@ class Document(Base):
     # 사용자 메모
     user_note: Mapped[str | None] = mapped_column(Text)
 
-    # 원본 보존 (변환 전)
-    original_path: Mapped[str | None] = mapped_column(Text)
+    # ODF 변환
+    derived_path: Mapped[str | None] = mapped_column(Text)  # 변환본 경로 (.derived/)
     original_format: Mapped[str | None] = mapped_column(String(20))
-    original_hash: Mapped[str | None] = mapped_column(String(64))
     conversion_status: Mapped[str | None] = mapped_column(String(20), default="none")
 
+    # 승인/삭제
+    review_status: Mapped[str | None] = mapped_column(String(20), default="pending")
+    deleted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+
     # 외부 편집 URL
     edit_url: Mapped[str | None] = mapped_column(Text)
 
diff --git a/app/workers/classify_worker.py b/app/workers/classify_worker.py
index 04134ea..9ebc456 100644
--- a/app/workers/classify_worker.py
+++ b/app/workers/classify_worker.py
@@ -1,8 +1,6 @@
 """AI 분류 워커 — taxonomy 기반 도메인/문서타입/태그/요약 생성"""
 
-import yaml
 from datetime import datetime, timezone
-from pathlib import Path
 
 from sqlalchemy.ext.asyncio import AsyncSession
 
@@ -15,11 +13,8 @@ logger = setup_logger("classify_worker")
 
 MAX_CLASSIFY_TEXT = 8000
 
-# config.yaml에서 taxonomy 로딩
-_config_path = Path(__file__).resolve().parent.parent / "config.yaml"
-_config = yaml.safe_load(_config_path.read_text(encoding="utf-8"))
-
-DOCUMENT_TYPES = set(_config.get("document_types", []))
+# settings에서 taxonomy/document_types 로딩
+DOCUMENT_TYPES = set(settings.document_types)
 
 
 def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]:
@@ -44,7 +39,7 @@ def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]:
     return paths
 
 
-VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(_config.get("taxonomy", {}))
+VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(settings.taxonomy)
 
 
 def _validate_domain(domain: str) -> str:
diff --git a/app/workers/extract_worker.py b/app/workers/extract_worker.py
index 98608b2..52d5119 100644
--- a/app/workers/extract_worker.py
+++ b/app/workers/extract_worker.py
@@ -127,13 +127,7 @@ async def process(document_id: int, session: AsyncSession) -> None:
         target_fmt = CONVERT_MAP.get(fmt)
         if target_fmt:
             try:
-                from core.utils import file_hash as calc_hash
-                # 원본 메타 보존
-                doc.original_path = doc.file_path
-                doc.original_format = doc.file_format
-                doc.original_hash = doc.file_hash
-
-                # .derived 디렉토리에 변환
+                # .derived 디렉토리에 변환 (file_path는 원본 유지!)
                 derived_dir = full_path.parent / ".derived"
                 derived_dir.mkdir(exist_ok=True)
                 tmp_input2 = tmp_dir / f"convert_{document_id}.{fmt}"
@@ -150,13 +144,11 @@ async def process(document_id: int, session: AsyncSession) -> None:
                     final_path = derived_dir / f"{document_id}.{target_fmt}"
                     shutil.move(str(conv_file), str(final_path))
 
-                    # DB 업데이트: current → ODF
                     nas_root = Path(settings.nas_mount_path)
-                    doc.file_path = str(final_path.relative_to(nas_root))
-                    doc.file_format = target_fmt
-                    doc.file_hash = calc_hash(final_path)
+                    doc.derived_path = str(final_path.relative_to(nas_root))
+                    doc.original_format = doc.file_format
                     doc.conversion_status = "done"
-                    logger.info(f"[ODF변환] {doc.original_path} → {doc.file_path}")
+                    logger.info(f"[ODF변환] {doc.file_path} → derived: {doc.derived_path}")
                 else:
                     doc.conversion_status = "failed"
                     logger.warning(f"[ODF변환] 실패: {conv_result.stderr[:200]}")
diff --git a/app/workers/law_monitor.py b/app/workers/law_monitor.py
index 545697e..5505e1f 100644
--- a/app/workers/law_monitor.py
+++ b/app/workers/law_monitor.py
@@ -206,9 +206,10 @@ async def _save_law_split(
 
         # 장 구분자: 키가 000으로 끝나고 내용에 "제X장" 포함
         if key.endswith("000") and re.search(r"제\d+장", content):
-            # 이전 장 저장
-            if current_chapter and current_articles:
-                chapters.append((current_chapter, current_articles))
+            # 이전 장/서문 저장
+            if current_articles:
+                chapter_name = current_chapter or "서문"
+                chapters.append((chapter_name, current_articles))
             chapter_match = re.search(r"(제\d+장\s*.+)", content)
             current_chapter = chapter_match.group(1).strip() if chapter_match else content.strip()
             current_articles = []
@@ -216,8 +217,9 @@ async def _save_law_split(
             current_articles.append(unit)
 
     # 마지막 장 저장
-    if current_chapter and current_articles:
-        chapters.append((current_chapter, current_articles))
+    if current_articles:
+        chapter_name = current_chapter or "서문"
+        chapters.append((chapter_name, current_articles))
 
     # 장 분할 성공
     sections = []
diff --git a/app/workers/queue_consumer.py b/app/workers/queue_consumer.py
index e31951a..05c8637 100644
--- a/app/workers/queue_consumer.py
+++ b/app/workers/queue_consumer.py
@@ -112,6 +112,9 @@ async def consume_queue():
                 # 완료 처리
                 async with async_session() as session:
                     item = await session.get(ProcessingQueue, queue_id)
+                    if not item:
+                        logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip")
+                        continue
                     item.status = "completed"
                     item.completed_at = datetime.now(timezone.utc)
                     await session.commit()
@@ -123,6 +126,9 @@ async def consume_queue():
                 # 실패 처리
                 async with async_session() as session:
                     item = await session.get(ProcessingQueue, queue_id)
+                    if not item:
+                        logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip")
+                        continue
                     item.error_message = str(e)[:500]
                     if item.attempts >= item.max_attempts:
                         item.status = "failed"
diff --git a/frontend/src/routes/inbox/+page.svelte b/frontend/src/routes/inbox/+page.svelte
index 33fbb7d..ebf9d8f 100644
--- a/frontend/src/routes/inbox/+page.svelte
+++ b/frontend/src/routes/inbox/+page.svelte
@@ -14,7 +14,7 @@
     try {
       // Inbox 파일만 필터
       const data = await api('/documents/?page_size=100');
-      documents = data.items.filter(d => !d.ai_domain);
+      documents = data.items.filter(d => d.review_status === 'pending');
     } catch (err) {
       addToast('error', 'Inbox 로딩 실패');
     } finally {
diff --git a/migrations/009_review_status.sql b/migrations/009_review_status.sql
new file mode 100644
index 0000000..02eb770
--- /dev/null
+++ b/migrations/009_review_status.sql
@@ -0,0 +1,8 @@
+-- Inbox 승인 상태 분리 + derived_path
+ALTER TABLE documents ADD COLUMN IF NOT EXISTS review_status VARCHAR(20) DEFAULT 'pending';
+ALTER TABLE documents ADD COLUMN IF NOT EXISTS derived_path TEXT;
+
+-- 기존 문서는 전부 approved (마이그레이션 이후 신규만 pending)
+UPDATE documents SET review_status = 'approved';
+
+CREATE INDEX IF NOT EXISTS idx_documents_review_status ON documents(review_status);
diff --git a/migrations/010_soft_delete.sql b/migrations/010_soft_delete.sql
new file mode 100644
index 0000000..6936f72
--- /dev/null
+++ b/migrations/010_soft_delete.sql
@@ -0,0 +1,7 @@
+-- Soft-delete 지원
+ALTER TABLE documents ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
+
+CREATE INDEX IF NOT EXISTS idx_documents_not_deleted ON documents(deleted_at) WHERE deleted_at IS NULL;
+
+-- active documents 뷰 (raw SQL 누락 방지)
+CREATE OR REPLACE VIEW active_documents AS SELECT * FROM documents WHERE deleted_at IS NULL;