From 24142ea605cc61346ade5555b9697c6b91d799cc Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Mon, 6 Apr 2026 07:15:13 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20Codex=20=EB=A6=AC=EB=B7=B0=205=EA=B1=B4?= =?UTF-8?q?=20=EC=88=98=EC=A0=95=20(critical=201=20+=20high=204)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. [critical] config.yaml → settings 객체에서 taxonomy 로드 (import crash 방지) 2. [high] ODF 변환: file_path 유지, derived_path 별도 필드 (무한 중복 방지) 3. [high] 법령 분할: 첫 장 이전 조문을 "서문"으로 보존 4. [high] Inbox: review_status 필드 분리 (pending/approved/rejected) 5. [high] 삭제: soft-delete (deleted_at) + worker 방어 + active_documents 뷰 - 모든 조회에 deleted_at IS NULL 일관 적용 - queue_consumer: row 없으면 gracefully skip Co-Authored-By: Claude Opus 4.6 (1M context) --- app/api/dashboard.py | 5 ++-- app/api/documents.py | 33 +++++++------------------- app/api/search.py | 7 +++--- app/core/config.py | 9 +++++++ app/models/document.py | 9 ++++--- app/workers/classify_worker.py | 11 +++------ app/workers/extract_worker.py | 16 ++++--------- app/workers/law_monitor.py | 12 ++++++---- app/workers/queue_consumer.py | 6 +++++ frontend/src/routes/inbox/+page.svelte | 2 +- migrations/009_review_status.sql | 8 +++++++ migrations/010_soft_delete.sql | 7 ++++++ 12 files changed, 66 insertions(+), 59 deletions(-) create mode 100644 migrations/009_review_status.sql create mode 100644 migrations/010_soft_delete.sql diff --git a/app/api/dashboard.py b/app/api/dashboard.py index 8e33b7b..d583df4 100644 --- a/app/api/dashboard.py +++ b/app/api/dashboard.py @@ -62,11 +62,12 @@ async def get_dashboard( today_rows = today_result.all() today_added = sum(row[1] for row in today_rows) - # Inbox 미분류 수 (ai_domain이 없는 문서 = 미분류) + # Inbox 미분류 수 (review_status = pending) inbox_result = await session.execute( select(func.count(Document.id)) .where( - (Document.ai_domain == None) | (Document.ai_domain == "") + Document.review_status == "pending", + Document.deleted_at == None, ) ) inbox_count = inbox_result.scalar() or 0 diff --git a/app/api/documents.py b/app/api/documents.py index 5dd494f..a867120 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -40,9 +40,10 @@ class DocumentResponse(BaseModel): importance: str | None ai_confidence: float | None user_note: str | None - original_path: str | None + derived_path: str | None original_format: str | None conversion_status: str | None + review_status: str | None edit_url: str | None preview_status: str | None source_channel: str | None @@ -101,6 +102,7 @@ async def get_document_tree( SELECT ai_domain, COUNT(*) FROM documents WHERE ai_domain IS NOT NULL AND ai_domain != '' + AND deleted_at IS NULL GROUP BY ai_domain ORDER BY ai_domain """) @@ -145,7 +147,7 @@ async def list_documents( format: str | None = None, ): """문서 목록 조회 (페이지네이션 + 필터)""" - query = select(Document) + query = select(Document).where(Document.deleted_at == None) if domain: # prefix 매칭: Industrial_Safety 클릭 시 하위 전부 포함 @@ -181,7 +183,7 @@ async def get_document( ): """문서 단건 조회""" doc = await session.get(Document, doc_id) - if not doc: + if not doc or doc.deleted_at is not None: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") return DocumentResponse.model_validate(doc) @@ -390,27 +392,8 @@ async def delete_document( if not doc: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") - if delete_file: - # 원본 파일 삭제 - file_path = Path(settings.nas_mount_path) / doc.file_path - if file_path.exists(): - file_path.unlink() - # 변환본 삭제 - if doc.original_path: - orig = Path(settings.nas_mount_path) / doc.original_path - if orig.exists(): - orig.unlink() - # preview 캐시 삭제 - preview = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf" - if preview.exists(): - preview.unlink() - - # 관련 processing_queue 먼저 삭제 (FK 제약) - from sqlalchemy import delete as sql_delete - await session.execute( - sql_delete(ProcessingQueue).where(ProcessingQueue.document_id == doc_id) - ) - await session.delete(doc) + # soft-delete (물리 파일은 cleanup job에서 나중에 정리) + doc.deleted_at = datetime.now(timezone.utc) await session.commit() - return {"message": f"문서 {doc_id} 삭제됨", "file_deleted": delete_file} + return {"message": f"문서 {doc_id} soft-delete 완료"} diff --git a/app/api/search.py b/app/api/search.py index a8e107a..5d3cbf4 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -93,13 +93,14 @@ async def _search_text(session: AsyncSession, query: str, limit: int) -> list[Se ELSE 'fts' END AS match_reason FROM documents - WHERE coalesce(title, '') ILIKE '%%' || :q || '%%' + WHERE deleted_at IS NULL + AND (coalesce(title, '') ILIKE '%%' || :q || '%%' OR coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%' OR coalesce(user_note, '') ILIKE '%%' || :q || '%%' OR coalesce(ai_summary, '') ILIKE '%%' || :q || '%%' OR coalesce(extracted_text, '') ILIKE '%%' || :q || '%%' OR to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, '')) - @@ plainto_tsquery('simple', :q) + @@ plainto_tsquery('simple', :q)) ORDER BY score DESC LIMIT :limit """), @@ -124,7 +125,7 @@ async def _search_vector(session: AsyncSession, query: str, limit: int) -> list[ left(extracted_text, 200) AS snippet, 'vector' AS match_reason FROM documents - WHERE embedding IS NOT NULL + WHERE embedding IS NOT NULL AND deleted_at IS NULL ORDER BY embedding <=> cast(:embedding AS vector) LIMIT :limit """), diff --git a/app/core/config.py b/app/core/config.py index 1a0d076..897e269 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -44,6 +44,10 @@ class Settings(BaseModel): # kordoc kordoc_endpoint: str = "http://kordoc-service:3100" + # 분류 체계 + taxonomy: dict = {} + document_types: list[str] = [] + def load_settings() -> Settings: """config.yaml + 환경변수에서 설정 로딩""" @@ -81,6 +85,9 @@ def load_settings() -> Settings: nas_mount = raw["nas"].get("mount_path", nas_mount) nas_pkm = raw["nas"].get("pkm_root", nas_pkm) + taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {} + document_types = raw.get("document_types", []) if config_path.exists() and raw else [] + return Settings( database_url=database_url, ai=ai_config, @@ -89,6 +96,8 @@ def load_settings() -> Settings: jwt_secret=jwt_secret, totp_secret=totp_secret, kordoc_endpoint=kordoc_endpoint, + taxonomy=taxonomy, + document_types=document_types, ) diff --git a/app/models/document.py b/app/models/document.py index c4868b9..bad4566 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -50,12 +50,15 @@ class Document(Base): # 사용자 메모 user_note: Mapped[str | None] = mapped_column(Text) - # 원본 보존 (변환 전) - original_path: Mapped[str | None] = mapped_column(Text) + # ODF 변환 + derived_path: Mapped[str | None] = mapped_column(Text) # 변환본 경로 (.derived/) original_format: Mapped[str | None] = mapped_column(String(20)) - original_hash: Mapped[str | None] = mapped_column(String(64)) conversion_status: Mapped[str | None] = mapped_column(String(20), default="none") + # 승인/삭제 + review_status: Mapped[str | None] = mapped_column(String(20), default="pending") + deleted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + # 외부 편집 URL edit_url: Mapped[str | None] = mapped_column(Text) diff --git a/app/workers/classify_worker.py b/app/workers/classify_worker.py index 04134ea..9ebc456 100644 --- a/app/workers/classify_worker.py +++ b/app/workers/classify_worker.py @@ -1,8 +1,6 @@ """AI 분류 워커 — taxonomy 기반 도메인/문서타입/태그/요약 생성""" -import yaml from datetime import datetime, timezone -from pathlib import Path from sqlalchemy.ext.asyncio import AsyncSession @@ -15,11 +13,8 @@ logger = setup_logger("classify_worker") MAX_CLASSIFY_TEXT = 8000 -# config.yaml에서 taxonomy 로딩 -_config_path = Path(__file__).resolve().parent.parent / "config.yaml" -_config = yaml.safe_load(_config_path.read_text(encoding="utf-8")) - -DOCUMENT_TYPES = set(_config.get("document_types", [])) +# settings에서 taxonomy/document_types 로딩 +DOCUMENT_TYPES = set(settings.document_types) def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]: @@ -44,7 +39,7 @@ def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]: return paths -VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(_config.get("taxonomy", {})) +VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(settings.taxonomy) def _validate_domain(domain: str) -> str: diff --git a/app/workers/extract_worker.py b/app/workers/extract_worker.py index 98608b2..52d5119 100644 --- a/app/workers/extract_worker.py +++ b/app/workers/extract_worker.py @@ -127,13 +127,7 @@ async def process(document_id: int, session: AsyncSession) -> None: target_fmt = CONVERT_MAP.get(fmt) if target_fmt: try: - from core.utils import file_hash as calc_hash - # 원본 메타 보존 - doc.original_path = doc.file_path - doc.original_format = doc.file_format - doc.original_hash = doc.file_hash - - # .derived 디렉토리에 변환 + # .derived 디렉토리에 변환 (file_path는 원본 유지!) derived_dir = full_path.parent / ".derived" derived_dir.mkdir(exist_ok=True) tmp_input2 = tmp_dir / f"convert_{document_id}.{fmt}" @@ -150,13 +144,11 @@ async def process(document_id: int, session: AsyncSession) -> None: final_path = derived_dir / f"{document_id}.{target_fmt}" shutil.move(str(conv_file), str(final_path)) - # DB 업데이트: current → ODF nas_root = Path(settings.nas_mount_path) - doc.file_path = str(final_path.relative_to(nas_root)) - doc.file_format = target_fmt - doc.file_hash = calc_hash(final_path) + doc.derived_path = str(final_path.relative_to(nas_root)) + doc.original_format = doc.file_format doc.conversion_status = "done" - logger.info(f"[ODF변환] {doc.original_path} → {doc.file_path}") + logger.info(f"[ODF변환] {doc.file_path} → derived: {doc.derived_path}") else: doc.conversion_status = "failed" logger.warning(f"[ODF변환] 실패: {conv_result.stderr[:200]}") diff --git a/app/workers/law_monitor.py b/app/workers/law_monitor.py index 545697e..5505e1f 100644 --- a/app/workers/law_monitor.py +++ b/app/workers/law_monitor.py @@ -206,9 +206,10 @@ async def _save_law_split( # 장 구분자: 키가 000으로 끝나고 내용에 "제X장" 포함 if key.endswith("000") and re.search(r"제\d+장", content): - # 이전 장 저장 - if current_chapter and current_articles: - chapters.append((current_chapter, current_articles)) + # 이전 장/서문 저장 + if current_articles: + chapter_name = current_chapter or "서문" + chapters.append((chapter_name, current_articles)) chapter_match = re.search(r"(제\d+장\s*.+)", content) current_chapter = chapter_match.group(1).strip() if chapter_match else content.strip() current_articles = [] @@ -216,8 +217,9 @@ async def _save_law_split( current_articles.append(unit) # 마지막 장 저장 - if current_chapter and current_articles: - chapters.append((current_chapter, current_articles)) + if current_articles: + chapter_name = current_chapter or "서문" + chapters.append((chapter_name, current_articles)) # 장 분할 성공 sections = [] diff --git a/app/workers/queue_consumer.py b/app/workers/queue_consumer.py index e31951a..05c8637 100644 --- a/app/workers/queue_consumer.py +++ b/app/workers/queue_consumer.py @@ -112,6 +112,9 @@ async def consume_queue(): # 완료 처리 async with async_session() as session: item = await session.get(ProcessingQueue, queue_id) + if not item: + logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip") + continue item.status = "completed" item.completed_at = datetime.now(timezone.utc) await session.commit() @@ -123,6 +126,9 @@ async def consume_queue(): # 실패 처리 async with async_session() as session: item = await session.get(ProcessingQueue, queue_id) + if not item: + logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip") + continue item.error_message = str(e)[:500] if item.attempts >= item.max_attempts: item.status = "failed" diff --git a/frontend/src/routes/inbox/+page.svelte b/frontend/src/routes/inbox/+page.svelte index 33fbb7d..ebf9d8f 100644 --- a/frontend/src/routes/inbox/+page.svelte +++ b/frontend/src/routes/inbox/+page.svelte @@ -14,7 +14,7 @@ try { // Inbox 파일만 필터 const data = await api('/documents/?page_size=100'); - documents = data.items.filter(d => !d.ai_domain); + documents = data.items.filter(d => d.review_status === 'pending'); } catch (err) { addToast('error', 'Inbox 로딩 실패'); } finally { diff --git a/migrations/009_review_status.sql b/migrations/009_review_status.sql new file mode 100644 index 0000000..02eb770 --- /dev/null +++ b/migrations/009_review_status.sql @@ -0,0 +1,8 @@ +-- Inbox 승인 상태 분리 + derived_path +ALTER TABLE documents ADD COLUMN IF NOT EXISTS review_status VARCHAR(20) DEFAULT 'pending'; +ALTER TABLE documents ADD COLUMN IF NOT EXISTS derived_path TEXT; + +-- 기존 문서는 전부 approved (마이그레이션 이후 신규만 pending) +UPDATE documents SET review_status = 'approved'; + +CREATE INDEX IF NOT EXISTS idx_documents_review_status ON documents(review_status); diff --git a/migrations/010_soft_delete.sql b/migrations/010_soft_delete.sql new file mode 100644 index 0000000..6936f72 --- /dev/null +++ b/migrations/010_soft_delete.sql @@ -0,0 +1,7 @@ +-- Soft-delete 지원 +ALTER TABLE documents ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ; + +CREATE INDEX IF NOT EXISTS idx_documents_not_deleted ON documents(deleted_at) WHERE deleted_at IS NULL; + +-- active documents 뷰 (raw SQL 누락 방지) +CREATE OR REPLACE VIEW active_documents AS SELECT * FROM documents WHERE deleted_at IS NULL;