From e88640d3d8a8bb22d9cb07202c0481c146b02578 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Fri, 24 Apr 2026 09:14:56 +0900 Subject: [PATCH] =?UTF-8?q?feat(category):=20law=20=EC=B9=B4=ED=85=8C?= =?UTF-8?q?=EA=B3=A0=EB=A6=AC=20=EB=B6=84=EB=A6=AC=20=E2=80=94=20enum=20+?= =?UTF-8?q?=20backfill=20+=20classify=20skip?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - migrations/152: ALTER TYPE doc_category ADD VALUE 'law' (DDL only; PG16 단일-트랜잭션 제약상 backfill 은 별도) - models/document.py: Enum 에 'law' 추가 (7 활성 + 3 유보) - workers/law_monitor.py: Document(..., category='law') — 신규 유입부터 세팅 - workers/classify_worker.py: source_channel='law_monitor' early-return + 최소 필드 (ai_domain='법령', ai_tags=['법령'], importance='medium'). AI classify skip — 법령 구조 고정/외부 source of truth/자동 재수집 - scripts/backfill_category.py: law 분기 + WHERE re-target ((source_channel='law_monitor' AND category='document')) + VERIFY cat_law/law_source_count + fail 조건 - api/documents.py: default 목록 제외에 law_monitor 추가 (news 와 동일 패턴) - api/dashboard.py: documents count FILTER 에 law_monitor 제외 (category_counts.law 는 기존 GROUP BY category 로 자동 노출) - frontend/Sidebar.svelte: '법령 알림' 버튼 ?source=law_monitor → ?category=law (explicit category 경로가 default exclusion 을 skip) plan: ~/.claude/plans/stateless-churning-raccoon.md axis 원칙: category=UI 축, policy/telemetry=source_channel+ai_domain 축 (feedback_category_vs_ai_domain_axis.md) 배포 순서: push → GPU pull → compose up --build fastapi frontend → backfill --dry-run → --apply. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/api/dashboard.py | 2 +- app/api/documents.py | 3 +- app/models/document.py | 4 +-- app/workers/classify_worker.py | 14 ++++++++ app/workers/law_monitor.py | 1 + frontend/src/lib/components/Sidebar.svelte | 2 +- migrations/152_category_law.sql | 8 +++++ scripts/backfill_category.py | 37 +++++++++++++++++----- 8 files changed, 58 insertions(+), 13 deletions(-) create mode 100644 migrations/152_category_law.sql diff --git a/app/api/dashboard.py b/app/api/dashboard.py index 94bdbc8..777f67e 100644 --- a/app/api/dashboard.py +++ b/app/api/dashboard.py @@ -136,7 +136,7 @@ async def get_dashboard( text(""" SELECT COUNT(*) AS total, - COUNT(*) FILTER (WHERE source_channel != 'news' AND file_type != 'note') AS documents, + COUNT(*) FILTER (WHERE source_channel NOT IN ('news', 'law_monitor') AND file_type != 'note') AS documents, COUNT(*) FILTER (WHERE source_channel = 'memo' AND file_type = 'note') AS memos, COUNT(*) FILTER (WHERE source_channel = 'news') AS news FROM documents WHERE deleted_at IS NULL diff --git a/app/api/documents.py b/app/api/documents.py index eed8b6f..89ec126 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -417,9 +417,10 @@ async def list_documents( # 명시적 카테고리 필터 — 기본 exclude 해제 query = query.where(Document.category == category) else: - # 기본 목록: 뉴스/메모 제외 (문서함 용도) + # 기본 목록: 뉴스/메모/법령 제외 (문서함 용도) query = query.where( Document.source_channel != "news", + Document.source_channel != "law_monitor", Document.file_type != "note", ) diff --git a/app/models/document.py b/app/models/document.py index e464af6..af69454 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -103,10 +103,10 @@ class Document(Base): title: Mapped[str | None] = mapped_column(Text) # 카테고리 (1차 진입점 — UI 탭/라우트 분기) - # 6 활성: document / library / news / memo / audio / video + # 7 활성: document / library / news / memo / audio / video / law # 3 유보: mail / calendar / plex category: Mapped[str | None] = mapped_column( - Enum("document", "library", "news", "memo", "audio", "video", + Enum("document", "library", "news", "memo", "audio", "video", "law", "mail", "calendar", "plex", name="doc_category", create_type=False) ) diff --git a/app/workers/classify_worker.py b/app/workers/classify_worker.py index d10a8b8..cce5cd7 100644 --- a/app/workers/classify_worker.py +++ b/app/workers/classify_worker.py @@ -71,6 +71,20 @@ async def process(document_id: int, session: AsyncSession) -> None: if not doc: raise ValueError(f"문서 ID {document_id}를 찾을 수 없음") + # 법령은 구조 고정 + 외부 source of truth (law.go.kr) + 자동 재수집. + # AI 분류 skip, downstream(embed/chunk) 은 queue_consumer NEXT_STAGES 가 자동 chain. + # ai_domain 단일 "법령" — PR-A policy.domain_policy.yaml 에서 source_channel 기준 세분화. + if doc.source_channel == "law_monitor": + if not doc.ai_domain: + doc.ai_domain = "법령" + if not doc.ai_tags: + doc.ai_tags = ["법령"] + if not doc.importance: + doc.importance = "medium" + await session.commit() + logger.info(f"doc {document_id}: law_monitor → classify skip") + return + if not doc.extracted_text: raise ValueError(f"문서 ID {document_id}: extracted_text가 비어있음") diff --git a/app/workers/law_monitor.py b/app/workers/law_monitor.py index e95bbb6..5212592 100644 --- a/app/workers/law_monitor.py +++ b/app/workers/law_monitor.py @@ -271,6 +271,7 @@ async def _save_law_split( title=f"{law_name} ({proclamation_date}) {section_name}", source_channel="law_monitor", data_origin="work", + category="law", user_note=note or None, ) session.add(doc) diff --git a/frontend/src/lib/components/Sidebar.svelte b/frontend/src/lib/components/Sidebar.svelte index 0742713..9be1f3d 100644 --- a/frontend/src/lib/components/Sidebar.svelte +++ b/frontend/src/lib/components/Sidebar.svelte @@ -177,7 +177,7 @@ 최근 7일