fix: Codex 리뷰 5건 수정 (critical 1 + high 4)

1. [critical] config.yaml → settings 객체에서 taxonomy 로드 (import crash 방지)
2. [high] ODF 변환: file_path 유지, derived_path 별도 필드 (무한 중복 방지)
3. [high] 법령 분할: 첫 장 이전 조문을 "서문"으로 보존
4. [high] Inbox: review_status 필드 분리 (pending/approved/rejected)
5. [high] 삭제: soft-delete (deleted_at) + worker 방어 + active_documents 뷰
   - 모든 조회에 deleted_at IS NULL 일관 적용
   - queue_consumer: row 없으면 gracefully skip

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-06 07:15:13 +09:00
parent 6c92e375c2
commit 24142ea605
12 changed files with 66 additions and 59 deletions

View File

@@ -1,8 +1,6 @@
"""AI 분류 워커 — taxonomy 기반 도메인/문서타입/태그/요약 생성"""
import yaml
from datetime import datetime, timezone
from pathlib import Path
from sqlalchemy.ext.asyncio import AsyncSession
@@ -15,11 +13,8 @@ logger = setup_logger("classify_worker")
MAX_CLASSIFY_TEXT = 8000
# config.yaml에서 taxonomy 로딩
_config_path = Path(__file__).resolve().parent.parent / "config.yaml"
_config = yaml.safe_load(_config_path.read_text(encoding="utf-8"))
DOCUMENT_TYPES = set(_config.get("document_types", []))
# settings에서 taxonomy/document_types 로딩
DOCUMENT_TYPES = set(settings.document_types)
def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]:
@@ -44,7 +39,7 @@ def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]:
return paths
VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(_config.get("taxonomy", {}))
VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(settings.taxonomy)
def _validate_domain(domain: str) -> str:

View File

@@ -127,13 +127,7 @@ async def process(document_id: int, session: AsyncSession) -> None:
target_fmt = CONVERT_MAP.get(fmt)
if target_fmt:
try:
from core.utils import file_hash as calc_hash
# 원본 메타 보존
doc.original_path = doc.file_path
doc.original_format = doc.file_format
doc.original_hash = doc.file_hash
# .derived 디렉토리에 변환
# .derived 디렉토리에 변환 (file_path는 원본 유지!)
derived_dir = full_path.parent / ".derived"
derived_dir.mkdir(exist_ok=True)
tmp_input2 = tmp_dir / f"convert_{document_id}.{fmt}"
@@ -150,13 +144,11 @@ async def process(document_id: int, session: AsyncSession) -> None:
final_path = derived_dir / f"{document_id}.{target_fmt}"
shutil.move(str(conv_file), str(final_path))
# DB 업데이트: current → ODF
nas_root = Path(settings.nas_mount_path)
doc.file_path = str(final_path.relative_to(nas_root))
doc.file_format = target_fmt
doc.file_hash = calc_hash(final_path)
doc.derived_path = str(final_path.relative_to(nas_root))
doc.original_format = doc.file_format
doc.conversion_status = "done"
logger.info(f"[ODF변환] {doc.original_path}{doc.file_path}")
logger.info(f"[ODF변환] {doc.file_path} derived: {doc.derived_path}")
else:
doc.conversion_status = "failed"
logger.warning(f"[ODF변환] 실패: {conv_result.stderr[:200]}")

View File

@@ -206,9 +206,10 @@ async def _save_law_split(
# 장 구분자: 키가 000으로 끝나고 내용에 "제X장" 포함
if key.endswith("000") and re.search(r"\d+장", content):
# 이전 장 저장
if current_chapter and current_articles:
chapters.append((current_chapter, current_articles))
# 이전 장/서문 저장
if current_articles:
chapter_name = current_chapter or "서문"
chapters.append((chapter_name, current_articles))
chapter_match = re.search(r"(제\d+장\s*.+)", content)
current_chapter = chapter_match.group(1).strip() if chapter_match else content.strip()
current_articles = []
@@ -216,8 +217,9 @@ async def _save_law_split(
current_articles.append(unit)
# 마지막 장 저장
if current_chapter and current_articles:
chapters.append((current_chapter, current_articles))
if current_articles:
chapter_name = current_chapter or "서문"
chapters.append((chapter_name, current_articles))
# 장 분할 성공
sections = []

View File

@@ -112,6 +112,9 @@ async def consume_queue():
# 완료 처리
async with async_session() as session:
item = await session.get(ProcessingQueue, queue_id)
if not item:
logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip")
continue
item.status = "completed"
item.completed_at = datetime.now(timezone.utc)
await session.commit()
@@ -123,6 +126,9 @@ async def consume_queue():
# 실패 처리
async with async_session() as session:
item = await session.get(ProcessingQueue, queue_id)
if not item:
logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip")
continue
item.error_message = str(e)[:500]
if item.attempts >= item.max_attempts:
item.status = "failed"