fix: Codex 리뷰 5건 수정 (critical 1 + high 4)
1. [critical] config.yaml → settings 객체에서 taxonomy 로드 (import crash 방지) 2. [high] ODF 변환: file_path 유지, derived_path 별도 필드 (무한 중복 방지) 3. [high] 법령 분할: 첫 장 이전 조문을 "서문"으로 보존 4. [high] Inbox: review_status 필드 분리 (pending/approved/rejected) 5. [high] 삭제: soft-delete (deleted_at) + worker 방어 + active_documents 뷰 - 모든 조회에 deleted_at IS NULL 일관 적용 - queue_consumer: row 없으면 gracefully skip Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -62,11 +62,12 @@ async def get_dashboard(
|
||||
today_rows = today_result.all()
|
||||
today_added = sum(row[1] for row in today_rows)
|
||||
|
||||
# Inbox 미분류 수 (ai_domain이 없는 문서 = 미분류)
|
||||
# Inbox 미분류 수 (review_status = pending)
|
||||
inbox_result = await session.execute(
|
||||
select(func.count(Document.id))
|
||||
.where(
|
||||
(Document.ai_domain == None) | (Document.ai_domain == "")
|
||||
Document.review_status == "pending",
|
||||
Document.deleted_at == None,
|
||||
)
|
||||
)
|
||||
inbox_count = inbox_result.scalar() or 0
|
||||
|
||||
@@ -40,9 +40,10 @@ class DocumentResponse(BaseModel):
|
||||
importance: str | None
|
||||
ai_confidence: float | None
|
||||
user_note: str | None
|
||||
original_path: str | None
|
||||
derived_path: str | None
|
||||
original_format: str | None
|
||||
conversion_status: str | None
|
||||
review_status: str | None
|
||||
edit_url: str | None
|
||||
preview_status: str | None
|
||||
source_channel: str | None
|
||||
@@ -101,6 +102,7 @@ async def get_document_tree(
|
||||
SELECT ai_domain, COUNT(*)
|
||||
FROM documents
|
||||
WHERE ai_domain IS NOT NULL AND ai_domain != ''
|
||||
AND deleted_at IS NULL
|
||||
GROUP BY ai_domain
|
||||
ORDER BY ai_domain
|
||||
""")
|
||||
@@ -145,7 +147,7 @@ async def list_documents(
|
||||
format: str | None = None,
|
||||
):
|
||||
"""문서 목록 조회 (페이지네이션 + 필터)"""
|
||||
query = select(Document)
|
||||
query = select(Document).where(Document.deleted_at == None)
|
||||
|
||||
if domain:
|
||||
# prefix 매칭: Industrial_Safety 클릭 시 하위 전부 포함
|
||||
@@ -181,7 +183,7 @@ async def get_document(
|
||||
):
|
||||
"""문서 단건 조회"""
|
||||
doc = await session.get(Document, doc_id)
|
||||
if not doc:
|
||||
if not doc or doc.deleted_at is not None:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
return DocumentResponse.model_validate(doc)
|
||||
|
||||
@@ -390,27 +392,8 @@ async def delete_document(
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
|
||||
if delete_file:
|
||||
# 원본 파일 삭제
|
||||
file_path = Path(settings.nas_mount_path) / doc.file_path
|
||||
if file_path.exists():
|
||||
file_path.unlink()
|
||||
# 변환본 삭제
|
||||
if doc.original_path:
|
||||
orig = Path(settings.nas_mount_path) / doc.original_path
|
||||
if orig.exists():
|
||||
orig.unlink()
|
||||
# preview 캐시 삭제
|
||||
preview = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf"
|
||||
if preview.exists():
|
||||
preview.unlink()
|
||||
|
||||
# 관련 processing_queue 먼저 삭제 (FK 제약)
|
||||
from sqlalchemy import delete as sql_delete
|
||||
await session.execute(
|
||||
sql_delete(ProcessingQueue).where(ProcessingQueue.document_id == doc_id)
|
||||
)
|
||||
await session.delete(doc)
|
||||
# soft-delete (물리 파일은 cleanup job에서 나중에 정리)
|
||||
doc.deleted_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
|
||||
return {"message": f"문서 {doc_id} 삭제됨", "file_deleted": delete_file}
|
||||
return {"message": f"문서 {doc_id} soft-delete 완료"}
|
||||
|
||||
@@ -93,13 +93,14 @@ async def _search_text(session: AsyncSession, query: str, limit: int) -> list[Se
|
||||
ELSE 'fts'
|
||||
END AS match_reason
|
||||
FROM documents
|
||||
WHERE coalesce(title, '') ILIKE '%%' || :q || '%%'
|
||||
WHERE deleted_at IS NULL
|
||||
AND (coalesce(title, '') ILIKE '%%' || :q || '%%'
|
||||
OR coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%'
|
||||
OR coalesce(user_note, '') ILIKE '%%' || :q || '%%'
|
||||
OR coalesce(ai_summary, '') ILIKE '%%' || :q || '%%'
|
||||
OR coalesce(extracted_text, '') ILIKE '%%' || :q || '%%'
|
||||
OR to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, ''))
|
||||
@@ plainto_tsquery('simple', :q)
|
||||
@@ plainto_tsquery('simple', :q))
|
||||
ORDER BY score DESC
|
||||
LIMIT :limit
|
||||
"""),
|
||||
@@ -124,7 +125,7 @@ async def _search_vector(session: AsyncSession, query: str, limit: int) -> list[
|
||||
left(extracted_text, 200) AS snippet,
|
||||
'vector' AS match_reason
|
||||
FROM documents
|
||||
WHERE embedding IS NOT NULL
|
||||
WHERE embedding IS NOT NULL AND deleted_at IS NULL
|
||||
ORDER BY embedding <=> cast(:embedding AS vector)
|
||||
LIMIT :limit
|
||||
"""),
|
||||
|
||||
@@ -44,6 +44,10 @@ class Settings(BaseModel):
|
||||
# kordoc
|
||||
kordoc_endpoint: str = "http://kordoc-service:3100"
|
||||
|
||||
# 분류 체계
|
||||
taxonomy: dict = {}
|
||||
document_types: list[str] = []
|
||||
|
||||
|
||||
def load_settings() -> Settings:
|
||||
"""config.yaml + 환경변수에서 설정 로딩"""
|
||||
@@ -81,6 +85,9 @@ def load_settings() -> Settings:
|
||||
nas_mount = raw["nas"].get("mount_path", nas_mount)
|
||||
nas_pkm = raw["nas"].get("pkm_root", nas_pkm)
|
||||
|
||||
taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
|
||||
document_types = raw.get("document_types", []) if config_path.exists() and raw else []
|
||||
|
||||
return Settings(
|
||||
database_url=database_url,
|
||||
ai=ai_config,
|
||||
@@ -89,6 +96,8 @@ def load_settings() -> Settings:
|
||||
jwt_secret=jwt_secret,
|
||||
totp_secret=totp_secret,
|
||||
kordoc_endpoint=kordoc_endpoint,
|
||||
taxonomy=taxonomy,
|
||||
document_types=document_types,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -50,12 +50,15 @@ class Document(Base):
|
||||
# 사용자 메모
|
||||
user_note: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# 원본 보존 (변환 전)
|
||||
original_path: Mapped[str | None] = mapped_column(Text)
|
||||
# ODF 변환
|
||||
derived_path: Mapped[str | None] = mapped_column(Text) # 변환본 경로 (.derived/)
|
||||
original_format: Mapped[str | None] = mapped_column(String(20))
|
||||
original_hash: Mapped[str | None] = mapped_column(String(64))
|
||||
conversion_status: Mapped[str | None] = mapped_column(String(20), default="none")
|
||||
|
||||
# 승인/삭제
|
||||
review_status: Mapped[str | None] = mapped_column(String(20), default="pending")
|
||||
deleted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
# 외부 편집 URL
|
||||
edit_url: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
"""AI 분류 워커 — taxonomy 기반 도메인/문서타입/태그/요약 생성"""
|
||||
|
||||
import yaml
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -15,11 +13,8 @@ logger = setup_logger("classify_worker")
|
||||
|
||||
MAX_CLASSIFY_TEXT = 8000
|
||||
|
||||
# config.yaml에서 taxonomy 로딩
|
||||
_config_path = Path(__file__).resolve().parent.parent / "config.yaml"
|
||||
_config = yaml.safe_load(_config_path.read_text(encoding="utf-8"))
|
||||
|
||||
DOCUMENT_TYPES = set(_config.get("document_types", []))
|
||||
# settings에서 taxonomy/document_types 로딩
|
||||
DOCUMENT_TYPES = set(settings.document_types)
|
||||
|
||||
|
||||
def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]:
|
||||
@@ -44,7 +39,7 @@ def _get_taxonomy_leaf_paths(taxonomy: dict, prefix: str = "") -> set[str]:
|
||||
return paths
|
||||
|
||||
|
||||
VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(_config.get("taxonomy", {}))
|
||||
VALID_DOMAIN_PATHS = _get_taxonomy_leaf_paths(settings.taxonomy)
|
||||
|
||||
|
||||
def _validate_domain(domain: str) -> str:
|
||||
|
||||
@@ -127,13 +127,7 @@ async def process(document_id: int, session: AsyncSession) -> None:
|
||||
target_fmt = CONVERT_MAP.get(fmt)
|
||||
if target_fmt:
|
||||
try:
|
||||
from core.utils import file_hash as calc_hash
|
||||
# 원본 메타 보존
|
||||
doc.original_path = doc.file_path
|
||||
doc.original_format = doc.file_format
|
||||
doc.original_hash = doc.file_hash
|
||||
|
||||
# .derived 디렉토리에 변환
|
||||
# .derived 디렉토리에 변환 (file_path는 원본 유지!)
|
||||
derived_dir = full_path.parent / ".derived"
|
||||
derived_dir.mkdir(exist_ok=True)
|
||||
tmp_input2 = tmp_dir / f"convert_{document_id}.{fmt}"
|
||||
@@ -150,13 +144,11 @@ async def process(document_id: int, session: AsyncSession) -> None:
|
||||
final_path = derived_dir / f"{document_id}.{target_fmt}"
|
||||
shutil.move(str(conv_file), str(final_path))
|
||||
|
||||
# DB 업데이트: current → ODF
|
||||
nas_root = Path(settings.nas_mount_path)
|
||||
doc.file_path = str(final_path.relative_to(nas_root))
|
||||
doc.file_format = target_fmt
|
||||
doc.file_hash = calc_hash(final_path)
|
||||
doc.derived_path = str(final_path.relative_to(nas_root))
|
||||
doc.original_format = doc.file_format
|
||||
doc.conversion_status = "done"
|
||||
logger.info(f"[ODF변환] {doc.original_path} → {doc.file_path}")
|
||||
logger.info(f"[ODF변환] {doc.file_path} → derived: {doc.derived_path}")
|
||||
else:
|
||||
doc.conversion_status = "failed"
|
||||
logger.warning(f"[ODF변환] 실패: {conv_result.stderr[:200]}")
|
||||
|
||||
@@ -206,9 +206,10 @@ async def _save_law_split(
|
||||
|
||||
# 장 구분자: 키가 000으로 끝나고 내용에 "제X장" 포함
|
||||
if key.endswith("000") and re.search(r"제\d+장", content):
|
||||
# 이전 장 저장
|
||||
if current_chapter and current_articles:
|
||||
chapters.append((current_chapter, current_articles))
|
||||
# 이전 장/서문 저장
|
||||
if current_articles:
|
||||
chapter_name = current_chapter or "서문"
|
||||
chapters.append((chapter_name, current_articles))
|
||||
chapter_match = re.search(r"(제\d+장\s*.+)", content)
|
||||
current_chapter = chapter_match.group(1).strip() if chapter_match else content.strip()
|
||||
current_articles = []
|
||||
@@ -216,8 +217,9 @@ async def _save_law_split(
|
||||
current_articles.append(unit)
|
||||
|
||||
# 마지막 장 저장
|
||||
if current_chapter and current_articles:
|
||||
chapters.append((current_chapter, current_articles))
|
||||
if current_articles:
|
||||
chapter_name = current_chapter or "서문"
|
||||
chapters.append((chapter_name, current_articles))
|
||||
|
||||
# 장 분할 성공
|
||||
sections = []
|
||||
|
||||
@@ -112,6 +112,9 @@ async def consume_queue():
|
||||
# 완료 처리
|
||||
async with async_session() as session:
|
||||
item = await session.get(ProcessingQueue, queue_id)
|
||||
if not item:
|
||||
logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip")
|
||||
continue
|
||||
item.status = "completed"
|
||||
item.completed_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
@@ -123,6 +126,9 @@ async def consume_queue():
|
||||
# 실패 처리
|
||||
async with async_session() as session:
|
||||
item = await session.get(ProcessingQueue, queue_id)
|
||||
if not item:
|
||||
logger.warning(f"[{stage}] queue_id={queue_id} 없음 (삭제됨?), skip")
|
||||
continue
|
||||
item.error_message = str(e)[:500]
|
||||
if item.attempts >= item.max_attempts:
|
||||
item.status = "failed"
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
try {
|
||||
// Inbox 파일만 필터
|
||||
const data = await api('/documents/?page_size=100');
|
||||
documents = data.items.filter(d => !d.ai_domain);
|
||||
documents = data.items.filter(d => d.review_status === 'pending');
|
||||
} catch (err) {
|
||||
addToast('error', 'Inbox 로딩 실패');
|
||||
} finally {
|
||||
|
||||
8
migrations/009_review_status.sql
Normal file
8
migrations/009_review_status.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
-- Inbox 승인 상태 분리 + derived_path
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS review_status VARCHAR(20) DEFAULT 'pending';
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS derived_path TEXT;
|
||||
|
||||
-- 기존 문서는 전부 approved (마이그레이션 이후 신규만 pending)
|
||||
UPDATE documents SET review_status = 'approved';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_review_status ON documents(review_status);
|
||||
7
migrations/010_soft_delete.sql
Normal file
7
migrations/010_soft_delete.sql
Normal file
@@ -0,0 +1,7 @@
|
||||
-- Soft-delete 지원
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_not_deleted ON documents(deleted_at) WHERE deleted_at IS NULL;
|
||||
|
||||
-- active documents 뷰 (raw SQL 누락 방지)
|
||||
CREATE OR REPLACE VIEW active_documents AS SELECT * FROM documents WHERE deleted_at IS NULL;
|
||||
Reference in New Issue
Block a user