From 7dd77ec926a5749898fd0abf6a7fe89faa3f9cd6 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Tue, 28 Apr 2026 10:01:45 +0900 Subject: [PATCH] =?UTF-8?q?fix(classify):=20data=5Forigin=20enum=20?= =?UTF-8?q?=EA=B2=80=EC=A6=9D=20=E2=80=94=20knowledge=20=EB=93=B1=20?= =?UTF-8?q?=EC=9E=98=EB=AA=BB=EB=90=9C=20=EA=B0=92=20cascade=20fail=20?= =?UTF-8?q?=EB=B0=A9=EC=A7=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AI 응답에서 dataOrigin='knowledge' 같은 doc_purpose enum 값이 data_origin 컬럼에 잘못 매핑되면 asyncpg InvalidTextRepresentationError 발생. 같은 classify_worker session 의 후속 autoflush 호출이 PendingRollbackError 로 cascade 되어 batch 안 다른 문서까지 모두 실패. doc_purpose 처럼 enum 허용값(work/external) 검증 후 박도록 수정. 외 값은 skip (data_origin NULL 유지). 가스기사 토픽 결손 15건의 RAG 결손 root cause. --- app/workers/classify_worker.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/workers/classify_worker.py b/app/workers/classify_worker.py index 644e0c7..3ceb57a 100644 --- a/app/workers/classify_worker.py +++ b/app/workers/classify_worker.py @@ -370,8 +370,13 @@ async def process(document_id: int, session: AsyncSession) -> None: # source/origin if parsed.get("sourceChannel") and not doc.source_channel: doc.source_channel = parsed["sourceChannel"] + # data_origin enum 검증 — AI 가 'knowledge' 같은 doc_purpose 값을 잘못 던지면 + # asyncpg InvalidTextRepresentationError → 같은 session 후속 호출 cascade fail. + # 허용값(work/external) 외는 skip (NULL 유지). if parsed.get("dataOrigin") and not doc.data_origin: - doc.data_origin = parsed["dataOrigin"] + origin = parsed["dataOrigin"] + if origin in ("work", "external"): + doc.data_origin = origin # 용도 (AI는 빈 값만 채움 — 수동/업로드 명시값 우선) if parsed.get("docPurpose") and not doc.doc_purpose: