From ee74a9ba782ed41da15f90a3bd9362f271d35f88 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Tue, 14 Apr 2026 06:47:22 +0900 Subject: [PATCH] fix(extract): scale kordoc timeout by file size for large PDFs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 대형 PDF(14~40MB)에서 kordoc 파싱 timeout(60초) 실패하던 문제. 10MB당 60초 추가, 최소 60초 최대 300초로 조정. Co-Authored-By: Claude Opus 4.6 (1M context) --- app/workers/extract_worker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/workers/extract_worker.py b/app/workers/extract_worker.py index 649a6dc..35c6197 100644 --- a/app/workers/extract_worker.py +++ b/app/workers/extract_worker.py @@ -58,7 +58,9 @@ async def process(document_id: int, session: AsyncSession) -> None: if fmt in KORDOC_FORMATS: # 컨테이너 내부 경로: /documents/{file_path} container_path = f"/documents/{doc.file_path}" - async with httpx.AsyncClient(timeout=60) as client: + # 대형 PDF 대응: 10MB당 60초, 최소 60초 최대 300초 + kordoc_timeout = min(300, max(60, (doc.file_size or 0) // (10 * 1024 * 1024) * 60 + 60)) + async with httpx.AsyncClient(timeout=kordoc_timeout) as client: resp = await client.post( f"{settings.kordoc_endpoint}/parse", json={"filePath": container_path},