From b37043d651a24095ea4074bae20ce07721724cf8 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Fri, 3 Apr 2026 11:18:06 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20LibreOffice=20=ED=95=9C=EA=B8=80=20?= =?UTF-8?q?=ED=8C=8C=EC=9D=BC=EB=AA=85=20=ED=98=B8=ED=99=98=20=E2=80=94=20?= =?UTF-8?q?=EC=98=81=EB=AC=B8=20=EC=9E=84=EC=8B=9C=ED=8C=8C=EC=9D=BC?= =?UTF-8?q?=EB=A1=9C=20=EB=B3=B5=EC=82=AC=20=ED=9B=84=20=EB=B3=80=ED=99=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit extract_worker, preview_worker 모두 적용. Co-Authored-By: Claude Opus 4.6 (1M context) --- app/workers/extract_worker.py | 12 +++++++++--- app/workers/preview_worker.py | 10 ++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/app/workers/extract_worker.py b/app/workers/extract_worker.py index 9a6e32f..9bf29a2 100644 --- a/app/workers/extract_worker.py +++ b/app/workers/extract_worker.py @@ -81,15 +81,20 @@ async def process(document_id: int, session: AsyncSession) -> None: if not full_path.exists(): raise FileNotFoundError(f"파일 없음: {full_path}") + import shutil tmp_dir = Path("/tmp/extract_work") tmp_dir.mkdir(exist_ok=True) + # 한글 파일명 문제 방지 — 영문 임시 파일로 복사 + tmp_input = tmp_dir / f"input_{document_id}.{fmt}" + shutil.copy2(str(full_path), str(tmp_input)) + try: result = subprocess.run( - ["libreoffice", "--headless", "--convert-to", "txt:Text", "--outdir", str(tmp_dir), str(full_path)], + ["libreoffice", "--headless", "--convert-to", "txt:Text", "--outdir", str(tmp_dir), str(tmp_input)], capture_output=True, text=True, timeout=60, ) - txt_file = tmp_dir / f"{full_path.stem}.txt" + txt_file = tmp_dir / f"input_{document_id}.txt" if txt_file.exists(): text = txt_file.read_text(encoding="utf-8", errors="replace") doc.extracted_text = text[:15000] @@ -97,11 +102,12 @@ async def process(document_id: int, session: AsyncSession) -> None: doc.extractor_version = "libreoffice" txt_file.unlink() logger.info(f"[LibreOffice] {doc.file_path} ({len(text)}자)") - return else: raise RuntimeError(f"LibreOffice 변환 결과물 없음: {result.stderr[:200]}") except subprocess.TimeoutExpired: raise RuntimeError(f"LibreOffice 텍스트 추출 timeout (60s)") + finally: + tmp_input.unlink(missing_ok=True) # 미지원 포맷 doc.extracted_text = "" diff --git a/app/workers/preview_worker.py b/app/workers/preview_worker.py index 997cf77..796c1d4 100644 --- a/app/workers/preview_worker.py +++ b/app/workers/preview_worker.py @@ -71,22 +71,28 @@ async def process(document_id: int, session: AsyncSession) -> None: tmp_dir = Path("/tmp/preview_work") tmp_dir.mkdir(exist_ok=True) + # 한글 파일명 문제 방지 — 영문 임시 파일로 복사 + tmp_input = tmp_dir / f"input_{document_id}{source.suffix}" + shutil.copy2(str(source), str(tmp_input)) + result = subprocess.run( [ "libreoffice", "--headless", "--convert-to", "pdf", "--outdir", str(tmp_dir), - str(source), + str(tmp_input), ], capture_output=True, text=True, timeout=TIMEOUT_SECONDS, ) + tmp_input.unlink(missing_ok=True) + if result.returncode != 0: raise RuntimeError(f"LibreOffice 변환 실패: {result.stderr[:200]}") # 변환 결과 찾기 - converted = tmp_dir / f"{source.stem}.pdf" + converted = tmp_dir / f"input_{document_id}.pdf" if not converted.exists(): raise RuntimeError(f"변환 결과물 없음: {converted}")