"""marker-service — POST /convert: PDF → markdown (텍스트만, 이미지 제외). Phase 1B Round 5 — /ready 정확한 status code, warmup 실패 가시화, 변환 실패 = 422. plan: ~/.claude/plans/plan-idempotent-sundae.md """ import hashlib import logging import os import threading import time from pathlib import Path from fastapi import FastAPI, HTTPException, Response from pydantic import BaseModel from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered import marker as marker_module logger = logging.getLogger(__name__) app = FastAPI() os.environ.setdefault("HF_HOME", "/models/huggingface") os.environ.setdefault("TORCH_HOME", "/models/torch") _models = None _converter = None try: import importlib.metadata _engine_version = importlib.metadata.version("marker-pdf") except Exception: _engine_version = "unknown" _warmup_done = False _warmup_error: str | None = None _warmup_lock = threading.Lock() def _ensure_warmup() -> None: """첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용.""" global _models, _converter, _warmup_done, _warmup_error if _warmup_done: return with _warmup_lock: if _warmup_done: return try: logger.info("[marker-service] warmup start") _models = create_model_dict() _converter = PdfConverter(artifact_dict=_models) _warmup_done = True _warmup_error = None logger.info(f"[marker-service] warmup done engine_version={_engine_version}") except Exception as exc: _warmup_error = f"{type(exc).__name__}: {exc}" logger.exception("[marker-service] warmup failed") raise @app.on_event("startup") async def startup(): """startup hook — async warmup 백그라운드. /ready 가 완료 여부 노출.""" import asyncio asyncio.create_task(asyncio.to_thread(_ensure_warmup)) class ConvertRequest(BaseModel): file_path: str max_pages: int | None = None class ConvertResponse(BaseModel): md_content: str md_content_hash: str engine: str engine_version: str elapsed_ms: int raw_metrics: dict @app.get("/ready") async def ready(response: Response): """Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출.""" if _warmup_error: response.status_code = 503 return { "status": "warmup_failed", "engine": "marker", "engine_version": _engine_version, "error": _warmup_error, } if not _warmup_done: response.status_code = 503 return { "status": "warming_up", "engine": "marker", "engine_version": _engine_version, } return { "status": "ready", "engine": "marker", "engine_version": _engine_version, } @app.post("/convert", response_model=ConvertResponse) async def convert(req: ConvertRequest): _ensure_warmup() p = Path(req.file_path) if not p.is_file(): raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)}) start = time.monotonic() try: rendered = _converter(str(p)) except Exception as exc: logger.exception(f"[marker-service] conversion failed path={p}: {exc}") raise HTTPException( status_code=422, detail={ "code": "conversion_failed", "message": f"{type(exc).__name__}: {exc}", }, ) from exc md_text, _meta, _images = text_from_rendered(rendered) elapsed_ms = int((time.monotonic() - start) * 1000) return ConvertResponse( md_content=md_text, md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(), engine="marker", engine_version=_engine_version, elapsed_ms=elapsed_ms, raw_metrics={ "page_count": getattr(rendered, "page_count", None), "image_count_extracted": len(_images) if _images else 0, }, )