Files
hyungi_document_server/services/marker/server.py
T
Hyungi Ahn e66addf975 fix(canonical): marker engine_version via importlib.metadata
marker module 이 __version__ attribute 를 노출하지 않아 ship gate 10 에서
engine_version="unknown" 으로 표시되던 cosmetic 문제. importlib.metadata.
version("marker-pdf") 로 패키지 버전 정확히 읽음.

테스트: ship gate 10 PASS 확인 후 재배포.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 00:19:46 +00:00

141 lines
4.0 KiB
Python

"""marker-service — POST /convert: PDF → markdown (텍스트만, 이미지 제외).
Phase 1B Round 5 — /ready 정확한 status code, warmup 실패 가시화, 변환 실패 = 422.
plan: ~/.claude/plans/plan-idempotent-sundae.md
"""
import hashlib
import logging
import os
import threading
import time
from pathlib import Path
from fastapi import FastAPI, HTTPException, Response
from pydantic import BaseModel
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
import marker as marker_module
logger = logging.getLogger(__name__)
app = FastAPI()
os.environ.setdefault("HF_HOME", "/models/huggingface")
os.environ.setdefault("TORCH_HOME", "/models/torch")
_models = None
_converter = None
try:
import importlib.metadata
_engine_version = importlib.metadata.version("marker-pdf")
except Exception:
_engine_version = "unknown"
_warmup_done = False
_warmup_error: str | None = None
_warmup_lock = threading.Lock()
def _ensure_warmup() -> None:
"""첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용."""
global _models, _converter, _warmup_done, _warmup_error
if _warmup_done:
return
with _warmup_lock:
if _warmup_done:
return
try:
logger.info("[marker-service] warmup start")
_models = create_model_dict()
_converter = PdfConverter(artifact_dict=_models)
_warmup_done = True
_warmup_error = None
logger.info(f"[marker-service] warmup done engine_version={_engine_version}")
except Exception as exc:
_warmup_error = f"{type(exc).__name__}: {exc}"
logger.exception("[marker-service] warmup failed")
raise
@app.on_event("startup")
async def startup():
"""startup hook — async warmup 백그라운드. /ready 가 완료 여부 노출."""
import asyncio
asyncio.create_task(asyncio.to_thread(_ensure_warmup))
class ConvertRequest(BaseModel):
file_path: str
max_pages: int | None = None
class ConvertResponse(BaseModel):
md_content: str
md_content_hash: str
engine: str
engine_version: str
elapsed_ms: int
raw_metrics: dict
@app.get("/ready")
async def ready(response: Response):
"""Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출."""
if _warmup_error:
response.status_code = 503
return {
"status": "warmup_failed",
"engine": "marker",
"engine_version": _engine_version,
"error": _warmup_error,
}
if not _warmup_done:
response.status_code = 503
return {
"status": "warming_up",
"engine": "marker",
"engine_version": _engine_version,
}
return {
"status": "ready",
"engine": "marker",
"engine_version": _engine_version,
}
@app.post("/convert", response_model=ConvertResponse)
async def convert(req: ConvertRequest):
_ensure_warmup()
p = Path(req.file_path)
if not p.is_file():
raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)})
start = time.monotonic()
try:
rendered = _converter(str(p))
except Exception as exc:
logger.exception(f"[marker-service] conversion failed path={p}: {exc}")
raise HTTPException(
status_code=422,
detail={
"code": "conversion_failed",
"message": f"{type(exc).__name__}: {exc}",
},
) from exc
md_text, _meta, _images = text_from_rendered(rendered)
elapsed_ms = int((time.monotonic() - start) * 1000)
return ConvertResponse(
md_content=md_text,
md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(),
engine="marker",
engine_version=_engine_version,
elapsed_ms=elapsed_ms,
raw_metrics={
"page_count": getattr(rendered, "page_count", None),
"image_count_extracted": len(_images) if _images else 0,
},
)