98ee7dffe2
PR-GPU-Health-1. 운영 준비성 표준화 PR (모델 성능 개선 아님). - OCR /smoke endpoint 추가 (160x60 OK PNG in-memory, 200/503 분기, Docker healthcheck 미사용) - marker /health endpoint 추가 (stt/ocr 동일 시그니처) - reranker docker-compose healthcheck 추가 (TEI :80/health) - scripts/gpu_service_smoke.sh: docker exec 표준 점검 (OCR/STT expose-only) - scripts/gpu_vram_fixture.sh: Mode A sequential + Mode B light overlap + --stress 옵션 - tests/load/fixtures/: synthetic ocr_ok.png / sine_30s.wav / lorem_1p.pdf OCR 빈 응답 false negative — root cause: ports 미매핑. 결정: ocr-service / stt-service 는 expose-only 유지, 운영 점검은 docker exec 내부 curl 표준. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
227 lines
7.5 KiB
Python
227 lines
7.5 KiB
Python
"""marker-service — POST /convert: PDF → markdown + 추출 이미지 base64.
|
|
|
|
Phase 1B (2026-05-01) — 텍스트만 응답, 이미지 폐기.
|
|
Phase 1B.5 (본 변경) — `_images` 직렬화해서 base64 응답에 포함. NAS write 권한이
|
|
없는 stateless 변환기 유지 (fastapi 가 NAS persist 담당).
|
|
|
|
plan: ~/.claude/plans/piped-humming-crystal.md
|
|
"""
|
|
import base64
|
|
import hashlib
|
|
import io
|
|
import logging
|
|
import os
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI, HTTPException, Response
|
|
from pydantic import BaseModel, Field
|
|
|
|
from marker.converters.pdf import PdfConverter
|
|
from marker.models import create_model_dict
|
|
from marker.output import text_from_rendered
|
|
import marker as marker_module
|
|
|
|
logger = logging.getLogger(__name__)
|
|
app = FastAPI()
|
|
|
|
os.environ.setdefault("HF_HOME", "/models/huggingface")
|
|
os.environ.setdefault("TORCH_HOME", "/models/torch")
|
|
|
|
_models = None
|
|
_converter = None
|
|
try:
|
|
import importlib.metadata
|
|
_engine_version = importlib.metadata.version("marker-pdf")
|
|
except Exception:
|
|
_engine_version = "unknown"
|
|
_warmup_done = False
|
|
_warmup_error: str | None = None
|
|
_warmup_lock = threading.Lock()
|
|
|
|
# 이미지 응답 cap. base64 응답 크기 폭주 방지. 사용자 PDF 풀 측정 (Phase 1D) 시
|
|
# 가장 이미지 많은 문서가 ~30건 수준 → 200 은 안전 마진. 초과 시 truncate flag 응답.
|
|
MAX_IMAGES_PER_DOC = int(os.getenv("MARKER_MAX_IMAGES_PER_DOC", "200"))
|
|
# per-image 최대 raw bytes (base64 전). 그래픽이 많은 풀페이지 스캔 회피.
|
|
MAX_BYTES_PER_IMAGE = int(os.getenv("MARKER_MAX_BYTES_PER_IMAGE", str(10 * 1024 * 1024)))
|
|
|
|
|
|
def _ensure_warmup() -> None:
|
|
"""첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용."""
|
|
global _models, _converter, _warmup_done, _warmup_error
|
|
if _warmup_done:
|
|
return
|
|
with _warmup_lock:
|
|
if _warmup_done:
|
|
return
|
|
try:
|
|
logger.info("[marker-service] warmup start")
|
|
_models = create_model_dict()
|
|
_converter = PdfConverter(artifact_dict=_models)
|
|
_warmup_done = True
|
|
_warmup_error = None
|
|
logger.info(f"[marker-service] warmup done engine_version={_engine_version}")
|
|
except Exception as exc:
|
|
_warmup_error = f"{type(exc).__name__}: {exc}"
|
|
logger.exception("[marker-service] warmup failed")
|
|
raise
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def startup():
|
|
"""startup hook — async warmup 백그라운드. /ready 가 완료 여부 노출."""
|
|
import asyncio
|
|
asyncio.create_task(asyncio.to_thread(_ensure_warmup))
|
|
|
|
|
|
class ConvertRequest(BaseModel):
|
|
file_path: str
|
|
max_pages: int | None = None
|
|
|
|
|
|
class ConvertImage(BaseModel):
|
|
"""marker 추출 이미지 1건. fastapi 가 NAS 에 쓰고 docimg:img_NNN 으로 ref 정규화."""
|
|
slug: str # marker 원본 slug (예: '_page_0_Picture_3.jpeg')
|
|
format: str # 'png' | 'jpeg' | 'webp' | 'gif'
|
|
width: int | None = None
|
|
height: int | None = None
|
|
bytes_b64: str # base64-encoded raw bytes
|
|
|
|
|
|
class ConvertResponse(BaseModel):
|
|
md_content: str
|
|
md_content_hash: str
|
|
engine: str
|
|
engine_version: str
|
|
elapsed_ms: int
|
|
raw_metrics: dict
|
|
images: list[ConvertImage] = Field(default_factory=list)
|
|
images_truncated: bool = False
|
|
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
return {"status": "ok", "service": "marker-service"}
|
|
|
|
|
|
@app.get("/ready")
|
|
async def ready(response: Response):
|
|
"""Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출."""
|
|
if _warmup_error:
|
|
response.status_code = 503
|
|
return {
|
|
"status": "warmup_failed",
|
|
"engine": "marker",
|
|
"engine_version": _engine_version,
|
|
"error": _warmup_error,
|
|
}
|
|
if not _warmup_done:
|
|
response.status_code = 503
|
|
return {
|
|
"status": "warming_up",
|
|
"engine": "marker",
|
|
"engine_version": _engine_version,
|
|
}
|
|
return {
|
|
"status": "ready",
|
|
"engine": "marker",
|
|
"engine_version": _engine_version,
|
|
}
|
|
|
|
|
|
@app.post("/convert", response_model=ConvertResponse)
|
|
async def convert(req: ConvertRequest):
|
|
_ensure_warmup()
|
|
|
|
p = Path(req.file_path)
|
|
if not p.is_file():
|
|
raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)})
|
|
|
|
start = time.monotonic()
|
|
try:
|
|
rendered = _converter(str(p))
|
|
except Exception as exc:
|
|
logger.exception(f"[marker-service] conversion failed path={p}: {exc}")
|
|
raise HTTPException(
|
|
status_code=422,
|
|
detail={
|
|
"code": "conversion_failed",
|
|
"message": f"{type(exc).__name__}: {exc}",
|
|
},
|
|
) from exc
|
|
|
|
md_text, _meta, raw_images = text_from_rendered(rendered)
|
|
elapsed_ms = int((time.monotonic() - start) * 1000)
|
|
|
|
images_payload, truncated = _serialize_images(raw_images, str(p))
|
|
|
|
return ConvertResponse(
|
|
md_content=md_text,
|
|
md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(),
|
|
engine="marker",
|
|
engine_version=_engine_version,
|
|
elapsed_ms=elapsed_ms,
|
|
raw_metrics={
|
|
"page_count": getattr(rendered, "page_count", None),
|
|
"image_count_extracted": len(raw_images) if raw_images else 0,
|
|
"image_count_returned": len(images_payload),
|
|
},
|
|
images=images_payload,
|
|
images_truncated=truncated,
|
|
)
|
|
|
|
|
|
def _serialize_images(raw_images, src_path: str) -> tuple[list[ConvertImage], bool]:
|
|
"""marker 의 `_images` (dict[slug, PIL.Image]) → base64 ConvertImage 리스트.
|
|
|
|
가드:
|
|
- MAX_IMAGES_PER_DOC 초과 시 head 만 반환 + truncated=True
|
|
- per-image 직렬화 실패 시 해당 이미지만 skip + warn (전체 fail 안 함)
|
|
- per-image 결과 byte 크기가 MAX_BYTES_PER_IMAGE 초과 시 skip + warn
|
|
"""
|
|
if not raw_images:
|
|
return [], False
|
|
|
|
items = list(raw_images.items())
|
|
truncated = len(items) > MAX_IMAGES_PER_DOC
|
|
if truncated:
|
|
logger.warning(
|
|
f"[marker-service] images truncated path={src_path} "
|
|
f"total={len(items)} cap={MAX_IMAGES_PER_DOC}"
|
|
)
|
|
items = items[:MAX_IMAGES_PER_DOC]
|
|
|
|
out: list[ConvertImage] = []
|
|
for slug, pil_img in items:
|
|
try:
|
|
fmt_raw = (pil_img.format or "PNG").upper()
|
|
# WebP/GIF 도 marker 가 emit 가능하지만 본 1B.5 기준은 PNG/JPEG 우선.
|
|
# 알 수 없는 포맷이면 PNG 로 강제 (lossless re-encode).
|
|
fmt = fmt_raw if fmt_raw in {"PNG", "JPEG", "WEBP", "GIF"} else "PNG"
|
|
buf = io.BytesIO()
|
|
pil_img.save(buf, format=fmt)
|
|
raw_bytes = buf.getvalue()
|
|
if len(raw_bytes) > MAX_BYTES_PER_IMAGE:
|
|
logger.warning(
|
|
f"[marker-service] image too large skipped path={src_path} "
|
|
f"slug={slug} bytes={len(raw_bytes)} cap={MAX_BYTES_PER_IMAGE}"
|
|
)
|
|
continue
|
|
out.append(
|
|
ConvertImage(
|
|
slug=slug,
|
|
format=fmt.lower(),
|
|
width=pil_img.width,
|
|
height=pil_img.height,
|
|
bytes_b64=base64.b64encode(raw_bytes).decode("ascii"),
|
|
)
|
|
)
|
|
except Exception as exc:
|
|
logger.warning(
|
|
f"[marker-service] image serialize failed path={src_path} "
|
|
f"slug={slug}: {type(exc).__name__}: {exc}"
|
|
)
|
|
continue
|
|
return out, truncated
|