Files
hyungi_document_server/services/marker/server.py
T
Hyungi Ahn 98ee7dffe2 ops(gpu-health): GPU 서비스 health/smoke 표준화 + synthetic VRAM 피크 가드
PR-GPU-Health-1. 운영 준비성 표준화 PR (모델 성능 개선 아님).

- OCR /smoke endpoint 추가 (160x60 OK PNG in-memory, 200/503 분기, Docker healthcheck 미사용)
- marker /health endpoint 추가 (stt/ocr 동일 시그니처)
- reranker docker-compose healthcheck 추가 (TEI :80/health)
- scripts/gpu_service_smoke.sh: docker exec 표준 점검 (OCR/STT expose-only)
- scripts/gpu_vram_fixture.sh: Mode A sequential + Mode B light overlap + --stress 옵션
- tests/load/fixtures/: synthetic ocr_ok.png / sine_30s.wav / lorem_1p.pdf

OCR 빈 응답 false negative — root cause: ports 미매핑.
결정: ocr-service / stt-service 는 expose-only 유지, 운영 점검은 docker exec 내부 curl 표준.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 09:42:07 +09:00

227 lines
7.5 KiB
Python

"""marker-service — POST /convert: PDF → markdown + 추출 이미지 base64.
Phase 1B (2026-05-01) — 텍스트만 응답, 이미지 폐기.
Phase 1B.5 (본 변경) — `_images` 직렬화해서 base64 응답에 포함. NAS write 권한이
없는 stateless 변환기 유지 (fastapi 가 NAS persist 담당).
plan: ~/.claude/plans/piped-humming-crystal.md
"""
import base64
import hashlib
import io
import logging
import os
import threading
import time
from pathlib import Path
from fastapi import FastAPI, HTTPException, Response
from pydantic import BaseModel, Field
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
import marker as marker_module
logger = logging.getLogger(__name__)
app = FastAPI()
os.environ.setdefault("HF_HOME", "/models/huggingface")
os.environ.setdefault("TORCH_HOME", "/models/torch")
_models = None
_converter = None
try:
import importlib.metadata
_engine_version = importlib.metadata.version("marker-pdf")
except Exception:
_engine_version = "unknown"
_warmup_done = False
_warmup_error: str | None = None
_warmup_lock = threading.Lock()
# 이미지 응답 cap. base64 응답 크기 폭주 방지. 사용자 PDF 풀 측정 (Phase 1D) 시
# 가장 이미지 많은 문서가 ~30건 수준 → 200 은 안전 마진. 초과 시 truncate flag 응답.
MAX_IMAGES_PER_DOC = int(os.getenv("MARKER_MAX_IMAGES_PER_DOC", "200"))
# per-image 최대 raw bytes (base64 전). 그래픽이 많은 풀페이지 스캔 회피.
MAX_BYTES_PER_IMAGE = int(os.getenv("MARKER_MAX_BYTES_PER_IMAGE", str(10 * 1024 * 1024)))
def _ensure_warmup() -> None:
"""첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용."""
global _models, _converter, _warmup_done, _warmup_error
if _warmup_done:
return
with _warmup_lock:
if _warmup_done:
return
try:
logger.info("[marker-service] warmup start")
_models = create_model_dict()
_converter = PdfConverter(artifact_dict=_models)
_warmup_done = True
_warmup_error = None
logger.info(f"[marker-service] warmup done engine_version={_engine_version}")
except Exception as exc:
_warmup_error = f"{type(exc).__name__}: {exc}"
logger.exception("[marker-service] warmup failed")
raise
@app.on_event("startup")
async def startup():
"""startup hook — async warmup 백그라운드. /ready 가 완료 여부 노출."""
import asyncio
asyncio.create_task(asyncio.to_thread(_ensure_warmup))
class ConvertRequest(BaseModel):
file_path: str
max_pages: int | None = None
class ConvertImage(BaseModel):
"""marker 추출 이미지 1건. fastapi 가 NAS 에 쓰고 docimg:img_NNN 으로 ref 정규화."""
slug: str # marker 원본 slug (예: '_page_0_Picture_3.jpeg')
format: str # 'png' | 'jpeg' | 'webp' | 'gif'
width: int | None = None
height: int | None = None
bytes_b64: str # base64-encoded raw bytes
class ConvertResponse(BaseModel):
md_content: str
md_content_hash: str
engine: str
engine_version: str
elapsed_ms: int
raw_metrics: dict
images: list[ConvertImage] = Field(default_factory=list)
images_truncated: bool = False
@app.get("/health")
def health():
return {"status": "ok", "service": "marker-service"}
@app.get("/ready")
async def ready(response: Response):
"""Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출."""
if _warmup_error:
response.status_code = 503
return {
"status": "warmup_failed",
"engine": "marker",
"engine_version": _engine_version,
"error": _warmup_error,
}
if not _warmup_done:
response.status_code = 503
return {
"status": "warming_up",
"engine": "marker",
"engine_version": _engine_version,
}
return {
"status": "ready",
"engine": "marker",
"engine_version": _engine_version,
}
@app.post("/convert", response_model=ConvertResponse)
async def convert(req: ConvertRequest):
_ensure_warmup()
p = Path(req.file_path)
if not p.is_file():
raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)})
start = time.monotonic()
try:
rendered = _converter(str(p))
except Exception as exc:
logger.exception(f"[marker-service] conversion failed path={p}: {exc}")
raise HTTPException(
status_code=422,
detail={
"code": "conversion_failed",
"message": f"{type(exc).__name__}: {exc}",
},
) from exc
md_text, _meta, raw_images = text_from_rendered(rendered)
elapsed_ms = int((time.monotonic() - start) * 1000)
images_payload, truncated = _serialize_images(raw_images, str(p))
return ConvertResponse(
md_content=md_text,
md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(),
engine="marker",
engine_version=_engine_version,
elapsed_ms=elapsed_ms,
raw_metrics={
"page_count": getattr(rendered, "page_count", None),
"image_count_extracted": len(raw_images) if raw_images else 0,
"image_count_returned": len(images_payload),
},
images=images_payload,
images_truncated=truncated,
)
def _serialize_images(raw_images, src_path: str) -> tuple[list[ConvertImage], bool]:
"""marker 의 `_images` (dict[slug, PIL.Image]) → base64 ConvertImage 리스트.
가드:
- MAX_IMAGES_PER_DOC 초과 시 head 만 반환 + truncated=True
- per-image 직렬화 실패 시 해당 이미지만 skip + warn (전체 fail 안 함)
- per-image 결과 byte 크기가 MAX_BYTES_PER_IMAGE 초과 시 skip + warn
"""
if not raw_images:
return [], False
items = list(raw_images.items())
truncated = len(items) > MAX_IMAGES_PER_DOC
if truncated:
logger.warning(
f"[marker-service] images truncated path={src_path} "
f"total={len(items)} cap={MAX_IMAGES_PER_DOC}"
)
items = items[:MAX_IMAGES_PER_DOC]
out: list[ConvertImage] = []
for slug, pil_img in items:
try:
fmt_raw = (pil_img.format or "PNG").upper()
# WebP/GIF 도 marker 가 emit 가능하지만 본 1B.5 기준은 PNG/JPEG 우선.
# 알 수 없는 포맷이면 PNG 로 강제 (lossless re-encode).
fmt = fmt_raw if fmt_raw in {"PNG", "JPEG", "WEBP", "GIF"} else "PNG"
buf = io.BytesIO()
pil_img.save(buf, format=fmt)
raw_bytes = buf.getvalue()
if len(raw_bytes) > MAX_BYTES_PER_IMAGE:
logger.warning(
f"[marker-service] image too large skipped path={src_path} "
f"slug={slug} bytes={len(raw_bytes)} cap={MAX_BYTES_PER_IMAGE}"
)
continue
out.append(
ConvertImage(
slug=slug,
format=fmt.lower(),
width=pil_img.width,
height=pil_img.height,
bytes_b64=base64.b64encode(raw_bytes).decode("ascii"),
)
)
except Exception as exc:
logger.warning(
f"[marker-service] image serialize failed path={src_path} "
f"slug={slug}: {type(exc).__name__}: {exc}"
)
continue
return out, truncated