Files
hyungi_document_server/services/marker/server.py
T
hyungi 3df0ca53ab feat(services): crawl-24x7 A-8 헬스 패널 + D-1 stt/marker idle-unload
A-8 1차: crawl-health 컨테이너(100.110.63.63:8765 Tailscale 바인딩 전용, 읽기 전용 SELECT, caddy 라우트 금지).
D-1 전제 작업: STT_PRELOAD=0+30분 유휴 해제(lock+inflight+reaper), marker MARKER_PRELOAD=0+idle-unload,
/ready idle=200(503=warmup_failed 한정 — fastapi depends_on 정합), healthcheck cuda 기준 전환.
2026-06-10 13:03:31 +09:00

326 lines
12 KiB
Python

"""marker-service — POST /convert: PDF → markdown + 추출 이미지 base64.
Phase 1B (2026-05-01) — 텍스트만 응답, 이미지 폐기.
Phase 1B.5 — `_images` 직렬화해서 base64 응답에 포함. NAS write 권한이
없는 stateless 변환기 유지 (fastapi 가 NAS persist 담당).
D-1 (plan crawl-24x7-1, 2026-06-10) — idle-unload 운영 전환:
MARKER_PRELOAD=0 : startup warmup 끔 (첫 /convert 시 lazy load)
MARKER_IDLE_UNLOAD_MINUTES : N분 유휴 시 모델 해제 (0=비활성, 기존 동작)
/ready 는 idle(미적재)에서도 200 — fastapi 의 depends_on service_healthy 가
lazy 모드에서 영구 미기동으로 굳는 것 방지. 503 은 warmup_failed 한정.
plan: ~/.claude/plans/piped-humming-crystal.md
"""
import base64
import gc
import hashlib
import io
import logging
import os
import threading
import time
from pathlib import Path
from fastapi import FastAPI, HTTPException, Response
from pydantic import BaseModel, Field
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
import marker as marker_module
logger = logging.getLogger(__name__)
app = FastAPI()
os.environ.setdefault("HF_HOME", "/models/huggingface")
os.environ.setdefault("TORCH_HOME", "/models/torch")
_models = None
_converter = None
try:
import importlib.metadata
_engine_version = importlib.metadata.version("marker-pdf")
except Exception:
_engine_version = "unknown"
_warmup_done = False
_warmup_error: str | None = None
_warmup_lock = threading.Lock()
# D-1 idle-unload 상태 — 전이는 전부 _warmup_lock 아래
_PRELOAD = os.getenv("MARKER_PRELOAD", "1") != "0"
_IDLE_UNLOAD_MINUTES = int(os.getenv("MARKER_IDLE_UNLOAD_MINUTES", "0"))
_inflight = 0
_last_used = time.monotonic()
# 이미지 응답 cap. base64 응답 크기 폭주 방지. 사용자 PDF 풀 측정 (Phase 1D) 시
# 가장 이미지 많은 문서가 ~30건 수준 → 200 은 안전 마진. 초과 시 truncate flag 응답.
MAX_IMAGES_PER_DOC = int(os.getenv("MARKER_MAX_IMAGES_PER_DOC", "200"))
# per-image 최대 raw bytes (base64 전). 그래픽이 많은 풀페이지 스캔 회피.
MAX_BYTES_PER_IMAGE = int(os.getenv("MARKER_MAX_BYTES_PER_IMAGE", str(10 * 1024 * 1024)))
def _ensure_warmup() -> None:
"""첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용."""
global _models, _converter, _warmup_done, _warmup_error
if _warmup_done:
return
with _warmup_lock:
if _warmup_done:
return
try:
logger.info("[marker-service] warmup start")
_models = create_model_dict()
_converter = PdfConverter(artifact_dict=_models)
_warmup_done = True
_warmup_error = None
logger.info(f"[marker-service] warmup done engine_version={_engine_version}")
except Exception as exc:
_warmup_error = f"{type(exc).__name__}: {exc}"
logger.exception("[marker-service] warmup failed")
raise
def _acquire_models():
"""warmup 보장 + inflight 진입을 원자적으로 — ensure 직후 reaper 가 해제하는 경합 차단."""
global _inflight
while True:
_ensure_warmup()
with _warmup_lock:
if _warmup_done:
_inflight += 1
return
# ensure 와 lock 재진입 사이에 unload 가 끼어든 희귀 경합 — 재시도
def _release_models():
global _inflight, _last_used
with _warmup_lock:
_inflight -= 1
_last_used = time.monotonic()
def _maybe_unload() -> None:
"""유휴 시 모델 해제. 변환 중(inflight>0)이면 절대 해제하지 않는다.
split 변환의 배치 사이 간격은 초 단위 — N>=1분 임계면 배치 사이 해제 없음.
"""
global _models, _converter, _warmup_done
with _warmup_lock:
if not _warmup_done or _inflight > 0:
return
if time.monotonic() - _last_used < _IDLE_UNLOAD_MINUTES * 60:
return
_models = None
_converter = None
_warmup_done = False
gc.collect()
try:
import torch
torch.cuda.empty_cache()
except Exception:
pass
logger.info(f"[marker-service] idle-unload: 모델 해제 (유휴 {_IDLE_UNLOAD_MINUTES}분 초과)")
async def _idle_reaper():
import asyncio
while True:
await asyncio.sleep(60)
try:
_maybe_unload()
except Exception:
logger.exception("[marker-service] idle reaper 오류")
@app.on_event("startup")
async def startup():
"""startup hook — warmup 은 MARKER_PRELOAD 게이트 (D-1: lazy 기본 전환은 compose 가)."""
import asyncio
if _PRELOAD:
asyncio.create_task(asyncio.to_thread(_ensure_warmup))
if _IDLE_UNLOAD_MINUTES > 0:
asyncio.create_task(_idle_reaper())
logger.info(f"[marker-service] idle-unload 활성: {_IDLE_UNLOAD_MINUTES}")
class ConvertRequest(BaseModel):
file_path: str
max_pages: int | None = None
# page range (1-based inclusive) — LargeDoc split 변환용. marker 내부 0-based 변환은
# convert() 에 격리 (page numbering invariant: DB/API=1-based, marker=0-based).
start_page: int | None = None
end_page: int | None = None
class ConvertImage(BaseModel):
"""marker 추출 이미지 1건. fastapi 가 NAS 에 쓰고 docimg:img_NNN 으로 ref 정규화."""
slug: str # marker 원본 slug (예: '_page_0_Picture_3.jpeg')
format: str # 'png' | 'jpeg' | 'webp' | 'gif'
width: int | None = None
height: int | None = None
bytes_b64: str # base64-encoded raw bytes
class ConvertResponse(BaseModel):
md_content: str
md_content_hash: str
engine: str
engine_version: str
elapsed_ms: int
raw_metrics: dict
images: list[ConvertImage] = Field(default_factory=list)
images_truncated: bool = False
@app.get("/health")
def health():
return {"status": "ok", "service": "marker-service"}
@app.get("/ready")
async def ready(response: Response):
"""Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출.
D-1: idle(미적재) = 200. 503 은 warmup_failed 한정 — lazy 모드에서 fastapi
depends_on service_healthy 가 영구 미기동으로 굳지 않게. 배포 검증에서
'status=ready' 단언하던 runbook 은 강제 warm 호출(/convert 1건)로 대체.
"""
if _warmup_error:
response.status_code = 503
return {
"status": "warmup_failed",
"engine": "marker",
"engine_version": _engine_version,
"error": _warmup_error,
}
if not _warmup_done:
return {
"status": "warming_up" if _PRELOAD else "idle",
"engine": "marker",
"engine_version": _engine_version,
"models_loaded": False,
"idle_unload_minutes": _IDLE_UNLOAD_MINUTES,
}
return {
"status": "ready",
"engine": "marker",
"engine_version": _engine_version,
"models_loaded": True,
"inflight": _inflight,
"idle_unload_minutes": _IDLE_UNLOAD_MINUTES,
}
@app.post("/convert", response_model=ConvertResponse)
async def convert(req: ConvertRequest):
p = Path(req.file_path)
if not p.is_file():
raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)})
if req.start_page is not None and req.end_page is not None:
if req.start_page < 1 or req.end_page < req.start_page:
raise HTTPException(
422,
detail={
"code": "bad_page_range",
"message": f"start_page={req.start_page} end_page={req.end_page}",
},
)
# D-1: warmup 보장 + inflight 진입 원자화 — 변환 중 reaper 해제 차단. 해제는 finally.
_acquire_models()
try:
start = time.monotonic()
# page range 지정 시 per-request converter (모델 _models 재사용 → reload 없음).
# invariant: req.start_page/end_page = 1-based inclusive → marker 0-based 로 변환.
converter = _converter
if req.start_page is not None and req.end_page is not None:
page_range = list(range(req.start_page - 1, req.end_page)) # 0-based inclusive
converter = PdfConverter(artifact_dict=_models, config={"page_range": page_range})
try:
rendered = converter(str(p))
except Exception as exc:
logger.exception(f"[marker-service] conversion failed path={p}: {exc}")
raise HTTPException(
status_code=422,
detail={
"code": "conversion_failed",
"message": f"{type(exc).__name__}: {exc}",
},
) from exc
md_text, _meta, raw_images = text_from_rendered(rendered)
elapsed_ms = int((time.monotonic() - start) * 1000)
finally:
_release_models()
images_payload, truncated = _serialize_images(raw_images, str(p))
return ConvertResponse(
md_content=md_text,
md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(),
engine="marker",
engine_version=_engine_version,
elapsed_ms=elapsed_ms,
raw_metrics={
"page_count": getattr(rendered, "page_count", None),
"image_count_extracted": len(raw_images) if raw_images else 0,
"image_count_returned": len(images_payload),
},
images=images_payload,
images_truncated=truncated,
)
def _serialize_images(raw_images, src_path: str) -> tuple[list[ConvertImage], bool]:
"""marker 의 `_images` (dict[slug, PIL.Image]) → base64 ConvertImage 리스트.
가드:
- MAX_IMAGES_PER_DOC 초과 시 head 만 반환 + truncated=True
- per-image 직렬화 실패 시 해당 이미지만 skip + warn (전체 fail 안 함)
- per-image 결과 byte 크기가 MAX_BYTES_PER_IMAGE 초과 시 skip + warn
"""
if not raw_images:
return [], False
items = list(raw_images.items())
truncated = len(items) > MAX_IMAGES_PER_DOC
if truncated:
logger.warning(
f"[marker-service] images truncated path={src_path} "
f"total={len(items)} cap={MAX_IMAGES_PER_DOC}"
)
items = items[:MAX_IMAGES_PER_DOC]
out: list[ConvertImage] = []
for slug, pil_img in items:
try:
fmt_raw = (pil_img.format or "PNG").upper()
# WebP/GIF 도 marker 가 emit 가능하지만 본 1B.5 기준은 PNG/JPEG 우선.
# 알 수 없는 포맷이면 PNG 로 강제 (lossless re-encode).
fmt = fmt_raw if fmt_raw in {"PNG", "JPEG", "WEBP", "GIF"} else "PNG"
buf = io.BytesIO()
pil_img.save(buf, format=fmt)
raw_bytes = buf.getvalue()
if len(raw_bytes) > MAX_BYTES_PER_IMAGE:
logger.warning(
f"[marker-service] image too large skipped path={src_path} "
f"slug={slug} bytes={len(raw_bytes)} cap={MAX_BYTES_PER_IMAGE}"
)
continue
out.append(
ConvertImage(
slug=slug,
format=fmt.lower(),
width=pil_img.width,
height=pil_img.height,
bytes_b64=base64.b64encode(raw_bytes).decode("ascii"),
)
)
except Exception as exc:
logger.warning(
f"[marker-service] image serialize failed path={src_path} "
f"slug={slug}: {type(exc).__name__}: {exc}"
)
continue
return out, truncated