"""marker-service — POST /convert: PDF → markdown + 추출 이미지 base64. Phase 1B (2026-05-01) — 텍스트만 응답, 이미지 폐기. Phase 1B.5 — `_images` 직렬화해서 base64 응답에 포함. NAS write 권한이 없는 stateless 변환기 유지 (fastapi 가 NAS persist 담당). D-1 (plan crawl-24x7-1, 2026-06-10) — idle-unload 운영 전환: MARKER_PRELOAD=0 : startup warmup 끔 (첫 /convert 시 lazy load) MARKER_IDLE_UNLOAD_MINUTES : N분 유휴 시 모델 해제 (0=비활성, 기존 동작) /ready 는 idle(미적재)에서도 200 — fastapi 의 depends_on service_healthy 가 lazy 모드에서 영구 미기동으로 굳는 것 방지. 503 은 warmup_failed 한정. plan: ~/.claude/plans/piped-humming-crystal.md """ import base64 import gc import hashlib import io import logging import os import threading import time from pathlib import Path from fastapi import FastAPI, HTTPException, Response from pydantic import BaseModel, Field from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered import marker as marker_module logger = logging.getLogger(__name__) app = FastAPI() os.environ.setdefault("HF_HOME", "/models/huggingface") os.environ.setdefault("TORCH_HOME", "/models/torch") _models = None _converter = None try: import importlib.metadata _engine_version = importlib.metadata.version("marker-pdf") except Exception: _engine_version = "unknown" _warmup_done = False _warmup_error: str | None = None _warmup_lock = threading.Lock() # D-1 idle-unload 상태 — 전이는 전부 _warmup_lock 아래 _PRELOAD = os.getenv("MARKER_PRELOAD", "1") != "0" _IDLE_UNLOAD_MINUTES = int(os.getenv("MARKER_IDLE_UNLOAD_MINUTES", "0")) _inflight = 0 _last_used = time.monotonic() # 이미지 응답 cap. base64 응답 크기 폭주 방지. 사용자 PDF 풀 측정 (Phase 1D) 시 # 가장 이미지 많은 문서가 ~30건 수준 → 200 은 안전 마진. 초과 시 truncate flag 응답. MAX_IMAGES_PER_DOC = int(os.getenv("MARKER_MAX_IMAGES_PER_DOC", "200")) # per-image 최대 raw bytes (base64 전). 그래픽이 많은 풀페이지 스캔 회피. MAX_BYTES_PER_IMAGE = int(os.getenv("MARKER_MAX_BYTES_PER_IMAGE", str(10 * 1024 * 1024))) def _ensure_warmup() -> None: """첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용.""" global _models, _converter, _warmup_done, _warmup_error if _warmup_done: return with _warmup_lock: if _warmup_done: return try: logger.info("[marker-service] warmup start") _models = create_model_dict() _converter = PdfConverter(artifact_dict=_models) _warmup_done = True _warmup_error = None logger.info(f"[marker-service] warmup done engine_version={_engine_version}") except Exception as exc: _warmup_error = f"{type(exc).__name__}: {exc}" logger.exception("[marker-service] warmup failed") raise def _acquire_models(): """warmup 보장 + inflight 진입을 원자적으로 — ensure 직후 reaper 가 해제하는 경합 차단.""" global _inflight while True: _ensure_warmup() with _warmup_lock: if _warmup_done: _inflight += 1 return # ensure 와 lock 재진입 사이에 unload 가 끼어든 희귀 경합 — 재시도 def _release_models(): global _inflight, _last_used with _warmup_lock: _inflight -= 1 _last_used = time.monotonic() def _maybe_unload() -> None: """유휴 시 모델 해제. 변환 중(inflight>0)이면 절대 해제하지 않는다. split 변환의 배치 사이 간격은 초 단위 — N>=1분 임계면 배치 사이 해제 없음. """ global _models, _converter, _warmup_done with _warmup_lock: if not _warmup_done or _inflight > 0: return if time.monotonic() - _last_used < _IDLE_UNLOAD_MINUTES * 60: return _models = None _converter = None _warmup_done = False gc.collect() try: import torch torch.cuda.empty_cache() except Exception: pass logger.info(f"[marker-service] idle-unload: 모델 해제 (유휴 {_IDLE_UNLOAD_MINUTES}분 초과)") async def _idle_reaper(): import asyncio while True: await asyncio.sleep(60) try: _maybe_unload() except Exception: logger.exception("[marker-service] idle reaper 오류") @app.on_event("startup") async def startup(): """startup hook — warmup 은 MARKER_PRELOAD 게이트 (D-1: lazy 기본 전환은 compose 가).""" import asyncio if _PRELOAD: asyncio.create_task(asyncio.to_thread(_ensure_warmup)) if _IDLE_UNLOAD_MINUTES > 0: asyncio.create_task(_idle_reaper()) logger.info(f"[marker-service] idle-unload 활성: {_IDLE_UNLOAD_MINUTES}분") class ConvertRequest(BaseModel): file_path: str max_pages: int | None = None # page range (1-based inclusive) — LargeDoc split 변환용. marker 내부 0-based 변환은 # convert() 에 격리 (page numbering invariant: DB/API=1-based, marker=0-based). start_page: int | None = None end_page: int | None = None class ConvertImage(BaseModel): """marker 추출 이미지 1건. fastapi 가 NAS 에 쓰고 docimg:img_NNN 으로 ref 정규화.""" slug: str # marker 원본 slug (예: '_page_0_Picture_3.jpeg') format: str # 'png' | 'jpeg' | 'webp' | 'gif' width: int | None = None height: int | None = None bytes_b64: str # base64-encoded raw bytes class ConvertResponse(BaseModel): md_content: str md_content_hash: str engine: str engine_version: str elapsed_ms: int raw_metrics: dict images: list[ConvertImage] = Field(default_factory=list) images_truncated: bool = False @app.get("/health") def health(): return {"status": "ok", "service": "marker-service"} @app.get("/ready") async def ready(response: Response): """Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출. D-1: idle(미적재) = 200. 503 은 warmup_failed 한정 — lazy 모드에서 fastapi depends_on service_healthy 가 영구 미기동으로 굳지 않게. 배포 검증에서 'status=ready' 단언하던 runbook 은 강제 warm 호출(/convert 1건)로 대체. """ if _warmup_error: response.status_code = 503 return { "status": "warmup_failed", "engine": "marker", "engine_version": _engine_version, "error": _warmup_error, } if not _warmup_done: return { "status": "warming_up" if _PRELOAD else "idle", "engine": "marker", "engine_version": _engine_version, "models_loaded": False, "idle_unload_minutes": _IDLE_UNLOAD_MINUTES, } return { "status": "ready", "engine": "marker", "engine_version": _engine_version, "models_loaded": True, "inflight": _inflight, "idle_unload_minutes": _IDLE_UNLOAD_MINUTES, } @app.post("/convert", response_model=ConvertResponse) async def convert(req: ConvertRequest): p = Path(req.file_path) if not p.is_file(): raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)}) if req.start_page is not None and req.end_page is not None: if req.start_page < 1 or req.end_page < req.start_page: raise HTTPException( 422, detail={ "code": "bad_page_range", "message": f"start_page={req.start_page} end_page={req.end_page}", }, ) # D-1: warmup 보장 + inflight 진입 원자화 — 변환 중 reaper 해제 차단. 해제는 finally. _acquire_models() try: start = time.monotonic() # page range 지정 시 per-request converter (모델 _models 재사용 → reload 없음). # invariant: req.start_page/end_page = 1-based inclusive → marker 0-based 로 변환. converter = _converter if req.start_page is not None and req.end_page is not None: page_range = list(range(req.start_page - 1, req.end_page)) # 0-based inclusive converter = PdfConverter(artifact_dict=_models, config={"page_range": page_range}) try: rendered = converter(str(p)) except Exception as exc: logger.exception(f"[marker-service] conversion failed path={p}: {exc}") raise HTTPException( status_code=422, detail={ "code": "conversion_failed", "message": f"{type(exc).__name__}: {exc}", }, ) from exc md_text, _meta, raw_images = text_from_rendered(rendered) elapsed_ms = int((time.monotonic() - start) * 1000) finally: _release_models() images_payload, truncated = _serialize_images(raw_images, str(p)) return ConvertResponse( md_content=md_text, md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(), engine="marker", engine_version=_engine_version, elapsed_ms=elapsed_ms, raw_metrics={ "page_count": getattr(rendered, "page_count", None), "image_count_extracted": len(raw_images) if raw_images else 0, "image_count_returned": len(images_payload), }, images=images_payload, images_truncated=truncated, ) def _serialize_images(raw_images, src_path: str) -> tuple[list[ConvertImage], bool]: """marker 의 `_images` (dict[slug, PIL.Image]) → base64 ConvertImage 리스트. 가드: - MAX_IMAGES_PER_DOC 초과 시 head 만 반환 + truncated=True - per-image 직렬화 실패 시 해당 이미지만 skip + warn (전체 fail 안 함) - per-image 결과 byte 크기가 MAX_BYTES_PER_IMAGE 초과 시 skip + warn """ if not raw_images: return [], False items = list(raw_images.items()) truncated = len(items) > MAX_IMAGES_PER_DOC if truncated: logger.warning( f"[marker-service] images truncated path={src_path} " f"total={len(items)} cap={MAX_IMAGES_PER_DOC}" ) items = items[:MAX_IMAGES_PER_DOC] out: list[ConvertImage] = [] for slug, pil_img in items: try: fmt_raw = (pil_img.format or "PNG").upper() # WebP/GIF 도 marker 가 emit 가능하지만 본 1B.5 기준은 PNG/JPEG 우선. # 알 수 없는 포맷이면 PNG 로 강제 (lossless re-encode). fmt = fmt_raw if fmt_raw in {"PNG", "JPEG", "WEBP", "GIF"} else "PNG" buf = io.BytesIO() pil_img.save(buf, format=fmt) raw_bytes = buf.getvalue() if len(raw_bytes) > MAX_BYTES_PER_IMAGE: logger.warning( f"[marker-service] image too large skipped path={src_path} " f"slug={slug} bytes={len(raw_bytes)} cap={MAX_BYTES_PER_IMAGE}" ) continue out.append( ConvertImage( slug=slug, format=fmt.lower(), width=pil_img.width, height=pil_img.height, bytes_b64=base64.b64encode(raw_bytes).decode("ascii"), ) ) except Exception as exc: logger.warning( f"[marker-service] image serialize failed path={src_path} " f"slug={slug}: {type(exc).__name__}: {exc}" ) continue return out, truncated