diff --git a/docker-compose.yml b/docker-compose.yml index d9f897e..a3bae71 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -54,39 +54,6 @@ services: start_period: 180s restart: unless-stopped - # Phase 1B (2026-05-01): PDF → markdown 변환. ocr-service 와 별도 컨테이너 (deps 충돌 회피). - marker-service: - build: ./services/marker - ports: - - "127.0.0.1:3300:3300" - expose: - - "3300" - environment: - - HF_HOME=/models/huggingface - - TORCH_HOME=/models/torch - # D-1 (crawl-24x7): idle-unload 전환 — 영구 점유(~3.5GB) 해제가 90% 봉투의 전제. - # /ready 는 idle 에서도 200 (fastapi depends_on service_healthy 유지). - # 롤백 = MARKER_PRELOAD=1 + MARKER_IDLE_UNLOAD_MINUTES=0. - - MARKER_PRELOAD=0 - - MARKER_IDLE_UNLOAD_MINUTES=${MARKER_IDLE_UNLOAD_MINUTES:-30} - volumes: - - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro - - marker_models:/models - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:3300/ready"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 300s - restart: unless-stopped - # MinerU 2.5 VLM PDF→markdown 추출 — ★ marker-service 대체(컷오버 2026-06-18, A/B 8/8 PASS). # 단일카드 markdown VRAM ~10GB(marker)→~5.9GB 고정. fastapi 가 MARKER_ENDPOINT 로 호출. # 동기 do_parse 버그 회피 위해 server.py 는 async aio_do_parse 사용. 포트 3301. @@ -220,11 +187,9 @@ services: condition: service_healthy kordoc-service: condition: service_healthy - # 컷오버: mineru-service 가 마크다운 엔진. marker-service 는 Phase 2 에서 제거(롤백 대비 잔존). + # 마크다운 엔진 = mineru-service (marker-service 제거 2026-06-18, 롤백=git history). mineru-service: condition: service_healthy - marker-service: - condition: service_healthy env_file: - credentials.env environment: @@ -310,5 +275,4 @@ volumes: reranker_cache: ocr_models: stt_models: - marker_models: mineru_models: diff --git a/services/marker/Dockerfile b/services/marker/Dockerfile deleted file mode 100644 index 33ddfa4..0000000 --- a/services/marker/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -FROM python:3.12-slim - -WORKDIR /app - -RUN apt-get update && apt-get install -y --no-install-recommends \ - libgl1 libglib2.0-0 curl \ - && apt-get clean && rm -rf /var/lib/apt/lists/* - -COPY requirements.txt . -RUN pip install --no-cache-dir \ - --extra-index-url https://download.pytorch.org/whl/cu126 \ - -r requirements.txt - -# 모델 미다운로드 (HF cache volume → 첫 호출/warmup 시 적재). - -COPY server.py . - -EXPOSE 3300 -HEALTHCHECK --start-period=300s --interval=30s --timeout=10s --retries=3 \ - CMD curl -f http://localhost:3300/ready || exit 1 - -CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "3300"] diff --git a/services/marker/requirements.txt b/services/marker/requirements.txt deleted file mode 100644 index ef7cc06..0000000 --- a/services/marker/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -torch==2.11.0+cu126 -torchvision==0.26.0+cu126 -transformers==4.57.6 -surya-ocr==0.17.1 -marker-pdf==1.10.2 -pymupdf>=1.24.0,<2.0.0 -fastapi>=0.110.0,<1.0.0 -uvicorn[standard]>=0.27.0,<1.0.0 -pillow>=10.0.0,<12.0.0 diff --git a/services/marker/server.py b/services/marker/server.py deleted file mode 100644 index da7738a..0000000 --- a/services/marker/server.py +++ /dev/null @@ -1,325 +0,0 @@ -"""marker-service — POST /convert: PDF → markdown + 추출 이미지 base64. - -Phase 1B (2026-05-01) — 텍스트만 응답, 이미지 폐기. -Phase 1B.5 — `_images` 직렬화해서 base64 응답에 포함. NAS write 권한이 - 없는 stateless 변환기 유지 (fastapi 가 NAS persist 담당). -D-1 (plan crawl-24x7-1, 2026-06-10) — idle-unload 운영 전환: - MARKER_PRELOAD=0 : startup warmup 끔 (첫 /convert 시 lazy load) - MARKER_IDLE_UNLOAD_MINUTES : N분 유휴 시 모델 해제 (0=비활성, 기존 동작) - /ready 는 idle(미적재)에서도 200 — fastapi 의 depends_on service_healthy 가 - lazy 모드에서 영구 미기동으로 굳는 것 방지. 503 은 warmup_failed 한정. - -plan: ~/.claude/plans/piped-humming-crystal.md -""" -import base64 -import gc -import hashlib -import io -import logging -import os -import threading -import time -from pathlib import Path - -from fastapi import FastAPI, HTTPException, Response -from pydantic import BaseModel, Field - -from marker.converters.pdf import PdfConverter -from marker.models import create_model_dict -from marker.output import text_from_rendered -import marker as marker_module - -logger = logging.getLogger(__name__) -app = FastAPI() - -os.environ.setdefault("HF_HOME", "/models/huggingface") -os.environ.setdefault("TORCH_HOME", "/models/torch") - -_models = None -_converter = None -try: - import importlib.metadata - _engine_version = importlib.metadata.version("marker-pdf") -except Exception: - _engine_version = "unknown" -_warmup_done = False -_warmup_error: str | None = None -_warmup_lock = threading.Lock() - -# D-1 idle-unload 상태 — 전이는 전부 _warmup_lock 아래 -_PRELOAD = os.getenv("MARKER_PRELOAD", "1") != "0" -_IDLE_UNLOAD_MINUTES = int(os.getenv("MARKER_IDLE_UNLOAD_MINUTES", "0")) -_inflight = 0 -_last_used = time.monotonic() - -# 이미지 응답 cap. base64 응답 크기 폭주 방지. 사용자 PDF 풀 측정 (Phase 1D) 시 -# 가장 이미지 많은 문서가 ~30건 수준 → 200 은 안전 마진. 초과 시 truncate flag 응답. -MAX_IMAGES_PER_DOC = int(os.getenv("MARKER_MAX_IMAGES_PER_DOC", "200")) -# per-image 최대 raw bytes (base64 전). 그래픽이 많은 풀페이지 스캔 회피. -MAX_BYTES_PER_IMAGE = int(os.getenv("MARKER_MAX_BYTES_PER_IMAGE", str(10 * 1024 * 1024))) - - -def _ensure_warmup() -> None: - """첫 /convert 또는 startup hook 시 모델 로드. HF cache volume 활용.""" - global _models, _converter, _warmup_done, _warmup_error - if _warmup_done: - return - with _warmup_lock: - if _warmup_done: - return - try: - logger.info("[marker-service] warmup start") - _models = create_model_dict() - _converter = PdfConverter(artifact_dict=_models) - _warmup_done = True - _warmup_error = None - logger.info(f"[marker-service] warmup done engine_version={_engine_version}") - except Exception as exc: - _warmup_error = f"{type(exc).__name__}: {exc}" - logger.exception("[marker-service] warmup failed") - raise - - -def _acquire_models(): - """warmup 보장 + inflight 진입을 원자적으로 — ensure 직후 reaper 가 해제하는 경합 차단.""" - global _inflight - while True: - _ensure_warmup() - with _warmup_lock: - if _warmup_done: - _inflight += 1 - return - # ensure 와 lock 재진입 사이에 unload 가 끼어든 희귀 경합 — 재시도 - - -def _release_models(): - global _inflight, _last_used - with _warmup_lock: - _inflight -= 1 - _last_used = time.monotonic() - - -def _maybe_unload() -> None: - """유휴 시 모델 해제. 변환 중(inflight>0)이면 절대 해제하지 않는다. - - split 변환의 배치 사이 간격은 초 단위 — N>=1분 임계면 배치 사이 해제 없음. - """ - global _models, _converter, _warmup_done - with _warmup_lock: - if not _warmup_done or _inflight > 0: - return - if time.monotonic() - _last_used < _IDLE_UNLOAD_MINUTES * 60: - return - _models = None - _converter = None - _warmup_done = False - gc.collect() - try: - import torch - torch.cuda.empty_cache() - except Exception: - pass - logger.info(f"[marker-service] idle-unload: 모델 해제 (유휴 {_IDLE_UNLOAD_MINUTES}분 초과)") - - -async def _idle_reaper(): - import asyncio - while True: - await asyncio.sleep(60) - try: - _maybe_unload() - except Exception: - logger.exception("[marker-service] idle reaper 오류") - - -@app.on_event("startup") -async def startup(): - """startup hook — warmup 은 MARKER_PRELOAD 게이트 (D-1: lazy 기본 전환은 compose 가).""" - import asyncio - if _PRELOAD: - asyncio.create_task(asyncio.to_thread(_ensure_warmup)) - if _IDLE_UNLOAD_MINUTES > 0: - asyncio.create_task(_idle_reaper()) - logger.info(f"[marker-service] idle-unload 활성: {_IDLE_UNLOAD_MINUTES}분") - - -class ConvertRequest(BaseModel): - file_path: str - max_pages: int | None = None - # page range (1-based inclusive) — LargeDoc split 변환용. marker 내부 0-based 변환은 - # convert() 에 격리 (page numbering invariant: DB/API=1-based, marker=0-based). - start_page: int | None = None - end_page: int | None = None - - -class ConvertImage(BaseModel): - """marker 추출 이미지 1건. fastapi 가 NAS 에 쓰고 docimg:img_NNN 으로 ref 정규화.""" - slug: str # marker 원본 slug (예: '_page_0_Picture_3.jpeg') - format: str # 'png' | 'jpeg' | 'webp' | 'gif' - width: int | None = None - height: int | None = None - bytes_b64: str # base64-encoded raw bytes - - -class ConvertResponse(BaseModel): - md_content: str - md_content_hash: str - engine: str - engine_version: str - elapsed_ms: int - raw_metrics: dict - images: list[ConvertImage] = Field(default_factory=list) - images_truncated: bool = False - - -@app.get("/health") -def health(): - return {"status": "ok", "service": "marker-service"} - - -@app.get("/ready") -async def ready(response: Response): - """Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출. - - D-1: idle(미적재) = 200. 503 은 warmup_failed 한정 — lazy 모드에서 fastapi - depends_on service_healthy 가 영구 미기동으로 굳지 않게. 배포 검증에서 - 'status=ready' 단언하던 runbook 은 강제 warm 호출(/convert 1건)로 대체. - """ - if _warmup_error: - response.status_code = 503 - return { - "status": "warmup_failed", - "engine": "marker", - "engine_version": _engine_version, - "error": _warmup_error, - } - if not _warmup_done: - return { - "status": "warming_up" if _PRELOAD else "idle", - "engine": "marker", - "engine_version": _engine_version, - "models_loaded": False, - "idle_unload_minutes": _IDLE_UNLOAD_MINUTES, - } - return { - "status": "ready", - "engine": "marker", - "engine_version": _engine_version, - "models_loaded": True, - "inflight": _inflight, - "idle_unload_minutes": _IDLE_UNLOAD_MINUTES, - } - - -@app.post("/convert", response_model=ConvertResponse) -async def convert(req: ConvertRequest): - p = Path(req.file_path) - if not p.is_file(): - raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)}) - if req.start_page is not None and req.end_page is not None: - if req.start_page < 1 or req.end_page < req.start_page: - raise HTTPException( - 422, - detail={ - "code": "bad_page_range", - "message": f"start_page={req.start_page} end_page={req.end_page}", - }, - ) - - # D-1: warmup 보장 + inflight 진입 원자화 — 변환 중 reaper 해제 차단. 해제는 finally. - _acquire_models() - try: - start = time.monotonic() - # page range 지정 시 per-request converter (모델 _models 재사용 → reload 없음). - # invariant: req.start_page/end_page = 1-based inclusive → marker 0-based 로 변환. - converter = _converter - if req.start_page is not None and req.end_page is not None: - page_range = list(range(req.start_page - 1, req.end_page)) # 0-based inclusive - converter = PdfConverter(artifact_dict=_models, config={"page_range": page_range}) - try: - rendered = converter(str(p)) - except Exception as exc: - logger.exception(f"[marker-service] conversion failed path={p}: {exc}") - raise HTTPException( - status_code=422, - detail={ - "code": "conversion_failed", - "message": f"{type(exc).__name__}: {exc}", - }, - ) from exc - - md_text, _meta, raw_images = text_from_rendered(rendered) - elapsed_ms = int((time.monotonic() - start) * 1000) - finally: - _release_models() - - images_payload, truncated = _serialize_images(raw_images, str(p)) - - return ConvertResponse( - md_content=md_text, - md_content_hash=hashlib.sha256(md_text.encode("utf-8")).hexdigest(), - engine="marker", - engine_version=_engine_version, - elapsed_ms=elapsed_ms, - raw_metrics={ - "page_count": getattr(rendered, "page_count", None), - "image_count_extracted": len(raw_images) if raw_images else 0, - "image_count_returned": len(images_payload), - }, - images=images_payload, - images_truncated=truncated, - ) - - -def _serialize_images(raw_images, src_path: str) -> tuple[list[ConvertImage], bool]: - """marker 의 `_images` (dict[slug, PIL.Image]) → base64 ConvertImage 리스트. - - 가드: - - MAX_IMAGES_PER_DOC 초과 시 head 만 반환 + truncated=True - - per-image 직렬화 실패 시 해당 이미지만 skip + warn (전체 fail 안 함) - - per-image 결과 byte 크기가 MAX_BYTES_PER_IMAGE 초과 시 skip + warn - """ - if not raw_images: - return [], False - - items = list(raw_images.items()) - truncated = len(items) > MAX_IMAGES_PER_DOC - if truncated: - logger.warning( - f"[marker-service] images truncated path={src_path} " - f"total={len(items)} cap={MAX_IMAGES_PER_DOC}" - ) - items = items[:MAX_IMAGES_PER_DOC] - - out: list[ConvertImage] = [] - for slug, pil_img in items: - try: - fmt_raw = (pil_img.format or "PNG").upper() - # WebP/GIF 도 marker 가 emit 가능하지만 본 1B.5 기준은 PNG/JPEG 우선. - # 알 수 없는 포맷이면 PNG 로 강제 (lossless re-encode). - fmt = fmt_raw if fmt_raw in {"PNG", "JPEG", "WEBP", "GIF"} else "PNG" - buf = io.BytesIO() - pil_img.save(buf, format=fmt) - raw_bytes = buf.getvalue() - if len(raw_bytes) > MAX_BYTES_PER_IMAGE: - logger.warning( - f"[marker-service] image too large skipped path={src_path} " - f"slug={slug} bytes={len(raw_bytes)} cap={MAX_BYTES_PER_IMAGE}" - ) - continue - out.append( - ConvertImage( - slug=slug, - format=fmt.lower(), - width=pil_img.width, - height=pil_img.height, - bytes_b64=base64.b64encode(raw_bytes).decode("ascii"), - ) - ) - except Exception as exc: - logger.warning( - f"[marker-service] image serialize failed path={src_path} " - f"slug={slug}: {type(exc).__name__}: {exc}" - ) - continue - return out, truncated