From 2528996deea8381925a2ad4bd76194f3be17b4b8 Mon Sep 17 00:00:00 2001 From: hyungi Date: Sun, 24 May 2026 07:01:34 +0000 Subject: [PATCH] feat(marker): support page-range conversion in /convert ConvertRequest.start_page/end_page (1-based inclusive); per-request PdfConverter with config page_range, reuses loaded models. 1-based->0-based contained in marker adapter. PR-DocSrv-LargeDoc-Split-Markdown-1 commit 2. Co-Authored-By: Claude Opus 4.7 (1M context) --- services/marker/server.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/services/marker/server.py b/services/marker/server.py index 62ba851..3e6fb5d 100644 --- a/services/marker/server.py +++ b/services/marker/server.py @@ -78,6 +78,10 @@ async def startup(): class ConvertRequest(BaseModel): file_path: str max_pages: int | None = None + # page range (1-based inclusive) — LargeDoc split 변환용. marker 내부 0-based 변환은 + # convert() 에 격리 (page numbering invariant: DB/API=1-based, marker=0-based). + start_page: int | None = None + end_page: int | None = None class ConvertImage(BaseModel): @@ -139,8 +143,22 @@ async def convert(req: ConvertRequest): raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)}) start = time.monotonic() + # page range 지정 시 per-request converter (모델 _models 재사용 → reload 없음). + # invariant: req.start_page/end_page = 1-based inclusive → marker 0-based 로 변환. + converter = _converter + if req.start_page is not None and req.end_page is not None: + if req.start_page < 1 or req.end_page < req.start_page: + raise HTTPException( + 422, + detail={ + "code": "bad_page_range", + "message": f"start_page={req.start_page} end_page={req.end_page}", + }, + ) + page_range = list(range(req.start_page - 1, req.end_page)) # 0-based inclusive + converter = PdfConverter(artifact_dict=_models, config={"page_range": page_range}) try: - rendered = _converter(str(p)) + rendered = converter(str(p)) except Exception as exc: logger.exception(f"[marker-service] conversion failed path={p}: {exc}") raise HTTPException(