feat(marker): support page-range conversion in /convert

ConvertRequest.start_page/end_page (1-based inclusive); per-request PdfConverter with config page_range, reuses loaded models. 1-based->0-based contained in marker adapter. PR-DocSrv-LargeDoc-Split-Markdown-1 commit 2.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
hyungi
2026-05-24 07:01:34 +00:00
parent 72190cf90a
commit 2528996dee
+19 -1
View File
@@ -78,6 +78,10 @@ async def startup():
class ConvertRequest(BaseModel):
file_path: str
max_pages: int | None = None
# page range (1-based inclusive) — LargeDoc split 변환용. marker 내부 0-based 변환은
# convert() 에 격리 (page numbering invariant: DB/API=1-based, marker=0-based).
start_page: int | None = None
end_page: int | None = None
class ConvertImage(BaseModel):
@@ -139,8 +143,22 @@ async def convert(req: ConvertRequest):
raise HTTPException(404, detail={"code": "file_not_found", "message": str(p)})
start = time.monotonic()
# page range 지정 시 per-request converter (모델 _models 재사용 → reload 없음).
# invariant: req.start_page/end_page = 1-based inclusive → marker 0-based 로 변환.
converter = _converter
if req.start_page is not None and req.end_page is not None:
if req.start_page < 1 or req.end_page < req.start_page:
raise HTTPException(
422,
detail={
"code": "bad_page_range",
"message": f"start_page={req.start_page} end_page={req.end_page}",
},
)
page_range = list(range(req.start_page - 1, req.end_page)) # 0-based inclusive
converter = PdfConverter(artifact_dict=_models, config={"page_range": page_range})
try:
rendered = _converter(str(p))
rendered = converter(str(p))
except Exception as exc:
logger.exception(f"[marker-service] conversion failed path={p}: {exc}")
raise HTTPException(