ops(gpu-health): GPU 서비스 health/smoke 표준화 + synthetic VRAM 피크 가드
PR-GPU-Health-1. 운영 준비성 표준화 PR (모델 성능 개선 아님). - OCR /smoke endpoint 추가 (160x60 OK PNG in-memory, 200/503 분기, Docker healthcheck 미사용) - marker /health endpoint 추가 (stt/ocr 동일 시그니처) - reranker docker-compose healthcheck 추가 (TEI :80/health) - scripts/gpu_service_smoke.sh: docker exec 표준 점검 (OCR/STT expose-only) - scripts/gpu_vram_fixture.sh: Mode A sequential + Mode B light overlap + --stress 옵션 - tests/load/fixtures/: synthetic ocr_ok.png / sine_30s.wav / lorem_1p.pdf OCR 빈 응답 false negative — root cause: ports 미매핑. 결정: ocr-service / stt-service 는 expose-only 유지, 운영 점검은 docker exec 내부 curl 표준. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -100,6 +100,11 @@ class ConvertResponse(BaseModel):
|
||||
images_truncated: bool = False
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok", "service": "marker-service"}
|
||||
|
||||
|
||||
@app.get("/ready")
|
||||
async def ready(response: Response):
|
||||
"""Round 4 #1+#2: Response.status_code 명시 + warmup_error 노출."""
|
||||
|
||||
+28
-1
@@ -4,13 +4,16 @@
|
||||
모델은 첫 요청 시 lazy loading.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
|
||||
import fitz
|
||||
import torch
|
||||
from fastapi import FastAPI
|
||||
from PIL import Image
|
||||
from fastapi.responses import JSONResponse
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@@ -82,6 +85,30 @@ def ready():
|
||||
}
|
||||
|
||||
|
||||
@app.get("/smoke")
|
||||
async def smoke():
|
||||
"""OCR 라운드트립이 예외 없이 완료되는지 운영 verify. Docker healthcheck 미사용."""
|
||||
start = time.monotonic()
|
||||
img = Image.new("RGB", (160, 60), color="white")
|
||||
draw = ImageDraw.Draw(img)
|
||||
draw.text((30, 20), "OK", fill="black")
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
await asyncio.wait_for(
|
||||
loop.run_in_executor(None, _ocr_image, img),
|
||||
timeout=20.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return JSONResponse(status_code=503, content={"status": "degraded", "reason": "timeout"})
|
||||
except Exception as exc:
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={"status": "degraded", "reason": exc.__class__.__name__},
|
||||
)
|
||||
elapsed_ms = int((time.monotonic() - start) * 1000)
|
||||
return {"status": "ok", "service": "ocr-service", "inference": "ok", "elapsed_ms": elapsed_ms}
|
||||
|
||||
|
||||
@app.post("/ocr")
|
||||
async def ocr_endpoint(body: dict):
|
||||
"""PDF/이미지 OCR — 페이지 단위 처리 (전체 일괄 로드 금지)"""
|
||||
|
||||
Reference in New Issue
Block a user