Files
Hyungi Ahn 98ee7dffe2 ops(gpu-health): GPU 서비스 health/smoke 표준화 + synthetic VRAM 피크 가드
PR-GPU-Health-1. 운영 준비성 표준화 PR (모델 성능 개선 아님).

- OCR /smoke endpoint 추가 (160x60 OK PNG in-memory, 200/503 분기, Docker healthcheck 미사용)
- marker /health endpoint 추가 (stt/ocr 동일 시그니처)
- reranker docker-compose healthcheck 추가 (TEI :80/health)
- scripts/gpu_service_smoke.sh: docker exec 표준 점검 (OCR/STT expose-only)
- scripts/gpu_vram_fixture.sh: Mode A sequential + Mode B light overlap + --stress 옵션
- tests/load/fixtures/: synthetic ocr_ok.png / sine_30s.wav / lorem_1p.pdf

OCR 빈 응답 false negative — root cause: ports 미매핑.
결정: ocr-service / stt-service 는 expose-only 유지, 운영 점검은 docker exec 내부 curl 표준.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 09:42:07 +09:00

152 lines
4.6 KiB
Python

"""OCR 마이크로서비스 — Surya OCR 0.17.x (GPU) + PyMuPDF (PDF→이미지)
페이지 단위 스트리밍으로 대형 PDF도 메모리 피크 억제.
모델은 첫 요청 시 lazy loading.
"""
import asyncio
import time
import unicodedata
from pathlib import Path
import fitz
import torch
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from PIL import Image, ImageDraw
app = FastAPI()
_models = None
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".tiff", ".tif", ".bmp", ".gif", ".webp"}
def _resolve_path(file_path: str) -> Path | None:
"""NFC(DB) vs NFD(NFS) 한글 경로 정규화 차이 흡수."""
candidates = [file_path,
unicodedata.normalize("NFD", file_path),
unicodedata.normalize("NFC", file_path)]
for c in candidates:
p = Path(c)
if p.exists():
return p
# 마지막 fallback: parent 디렉토리에서 이름을 NFC 로 매칭
parent = Path(file_path).parent
if parent.exists():
target = unicodedata.normalize("NFC", Path(file_path).name)
for child in parent.iterdir():
if unicodedata.normalize("NFC", child.name) == target:
return child
return None
def _load_models():
"""Surya 0.17 predictors lazy loading — 첫 호출 시만"""
global _models
if _models is not None:
return _models
from surya.detection import DetectionPredictor
from surya.recognition import FoundationPredictor, RecognitionPredictor
foundation = FoundationPredictor()
_models = {
"detection": DetectionPredictor(),
"recognition": RecognitionPredictor(foundation),
}
return _models
def _ocr_image(image: Image.Image) -> str:
m = _load_models()
results = m["recognition"]([image], det_predictor=m["detection"])
if not results:
return ""
return "\n".join(line.text for line in results[0].text_lines)
@app.get("/health")
def health():
"""Liveness — Docker healthcheck용, 프로세스 생존 확인"""
return {"status": "ok", "service": "ocr-surya"}
@app.get("/ready")
def ready():
"""Readiness — 배포 검증용, CUDA + 모델 상태"""
cuda_ok = torch.cuda.is_available()
models_loaded = _models is not None
return {
"ready": cuda_ok and models_loaded,
"cuda": cuda_ok,
"models_loaded": models_loaded,
"gpu_name": torch.cuda.get_device_name(0) if cuda_ok else None,
}
@app.get("/smoke")
async def smoke():
"""OCR 라운드트립이 예외 없이 완료되는지 운영 verify. Docker healthcheck 미사용."""
start = time.monotonic()
img = Image.new("RGB", (160, 60), color="white")
draw = ImageDraw.Draw(img)
draw.text((30, 20), "OK", fill="black")
try:
loop = asyncio.get_running_loop()
await asyncio.wait_for(
loop.run_in_executor(None, _ocr_image, img),
timeout=20.0,
)
except asyncio.TimeoutError:
return JSONResponse(status_code=503, content={"status": "degraded", "reason": "timeout"})
except Exception as exc:
return JSONResponse(
status_code=503,
content={"status": "degraded", "reason": exc.__class__.__name__},
)
elapsed_ms = int((time.monotonic() - start) * 1000)
return {"status": "ok", "service": "ocr-service", "inference": "ok", "elapsed_ms": elapsed_ms}
@app.post("/ocr")
async def ocr_endpoint(body: dict):
"""PDF/이미지 OCR — 페이지 단위 처리 (전체 일괄 로드 금지)"""
raw_path = body["filePath"]
max_pages = body.get("maxPages", 200)
resolved = _resolve_path(raw_path)
if resolved is None:
return {"error": f"파일 없음: {raw_path}", "text": "", "pages": 0, "chars": 0}
ext = resolved.suffix.lower()
if ext in IMAGE_EXTS:
img = Image.open(resolved).convert("RGB")
try:
text = _ocr_image(img)
finally:
del img
return {"text": text, "pages": 1, "chars": len(text)}
doc = fitz.open(str(resolved))
try:
page_count = len(doc)
process_pages = min(page_count, max_pages)
all_text = []
for i in range(process_pages):
pix = doc[i].get_pixmap(dpi=200)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
del pix
try:
page_text = _ocr_image(img)
finally:
del img
if page_text.strip():
all_text.append(page_text)
finally:
doc.close()
combined = "\n\n".join(all_text)
return {"text": combined, "pages": page_count, "chars": len(combined)}