fix(gpu-health): scripts 호출 도구를 host curl + container IP 로 통일

OCR/STT 컨테이너 안에 curl 미설치 (slim python image). docker exec curl 표준은
실측 OCI exec 실패. host curl + docker bridge IP (172.20.0.x) 로 변경 — host
publish 추가 아니라 docker network 내부 검증이라 보안 표면 동일.

reranker 만 curl 있고 OCR/marker/STT 는 python 만 있어 분기 발생을 회피.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-05-14 09:51:59 +09:00
parent 98ee7dffe2
commit 8f4413a38c
2 changed files with 51 additions and 37 deletions
+19 -22
View File
@@ -1,12 +1,14 @@
#!/usr/bin/env bash
# synthetic fixture 기반 GPU VRAM 피크 검증 (PR-GPU-Health-1).
# Mode A (sequential) + Mode B (light overlap) 기본. --stress 옵션은 5개 동시 (기본 gate 미포함).
# Mode A (sequential) + Mode B (light overlap) 기본. --stress (5 concurrent) 옵션.
# 호출은 호스트 curl + container IP (docker bridge 내부, host publish 추가 아님).
set -uo pipefail
OCR=hyungi_document_server-ocr-service-1
MARKER=hyungi_document_server-marker-service-1
RERANKER=hyungi_document_server-reranker-1
STT=hyungi_document_server-stt-service-1
OLLAMA=ollama
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
FIX="$REPO_ROOT/tests/load/fixtures"
@@ -16,6 +18,10 @@ mkdir -p "$REPO_ROOT/reports"
STRESS_MODE=0
[[ "${1:-}" == "--stress" ]] && STRESS_MODE=1
container_ip() {
docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1" 2>/dev/null
}
vram() {
nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d ' '
}
@@ -26,29 +32,20 @@ copy_fixtures() {
docker cp "$FIX/sine_30s.wav" $STT:/tmp/sine_30s.wav >/dev/null
}
call_ocr() {
docker exec "$OCR" curl -fsS -m 60 -X POST -H 'Content-Type: application/json' \
-d '{"filePath":"/tmp/ocr_ok.png"}' http://127.0.0.1:3200/ocr >/dev/null
}
call_marker() {
docker exec "$MARKER" curl -fsS -m 180 -X POST -H 'Content-Type: application/json' \
-d '{"file_path":"/tmp/lorem_1p.pdf"}' http://127.0.0.1:3300/convert >/dev/null
}
call_stt() {
docker exec "$STT" curl -fsS -m 180 -X POST -H 'Content-Type: application/json' \
-d '{"filePath":"/tmp/sine_30s.wav","langs":["en"],"beamSize":1}' http://127.0.0.1:3300/transcribe >/dev/null
}
call_rerank() {
docker exec "$RERANKER" curl -fsS -m 30 -X POST -H 'Content-Type: application/json' \
-d '{"query":"smoke","texts":["foo bar baz","alpha beta gamma"]}' http://127.0.0.1:80/rerank >/dev/null
}
call_embed() {
docker exec "$OCR" curl -fsS -m 30 -X POST -H 'Content-Type: application/json' \
-d '{"model":"bge-m3","prompt":"smoke test"}' http://ollama:11434/api/embeddings >/dev/null
}
OCR_IP=$(container_ip $OCR)
MARKER_IP=$(container_ip $MARKER)
RERANKER_IP=$(container_ip $RERANKER)
STT_IP=$(container_ip $STT)
OLLAMA_IP=$(container_ip $OLLAMA)
call_ocr() { curl -fsS -m 60 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/ocr_ok.png"}' "http://$OCR_IP:3200/ocr" >/dev/null; }
call_marker() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"file_path":"/tmp/lorem_1p.pdf"}' "http://$MARKER_IP:3300/convert" >/dev/null; }
call_stt() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/sine_30s.wav","langs":["en"],"beamSize":1}' "http://$STT_IP:3300/transcribe" >/dev/null; }
call_rerank() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"query":"smoke","texts":["foo bar baz","alpha beta gamma"]}' "http://$RERANKER_IP:80/rerank" >/dev/null; }
call_embed() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"model":"bge-m3","prompt":"smoke test"}' "http://$OLLAMA_IP:11434/api/embeddings" >/dev/null; }
run_named() {
local name="$1"; local fn="$2"
local name="$1" fn="$2"
local before=$(vram)
if $fn; then status="OK"; else status="FAIL"; fi
local after=$(vram)