Files
hyungi_document_server/scripts/gpu_vram_fixture.sh
T
Hyungi Ahn 8f4413a38c fix(gpu-health): scripts 호출 도구를 host curl + container IP 로 통일
OCR/STT 컨테이너 안에 curl 미설치 (slim python image). docker exec curl 표준은
실측 OCI exec 실패. host curl + docker bridge IP (172.20.0.x) 로 변경 — host
publish 추가 아니라 docker network 내부 검증이라 보안 표면 동일.

reranker 만 curl 있고 OCR/marker/STT 는 python 만 있어 분기 발생을 회피.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 09:51:59 +09:00

148 lines
5.0 KiB
Bash
Executable File

#!/usr/bin/env bash
# synthetic fixture 기반 GPU VRAM 피크 검증 (PR-GPU-Health-1).
# Mode A (sequential) + Mode B (light overlap) 기본. --stress (5 concurrent) 옵션.
# 호출은 호스트 curl + container IP (docker bridge 내부, host publish 추가 아님).
set -uo pipefail
OCR=hyungi_document_server-ocr-service-1
MARKER=hyungi_document_server-marker-service-1
RERANKER=hyungi_document_server-reranker-1
STT=hyungi_document_server-stt-service-1
OLLAMA=ollama
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
FIX="$REPO_ROOT/tests/load/fixtures"
REPORT="$REPO_ROOT/reports/vram_fixture_$(date +%F).md"
mkdir -p "$REPO_ROOT/reports"
STRESS_MODE=0
[[ "${1:-}" == "--stress" ]] && STRESS_MODE=1
container_ip() {
docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1" 2>/dev/null
}
vram() {
nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d ' '
}
copy_fixtures() {
docker cp "$FIX/ocr_ok.png" $OCR:/tmp/ocr_ok.png >/dev/null
docker cp "$FIX/lorem_1p.pdf" $MARKER:/tmp/lorem_1p.pdf >/dev/null
docker cp "$FIX/sine_30s.wav" $STT:/tmp/sine_30s.wav >/dev/null
}
OCR_IP=$(container_ip $OCR)
MARKER_IP=$(container_ip $MARKER)
RERANKER_IP=$(container_ip $RERANKER)
STT_IP=$(container_ip $STT)
OLLAMA_IP=$(container_ip $OLLAMA)
call_ocr() { curl -fsS -m 60 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/ocr_ok.png"}' "http://$OCR_IP:3200/ocr" >/dev/null; }
call_marker() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"file_path":"/tmp/lorem_1p.pdf"}' "http://$MARKER_IP:3300/convert" >/dev/null; }
call_stt() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/sine_30s.wav","langs":["en"],"beamSize":1}' "http://$STT_IP:3300/transcribe" >/dev/null; }
call_rerank() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"query":"smoke","texts":["foo bar baz","alpha beta gamma"]}' "http://$RERANKER_IP:80/rerank" >/dev/null; }
call_embed() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"model":"bge-m3","prompt":"smoke test"}' "http://$OLLAMA_IP:11434/api/embeddings" >/dev/null; }
run_named() {
local name="$1" fn="$2"
local before=$(vram)
if $fn; then status="OK"; else status="FAIL"; fi
local after=$(vram)
printf "| %s | %s | %s | %s |\n" "$name" "$before" "$after" "$status" >> "$REPORT"
echo " $name before=$before after=$after $status"
}
run_overlap() {
local label="$1" fn_a="$2" fn_b="$3"
local before=$(vram)
$fn_a & pid_a=$!
$fn_b & pid_b=$!
wait $pid_a && sa="OK" || sa="FAIL"
wait $pid_b && sb="OK" || sb="FAIL"
local after=$(vram)
printf "| %s | %s | %s | %s+%s |\n" "$label" "$before" "$after" "$sa" "$sb" >> "$REPORT"
echo " $label before=$before after=$after $sa+$sb"
}
run_stress() {
local before=$(vram)
call_ocr & p1=$!
call_marker & p2=$!
call_stt & p3=$!
call_rerank & p4=$!
call_embed & p5=$!
wait $p1 && s1="OK" || s1="FAIL"
wait $p2 && s2="OK" || s2="FAIL"
wait $p3 && s3="OK" || s3="FAIL"
wait $p4 && s4="OK" || s4="FAIL"
wait $p5 && s5="OK" || s5="FAIL"
local after=$(vram)
printf "| stress (5 concurrent) | %s | %s | %s/%s/%s/%s/%s |\n" "$before" "$after" "$s1" "$s2" "$s3" "$s4" "$s5" >> "$REPORT"
echo " stress before=$before after=$after $s1/$s2/$s3/$s4/$s5"
}
copy_fixtures
{
echo "# VRAM fixture report — $(date '+%F %H:%M:%S')"
echo
echo "- baseline used = $(vram) MiB / total = 16376 MiB"
echo "- stress mode: $([[ $STRESS_MODE -eq 1 ]] && echo enabled || echo disabled)"
echo
echo "## Mode A — sequential smoke"
echo
echo "| call | before (MiB) | after (MiB) | status |"
echo "|---|---|---|---|"
} > "$REPORT"
echo "[mode A] sequential"
run_named "OCR /ocr (ocr_ok.png)" call_ocr
run_named "STT /transcribe (sine30s)" call_stt
run_named "marker /convert (lorem1p)" call_marker
run_named "reranker /rerank" call_rerank
run_named "embed bge-m3" call_embed
{
echo
echo "## Mode B — light overlap"
echo
echo "| pair | before (MiB) | after (MiB) | status |"
echo "|---|---|---|---|"
} >> "$REPORT"
echo "[mode B] light overlap"
run_overlap "OCR + embedding" call_ocr call_embed
run_overlap "marker + reranker" call_marker call_rerank
run_overlap "STT + embedding" call_stt call_embed
if [[ $STRESS_MODE -eq 1 ]]; then
{
echo
echo "## Stress (--stress) — 5 concurrent"
echo
echo "| call | before (MiB) | after (MiB) | status |"
echo "|---|---|---|---|"
} >> "$REPORT"
echo "[stress] 5 concurrent"
run_stress
fi
PEAK=$(awk -F'|' '$0 ~ /^\|/ && $5 ~ /(OK|FAIL)/ {gsub(/ /,"",$4); if ($4+0 > max) max=$4+0} END {print max+0}' "$REPORT")
GATE=$([[ $PEAK -gt 0 && $PEAK -lt 14000 ]] && echo PASS || echo FAIL)
{
echo
echo "## Summary"
echo
echo "- peak after = $PEAK MiB"
echo "- safety margin (vs 16376 MiB) = $((16376 - PEAK)) MiB"
echo "- gate (peak < 14000 MiB) = $GATE"
} >> "$REPORT"
echo
echo "report: $REPORT"
echo "peak=$PEAK gate=$GATE"
[[ "$GATE" == "PASS" ]] && exit 0 || exit 1