#!/usr/bin/env bash # synthetic fixture 기반 GPU VRAM 피크 검증 (PR-GPU-Health-1). # Mode A (sequential) + Mode B (light overlap) 기본. --stress 옵션은 5개 동시 (기본 gate 미포함). set -uo pipefail OCR=hyungi_document_server-ocr-service-1 MARKER=hyungi_document_server-marker-service-1 RERANKER=hyungi_document_server-reranker-1 STT=hyungi_document_server-stt-service-1 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" FIX="$REPO_ROOT/tests/load/fixtures" REPORT="$REPO_ROOT/reports/vram_fixture_$(date +%F).md" mkdir -p "$REPO_ROOT/reports" STRESS_MODE=0 [[ "${1:-}" == "--stress" ]] && STRESS_MODE=1 vram() { nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d ' ' } copy_fixtures() { docker cp "$FIX/ocr_ok.png" $OCR:/tmp/ocr_ok.png >/dev/null docker cp "$FIX/lorem_1p.pdf" $MARKER:/tmp/lorem_1p.pdf >/dev/null docker cp "$FIX/sine_30s.wav" $STT:/tmp/sine_30s.wav >/dev/null } call_ocr() { docker exec "$OCR" curl -fsS -m 60 -X POST -H 'Content-Type: application/json' \ -d '{"filePath":"/tmp/ocr_ok.png"}' http://127.0.0.1:3200/ocr >/dev/null } call_marker() { docker exec "$MARKER" curl -fsS -m 180 -X POST -H 'Content-Type: application/json' \ -d '{"file_path":"/tmp/lorem_1p.pdf"}' http://127.0.0.1:3300/convert >/dev/null } call_stt() { docker exec "$STT" curl -fsS -m 180 -X POST -H 'Content-Type: application/json' \ -d '{"filePath":"/tmp/sine_30s.wav","langs":["en"],"beamSize":1}' http://127.0.0.1:3300/transcribe >/dev/null } call_rerank() { docker exec "$RERANKER" curl -fsS -m 30 -X POST -H 'Content-Type: application/json' \ -d '{"query":"smoke","texts":["foo bar baz","alpha beta gamma"]}' http://127.0.0.1:80/rerank >/dev/null } call_embed() { docker exec "$OCR" curl -fsS -m 30 -X POST -H 'Content-Type: application/json' \ -d '{"model":"bge-m3","prompt":"smoke test"}' http://ollama:11434/api/embeddings >/dev/null } run_named() { local name="$1"; local fn="$2" local before=$(vram) if $fn; then status="OK"; else status="FAIL"; fi local after=$(vram) printf "| %s | %s | %s | %s |\n" "$name" "$before" "$after" "$status" >> "$REPORT" echo " $name before=$before after=$after $status" } run_overlap() { local label="$1" fn_a="$2" fn_b="$3" local before=$(vram) $fn_a & pid_a=$! $fn_b & pid_b=$! wait $pid_a && sa="OK" || sa="FAIL" wait $pid_b && sb="OK" || sb="FAIL" local after=$(vram) printf "| %s | %s | %s | %s+%s |\n" "$label" "$before" "$after" "$sa" "$sb" >> "$REPORT" echo " $label before=$before after=$after $sa+$sb" } run_stress() { local before=$(vram) call_ocr & p1=$! call_marker & p2=$! call_stt & p3=$! call_rerank & p4=$! call_embed & p5=$! wait $p1 && s1="OK" || s1="FAIL" wait $p2 && s2="OK" || s2="FAIL" wait $p3 && s3="OK" || s3="FAIL" wait $p4 && s4="OK" || s4="FAIL" wait $p5 && s5="OK" || s5="FAIL" local after=$(vram) printf "| stress (5 concurrent) | %s | %s | %s/%s/%s/%s/%s |\n" "$before" "$after" "$s1" "$s2" "$s3" "$s4" "$s5" >> "$REPORT" echo " stress before=$before after=$after $s1/$s2/$s3/$s4/$s5" } copy_fixtures { echo "# VRAM fixture report — $(date '+%F %H:%M:%S')" echo echo "- baseline used = $(vram) MiB / total = 16376 MiB" echo "- stress mode: $([[ $STRESS_MODE -eq 1 ]] && echo enabled || echo disabled)" echo echo "## Mode A — sequential smoke" echo echo "| call | before (MiB) | after (MiB) | status |" echo "|---|---|---|---|" } > "$REPORT" echo "[mode A] sequential" run_named "OCR /ocr (ocr_ok.png)" call_ocr run_named "STT /transcribe (sine30s)" call_stt run_named "marker /convert (lorem1p)" call_marker run_named "reranker /rerank" call_rerank run_named "embed bge-m3" call_embed { echo echo "## Mode B — light overlap" echo echo "| pair | before (MiB) | after (MiB) | status |" echo "|---|---|---|---|" } >> "$REPORT" echo "[mode B] light overlap" run_overlap "OCR + embedding" call_ocr call_embed run_overlap "marker + reranker" call_marker call_rerank run_overlap "STT + embedding" call_stt call_embed if [[ $STRESS_MODE -eq 1 ]]; then { echo echo "## Stress (--stress) — 5 concurrent" echo echo "| call | before (MiB) | after (MiB) | status |" echo "|---|---|---|---|" } >> "$REPORT" echo "[stress] 5 concurrent" run_stress fi PEAK=$(awk -F'|' '$0 ~ /^\|/ && $5 ~ /(OK|FAIL)/ {gsub(/ /,"",$4); if ($4+0 > max) max=$4+0} END {print max+0}' "$REPORT") GATE=$([[ $PEAK -gt 0 && $PEAK -lt 14000 ]] && echo PASS || echo FAIL) { echo echo "## Summary" echo echo "- peak after = $PEAK MiB" echo "- safety margin (vs 16376 MiB) = $((16376 - PEAK)) MiB" echo "- gate (peak < 14000 MiB) = $GATE" } >> "$REPORT" echo echo "report: $REPORT" echo "peak=$PEAK gate=$GATE" [[ "$GATE" == "PASS" ]] && exit 0 || exit 1