#!/usr/bin/env bash # synthetic fixture 기반 GPU VRAM 피크 검증 (PR-GPU-Health-1). # Mode A (sequential) + Mode B (light overlap) 기본. --stress (5 concurrent) 옵션. # 호출은 호스트 curl + container IP (docker bridge 내부, host publish 추가 아님). set -uo pipefail OCR=hyungi_document_server-ocr-service-1 MARKER=hyungi_document_server-marker-service-1 RERANKER=hyungi_document_server-reranker-1 STT=hyungi_document_server-stt-service-1 OLLAMA=ollama REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" FIX="$REPO_ROOT/tests/load/fixtures" REPORT="$REPO_ROOT/reports/vram_fixture_$(date +%F).md" mkdir -p "$REPO_ROOT/reports" STRESS_MODE=0 [[ "${1:-}" == "--stress" ]] && STRESS_MODE=1 container_ip() { # hyungi_document_server_default network IP만 추출 (ollama 는 multi-network) docker inspect -f '{{(index .NetworkSettings.Networks "hyungi_document_server_default").IPAddress}}' "$1" 2>/dev/null } vram() { nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d ' ' } copy_fixtures() { docker cp "$FIX/ocr_ok.png" $OCR:/tmp/ocr_ok.png >/dev/null docker cp "$FIX/lorem_1p.pdf" $MARKER:/tmp/lorem_1p.pdf >/dev/null docker cp "$FIX/sine_30s.wav" $STT:/tmp/sine_30s.wav >/dev/null } OCR_IP=$(container_ip $OCR) MARKER_IP=$(container_ip $MARKER) RERANKER_IP=$(container_ip $RERANKER) STT_IP=$(container_ip $STT) OLLAMA_IP=$(container_ip $OLLAMA) call_ocr() { curl -fsS -m 60 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/ocr_ok.png"}' "http://$OCR_IP:3200/ocr" >/dev/null; } call_marker() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"file_path":"/tmp/lorem_1p.pdf"}' "http://$MARKER_IP:3300/convert" >/dev/null; } call_stt() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/sine_30s.wav","langs":["en"],"beamSize":1}' "http://$STT_IP:3300/transcribe" >/dev/null; } call_rerank() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"query":"smoke","texts":["foo bar baz","alpha beta gamma"]}' "http://$RERANKER_IP:80/rerank" >/dev/null; } call_embed() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"model":"bge-m3","prompt":"smoke test"}' "http://$OLLAMA_IP:11434/api/embeddings" >/dev/null; } run_named() { local name="$1" fn="$2" local before=$(vram) if $fn; then status="OK"; else status="FAIL"; fi local after=$(vram) printf "| %s | %s | %s | %s |\n" "$name" "$before" "$after" "$status" >> "$REPORT" echo " $name before=$before after=$after $status" } run_overlap() { local label="$1" fn_a="$2" fn_b="$3" local before=$(vram) $fn_a & pid_a=$! $fn_b & pid_b=$! wait $pid_a && sa="OK" || sa="FAIL" wait $pid_b && sb="OK" || sb="FAIL" local after=$(vram) printf "| %s | %s | %s | %s+%s |\n" "$label" "$before" "$after" "$sa" "$sb" >> "$REPORT" echo " $label before=$before after=$after $sa+$sb" } run_stress() { local before=$(vram) call_ocr & p1=$! call_marker & p2=$! call_stt & p3=$! call_rerank & p4=$! call_embed & p5=$! wait $p1 && s1="OK" || s1="FAIL" wait $p2 && s2="OK" || s2="FAIL" wait $p3 && s3="OK" || s3="FAIL" wait $p4 && s4="OK" || s4="FAIL" wait $p5 && s5="OK" || s5="FAIL" local after=$(vram) printf "| stress (5 concurrent) | %s | %s | %s/%s/%s/%s/%s |\n" "$before" "$after" "$s1" "$s2" "$s3" "$s4" "$s5" >> "$REPORT" echo " stress before=$before after=$after $s1/$s2/$s3/$s4/$s5" } copy_fixtures { echo "# VRAM fixture report — $(date '+%F %H:%M:%S')" echo echo "- baseline used = $(vram) MiB / total = 16376 MiB" echo "- stress mode: $([[ $STRESS_MODE -eq 1 ]] && echo enabled || echo disabled)" echo echo "## Mode A — sequential smoke" echo echo "| call | before (MiB) | after (MiB) | status |" echo "|---|---|---|---|" } > "$REPORT" echo "[mode A] sequential" run_named "OCR /ocr (ocr_ok.png)" call_ocr run_named "STT /transcribe (sine30s)" call_stt run_named "marker /convert (lorem1p)" call_marker run_named "reranker /rerank" call_rerank run_named "embed bge-m3" call_embed { echo echo "## Mode B — light overlap" echo echo "| pair | before (MiB) | after (MiB) | status |" echo "|---|---|---|---|" } >> "$REPORT" echo "[mode B] light overlap" run_overlap "OCR + embedding" call_ocr call_embed run_overlap "marker + reranker" call_marker call_rerank run_overlap "STT + embedding" call_stt call_embed if [[ $STRESS_MODE -eq 1 ]]; then { echo echo "## Stress (--stress) — 5 concurrent" echo echo "| call | before (MiB) | after (MiB) | status |" echo "|---|---|---|---|" } >> "$REPORT" echo "[stress] 5 concurrent" run_stress fi PEAK=$(awk -F'|' '$0 ~ /^\|/ && $5 ~ /(OK|FAIL)/ {gsub(/ /,"",$4); if ($4+0 > max) max=$4+0} END {print max+0}' "$REPORT") GATE=$([[ $PEAK -gt 0 && $PEAK -lt 14000 ]] && echo PASS || echo FAIL) { echo echo "## Summary" echo echo "- peak after = $PEAK MiB" echo "- safety margin (vs 16376 MiB) = $((16376 - PEAK)) MiB" echo "- gate (peak < 14000 MiB) = $GATE" } >> "$REPORT" echo echo "report: $REPORT" echo "peak=$PEAK gate=$GATE" [[ "$GATE" == "PASS" ]] && exit 0 || exit 1