Files
hyungi_document_server/scripts/gpu_service_smoke.sh
T
Hyungi Ahn a6b8dae18e fix(gpu-health): container_ip() 가 document_server network IP 만 추출
ollama 는 home-gateway-network / document_server / ollama_default 3개 network 에 속해
range loop 가 모든 IP concat. (index .NetworkSettings.Networks "hyungi_document_server_default").IPAddress
로 명시. 다른 GPU 서비스 4개도 동일 single-network 이라 호환.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 10:02:54 +09:00

96 lines
2.9 KiB
Bash
Executable File

#!/usr/bin/env bash
# GPU 미디어/검색 서비스 health/ready/smoke 점검 (PR-GPU-Health-1).
# OCR/STT 는 expose-only (host publish 없음). docker bridge IP 로 호스트에서 직접 호출 —
# host publish 추가 아니라 docker network 내부 검증 (보안 표면 동일).
set -uo pipefail
OCR=hyungi_document_server-ocr-service-1
MARKER=hyungi_document_server-marker-service-1
RERANKER=hyungi_document_server-reranker-1
STT=hyungi_document_server-stt-service-1
OLLAMA=ollama
PASS=0
FAIL=0
container_ip() {
# hyungi_document_server_default network IP만 추출 (ollama 는 multi-network 라 range 사용 불가)
docker inspect -f '{{(index .NetworkSettings.Networks "hyungi_document_server_default").IPAddress}}' "$1" 2>/dev/null
}
vram() {
nvidia-smi --query-gpu=memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null \
| awk -F',' '{printf "used=%dMiB free=%dMiB\n", $1, $2}'
}
probe() {
local label="$1" container="$2" port="$3" path="$4" timeout="${5:-5}"
local ip=$(container_ip "$container")
printf " %-22s " "$label"
if [[ -z "$ip" ]]; then
echo "FAIL (container IP 없음)"
FAIL=$((FAIL+1))
return
fi
if out=$(curl -fsS -m "$timeout" "http://$ip:$port$path" 2>&1); then
echo "OK $(echo "$out" | head -c 120)"
PASS=$((PASS+1))
else
echo "FAIL $(echo "$out" | head -c 120)"
FAIL=$((FAIL+1))
fi
}
probe_post() {
local label="$1" container="$2" port="$3" path="$4" body="$5" timeout="${6:-30}" expect="${7:-}"
local ip=$(container_ip "$container")
printf " %-22s " "$label"
if [[ -z "$ip" ]]; then
echo "FAIL (container IP 없음)"
FAIL=$((FAIL+1))
return
fi
if out=$(curl -fsS -m "$timeout" -H 'Content-Type: application/json' -X POST -d "$body" "http://$ip:$port$path" 2>&1); then
if [[ -z "$expect" || "$out" == *"$expect"* ]]; then
echo "OK $(echo "$out" | head -c 100)"
PASS=$((PASS+1))
else
echo "FAIL(unexpected body) $(echo "$out" | head -c 100)"
FAIL=$((FAIL+1))
fi
else
echo "FAIL $(echo "$out" | head -c 100)"
FAIL=$((FAIL+1))
fi
}
echo "=== nvidia-smi baseline ==="
BASE=$(vram); echo " $BASE"
echo
echo "=== health / ready ==="
probe "OCR /health" "$OCR" 3200 "/health" 5
probe "OCR /ready" "$OCR" 3200 "/ready" 5
probe "marker /health" "$MARKER" 3300 "/health" 5
probe "marker /ready" "$MARKER" 3300 "/ready" 5
probe "reranker /health" "$RERANKER" 80 "/health" 5
probe "stt /health" "$STT" 3300 "/health" 5
probe "stt /ready" "$STT" 3300 "/ready" 5
echo
echo "=== smoke ==="
probe "OCR /smoke" "$OCR" 3200 "/smoke" 30
probe_post "bge-m3 embed" "$OLLAMA" 11434 "/api/embeddings" '{"model":"bge-m3","prompt":"smoke test"}' 30 '"embedding"'
echo
echo "=== nvidia-smi after ==="
AFTER=$(vram); echo " $AFTER"
echo
echo " baseline: $BASE"
echo " after : $AFTER"
echo
echo "=== summary ==="
echo " pass=$PASS fail=$FAIL"
exit $FAIL