a6b8dae18e
ollama 는 home-gateway-network / document_server / ollama_default 3개 network 에 속해 range loop 가 모든 IP concat. (index .NetworkSettings.Networks "hyungi_document_server_default").IPAddress 로 명시. 다른 GPU 서비스 4개도 동일 single-network 이라 호환. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
96 lines
2.9 KiB
Bash
Executable File
96 lines
2.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# GPU 미디어/검색 서비스 health/ready/smoke 점검 (PR-GPU-Health-1).
|
|
# OCR/STT 는 expose-only (host publish 없음). docker bridge IP 로 호스트에서 직접 호출 —
|
|
# host publish 추가 아니라 docker network 내부 검증 (보안 표면 동일).
|
|
set -uo pipefail
|
|
|
|
OCR=hyungi_document_server-ocr-service-1
|
|
MARKER=hyungi_document_server-marker-service-1
|
|
RERANKER=hyungi_document_server-reranker-1
|
|
STT=hyungi_document_server-stt-service-1
|
|
OLLAMA=ollama
|
|
|
|
PASS=0
|
|
FAIL=0
|
|
|
|
container_ip() {
|
|
# hyungi_document_server_default network IP만 추출 (ollama 는 multi-network 라 range 사용 불가)
|
|
docker inspect -f '{{(index .NetworkSettings.Networks "hyungi_document_server_default").IPAddress}}' "$1" 2>/dev/null
|
|
}
|
|
|
|
vram() {
|
|
nvidia-smi --query-gpu=memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null \
|
|
| awk -F',' '{printf "used=%dMiB free=%dMiB\n", $1, $2}'
|
|
}
|
|
|
|
probe() {
|
|
local label="$1" container="$2" port="$3" path="$4" timeout="${5:-5}"
|
|
local ip=$(container_ip "$container")
|
|
printf " %-22s " "$label"
|
|
if [[ -z "$ip" ]]; then
|
|
echo "FAIL (container IP 없음)"
|
|
FAIL=$((FAIL+1))
|
|
return
|
|
fi
|
|
if out=$(curl -fsS -m "$timeout" "http://$ip:$port$path" 2>&1); then
|
|
echo "OK $(echo "$out" | head -c 120)"
|
|
PASS=$((PASS+1))
|
|
else
|
|
echo "FAIL $(echo "$out" | head -c 120)"
|
|
FAIL=$((FAIL+1))
|
|
fi
|
|
}
|
|
|
|
probe_post() {
|
|
local label="$1" container="$2" port="$3" path="$4" body="$5" timeout="${6:-30}" expect="${7:-}"
|
|
local ip=$(container_ip "$container")
|
|
printf " %-22s " "$label"
|
|
if [[ -z "$ip" ]]; then
|
|
echo "FAIL (container IP 없음)"
|
|
FAIL=$((FAIL+1))
|
|
return
|
|
fi
|
|
if out=$(curl -fsS -m "$timeout" -H 'Content-Type: application/json' -X POST -d "$body" "http://$ip:$port$path" 2>&1); then
|
|
if [[ -z "$expect" || "$out" == *"$expect"* ]]; then
|
|
echo "OK $(echo "$out" | head -c 100)"
|
|
PASS=$((PASS+1))
|
|
else
|
|
echo "FAIL(unexpected body) $(echo "$out" | head -c 100)"
|
|
FAIL=$((FAIL+1))
|
|
fi
|
|
else
|
|
echo "FAIL $(echo "$out" | head -c 100)"
|
|
FAIL=$((FAIL+1))
|
|
fi
|
|
}
|
|
|
|
echo "=== nvidia-smi baseline ==="
|
|
BASE=$(vram); echo " $BASE"
|
|
echo
|
|
|
|
echo "=== health / ready ==="
|
|
probe "OCR /health" "$OCR" 3200 "/health" 5
|
|
probe "OCR /ready" "$OCR" 3200 "/ready" 5
|
|
probe "marker /health" "$MARKER" 3300 "/health" 5
|
|
probe "marker /ready" "$MARKER" 3300 "/ready" 5
|
|
probe "reranker /health" "$RERANKER" 80 "/health" 5
|
|
probe "stt /health" "$STT" 3300 "/health" 5
|
|
probe "stt /ready" "$STT" 3300 "/ready" 5
|
|
|
|
echo
|
|
echo "=== smoke ==="
|
|
probe "OCR /smoke" "$OCR" 3200 "/smoke" 30
|
|
probe_post "bge-m3 embed" "$OLLAMA" 11434 "/api/embeddings" '{"model":"bge-m3","prompt":"smoke test"}' 30 '"embedding"'
|
|
|
|
echo
|
|
echo "=== nvidia-smi after ==="
|
|
AFTER=$(vram); echo " $AFTER"
|
|
echo
|
|
echo " baseline: $BASE"
|
|
echo " after : $AFTER"
|
|
echo
|
|
echo "=== summary ==="
|
|
echo " pass=$PASS fail=$FAIL"
|
|
|
|
exit $FAIL
|