#!/usr/bin/env bash # GPU 미디어/검색 서비스 health/ready/smoke 점검 (PR-GPU-Health-1). # OCR/STT 는 expose-only (host publish 없음). docker bridge IP 로 호스트에서 직접 호출 — # host publish 추가 아니라 docker network 내부 검증 (보안 표면 동일). set -uo pipefail OCR=hyungi_document_server-ocr-service-1 MARKER=hyungi_document_server-marker-service-1 RERANKER=hyungi_document_server-reranker-1 STT=hyungi_document_server-stt-service-1 OLLAMA=ollama PASS=0 FAIL=0 container_ip() { # hyungi_document_server_default network IP만 추출 (ollama 는 multi-network 라 range 사용 불가) docker inspect -f '{{(index .NetworkSettings.Networks "hyungi_document_server_default").IPAddress}}' "$1" 2>/dev/null } vram() { nvidia-smi --query-gpu=memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null \ | awk -F',' '{printf "used=%dMiB free=%dMiB\n", $1, $2}' } probe() { local label="$1" container="$2" port="$3" path="$4" timeout="${5:-5}" local ip=$(container_ip "$container") printf " %-22s " "$label" if [[ -z "$ip" ]]; then echo "FAIL (container IP 없음)" FAIL=$((FAIL+1)) return fi if out=$(curl -fsS -m "$timeout" "http://$ip:$port$path" 2>&1); then echo "OK $(echo "$out" | head -c 120)" PASS=$((PASS+1)) else echo "FAIL $(echo "$out" | head -c 120)" FAIL=$((FAIL+1)) fi } probe_post() { local label="$1" container="$2" port="$3" path="$4" body="$5" timeout="${6:-30}" expect="${7:-}" local ip=$(container_ip "$container") printf " %-22s " "$label" if [[ -z "$ip" ]]; then echo "FAIL (container IP 없음)" FAIL=$((FAIL+1)) return fi if out=$(curl -fsS -m "$timeout" -H 'Content-Type: application/json' -X POST -d "$body" "http://$ip:$port$path" 2>&1); then if [[ -z "$expect" || "$out" == *"$expect"* ]]; then echo "OK $(echo "$out" | head -c 100)" PASS=$((PASS+1)) else echo "FAIL(unexpected body) $(echo "$out" | head -c 100)" FAIL=$((FAIL+1)) fi else echo "FAIL $(echo "$out" | head -c 100)" FAIL=$((FAIL+1)) fi } echo "=== nvidia-smi baseline ===" BASE=$(vram); echo " $BASE" echo echo "=== health / ready ===" probe "OCR /health" "$OCR" 3200 "/health" 5 probe "OCR /ready" "$OCR" 3200 "/ready" 5 probe "marker /health" "$MARKER" 3300 "/health" 5 probe "marker /ready" "$MARKER" 3300 "/ready" 5 probe "reranker /health" "$RERANKER" 80 "/health" 5 probe "stt /health" "$STT" 3300 "/health" 5 probe "stt /ready" "$STT" 3300 "/ready" 5 echo echo "=== smoke ===" probe "OCR /smoke" "$OCR" 3200 "/smoke" 30 probe_post "bge-m3 embed" "$OLLAMA" 11434 "/api/embeddings" '{"model":"bge-m3","prompt":"smoke test"}' 30 '"embedding"' echo echo "=== nvidia-smi after ===" AFTER=$(vram); echo " $AFTER" echo echo " baseline: $BASE" echo " after : $AFTER" echo echo "=== summary ===" echo " pass=$PASS fail=$FAIL" exit $FAIL