From 8f4413a38cede4396d8b6b5d0a11ce24c796e157 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Thu, 14 May 2026 09:51:59 +0900 Subject: [PATCH] =?UTF-8?q?fix(gpu-health):=20scripts=20=ED=98=B8=EC=B6=9C?= =?UTF-8?q?=20=EB=8F=84=EA=B5=AC=EB=A5=BC=20host=20curl=20+=20container=20?= =?UTF-8?q?IP=20=EB=A1=9C=20=ED=86=B5=EC=9D=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OCR/STT 컨테이너 안에 curl 미설치 (slim python image). docker exec curl 표준은 실측 OCI exec 실패. host curl + docker bridge IP (172.20.0.x) 로 변경 — host publish 추가 아니라 docker network 내부 검증이라 보안 표면 동일. reranker 만 curl 있고 OCR/marker/STT 는 python 만 있어 분기 발생을 회피. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/gpu_service_smoke.sh | 47 ++++++++++++++++++++++++------------ scripts/gpu_vram_fixture.sh | 41 +++++++++++++++---------------- 2 files changed, 51 insertions(+), 37 deletions(-) diff --git a/scripts/gpu_service_smoke.sh b/scripts/gpu_service_smoke.sh index 27de2db..f60b6d2 100755 --- a/scripts/gpu_service_smoke.sh +++ b/scripts/gpu_service_smoke.sh @@ -1,26 +1,37 @@ #!/usr/bin/env bash # GPU 미디어/검색 서비스 health/ready/smoke 점검 (PR-GPU-Health-1). -# OCR/STT/reranker 는 expose-only 라 docker exec 내부 curl 표준 경로 사용. -# marker 는 ports 매핑이 있지만 일관성을 위해 동일 패턴. +# OCR/STT 는 expose-only (host publish 없음). docker bridge IP 로 호스트에서 직접 호출 — +# host publish 추가 아니라 docker network 내부 검증 (보안 표면 동일). set -uo pipefail OCR=hyungi_document_server-ocr-service-1 MARKER=hyungi_document_server-marker-service-1 RERANKER=hyungi_document_server-reranker-1 STT=hyungi_document_server-stt-service-1 +OLLAMA=ollama PASS=0 FAIL=0 +container_ip() { + docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1" 2>/dev/null +} + vram() { nvidia-smi --query-gpu=memory.used,memory.free --format=csv,noheader,nounits 2>/dev/null \ | awk -F',' '{printf "used=%dMiB free=%dMiB\n", $1, $2}' } probe() { - local label="$1" container="$2" path="$3" timeout="${4:-5}" + local label="$1" container="$2" port="$3" path="$4" timeout="${5:-5}" + local ip=$(container_ip "$container") printf " %-22s " "$label" - if out=$(docker exec "$container" curl -fsS -m "$timeout" "$path" 2>&1); then + if [[ -z "$ip" ]]; then + echo "FAIL (container IP 없음)" + FAIL=$((FAIL+1)) + return + fi + if out=$(curl -fsS -m "$timeout" "http://$ip:$port$path" 2>&1); then echo "OK $(echo "$out" | head -c 120)" PASS=$((PASS+1)) else @@ -30,9 +41,15 @@ probe() { } probe_post() { - local label="$1" container="$2" url="$3" body="$4" timeout="${5:-30}" expect="${6:-}" + local label="$1" container="$2" port="$3" path="$4" body="$5" timeout="${6:-30}" expect="${7:-}" + local ip=$(container_ip "$container") printf " %-22s " "$label" - if out=$(docker exec "$container" curl -fsS -m "$timeout" -H 'Content-Type: application/json' -X POST -d "$body" "$url" 2>&1); then + if [[ -z "$ip" ]]; then + echo "FAIL (container IP 없음)" + FAIL=$((FAIL+1)) + return + fi + if out=$(curl -fsS -m "$timeout" -H 'Content-Type: application/json' -X POST -d "$body" "http://$ip:$port$path" 2>&1); then if [[ -z "$expect" || "$out" == *"$expect"* ]]; then echo "OK $(echo "$out" | head -c 100)" PASS=$((PASS+1)) @@ -51,18 +68,18 @@ BASE=$(vram); echo " $BASE" echo echo "=== health / ready ===" -probe "OCR /health" "$OCR" "http://127.0.0.1:3200/health" 5 -probe "OCR /ready" "$OCR" "http://127.0.0.1:3200/ready" 5 -probe "marker /health" "$MARKER" "http://127.0.0.1:3300/health" 5 -probe "marker /ready" "$MARKER" "http://127.0.0.1:3300/ready" 5 -probe "reranker /health" "$RERANKER" "http://127.0.0.1:80/health" 5 -probe "stt /health" "$STT" "http://127.0.0.1:3300/health" 5 -probe "stt /ready" "$STT" "http://127.0.0.1:3300/ready" 5 +probe "OCR /health" "$OCR" 3200 "/health" 5 +probe "OCR /ready" "$OCR" 3200 "/ready" 5 +probe "marker /health" "$MARKER" 3300 "/health" 5 +probe "marker /ready" "$MARKER" 3300 "/ready" 5 +probe "reranker /health" "$RERANKER" 80 "/health" 5 +probe "stt /health" "$STT" 3300 "/health" 5 +probe "stt /ready" "$STT" 3300 "/ready" 5 echo echo "=== smoke ===" -probe "OCR /smoke" "$OCR" "http://127.0.0.1:3200/smoke" 30 -probe_post "bge-m3 embed" "$OCR" "http://ollama:11434/api/embeddings" '{"model":"bge-m3","prompt":"smoke test"}' 30 '"embedding"' +probe "OCR /smoke" "$OCR" 3200 "/smoke" 30 +probe_post "bge-m3 embed" "$OLLAMA" 11434 "/api/embeddings" '{"model":"bge-m3","prompt":"smoke test"}' 30 '"embedding"' echo echo "=== nvidia-smi after ===" diff --git a/scripts/gpu_vram_fixture.sh b/scripts/gpu_vram_fixture.sh index aef8ba5..a043502 100755 --- a/scripts/gpu_vram_fixture.sh +++ b/scripts/gpu_vram_fixture.sh @@ -1,12 +1,14 @@ #!/usr/bin/env bash # synthetic fixture 기반 GPU VRAM 피크 검증 (PR-GPU-Health-1). -# Mode A (sequential) + Mode B (light overlap) 기본. --stress 옵션은 5개 동시 (기본 gate 미포함). +# Mode A (sequential) + Mode B (light overlap) 기본. --stress (5 concurrent) 옵션. +# 호출은 호스트 curl + container IP (docker bridge 내부, host publish 추가 아님). set -uo pipefail OCR=hyungi_document_server-ocr-service-1 MARKER=hyungi_document_server-marker-service-1 RERANKER=hyungi_document_server-reranker-1 STT=hyungi_document_server-stt-service-1 +OLLAMA=ollama REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" FIX="$REPO_ROOT/tests/load/fixtures" @@ -16,6 +18,10 @@ mkdir -p "$REPO_ROOT/reports" STRESS_MODE=0 [[ "${1:-}" == "--stress" ]] && STRESS_MODE=1 +container_ip() { + docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1" 2>/dev/null +} + vram() { nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d ' ' } @@ -26,29 +32,20 @@ copy_fixtures() { docker cp "$FIX/sine_30s.wav" $STT:/tmp/sine_30s.wav >/dev/null } -call_ocr() { - docker exec "$OCR" curl -fsS -m 60 -X POST -H 'Content-Type: application/json' \ - -d '{"filePath":"/tmp/ocr_ok.png"}' http://127.0.0.1:3200/ocr >/dev/null -} -call_marker() { - docker exec "$MARKER" curl -fsS -m 180 -X POST -H 'Content-Type: application/json' \ - -d '{"file_path":"/tmp/lorem_1p.pdf"}' http://127.0.0.1:3300/convert >/dev/null -} -call_stt() { - docker exec "$STT" curl -fsS -m 180 -X POST -H 'Content-Type: application/json' \ - -d '{"filePath":"/tmp/sine_30s.wav","langs":["en"],"beamSize":1}' http://127.0.0.1:3300/transcribe >/dev/null -} -call_rerank() { - docker exec "$RERANKER" curl -fsS -m 30 -X POST -H 'Content-Type: application/json' \ - -d '{"query":"smoke","texts":["foo bar baz","alpha beta gamma"]}' http://127.0.0.1:80/rerank >/dev/null -} -call_embed() { - docker exec "$OCR" curl -fsS -m 30 -X POST -H 'Content-Type: application/json' \ - -d '{"model":"bge-m3","prompt":"smoke test"}' http://ollama:11434/api/embeddings >/dev/null -} +OCR_IP=$(container_ip $OCR) +MARKER_IP=$(container_ip $MARKER) +RERANKER_IP=$(container_ip $RERANKER) +STT_IP=$(container_ip $STT) +OLLAMA_IP=$(container_ip $OLLAMA) + +call_ocr() { curl -fsS -m 60 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/ocr_ok.png"}' "http://$OCR_IP:3200/ocr" >/dev/null; } +call_marker() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"file_path":"/tmp/lorem_1p.pdf"}' "http://$MARKER_IP:3300/convert" >/dev/null; } +call_stt() { curl -fsS -m 180 -X POST -H 'Content-Type: application/json' -d '{"filePath":"/tmp/sine_30s.wav","langs":["en"],"beamSize":1}' "http://$STT_IP:3300/transcribe" >/dev/null; } +call_rerank() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"query":"smoke","texts":["foo bar baz","alpha beta gamma"]}' "http://$RERANKER_IP:80/rerank" >/dev/null; } +call_embed() { curl -fsS -m 30 -X POST -H 'Content-Type: application/json' -d '{"model":"bge-m3","prompt":"smoke test"}' "http://$OLLAMA_IP:11434/api/embeddings" >/dev/null; } run_named() { - local name="$1"; local fn="$2" + local name="$1" fn="$2" local before=$(vram) if $fn; then status="OK"; else status="FAIL"; fi local after=$(vram)