Files
hyungi_document_server/docker-compose.yml
T
Hyungi Ahn 98ee7dffe2 ops(gpu-health): GPU 서비스 health/smoke 표준화 + synthetic VRAM 피크 가드
PR-GPU-Health-1. 운영 준비성 표준화 PR (모델 성능 개선 아님).

- OCR /smoke endpoint 추가 (160x60 OK PNG in-memory, 200/503 분기, Docker healthcheck 미사용)
- marker /health endpoint 추가 (stt/ocr 동일 시그니처)
- reranker docker-compose healthcheck 추가 (TEI :80/health)
- scripts/gpu_service_smoke.sh: docker exec 표준 점검 (OCR/STT expose-only)
- scripts/gpu_vram_fixture.sh: Mode A sequential + Mode B light overlap + --stress 옵션
- tests/load/fixtures/: synthetic ocr_ok.png / sine_30s.wav / lorem_1p.pdf

OCR 빈 응답 false negative — root cause: ports 미매핑.
결정: ocr-service / stt-service 는 expose-only 유지, 운영 점검은 docker exec 내부 curl 표준.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 09:42:07 +09:00

238 lines
7.2 KiB
YAML

services:
postgres:
image: pgvector/pgvector:pg16
volumes:
- pgdata:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d
environment:
POSTGRES_DB: pkm
POSTGRES_USER: pkm
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "127.0.0.1:15432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U pkm"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
kordoc-service:
build: ./services/kordoc
ports:
- "127.0.0.1:3100:3100"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
mem_limit: 4g
memswap_limit: 4g
healthcheck:
test: ["CMD", "node", "-e", "fetch('http://localhost:3100/health').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"]
interval: 10s
timeout: 5s
retries: 3
restart: unless-stopped
ocr-service:
build: ./services/ocr
expose:
- "3200"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- ocr_models:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3200/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
restart: unless-stopped
# Phase 1B (2026-05-01): PDF → markdown 변환. ocr-service 와 별도 컨테이너 (deps 충돌 회피).
marker-service:
build: ./services/marker
ports:
- "127.0.0.1:3300:3300"
expose:
- "3300"
environment:
- HF_HOME=/models/huggingface
- TORCH_HOME=/models/torch
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- marker_models:/models
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3300/ready"]
interval: 30s
timeout: 10s
retries: 3
start_period: 300s
restart: unless-stopped
stt-service:
# 2026-05-08 (D9 Track B revised): GPU is canonical STT owner.
# 정책: Mac mini = Gemma 26B 전용 우선이므로 STT/Whisper 는 호출량 무관 GPU 서버 소유.
# 이전 "Mac mini 이전본" 주석은 trace 오인 기반이었고 본 revised 결정으로 폐기.
# fastapi 의 STT_ENDPOINT 는 `http://stt-service:3300` (compose 내부 DNS) 사용.
build: ./services/stt
expose:
- "3300"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- stt_models:/root/.cache
environment:
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
- WHISPER_DEVICE=${WHISPER_DEVICE:-cuda}
- WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
# /ready: CUDA 디바이스 + 모델 적재 둘 다 확인. ready=true 만 healthy 처리.
# /health 는 단순 liveness 라 모델 미적재 상태도 healthy 로 잡혀 운영 신호로 부적합.
test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('ready') else 1)"]
interval: 30s
timeout: 10s
retries: 3
start_period: 300s
restart: unless-stopped
ollama:
image: ollama/ollama
volumes:
- ollama_data:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
ports:
- "127.0.0.1:11434:11434"
restart: unless-stopped
# Phase 1.3: bge-reranker-v2-m3 (TEI) — internal only, fastapi에서 reranker:80으로 호출
# fastapi가 depends_on 안 함 → 단독 시작 가능, 없어도 fastapi 동작 (rerank=false fallback)
reranker:
image: ghcr.io/huggingface/text-embeddings-inference:1.7
container_name: hyungi_document_server-reranker-1
expose:
- "80"
environment:
- MODEL_ID=BAAI/bge-reranker-v2-m3
- MAX_BATCH_TOKENS=8192
- MAX_CONCURRENT_REQUESTS=4
volumes:
- reranker_cache:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-fsS", "http://localhost/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 120s
restart: unless-stopped
ai-gateway:
build: ./gpu-server/services/ai-gateway
ports:
- "127.0.0.1:8081:8080"
environment:
- PRIMARY_ENDPOINT=http://100.76.254.116:8801/v1/chat/completions
- FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions
- CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
- DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00}
depends_on:
- ollama
restart: unless-stopped
fastapi:
build: ./app
ports:
- "127.0.0.1:8000:8000"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents
- ./config.yaml:/app/config.yaml:ro
- ./domain_policy.yaml:/app/domain_policy.yaml:ro
- ./scripts:/app/scripts:ro
- ./logs:/app/logs
- ./migrations:/app/migrations:ro
depends_on:
postgres:
condition: service_healthy
kordoc-service:
condition: service_healthy
marker-service:
condition: service_healthy
env_file:
- credentials.env
environment:
- DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
- KORDOC_ENDPOINT=http://kordoc-service:3100
- OCR_ENDPOINT=http://ocr-service:3200
- MARKER_ENDPOINT=http://marker-service:3300
- MARKER_CONTAINER_PATH_PREFIX=/documents
# 2026-05-08 (D9 Track B revised): GPU stt-service 정식 승격, 내부 DNS 사용.
- STT_ENDPOINT=http://stt-service:3300
# KGS Code 등 외부 학습 자료 추가 스캔 경로 (host .env 에서 주입). 빈 값이면 비활성.
- ADDITIONAL_WATCH_TARGETS=${ADDITIONAL_WATCH_TARGETS:-}
# Voice Memo PoC v1 — bot 계정 한정 long-expiry access token. default false → 일반 운영 영향 0.
# 활성화: host .env 에 VOICE_MEMO_BOT_TOKEN_ENABLED=true. plan: rosy-launching-otter.md
- VOICE_MEMO_BOT_TOKEN_ENABLED=${VOICE_MEMO_BOT_TOKEN_ENABLED:-false}
- VOICE_MEMO_BOT_USERNAME=${VOICE_MEMO_BOT_USERNAME:-voice-memo-bot}
- VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS=${VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS:-365}
restart: unless-stopped
frontend:
build: ./frontend
ports:
- "127.0.0.1:3000:3000"
depends_on:
- fastapi
restart: unless-stopped
caddy:
image: caddy:2
ports:
- "8080:80"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- caddy_data:/data
depends_on:
- fastapi
- frontend
restart: unless-stopped
volumes:
pgdata:
caddy_data:
ollama_data:
reranker_cache:
ocr_models:
stt_models:
marker_models: