Files
hyungi_document_server/docker-compose.yml
T
hyungi 7d06816bac fix(ops): DS compose 잉여 ollama 서비스 제거 — 매주 재부팅 outage 근본 해소
DS compose 의 ollama 서비스가 standalone ~/ollama 컨테이너와 host 127.0.0.1:11434 를
다퉈, 정기 재부팅 후 `docker compose up` 이 'port already allocated' 로 abort →
caddy·frontend 미기동 = 웹 outage(2026-06-08 internal error). standalone 이 이미
hyungi_document_server_default 망 + 동일 ollama_data 볼륨(external) 부착으로 fastapi
`ollama:11434` 임베딩을 서빙하므로 DS 서비스는 100% 잉여 → 제거(서비스+ai-gateway
depends_on). ollama_data 볼륨 def 는 standalone external 참조용으로 보존. 임베딩 무영향.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 07:15:24 +09:00

252 lines
9.2 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
services:
postgres:
image: pgvector/pgvector:pg16
volumes:
- pgdata:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d
environment:
POSTGRES_DB: pkm
POSTGRES_USER: pkm
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "100.110.63.63:15432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U pkm"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
kordoc-service:
build: ./services/kordoc
ports:
- "127.0.0.1:3100:3100"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
mem_limit: 4g
memswap_limit: 4g
healthcheck:
test: ["CMD", "node", "-e", "fetch('http://localhost:3100/health').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"]
interval: 10s
timeout: 5s
retries: 3
restart: unless-stopped
ocr-service:
build: ./services/ocr
expose:
- "3200"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- ocr_models:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3200/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
restart: unless-stopped
# Phase 1B (2026-05-01): PDF → markdown 변환. ocr-service 와 별도 컨테이너 (deps 충돌 회피).
marker-service:
build: ./services/marker
ports:
- "127.0.0.1:3300:3300"
expose:
- "3300"
environment:
- HF_HOME=/models/huggingface
- TORCH_HOME=/models/torch
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- marker_models:/models
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3300/ready"]
interval: 30s
timeout: 10s
retries: 3
start_period: 300s
restart: unless-stopped
stt-service:
# 2026-05-08 (D9 Track B revised): GPU is canonical STT owner.
# 정책: Mac mini = Gemma 26B 전용 우선이므로 STT/Whisper 는 호출량 무관 GPU 서버 소유.
# 이전 "Mac mini 이전본" 주석은 trace 오인 기반이었고 본 revised 결정으로 폐기.
# fastapi 의 STT_ENDPOINT 는 `http://stt-service:3300` (compose 내부 DNS) 사용.
build: ./services/stt
expose:
- "3300"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- stt_models:/root/.cache
environment:
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
- WHISPER_DEVICE=${WHISPER_DEVICE:-cuda}
- WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
# /ready: CUDA 디바이스 + 모델 적재 둘 다 확인. ready=true 만 healthy 처리.
# /health 는 단순 liveness 라 모델 미적재 상태도 healthy 로 잡혀 운영 신호로 부적합.
test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('ready') else 1)"]
interval: 30s
timeout: 10s
retries: 3
start_period: 300s
restart: unless-stopped
# ── ollama 서비스 제거 (2026-06-08) ──
# 정본 ollama = standalone `~/ollama/docker-compose.yml`(container_name: ollama).
# 그 컨테이너가 hyungi_document_server_default 망(external) + 동일 볼륨
# hyungi_document_server_ollama_data(external, bge-m3) 부착으로 fastapi 의 `ollama:11434`
# 임베딩을 이미 서빙(재부팅에도 durable). 본 중복 서비스는 같은 host 127.0.0.1:11434 를
# 점유 다퉈, 재부팅 후 `docker compose up` 을 'port already allocated' 로 abort →
# 뒤 의존서비스(caddy·frontend) 미기동 = 웹 outage 유발 → 제거. (ollama_data 볼륨 def 는
# standalone 이 external 로 참조하므로 아래 volumes: 에 보존.)
# Phase 1.3: bge-reranker-v2-m3 (TEI) — internal only, fastapi에서 reranker:80으로 호출
# fastapi가 depends_on 안 함 → 단독 시작 가능, 없어도 fastapi 동작 (rerank=false fallback)
reranker:
image: ghcr.io/huggingface/text-embeddings-inference:1.7
container_name: hyungi_document_server-reranker-1
expose:
- "80"
environment:
- MODEL_ID=BAAI/bge-reranker-v2-m3
# PR-2Q-Rerank-Payload-Fix (2026-05-24): 2 env 변경 — 413 root cause 분리.
# (a) MAX_BATCH_TOKENS 8192 → 16384: 각 batch 의 token sum 한도
# (b) MAX_CLIENT_BATCH_SIZE 32 → 64: 1 request 안 entries 수 한도 (default 32).
# multi-query cap 60 chunks (×2 chunks_per_doc dedup) 가 batch entries 54~60
# → 32 한도 초과 → 413. 64 로 늘림.
# GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0.
- MAX_BATCH_TOKENS=16384
- MAX_CLIENT_BATCH_SIZE=64
- MAX_CONCURRENT_REQUESTS=4
volumes:
- reranker_cache:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-fsS", "http://localhost/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 120s
restart: unless-stopped
ai-gateway:
build: ./gpu-server/services/ai-gateway
ports:
- "127.0.0.1:8081:8080"
environment:
- PRIMARY_ENDPOINT=http://100.76.254.116:8801/v1/chat/completions
- FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions
- CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
- DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00}
# depends_on: ollama 제거 (2026-06-08) — ollama 서비스가 standalone 으로 이관됨.
# FALLBACK_ENDPOINT 의 ollama:11434 는 standalone(동일 hostname, DS 망 부착)으로 해소.
restart: unless-stopped
fastapi:
build: ./app
ports:
- "100.110.63.63:8000:8000"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents
- ./config.yaml:/app/config.yaml:ro
- ./domain_policy.yaml:/app/domain_policy.yaml:ro
- ./scripts:/app/scripts:ro
- ./logs:/app/logs
- ./migrations:/app/migrations:ro
depends_on:
postgres:
condition: service_healthy
kordoc-service:
condition: service_healthy
marker-service:
condition: service_healthy
env_file:
- credentials.env
environment:
- DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
- KORDOC_ENDPOINT=http://kordoc-service:3100
- OCR_ENDPOINT=http://ocr-service:3200
- MARKER_ENDPOINT=http://marker-service:3300
- MARKER_CONTAINER_PATH_PREFIX=/documents
# 2026-05-08 (D9 Track B revised): GPU stt-service 정식 승격, 내부 DNS 사용.
- STT_ENDPOINT=http://stt-service:3300
# KGS Code 등 외부 학습 자료 추가 스캔 경로 (host .env 에서 주입). 빈 값이면 비활성.
- ADDITIONAL_WATCH_TARGETS=${ADDITIONAL_WATCH_TARGETS:-}
# PR-MacMini-Derived-Worker-1
- STUDY_EXPLANATION_ENABLED=${STUDY_EXPLANATION_ENABLED:-true}
- INTERNAL_WORKER_TOKEN=${INTERNAL_WORKER_TOKEN}
# Voice Memo PoC v1 — bot 계정 한정 long-expiry access token. default false → 일반 운영 영향 0.
# 활성화: host .env 에 VOICE_MEMO_BOT_TOKEN_ENABLED=true. plan: rosy-launching-otter.md
- VOICE_MEMO_BOT_TOKEN_ENABLED=${VOICE_MEMO_BOT_TOKEN_ENABLED:-false}
- VOICE_MEMO_BOT_USERNAME=${VOICE_MEMO_BOT_USERNAME:-voice-memo-bot}
- VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS=${VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS:-365}
# PR-Notebook-Client-1 — notebook-client-bot long-expiry token (laptop-worker-bot wrapper 재활용,
# 1B Pilot dormant 후 username rename). default false → 운영 영향 0.
- LAPTOP_WORKER_BOT_TOKEN_ENABLED=${LAPTOP_WORKER_BOT_TOKEN_ENABLED:-false}
- LAPTOP_WORKER_BOT_USERNAME=${LAPTOP_WORKER_BOT_USERNAME:-laptop-worker-bot}
- LAPTOP_WORKER_BOT_TOKEN_EXPIRE_DAYS=${LAPTOP_WORKER_BOT_TOKEN_EXPIRE_DAYS:-365}
# PR-2 of DS AI routing policy (2026-05-23) — backends dispatcher via llm-router.
# router_url default points at Mac mini Tailscale interface :8890 (PR-1).
# DS_BACKENDS_VIA_ROUTER=false 로 legacy 직접 호출 path 즉시 복귀.
- LLM_ROUTER_URL=${LLM_ROUTER_URL:-http://100.76.254.116:8890}
- DS_BACKENDS_VIA_ROUTER=${DS_BACKENDS_VIA_ROUTER:-true}
restart: unless-stopped
frontend:
build: ./frontend
ports:
- "127.0.0.1:3000:3000"
depends_on:
- fastapi
restart: unless-stopped
caddy:
image: caddy:2
ports:
- "8080:80"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- caddy_data:/data
depends_on:
- fastapi
- frontend
restart: unless-stopped
volumes:
pgdata:
caddy_data:
ollama_data:
reranker_cache:
ocr_models:
stt_models:
marker_models: