services: postgres: image: pgvector/pgvector:pg16 volumes: - pgdata:/var/lib/postgresql/data - ./migrations:/docker-entrypoint-initdb.d environment: POSTGRES_DB: pkm POSTGRES_USER: pkm POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} ports: - "127.0.0.1:15432:5432" healthcheck: test: ["CMD-SHELL", "pg_isready -U pkm"] interval: 5s timeout: 5s retries: 5 restart: unless-stopped kordoc-service: build: ./services/kordoc ports: - "127.0.0.1:3100:3100" volumes: - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro mem_limit: 4g memswap_limit: 4g healthcheck: test: ["CMD", "node", "-e", "fetch('http://localhost:3100/health').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"] interval: 10s timeout: 5s retries: 3 restart: unless-stopped ocr-service: build: ./services/ocr expose: - "3200" volumes: - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro - ocr_models:/root/.cache deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3200/health')"] interval: 30s timeout: 10s retries: 3 start_period: 180s restart: unless-stopped # Phase 1B (2026-05-01): PDF → markdown 변환. ocr-service 와 별도 컨테이너 (deps 충돌 회피). marker-service: build: ./services/marker ports: - "127.0.0.1:3300:3300" expose: - "3300" environment: - HF_HOME=/models/huggingface - TORCH_HOME=/models/torch volumes: - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro - marker_models:/models deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-f", "http://localhost:3300/ready"] interval: 30s timeout: 10s retries: 3 start_period: 300s restart: unless-stopped stt-service: # 2026-05-08 (D9 Track B revised): GPU is canonical STT owner. # 정책: Mac mini = Gemma 26B 전용 우선이므로 STT/Whisper 는 호출량 무관 GPU 서버 소유. # 이전 "Mac mini 이전본" 주석은 trace 오인 기반이었고 본 revised 결정으로 폐기. # fastapi 의 STT_ENDPOINT 는 `http://stt-service:3300` (compose 내부 DNS) 사용. build: ./services/stt expose: - "3300" volumes: - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro - stt_models:/root/.cache environment: - WHISPER_MODEL=${WHISPER_MODEL:-large-v3} - WHISPER_DEVICE=${WHISPER_DEVICE:-cuda} - WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16} deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: # /ready: CUDA 디바이스 + 모델 적재 둘 다 확인. ready=true 만 healthy 처리. # /health 는 단순 liveness 라 모델 미적재 상태도 healthy 로 잡혀 운영 신호로 부적합. test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('ready') else 1)"] interval: 30s timeout: 10s retries: 3 start_period: 300s restart: unless-stopped ollama: image: ollama/ollama volumes: - ollama_data:/root/.ollama deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] ports: - "127.0.0.1:11434:11434" restart: unless-stopped # Phase 1.3: bge-reranker-v2-m3 (TEI) — internal only, fastapi에서 reranker:80으로 호출 # fastapi가 depends_on 안 함 → 단독 시작 가능, 없어도 fastapi 동작 (rerank=false fallback) reranker: image: ghcr.io/huggingface/text-embeddings-inference:1.7 container_name: hyungi_document_server-reranker-1 expose: - "80" environment: - MODEL_ID=BAAI/bge-reranker-v2-m3 - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=4 volumes: - reranker_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 5s retries: 3 start_period: 120s restart: unless-stopped ai-gateway: build: ./gpu-server/services/ai-gateway ports: - "127.0.0.1:8081:8080" environment: - PRIMARY_ENDPOINT=http://100.76.254.116:8801/v1/chat/completions - FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} - DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00} depends_on: - ollama restart: unless-stopped fastapi: build: ./app ports: - "127.0.0.1:8000:8000" volumes: - ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents - ./config.yaml:/app/config.yaml:ro - ./domain_policy.yaml:/app/domain_policy.yaml:ro - ./scripts:/app/scripts:ro - ./logs:/app/logs - ./migrations:/app/migrations:ro depends_on: postgres: condition: service_healthy kordoc-service: condition: service_healthy marker-service: condition: service_healthy env_file: - credentials.env environment: - DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm - KORDOC_ENDPOINT=http://kordoc-service:3100 - OCR_ENDPOINT=http://ocr-service:3200 - MARKER_ENDPOINT=http://marker-service:3300 - MARKER_CONTAINER_PATH_PREFIX=/documents # 2026-05-08 (D9 Track B revised): GPU stt-service 정식 승격, 내부 DNS 사용. - STT_ENDPOINT=http://stt-service:3300 # KGS Code 등 외부 학습 자료 추가 스캔 경로 (host .env 에서 주입). 빈 값이면 비활성. - ADDITIONAL_WATCH_TARGETS=${ADDITIONAL_WATCH_TARGETS:-} # Voice Memo PoC v1 — bot 계정 한정 long-expiry access token. default false → 일반 운영 영향 0. # 활성화: host .env 에 VOICE_MEMO_BOT_TOKEN_ENABLED=true. plan: rosy-launching-otter.md - VOICE_MEMO_BOT_TOKEN_ENABLED=${VOICE_MEMO_BOT_TOKEN_ENABLED:-false} - VOICE_MEMO_BOT_USERNAME=${VOICE_MEMO_BOT_USERNAME:-voice-memo-bot} - VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS=${VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS:-365} restart: unless-stopped frontend: build: ./frontend ports: - "127.0.0.1:3000:3000" depends_on: - fastapi restart: unless-stopped caddy: image: caddy:2 ports: - "8080:80" volumes: - ./Caddyfile:/etc/caddy/Caddyfile - caddy_data:/data depends_on: - fastapi - frontend restart: unless-stopped volumes: pgdata: caddy_data: ollama_data: reranker_cache: ocr_models: stt_models: marker_models: