- ModelAdapter: 범용 OpenAI-compat 어댑터 (stream/complete/health)
- BackendRegistry: rewriter(EXAONE) + reasoner(Gemma4) 헬스체크 루프
- 2단계 파이프라인: EXAONE rewrite → Gemma reasoning (SSE rewrite 이벤트 노출)
- Fallback: 맥미니 다운 시 EXAONE 단독 모드, stream 중간 실패 시 자동 전환
- Cancel-safe: rewrite 전/후, streaming loop 내, fallback 경로 모두 체크
- Rewrite heartbeat: complete_chat 대기 중 2초 간격 processing 이벤트
- JobQueue: Semaphore(3) 기반 동시성 제한, 정확한 queue position
- GET /chat/{job_id}/status, GET /queue/stats 엔드포인트
- DB: rewrite_model, reasoning_model, rewritten_message 컬럼 추가
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
83 lines
2.1 KiB
YAML
83 lines
2.1 KiB
YAML
services:
|
|
caddy:
|
|
image: caddy:2-alpine
|
|
container_name: gpu-caddy
|
|
restart: unless-stopped
|
|
ports:
|
|
- "80:80"
|
|
- "443:443"
|
|
volumes:
|
|
- ./caddy/Caddyfile:/etc/caddy/Caddyfile
|
|
- caddy_data:/data
|
|
depends_on:
|
|
- hub-api
|
|
- hub-web
|
|
- nanoclaude
|
|
networks:
|
|
- gateway-net
|
|
|
|
hub-web:
|
|
build: ./hub-web
|
|
container_name: gpu-hub-web
|
|
restart: unless-stopped
|
|
networks:
|
|
- gateway-net
|
|
|
|
hub-api:
|
|
build: ./hub-api
|
|
container_name: gpu-hub-api
|
|
restart: unless-stopped
|
|
environment:
|
|
- OWNER_PASSWORD=${OWNER_PASSWORD}
|
|
- GUEST_PASSWORD=${GUEST_PASSWORD}
|
|
- JWT_SECRET=${JWT_SECRET}
|
|
- BACKENDS_CONFIG=/app/config/backends.json
|
|
- CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:5173}
|
|
- DB_PATH=/app/data/gateway.db
|
|
volumes:
|
|
- hub_data:/app/data
|
|
- ./backends.json:/app/config/backends.json:ro
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
networks:
|
|
- gateway-net
|
|
|
|
nanoclaude:
|
|
build: ./nanoclaude
|
|
container_name: gpu-nanoclaude
|
|
restart: unless-stopped
|
|
environment:
|
|
- EXAONE_BASE_URL=http://host.docker.internal:11434
|
|
- EXAONE_MODEL=${EXAONE_MODEL:-exaone3.5:7.8b-instruct-q8_0}
|
|
- REASONING_BASE_URL=${REASONING_BASE_URL:-http://192.168.1.122:8800}
|
|
- REASONING_MODEL=${REASONING_MODEL:-mlx-community/gemma-4-26b-a4b-it-8bit}
|
|
- PIPELINE_ENABLED=${PIPELINE_ENABLED:-true}
|
|
- MAX_CONCURRENT_JOBS=${MAX_CONCURRENT_JOBS:-3}
|
|
- DB_PATH=/app/data/nanoclaude.db
|
|
- API_KEY=${NANOCLAUDE_API_KEY:-}
|
|
volumes:
|
|
- nano_data:/app/data
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8100/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
networks:
|
|
- gateway-net
|
|
|
|
volumes:
|
|
caddy_data:
|
|
hub_data:
|
|
nano_data:
|
|
|
|
networks:
|
|
gateway-net:
|
|
name: gpu-gateway-network
|