Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5cabf728e6 | |||
| cd694e7386 | |||
| 7247d242a2 | |||
| 9434017114 | |||
| 66f3287564 |
@@ -1,8 +1,6 @@
|
||||
# hyungi_Document_Server 설정
|
||||
|
||||
ai:
|
||||
gateway:
|
||||
endpoint: "http://ai-gateway:8080"
|
||||
|
||||
models:
|
||||
# ─── 단일 generation 호스트 routing (2026-05-14 GPU LLM 제거) ───
|
||||
|
||||
+1
-14
@@ -149,7 +149,7 @@ services:
|
||||
# → 32 한도 초과 → 413. 64 로 늘림.
|
||||
# GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0.
|
||||
- MAX_BATCH_TOKENS=16384
|
||||
- MAX_CLIENT_BATCH_SIZE=64
|
||||
- MAX_CLIENT_BATCH_SIZE=256 # 2026-06-18 fix: 64→256, MAX_RERANK_INPUT=200 커버 (batch>64 ERROR=RRF silent fallback 해소; MAX_BATCH_TOKENS가 VRAM 상한이라 entries 증가는 VRAM 무관)
|
||||
- MAX_CONCURRENT_REQUESTS=4
|
||||
volumes:
|
||||
- reranker_cache:/data
|
||||
@@ -168,19 +168,6 @@ services:
|
||||
start_period: 120s
|
||||
restart: unless-stopped
|
||||
|
||||
ai-gateway:
|
||||
build: ./gpu-server/services/ai-gateway
|
||||
ports:
|
||||
- "127.0.0.1:8081:8080"
|
||||
environment:
|
||||
- PRIMARY_ENDPOINT=http://100.76.254.116:8801/v1/chat/completions
|
||||
- FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions
|
||||
- CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
|
||||
- DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00}
|
||||
# depends_on: ollama 제거 (2026-06-08) — ollama 서비스가 standalone 으로 이관됨.
|
||||
# FALLBACK_ENDPOINT 의 ollama:11434 는 standalone(동일 hostname, DS 망 부착)으로 해소.
|
||||
restart: unless-stopped
|
||||
|
||||
fastapi:
|
||||
build: ./app
|
||||
ports:
|
||||
|
||||
Reference in New Issue
Block a user