diff --git a/docker-compose.yml b/docker-compose.yml index 4b986d5..0f0b349 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -149,7 +149,7 @@ services: # → 32 한도 초과 → 413. 64 로 늘림. # GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0. - MAX_BATCH_TOKENS=16384 - - MAX_CLIENT_BATCH_SIZE=64 + - MAX_CLIENT_BATCH_SIZE=256 # 2026-06-18 fix: 64→256, MAX_RERANK_INPUT=200 커버 (batch>64 ERROR=RRF silent fallback 해소; MAX_BATCH_TOKENS가 VRAM 상한이라 entries 증가는 VRAM 무관) - MAX_CONCURRENT_REQUESTS=4 volumes: - reranker_cache:/data