From 5cabf728e66b2201c5feeda95ad2b8409dbfa254 Mon Sep 17 00:00:00 2001 From: hyungi Date: Wed, 17 Jun 2026 23:35:43 +0000 Subject: [PATCH] =?UTF-8?q?fix(search):=20reranker=20MAX=5FCLIENT=5FBATCH?= =?UTF-8?q?=5FSIZE=2064=E2=86=92256?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rerank_service.py 가 후보를 MAX_RERANK_INPUT=200 까지 청크 없이 한 번에 TEI 로 POST → TEI 한도 64 초과(85) 시 HTTPError → RRF silent fallback(리랭크 누락=검색 품질 저하, 48h 4회). MAX_BATCH_TOKENS=16384 가 VRAM 상한이라 client batch entries 한도만 256(MAX_RERANK_INPUT 200 커버)으로 상향, reranker 만 재생성. 검증: 85-text rerank HTTP 200, batch 에러 0. Co-Authored-By: Claude Opus 4.8 (1M context) --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 4b986d5..0f0b349 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -149,7 +149,7 @@ services: # → 32 한도 초과 → 413. 64 로 늘림. # GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0. - MAX_BATCH_TOKENS=16384 - - MAX_CLIENT_BATCH_SIZE=64 + - MAX_CLIENT_BATCH_SIZE=256 # 2026-06-18 fix: 64→256, MAX_RERANK_INPUT=200 커버 (batch>64 ERROR=RRF silent fallback 해소; MAX_BATCH_TOKENS가 VRAM 상한이라 entries 증가는 VRAM 무관) - MAX_CONCURRENT_REQUESTS=4 volumes: - reranker_cache:/data