From a332a8aabe66f55fb79425c554dd81428142f3a8 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Sat, 16 May 2026 19:42:49 +0900 Subject: [PATCH] =?UTF-8?q?fix(search):=20classifier=20timeout=2015s=20?= =?UTF-8?q?=E2=86=92=2030s=20(concurrent=20load=202x=20margin)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A1+config(15s) 후속 진단: voice memo PoC plan 호출 elapsed_ms=14432 — 15s 한계 거의 밀착. Mac mini 26B 동시 부하 (classifier + evidence + synthesis 3-way) 시 빈번 ReadTimeout 잔존. 30s 로 2x 마진 확보 — config.yaml + classifier_service.py 양쪽 align. Phase 3.5 guardrail 동작 자체에는 영향 없음 (timeout 시 fallback 경로 동일). 향후 별 트랙 (DS-Mac-mini-26B-Concurrent-Load-1): asyncio.Semaphore 도입으로 Mac mini 26B 동시 호출 제한 vs triage 만 작은 모델 재도입. 본 PR 은 timeout 완화만. --- app/services/search/classifier_service.py | 2 +- config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/services/search/classifier_service.py b/app/services/search/classifier_service.py index 9661a48..e6a2b59 100644 --- a/app/services/search/classifier_service.py +++ b/app/services/search/classifier_service.py @@ -22,7 +22,7 @@ from core.utils import setup_logger logger = setup_logger("classifier") -LLM_TIMEOUT_MS = 15000 +LLM_TIMEOUT_MS = 30000 CIRCUIT_THRESHOLD = 5 CIRCUIT_RECOVERY_SEC = 60 diff --git a/config.yaml b/config.yaml index 8c281e6..0264757 100644 --- a/config.yaml +++ b/config.yaml @@ -57,7 +57,7 @@ ai: endpoint: "http://100.76.254.116:8801/v1/chat/completions" model: "mlx-community/gemma-4-26b-a4b-it-8bit" max_tokens: 512 - timeout: 15 # 2026-05-17: 10s 시 httpx ReadTimeout 빈번 (Mac mini 26B 동시 부하 PR #20 후속) — classifier_service.LLM_TIMEOUT_MS (15s) 와 align + timeout: 30 # 2026-05-17: 15s 도 동시 부하 시 elapsed 14.4s 직전이라 tight — 30s 로 2x 마진 (Mac mini 26B concurrent load). classifier_service.LLM_TIMEOUT_MS=30000 와 align # 제거: vision (미사용) # ─── deep_summary enqueue 폭발 억제 (B-1 R2) ───