diff --git a/app/services/search/classifier_service.py b/app/services/search/classifier_service.py index 9661a48..e6a2b59 100644 --- a/app/services/search/classifier_service.py +++ b/app/services/search/classifier_service.py @@ -22,7 +22,7 @@ from core.utils import setup_logger logger = setup_logger("classifier") -LLM_TIMEOUT_MS = 15000 +LLM_TIMEOUT_MS = 30000 CIRCUIT_THRESHOLD = 5 CIRCUIT_RECOVERY_SEC = 60 diff --git a/config.yaml b/config.yaml index 8c281e6..0264757 100644 --- a/config.yaml +++ b/config.yaml @@ -57,7 +57,7 @@ ai: endpoint: "http://100.76.254.116:8801/v1/chat/completions" model: "mlx-community/gemma-4-26b-a4b-it-8bit" max_tokens: 512 - timeout: 15 # 2026-05-17: 10s 시 httpx ReadTimeout 빈번 (Mac mini 26B 동시 부하 PR #20 후속) — classifier_service.LLM_TIMEOUT_MS (15s) 와 align + timeout: 30 # 2026-05-17: 15s 도 동시 부하 시 elapsed 14.4s 직전이라 tight — 30s 로 2x 마진 (Mac mini 26B concurrent load). classifier_service.LLM_TIMEOUT_MS=30000 와 align # 제거: vision (미사용) # ─── deep_summary enqueue 폭발 억제 (B-1 R2) ───