From 3d79002dfa8ad5ea3ee7add7e8436e2ef53f1df9 Mon Sep 17 00:00:00 2001 From: hyungi Date: Thu, 11 Jun 2026 17:29:45 +0900 Subject: [PATCH] =?UTF-8?q?ops(ai):=20Qwen=2027B=20=ED=94=84=EB=A6=AC?= =?UTF-8?q?=ED=95=84=20=EC=8B=A4=EC=B8=A1(~112=20tok/s)=20=EB=B0=98?= =?UTF-8?q?=EC=98=81=20timeout=20=EC=83=81=ED=96=A5=20=E2=80=94=20triage?= =?UTF-8?q?=20480=20/=20primary=20900?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 장문(context_char_limit 상한급) 프리필이 수 분 걸려 기존 120/300s 로는 timeout 실패 churn. 단일 코루틴 컨슈머라 장문 1건이 사이클을 수 분 점유하는 것은 수용(관찰 후 배칭/컨텍스트 튜닝 PR). Co-Authored-By: Claude Fable 5 --- config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index 58a2c45..da70dc6 100644 --- a/config.yaml +++ b/config.yaml @@ -16,7 +16,7 @@ ai: endpoint: "http://100.76.254.116:8801/v1/chat/completions" model: "mlx-community/Qwen3.6-27B-6bit" max_tokens: 4096 - timeout: 120 + timeout: 480 # 프리필 실측 ~112 tok/s — 120K자 장문 커버 (2026-06-11) context_char_limit: 120000 temperature: 0.0 @@ -25,7 +25,7 @@ ai: endpoint: "http://100.76.254.116:8801/v1/chat/completions" model: "mlx-community/Qwen3.6-27B-6bit" max_tokens: 8192 - timeout: 300 + timeout: 900 # 프리필 실측 ~112 tok/s — 260K자 상한 장문 커버 (2026-06-11) context_char_limit: 260000 temperature: 0.3 top_p: 0.9