From d4e1f76e81c792a5dfcc222cf42b5fe8ea35c7eb Mon Sep 17 00:00:00 2001
From: hyungi <hyun49196@gmail.com>
Date: Sun, 14 Jun 2026 23:55:44 +0000
Subject: [PATCH] =?UTF-8?q?fix(news)!:=20mlx=5Fgate=5Fconcurrency=204?=
 =?UTF-8?q?=E2=86=922=20=EB=A1=A4=EB=B0=B1=20=E2=80=94=20gate=3D4=20?=
 =?UTF-8?q?=EA=B0=80=20=EB=8C=80=ED=98=95=20=ED=94=84=EB=A1=AC=ED=94=84?=
 =?UTF-8?q?=ED=8A=B8(digest/briefing+deep=206764tok)=20=EB=8F=99=EC=8B=9C?=
 =?UTF-8?q?=EC=84=B1=EC=9C=BC=EB=A1=9C=20=EB=A7=A5=EB=AF=B8=EB=8B=88=20mlx?=
 =?UTF-8?q?=5Fvlm=20OOM/=EC=97=B0=EA=B2=B0=EB=93=9C=EB=A1=AD=20=EC=9C=A0?=
 =?UTF-8?q?=EB=B0=9C(08:45=20=EC=84=9C=EB=B2=84=20=ED=81=AC=EB=9E=98?=
 =?UTF-8?q?=EC=8B=9C=C2=B7=EC=9E=AC=EC=8B=9C=EC=9E=91=20=EC=8B=A4=EC=B8=A1?=
 =?UTF-8?q?).=20digest=20cap=203000=E2=86=925400(gate=3D2=20=EB=B3=B4?=
 =?UTF-8?q?=EC=A0=95).=20timeout/deep-split=20=EC=9C=A0=EC=A7=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 config.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/config.yaml b/config.yaml
index 416a7ea..9be6e5e 100644
--- a/config.yaml
+++ b/config.yaml
@@ -13,7 +13,7 @@ ai:
 
     # triage: 상시 분류·요약·근거 선별. Mac mini Qwen 27B (primary 와 동일 endpoint, 짧은 max_tokens).
     triage:
-      endpoint: "http://100.76.254.116:8801/v1/chat/completions"
+      endpoint: "http://100.76.254.116:8890/v1/chat/completions"
       model: "mlx-community/Qwen3.6-27B-6bit"
       max_tokens: 4096
       timeout: 480  # 프리필 실측 ~112 tok/s — 120K자 장문 커버 (2026-06-11)
@@ -22,7 +22,7 @@ ai:
 
     # primary: 에스컬레이션 전용. Qwen 27B MLX (맥미니 Semaphore(1) 보호 대상).
     primary:
-      endpoint: "http://100.76.254.116:8801/v1/chat/completions"
+      endpoint: "http://100.76.254.116:8890/v1/chat/completions"
       model: "mlx-community/Qwen3.6-27B-6bit"
       max_tokens: 8192
       timeout: 900  # 프리필 실측 ~112 tok/s — 260K자 상한 장문 커버 (2026-06-11)
@@ -72,7 +72,7 @@ ai:
     # Phase 3.5a answerability classifier. 2026-05-14 GPU LLM 제거 후 Mac mini 26B 로 swap.
     # classifier_service 가 hasattr 체크로 optional 이므로 이 섹션 제거 시 classifier gate 는 자동 skip (score-only).
     classifier:
-      endpoint: "http://100.76.254.116:8801/v1/chat/completions"
+      endpoint: "http://100.76.254.116:8890/v1/chat/completions"
       model: "mlx-community/Qwen3.6-27B-6bit"  # 2026-06-11 B안 동승 — gemma id 잔존 시 mlx 서버가 Gemma 를 재로드(이중 적재) 위험
       max_tokens: 512
       timeout: 30  # 2026-05-17: 15s 도 동시 부하 시 elapsed 14.4s 직전이라 tight — 30s 로 2x 마진. classifier_service.LLM_TIMEOUT_MS=30000 와 align (초과 = score-only skip, graceful)
@@ -203,10 +203,10 @@ pipeline:
   # (2026-06-11 밤 6~8 concurrent 실측 정상). 2026-06-15: 2→4 — digest/briefing 합성을
   # 이 단일 게이트(BACKGROUND 우선순위)로 라우팅하며 digest(클러스터 44~68)가 하드캡 내
   # 완료되도록 동시성 확보. ask/eid(FOREGROUND)는 큐 점프라 영향 최소. 되돌리면 구 동작.
-  mlx_gate_concurrency: 4
+  mlx_gate_concurrency: 2
   # 2026-06-15: digest/briefing 생성 LLM 파라미터 (모델 교체 후 단일소스, 상세 = config.py).
   # 구 하드코딩 25s(빠른 Gemma)가 Qwen 27B(콜당 ~90~300s) 교체 sweep 누락 → digest 600s
   # 초과·briefing 4/4 폴백. 동시성은 위 mlx_gate_concurrency 가 담당(별 키 없음).
   digest_llm_timeout_s: 300
   digest_llm_attempts: 2
-  digest_pipeline_hard_cap_s: 3000
+  digest_pipeline_hard_cap_s: 5400