From de08735420f0df40fb4e473924a39cab473c70a9 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Wed, 8 Apr 2026 04:40:06 +0000 Subject: [PATCH] fix(ai): primary -> mlx-proxy 8801 + align model to gemma - endpoint: 100.76.254.116:8800 -> :8801 (route through mlx-proxy for /status observability - active_jobs / total_requests) - model: Qwen3.5-35B-A3B-4bit -> gemma-4-26b-a4b-it-8bit (match the model actually loaded on mlx-proxy) Co-Authored-By: Claude Opus 4.6 (1M context) --- config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index 53a4f9f..7b5d589 100644 --- a/config.yaml +++ b/config.yaml @@ -6,8 +6,8 @@ ai: models: primary: - endpoint: "http://100.76.254.116:8800/v1/chat/completions" - model: "mlx-community/Qwen3.5-35B-A3B-4bit" + endpoint: "http://100.76.254.116:8801/v1/chat/completions" + model: "mlx-community/gemma-4-26b-a4b-it-8bit" max_tokens: 4096 timeout: 60