fix(ai): primary -> mlx-proxy 8801 + align model to gemma

- endpoint: 100.76.254.116:8800 -> :8801 (route through mlx-proxy for /status observability - active_jobs / total_requests) - model: Qwen3.5-35B-A3B-4bit -> gemma-4-26b-a4b-it-8bit (match the model actually loaded on mlx-proxy) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 04:40:06 +00:00
parent e3ebbe105b
commit de08735420
1 changed files with 2 additions and 2 deletions
@@ -6,8 +6,8 @@ ai:

  models:
    primary:
-      endpoint: "http://100.76.254.116:8800/v1/chat/completions"
-      model: "mlx-community/Qwen3.5-35B-A3B-4bit"
+      endpoint: "http://100.76.254.116:8801/v1/chat/completions"
+      model: "mlx-community/gemma-4-26b-a4b-it-8bit"
      max_tokens: 4096
      timeout: 60