fix(deploy): primary endpoint -> mlx-proxy 8801

100.76.254.116:8800 -> :8801 to route through mlx-proxy and gain
/status observability (active_jobs / total_requests).
This commit is contained in:
Hyungi Ahn
2026-04-08 02:56:08 +00:00
parent ca3e1952d2
commit 32c96d6191
+1 -1
View File
@@ -6,7 +6,7 @@ ai:
models:
primary:
endpoint: "http://100.76.254.116:8800/v1/chat/completions"
endpoint: "http://100.76.254.116:8801/v1/chat/completions"
model: "mlx-community/Qwen3.5-35B-A3B-4bit"
max_tokens: 4096
timeout: 60