gpu-services/backends.json

[
  {
    "id": "ollama-gpu",
    "type": "ollama",
    "url": "http://host.docker.internal:11434",
    "models": [
      { "id": "bge-m3", "capabilities": ["embed"], "priority": 1 }
    ],
    "access": "all",
    "rate_limit": null
  },
  {
    "id": "mlx-mac",
    "type": "openai-compat",
    "url": "http://192.168.1.122:8800",
    "models": [
      { "id": "qwen3.5:35b-a3b", "capabilities": ["chat"], "priority": 1 }
    ],
    "access": "all",
    "rate_limit": null
  }
]