diff --git a/backends.json b/backends.json
index 4641464..60bf7b0 100644
--- a/backends.json
+++ b/backends.json
@@ -14,7 +14,7 @@
     "type": "openai-compat",
     "url": "http://192.168.1.122:8800",
     "models": [
-      { "id": "qwen3.5:35b-a3b", "capabilities": ["chat"], "priority": 1 }
+      { "id": "qwen3.5:35b-a3b", "backend_model_id": "mlx-community/Qwen3.5-35B-A3B-4bit", "capabilities": ["chat"], "priority": 1 }
     ],
     "access": "all",
     "rate_limit": null
diff --git a/hub-api/routers/chat.py b/hub-api/routers/chat.py
index 2d1d8a0..a0e43f4 100644
--- a/hub-api/routers/chat.py
+++ b/hub-api/routers/chat.py
@@ -60,12 +60,15 @@ async def chat_completions(body: ChatRequest, request: Request):
     if body.temperature is not None:
         kwargs["temperature"] = body.temperature
 
+    # Use backend-specific model ID if configured, otherwise use the user-facing ID
+    actual_model = model_info.backend_model_id or body.model
+
     # Route to appropriate proxy
     if backend.type == "ollama":
         if body.stream:
             return StreamingResponse(
                 proxy_ollama.stream_chat(
-                    backend.url, body.model, messages, **kwargs
+                    backend.url, actual_model, messages, **kwargs
                 ),
                 media_type="text/event-stream",
                 headers={
@@ -75,7 +78,7 @@ async def chat_completions(body: ChatRequest, request: Request):
             )
         else:
             result = await proxy_ollama.complete_chat(
-                backend.url, body.model, messages, **kwargs
+                backend.url, actual_model, messages, **kwargs
             )
             return JSONResponse(content=result)
 
@@ -83,7 +86,7 @@ async def chat_completions(body: ChatRequest, request: Request):
         if body.stream:
             return StreamingResponse(
                 proxy_openai.stream_chat(
-                    backend.url, body.model, messages, **kwargs
+                    backend.url, actual_model, messages, **kwargs
                 ),
                 media_type="text/event-stream",
                 headers={
@@ -93,7 +96,7 @@ async def chat_completions(body: ChatRequest, request: Request):
             )
         else:
             result = await proxy_openai.complete_chat(
-                backend.url, body.model, messages, **kwargs
+                backend.url, actual_model, messages, **kwargs
             )
             return JSONResponse(content=result)
 
diff --git a/hub-api/services/registry.py b/hub-api/services/registry.py
index a689a43..815422c 100644
--- a/hub-api/services/registry.py
+++ b/hub-api/services/registry.py
@@ -17,6 +17,7 @@ class ModelInfo:
     id: str
     capabilities: list[str]
     priority: int = 1
+    backend_model_id: str = ""  # actual model ID sent to backend (if different from id)
 
 
 @dataclass
@@ -68,6 +69,7 @@ class Registry:
                     id=m["id"],
                     capabilities=m.get("capabilities", ["chat"]),
                     priority=m.get("priority", 1),
+                    backend_model_id=m.get("backend_model_id", ""),
                 )
                 for m in entry.get("models", [])
             ]