diff --git a/backends.json b/backends.json index 4641464..60bf7b0 100644 --- a/backends.json +++ b/backends.json @@ -14,7 +14,7 @@ "type": "openai-compat", "url": "http://192.168.1.122:8800", "models": [ - { "id": "qwen3.5:35b-a3b", "capabilities": ["chat"], "priority": 1 } + { "id": "qwen3.5:35b-a3b", "backend_model_id": "mlx-community/Qwen3.5-35B-A3B-4bit", "capabilities": ["chat"], "priority": 1 } ], "access": "all", "rate_limit": null diff --git a/hub-api/routers/chat.py b/hub-api/routers/chat.py index 2d1d8a0..a0e43f4 100644 --- a/hub-api/routers/chat.py +++ b/hub-api/routers/chat.py @@ -60,12 +60,15 @@ async def chat_completions(body: ChatRequest, request: Request): if body.temperature is not None: kwargs["temperature"] = body.temperature + # Use backend-specific model ID if configured, otherwise use the user-facing ID + actual_model = model_info.backend_model_id or body.model + # Route to appropriate proxy if backend.type == "ollama": if body.stream: return StreamingResponse( proxy_ollama.stream_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ), media_type="text/event-stream", headers={ @@ -75,7 +78,7 @@ async def chat_completions(body: ChatRequest, request: Request): ) else: result = await proxy_ollama.complete_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ) return JSONResponse(content=result) @@ -83,7 +86,7 @@ async def chat_completions(body: ChatRequest, request: Request): if body.stream: return StreamingResponse( proxy_openai.stream_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ), media_type="text/event-stream", headers={ @@ -93,7 +96,7 @@ async def chat_completions(body: ChatRequest, request: Request): ) else: result = await proxy_openai.complete_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ) return JSONResponse(content=result) diff --git a/hub-api/services/registry.py b/hub-api/services/registry.py index a689a43..815422c 100644 --- a/hub-api/services/registry.py +++ b/hub-api/services/registry.py @@ -17,6 +17,7 @@ class ModelInfo: id: str capabilities: list[str] priority: int = 1 + backend_model_id: str = "" # actual model ID sent to backend (if different from id) @dataclass @@ -68,6 +69,7 @@ class Registry: id=m["id"], capabilities=m.get("capabilities", ["chat"]), priority=m.get("priority", 1), + backend_model_id=m.get("backend_model_id", ""), ) for m in entry.get("models", []) ]