From 2dab682e2148677e7b5736740e703599bfc67253 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Tue, 31 Mar 2026 15:14:55 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20backend=5Fmodel=5Fid=20=EB=A7=A4?= =?UTF-8?q?=ED=95=91=20=EC=B6=94=EA=B0=80=20=E2=80=94=20MLX=20=EB=AA=A8?= =?UTF-8?q?=EB=8D=B8=20ID=20=EB=B6=88=EC=9D=BC=EC=B9=98=20=ED=95=B4?= =?UTF-8?q?=EA=B2=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MLX 서버 모델 ID(mlx-community/Qwen3.5-35B-A3B-4bit)와 사용자 노출 ID(qwen3.5:35b-a3b)가 달라 500 에러 발생. registry에 backend_model_id 필드 추가하여 프록시 시 변환. Co-Authored-By: Claude Opus 4.6 (1M context) --- backends.json | 2 +- hub-api/routers/chat.py | 11 +++++++---- hub-api/services/registry.py | 2 ++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/backends.json b/backends.json index 4641464..60bf7b0 100644 --- a/backends.json +++ b/backends.json @@ -14,7 +14,7 @@ "type": "openai-compat", "url": "http://192.168.1.122:8800", "models": [ - { "id": "qwen3.5:35b-a3b", "capabilities": ["chat"], "priority": 1 } + { "id": "qwen3.5:35b-a3b", "backend_model_id": "mlx-community/Qwen3.5-35B-A3B-4bit", "capabilities": ["chat"], "priority": 1 } ], "access": "all", "rate_limit": null diff --git a/hub-api/routers/chat.py b/hub-api/routers/chat.py index 2d1d8a0..a0e43f4 100644 --- a/hub-api/routers/chat.py +++ b/hub-api/routers/chat.py @@ -60,12 +60,15 @@ async def chat_completions(body: ChatRequest, request: Request): if body.temperature is not None: kwargs["temperature"] = body.temperature + # Use backend-specific model ID if configured, otherwise use the user-facing ID + actual_model = model_info.backend_model_id or body.model + # Route to appropriate proxy if backend.type == "ollama": if body.stream: return StreamingResponse( proxy_ollama.stream_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ), media_type="text/event-stream", headers={ @@ -75,7 +78,7 @@ async def chat_completions(body: ChatRequest, request: Request): ) else: result = await proxy_ollama.complete_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ) return JSONResponse(content=result) @@ -83,7 +86,7 @@ async def chat_completions(body: ChatRequest, request: Request): if body.stream: return StreamingResponse( proxy_openai.stream_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ), media_type="text/event-stream", headers={ @@ -93,7 +96,7 @@ async def chat_completions(body: ChatRequest, request: Request): ) else: result = await proxy_openai.complete_chat( - backend.url, body.model, messages, **kwargs + backend.url, actual_model, messages, **kwargs ) return JSONResponse(content=result) diff --git a/hub-api/services/registry.py b/hub-api/services/registry.py index a689a43..815422c 100644 --- a/hub-api/services/registry.py +++ b/hub-api/services/registry.py @@ -17,6 +17,7 @@ class ModelInfo: id: str capabilities: list[str] priority: int = 1 + backend_model_id: str = "" # actual model ID sent to backend (if different from id) @dataclass @@ -68,6 +69,7 @@ class Registry: id=m["id"], capabilities=m.get("capabilities", ["chat"]), priority=m.get("priority", 1), + backend_model_id=m.get("backend_model_id", ""), ) for m in entry.get("models", []) ]