fix: backend_model_id 매핑 추가 — MLX 모델 ID 불일치 해결
MLX 서버 모델 ID(mlx-community/Qwen3.5-35B-A3B-4bit)와 사용자 노출 ID(qwen3.5:35b-a3b)가 달라 500 에러 발생. registry에 backend_model_id 필드 추가하여 프록시 시 변환. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,7 +14,7 @@
|
|||||||
"type": "openai-compat",
|
"type": "openai-compat",
|
||||||
"url": "http://192.168.1.122:8800",
|
"url": "http://192.168.1.122:8800",
|
||||||
"models": [
|
"models": [
|
||||||
{ "id": "qwen3.5:35b-a3b", "capabilities": ["chat"], "priority": 1 }
|
{ "id": "qwen3.5:35b-a3b", "backend_model_id": "mlx-community/Qwen3.5-35B-A3B-4bit", "capabilities": ["chat"], "priority": 1 }
|
||||||
],
|
],
|
||||||
"access": "all",
|
"access": "all",
|
||||||
"rate_limit": null
|
"rate_limit": null
|
||||||
|
|||||||
@@ -60,12 +60,15 @@ async def chat_completions(body: ChatRequest, request: Request):
|
|||||||
if body.temperature is not None:
|
if body.temperature is not None:
|
||||||
kwargs["temperature"] = body.temperature
|
kwargs["temperature"] = body.temperature
|
||||||
|
|
||||||
|
# Use backend-specific model ID if configured, otherwise use the user-facing ID
|
||||||
|
actual_model = model_info.backend_model_id or body.model
|
||||||
|
|
||||||
# Route to appropriate proxy
|
# Route to appropriate proxy
|
||||||
if backend.type == "ollama":
|
if backend.type == "ollama":
|
||||||
if body.stream:
|
if body.stream:
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
proxy_ollama.stream_chat(
|
proxy_ollama.stream_chat(
|
||||||
backend.url, body.model, messages, **kwargs
|
backend.url, actual_model, messages, **kwargs
|
||||||
),
|
),
|
||||||
media_type="text/event-stream",
|
media_type="text/event-stream",
|
||||||
headers={
|
headers={
|
||||||
@@ -75,7 +78,7 @@ async def chat_completions(body: ChatRequest, request: Request):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result = await proxy_ollama.complete_chat(
|
result = await proxy_ollama.complete_chat(
|
||||||
backend.url, body.model, messages, **kwargs
|
backend.url, actual_model, messages, **kwargs
|
||||||
)
|
)
|
||||||
return JSONResponse(content=result)
|
return JSONResponse(content=result)
|
||||||
|
|
||||||
@@ -83,7 +86,7 @@ async def chat_completions(body: ChatRequest, request: Request):
|
|||||||
if body.stream:
|
if body.stream:
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
proxy_openai.stream_chat(
|
proxy_openai.stream_chat(
|
||||||
backend.url, body.model, messages, **kwargs
|
backend.url, actual_model, messages, **kwargs
|
||||||
),
|
),
|
||||||
media_type="text/event-stream",
|
media_type="text/event-stream",
|
||||||
headers={
|
headers={
|
||||||
@@ -93,7 +96,7 @@ async def chat_completions(body: ChatRequest, request: Request):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result = await proxy_openai.complete_chat(
|
result = await proxy_openai.complete_chat(
|
||||||
backend.url, body.model, messages, **kwargs
|
backend.url, actual_model, messages, **kwargs
|
||||||
)
|
)
|
||||||
return JSONResponse(content=result)
|
return JSONResponse(content=result)
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ class ModelInfo:
|
|||||||
id: str
|
id: str
|
||||||
capabilities: list[str]
|
capabilities: list[str]
|
||||||
priority: int = 1
|
priority: int = 1
|
||||||
|
backend_model_id: str = "" # actual model ID sent to backend (if different from id)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -68,6 +69,7 @@ class Registry:
|
|||||||
id=m["id"],
|
id=m["id"],
|
||||||
capabilities=m.get("capabilities", ["chat"]),
|
capabilities=m.get("capabilities", ["chat"]),
|
||||||
priority=m.get("priority", 1),
|
priority=m.get("priority", 1),
|
||||||
|
backend_model_id=m.get("backend_model_id", ""),
|
||||||
)
|
)
|
||||||
for m in entry.get("models", [])
|
for m in entry.get("models", [])
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user