fix: backend_model_id 매핑 추가 — MLX 모델 ID 불일치 해결

MLX 서버 모델 ID(mlx-community/Qwen3.5-35B-A3B-4bit)와
사용자 노출 ID(qwen3.5:35b-a3b)가 달라 500 에러 발생.
registry에 backend_model_id 필드 추가하여 프록시 시 변환.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-03-31 15:14:55 +09:00
parent 7b28252d4f
commit 2dab682e21
3 changed files with 10 additions and 5 deletions

View File

@@ -60,12 +60,15 @@ async def chat_completions(body: ChatRequest, request: Request):
if body.temperature is not None:
kwargs["temperature"] = body.temperature
# Use backend-specific model ID if configured, otherwise use the user-facing ID
actual_model = model_info.backend_model_id or body.model
# Route to appropriate proxy
if backend.type == "ollama":
if body.stream:
return StreamingResponse(
proxy_ollama.stream_chat(
backend.url, body.model, messages, **kwargs
backend.url, actual_model, messages, **kwargs
),
media_type="text/event-stream",
headers={
@@ -75,7 +78,7 @@ async def chat_completions(body: ChatRequest, request: Request):
)
else:
result = await proxy_ollama.complete_chat(
backend.url, body.model, messages, **kwargs
backend.url, actual_model, messages, **kwargs
)
return JSONResponse(content=result)
@@ -83,7 +86,7 @@ async def chat_completions(body: ChatRequest, request: Request):
if body.stream:
return StreamingResponse(
proxy_openai.stream_chat(
backend.url, body.model, messages, **kwargs
backend.url, actual_model, messages, **kwargs
),
media_type="text/event-stream",
headers={
@@ -93,7 +96,7 @@ async def chat_completions(body: ChatRequest, request: Request):
)
else:
result = await proxy_openai.complete_chat(
backend.url, body.model, messages, **kwargs
backend.url, actual_model, messages, **kwargs
)
return JSONResponse(content=result)