From ed85dce490ef541c8f6c5c278c77a16ce80dccc4 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Fri, 15 May 2026 12:05:52 +0000 Subject: [PATCH] =?UTF-8?q?refactor(backends):=20PR=20#20=20reframe=20clea?= =?UTF-8?q?nup=20=E2=80=94=20gemma4=20model=20entry=20=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ollama-gpu 백엔드의 models 배열에서 gemma4:e4b-it-q8_0 1 항목 제거. bge-m3 embedding entry 는 보존 (ollama 컨테이너는 bge-m3 embedding owner). PR #20 (2026-05-14) 이후 chat candidate 는 mlx-mac (priority 1) 단일 라 실제 호출은 이미 Mac mini 26B 로 가고 있었음 (silent drift). 이번 정리는 inventory consistency 목적. 검증: - gpu-hub-api restart 후 Application startup complete + health 200 - backends.json.pre-llm-reframe-cleanup.20260515 백업 보존 plan: ~/.claude/plans/4-stateless-dongarra.md --- backends.json | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/backends.json b/backends.json index 6f5cd7a..dc9ebcf 100644 --- a/backends.json +++ b/backends.json @@ -4,8 +4,13 @@ "type": "ollama", "url": "http://ollama:11434", "models": [ - { "id": "gemma4:e4b-it-q8_0", "capabilities": ["chat"], "priority": 2 }, - { "id": "bge-m3", "capabilities": ["embed"], "priority": 1 } + { + "id": "bge-m3", + "capabilities": [ + "embed" + ], + "priority": 1 + } ], "access": "all", "rate_limit": null @@ -13,9 +18,16 @@ { "id": "mlx-mac", "type": "openai-compat", - "url": "http://100.76.254.116:8800", + "url": "http://192.168.1.122:8800", "models": [ - { "id": "gemma-4-26b-a4b", "backend_model_id": "mlx-community/gemma-4-26b-a4b-it-8bit", "capabilities": ["chat"], "priority": 1 } + { + "id": "gemma-4-26b-a4b", + "backend_model_id": "mlx-community/gemma-4-26b-a4b-it-8bit", + "capabilities": [ + "chat" + ], + "priority": 1 + } ], "access": "all", "rate_limit": null