fix: MLX 서버(localhost:8800) 대응 — Ollama API → OpenAI 호환 변경

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 13:31:52 +09:00
parent 084d3a8c63
commit a77477140b
4 changed files with 29 additions and 23 deletions
@@ -105,17 +105,22 @@ def run_applescript_inline(script: str) -> str:
        raise RuntimeError("AppleScript 타임아웃 (인라인)")


-def ollama_generate(prompt: str, model: str = "qwen3.5:35b-a3b-q4_K_M",
-                    host: str = "http://localhost:11434") -> str:
-    """Ollama API 호출"""
+def llm_generate(prompt: str, model: str = "mlx-community/Qwen3.5-35B-A3B-4bit",
+                 host: str = "http://localhost:8800") -> str:
+    """MLX 서버 API 호출 (OpenAI 호환)"""
    import requests
-    resp = requests.post(f"{host}/api/generate", json={
+    resp = requests.post(f"{host}/v1/chat/completions", json={
        "model": model,
-        "prompt": prompt,
-        "stream": False
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.3,
+        "max_tokens": 1024,
    }, timeout=120)
    resp.raise_for_status()
-    return resp.json().get("response", "")
+    return resp.json()["choices"][0]["message"]["content"]
+
+
+# 하위호환 별칭
+ollama_generate = llm_generate


 def count_log_errors(log_file: Path, since_hours: int = 24) -> int: