From a77477140b32ed8b367ad5a1f13091ea2e394b11 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Thu, 26 Mar 2026 13:31:52 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20MLX=20=EC=84=9C=EB=B2=84(localhost:8800)?= =?UTF-8?q?=20=EB=8C=80=EC=9D=91=20=E2=80=94=20Ollama=20API=20=E2=86=92=20?= =?UTF-8?q?OpenAI=20=ED=98=B8=ED=99=98=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 5 +++-- applescript/auto_classify.scpt | 7 ++++--- docs/architecture.md | 21 ++++++++++----------- scripts/pkm_utils.py | 19 ++++++++++++------- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 486fcb4..ca33844 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -80,8 +80,9 @@ manual — 직접 추가 → dataOrigin = work (기본 ``` Tier 1 (Mac mini, 상시): - qwen3.5:35b-a3b-q4_K_M — 태그 생성, 문서 분류, 요약 - → http://localhost:11434/api/generate + mlx-community/Qwen3.5-35B-A3B-4bit — 태그 생성, 문서 분류, 요약 + → http://localhost:8800/v1/chat/completions (OpenAI 호환 API) + → MLX 서버로 실행 중 (Ollama 아님) Tier 2 (Claude API, 필요시): claude-sonnet — 복잡한 분석, 장문 처리 diff --git a/applescript/auto_classify.scpt b/applescript/auto_classify.scpt index 4fb8ce1..66a6e6e 100644 --- a/applescript/auto_classify.scpt +++ b/applescript/auto_classify.scpt @@ -27,8 +27,8 @@ on performSmartRule(theRecords) -- 문서 텍스트를 프롬프트에 삽입 (특수문자 이스케이프) set escapedText to do shell script "echo " & quoted form of docText & " | sed 's/\\\\/\\\\\\\\/g; s/\"/\\\\\"/g; s/\\n/\\\\n/g' | head -c 4000" - -- 3. Ollama API 호출 - set curlCmd to "curl -s --max-time 120 http://localhost:11434/api/generate -d '{\"model\": \"qwen3.5:35b-a3b-q4_K_M\", \"prompt\": " & quoted form of escapedText & ", \"stream\": false, \"format\": \"json\"}'" + -- 3. MLX 서버 API 호출 (OpenAI 호환) + set curlCmd to "curl -s --max-time 120 http://localhost:8800/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\": \"mlx-community/Qwen3.5-35B-A3B-4bit\", \"messages\": [{\"role\": \"user\", \"content\": " & quoted form of escapedText & "}], \"temperature\": 0.3, \"max_tokens\": 1024}'" set jsonResult to do shell script curlCmd -- 4. JSON 파싱 (Python 사용) @@ -36,7 +36,8 @@ on performSmartRule(theRecords) import sys, json try: r = json.loads(sys.stdin.read()) - d = json.loads(r.get('response', '{}')) + content = r['choices'][0]['message']['content'] + d = json.loads(content) tags = ','.join(d.get('tags', [])) db = d.get('domain_db', '00_Note_BOX') grp = d.get('sub_group', '00_Inbox') diff --git a/docs/architecture.md b/docs/architecture.md index 95dd00d..0417d16 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -2,7 +2,7 @@ ## 시스템 개요 -Mac mini M4 Pro(64GB RAM, 4TB SSD)를 중심으로, DEVONthink를 **중앙 지식 허브**로 두고, Omni 제품군으로 **실행/계획**, Synology NAS로 **저장/백업**, AI(Claude API + Ollama)로 **지능화**하는 통합 PKM 시스템. +Mac mini M4 Pro(64GB RAM, 4TB SSD)를 중심으로, DEVONthink를 **중앙 지식 허브**로 두고, Omni 제품군으로 **실행/계획**, Synology NAS로 **저장/백업**, AI(Claude API + MLX/Ollama)로 **지능화**하는 통합 PKM 시스템. ``` ┌─────────────────────────────────────────────────────────────┐ @@ -279,10 +279,10 @@ on performSmartRule(theRecords) set docText to text 1 thru 4000 of docText end if - -- Step 1: Mac mini 35B → 태그 + 분류 대상 DB/그룹 생성 - set shellCmd to "curl -s http://localhost:11434/api/generate -d '{" & ¬ - "\"model\": \"qwen3.5:35b-a3b-q4_K_M\"," & ¬ - "\"prompt\": \"다음 문서를 분석하고 JSON으로 응답해줘.\\n" & ¬ + -- Step 1: Mac mini MLX 35B → 태그 + 분류 대상 DB/그룹 생성 + set shellCmd to "curl -s http://localhost:8800/v1/chat/completions -H 'Content-Type: application/json' -d '{" & ¬ + "\"model\": \"mlx-community/Qwen3.5-35B-A3B-4bit\"," & ¬ + "\"messages\": [{\"role\":\"user\",\"content\":\"다음 문서를 분석하고 JSON으로 응답해줘.\\n" & ¬ "{\\\"tags\\\": [최대5개_한글태그],\\n" & ¬ " \\\"domain_db\\\": \\\"DB이름\\\",\\n" & ¬ " \\\"sub_group\\\": \\\"하위그룹경로\\\"}\\n\\n" & ¬ @@ -910,11 +910,10 @@ tell application id "DNtp" end tell -- AI로 액션 아이템 추출 -set extractCmd to "curl -s http://localhost:11434/api/generate -d '{" & ¬ - "\"model\":\"qwen3.5:35b-a3b-q4_K_M\"," & ¬ - "\"prompt\":\"다음 텍스트에서 할 일(TODO)만 추출해서 " & ¬ - "한 줄에 하나씩 출력해줘:\\n" & docText & "\"," & ¬ - "\"stream\":false}'" +set extractCmd to "curl -s http://localhost:8800/v1/chat/completions -H 'Content-Type: application/json' -d '{" & ¬ + "\"model\":\"mlx-community/Qwen3.5-35B-A3B-4bit\"," & ¬ + "\"messages\":[{\"role\":\"user\",\"content\":\"다음 텍스트에서 할 일(TODO)만 추출해서 " & ¬ + "한 줄에 하나씩 출력해줘:\\n" & docText & "\"}]}'" set aiResult to do shell script extractCmd -- OmniFocus에 작업 생성 @@ -1325,7 +1324,7 @@ OmniFocus 0.5GB 낮음 OmniPlan 0.5GB 낮음 OmniOutliner 0.3GB 낮음 OmniGraffle 0.5GB 낮음 -Ollama (Qwen3.5-35B-A3B 4Q) ~20GB 중간 MoE: 3B만 활성 +MLX (Qwen3.5-35B-A3B 4bit) ~20GB 중간 MoE: 3B만 활성 ChromaDB 1-2GB 낮음 Roon Core 2-4GB 낮음 Komga 0.5GB 낮음 diff --git a/scripts/pkm_utils.py b/scripts/pkm_utils.py index 6282c4c..1ea3c8a 100644 --- a/scripts/pkm_utils.py +++ b/scripts/pkm_utils.py @@ -105,17 +105,22 @@ def run_applescript_inline(script: str) -> str: raise RuntimeError("AppleScript 타임아웃 (인라인)") -def ollama_generate(prompt: str, model: str = "qwen3.5:35b-a3b-q4_K_M", - host: str = "http://localhost:11434") -> str: - """Ollama API 호출""" +def llm_generate(prompt: str, model: str = "mlx-community/Qwen3.5-35B-A3B-4bit", + host: str = "http://localhost:8800") -> str: + """MLX 서버 API 호출 (OpenAI 호환)""" import requests - resp = requests.post(f"{host}/api/generate", json={ + resp = requests.post(f"{host}/v1/chat/completions", json={ "model": model, - "prompt": prompt, - "stream": False + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.3, + "max_tokens": 1024, }, timeout=120) resp.raise_for_status() - return resp.json().get("response", "") + return resp.json()["choices"][0]["message"]["content"] + + +# 하위호환 별칭 +ollama_generate = llm_generate def count_log_errors(log_file: Path, since_hours: int = 24) -> int: