From 5db2f4f6fa7cd60143879ebc16fb2c2c2ca86e00 Mon Sep 17 00:00:00 2001 From: hyungi Date: Mon, 30 Mar 2026 13:32:49 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20RAG=20=ED=8C=8C=EC=9D=B4=ED=94=84?= =?UTF-8?q?=EB=9D=BC=EC=9D=B8=20=E2=80=94=20pkm=5Fapi=5Fserver.py=EC=97=90?= =?UTF-8?q?=20=EA=B2=80=EC=83=89/=EC=9E=84=EB=B2=A0=EB=94=A9=20=EC=97=94?= =?UTF-8?q?=EB=93=9C=ED=8F=AC=EC=9D=B8=ED=8A=B8=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - POST /rag/query: 질문 → GPU bge-m3 임베딩 → Qdrant 검색 → MLX 35B 답변 생성 - DEVONthink 링크(x-devonthink-item://UUID) 포함 응답 - POST /devonthink/embed: 단일 문서 UUID → Qdrant 임베딩 트리거 - POST /devonthink/embed-batch: 배치 문서 임베딩 - docstring 범위 갱신: DEVONthink + OmniFocus + RAG 검색 Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/pkm_api_server.py | 161 +++++++++++++++++++++++++++++++++++++- 1 file changed, 158 insertions(+), 3 deletions(-) diff --git a/scripts/pkm_api_server.py b/scripts/pkm_api_server.py index 95c7136..8a301f5 100644 --- a/scripts/pkm_api_server.py +++ b/scripts/pkm_api_server.py @@ -1,17 +1,22 @@ #!/usr/bin/env python3 """ PKM Host API Server -DEVONthink + OmniFocus AppleScript 중계용 경량 HTTP 서버. +DEVONthink + OmniFocus AppleScript 중계 + RAG 검색 경량 HTTP 서버. NanoClaw 컨테이너에서 호출. LaunchAgent(GUI 세션)로 실행 필수. -범위: DEVONthink + OmniFocus 전용. 이 이상 확장하지 않을 것. +범위: DEVONthink + OmniFocus + RAG 검색. """ import json +import os import subprocess import sys +from pathlib import Path from flask import Flask, request, jsonify +sys.path.insert(0, str(Path(__file__).parent)) +from pkm_utils import load_credentials + app = Flask(__name__) @@ -225,11 +230,161 @@ def omnifocus_today(): return jsonify(success=False, error=str(e)), 500 +# --- RAG --- + +def _get_gpu_ip(): + creds = load_credentials() + return creds.get("GPU_SERVER_IP") + + +def _embed_text(text: str, gpu_ip: str) -> list[float] | None: + """GPU 서버 bge-m3로 텍스트 임베딩""" + import requests as req + try: + resp = req.post(f"http://{gpu_ip}:11434/api/embed", + json={"model": "bge-m3", "input": [text[:8000]]}, timeout=60) + resp.raise_for_status() + return resp.json().get("embeddings", [[]])[0] + except Exception: + return None + + +def _search_qdrant(vector: list[float], limit: int = 20) -> list[dict]: + """Qdrant에서 유사도 검색""" + import requests as req + resp = req.post("http://localhost:6333/collections/pkm_documents/points/search", + json={"vector": vector, "limit": limit, "with_payload": True}, timeout=10) + resp.raise_for_status() + return resp.json().get("result", []) + + +def _llm_generate(prompt: str) -> str: + """Mac Mini MLX로 답변 생성""" + import requests as req + resp = req.post("http://localhost:8800/v1/chat/completions", json={ + "model": "mlx-community/Qwen3.5-35B-A3B-4bit", + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.3, + "max_tokens": 2048, + }, timeout=120) + resp.raise_for_status() + return resp.json()["choices"][0]["message"]["content"] + + +@app.route('/rag/query', methods=['POST']) +def rag_query(): + """RAG 질의: 임베딩 → Qdrant 검색 → LLM 답변 생성""" + data = request.get_json(silent=True) or {} + q = data.get('q', '') + limit = data.get('limit', 10) + if not q: + return jsonify(success=False, error='q parameter required'), 400 + + gpu_ip = _get_gpu_ip() + if not gpu_ip: + return jsonify(success=False, error='GPU_SERVER_IP not configured'), 500 + + try: + # 1. 쿼리 임베딩 + query_vec = _embed_text(q, gpu_ip) + if not query_vec: + return jsonify(success=False, error='embedding failed'), 500 + + # 2. Qdrant 검색 + results = _search_qdrant(query_vec, limit=limit) + if not results: + return jsonify(success=True, answer="관련 문서를 찾지 못했습니다.", sources=[]) + + # 3. 컨텍스트 조립 + sources = [] + context_parts = [] + for r in results[:5]: + payload = r.get("payload", {}) + title = payload.get("title", "") + preview = payload.get("text_preview", "") + doc_uuid = payload.get("uuid", "") + sources.append({ + "title": title, + "uuid": doc_uuid, + "score": round(r.get("score", 0), 3), + "link": f"x-devonthink-item://{doc_uuid}" if doc_uuid else None, + }) + context_parts.append(f"[{title}]\n{preview}") + + context = "\n\n---\n\n".join(context_parts) + + # 4. LLM 답변 생성 + prompt = f"""다음 문서들을 참고하여 질문에 답변해주세요. + +## 참고 문서 +{context} + +## 질문 +{q} + +답변은 한국어로, 참고한 문서 제목을 언급해주세요.""" + + answer = _llm_generate(prompt) + + return jsonify(success=True, answer=answer, sources=sources, query=q) + except Exception as e: + return jsonify(success=False, error=str(e)), 500 + + +@app.route('/devonthink/embed', methods=['POST']) +def devonthink_embed(): + """단일 문서 임베딩 트리거""" + data = request.get_json(silent=True) or {} + doc_uuid = data.get('uuid', '') + if not doc_uuid: + return jsonify(success=False, error='uuid parameter required'), 400 + + try: + venv_python = str(Path(__file__).parent.parent / "venv" / "bin" / "python3") + embed_script = str(Path(__file__).parent / "embed_to_qdrant.py") + result = subprocess.run( + [venv_python, embed_script, doc_uuid], + capture_output=True, text=True, timeout=120 + ) + if result.returncode != 0: + return jsonify(success=False, error=result.stderr.strip()), 500 + return jsonify(success=True, uuid=doc_uuid) + except Exception as e: + return jsonify(success=False, error=str(e)), 500 + + +@app.route('/devonthink/embed-batch', methods=['POST']) +def devonthink_embed_batch(): + """배치 문서 임베딩 트리거""" + data = request.get_json(silent=True) or {} + uuids = data.get('uuids', []) + if not uuids: + return jsonify(success=False, error='uuids array required'), 400 + + results = [] + venv_python = str(Path(__file__).parent.parent / "venv" / "bin" / "python3") + embed_script = str(Path(__file__).parent / "embed_to_qdrant.py") + for doc_uuid in uuids: + try: + result = subprocess.run( + [venv_python, embed_script, doc_uuid], + capture_output=True, text=True, timeout=120 + ) + results.append({"uuid": doc_uuid, "success": result.returncode == 0}) + except Exception as e: + results.append({"uuid": doc_uuid, "success": False, "error": str(e)}) + + succeeded = sum(1 for r in results if r["success"]) + return jsonify(success=True, total=len(uuids), succeeded=succeeded, results=results) + + @app.route('/health') def health(): return jsonify(success=True, service='pkm-api', endpoints=[ '/devonthink/stats', '/devonthink/search?q=', - '/devonthink/inbox-count', '/omnifocus/stats', '/omnifocus/overdue', '/omnifocus/today' + '/devonthink/inbox-count', '/devonthink/embed', '/devonthink/embed-batch', + '/omnifocus/stats', '/omnifocus/overdue', '/omnifocus/today', + '/rag/query', ])