refactor: GPU 서버 재구성 + ChromaDB→Qdrant 마이그레이션
- embed_to_chroma.py → embed_to_qdrant.py 리라이트 (bge-m3 + Qdrant REST API) - auto_classify.scpt: embed_to_qdrant.py 경로 변경 + sourceChannel 덮어쓰기 버그 수정 - requirements.txt: chromadb/schedule 제거, qdrant-client/flask/gunicorn 추가 - credentials.env.example: GPU_SERVER_IP 항목 추가 - GPU 서버 재구성 계획서 (docs/gpu-restructure.md) + dev-roadmap/commands 통합 - CLAUDE.md, README.md, deploy.md 현행화 GPU 서버 변경사항 (이미 적용됨): - Ollama: qwen3.5:9b, id-9b 제거 → bge-m3 + bge-reranker-v2-m3 - Surya OCR 서비스 (:8400, systemd) - Docker + NFS + Komga 이전 (:25600) - tk-ai-service: Ollama API → OpenAI API 전환 (MLX 35B) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
114
scripts/embed_to_qdrant.py
Normal file
114
scripts/embed_to_qdrant.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
벡터 임베딩 스크립트
|
||||
- DEVONthink 문서 UUID로 텍스트 추출
|
||||
- GPU 서버(bge-m3)로 임베딩 생성
|
||||
- Qdrant에 저장
|
||||
"""
|
||||
|
||||
import sys
|
||||
import uuid as uuid_mod
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from pkm_utils import setup_logger, load_credentials, run_applescript_inline
|
||||
|
||||
logger = setup_logger("embed")
|
||||
|
||||
QDRANT_URL = "http://localhost:6333"
|
||||
COLLECTION = "pkm_documents"
|
||||
|
||||
|
||||
def get_document_text(doc_uuid: str) -> tuple[str, str]:
|
||||
"""DEVONthink에서 UUID로 문서 텍스트 + 제목 추출"""
|
||||
script = f'''
|
||||
tell application id "DNtp"
|
||||
set theRecord to get record with uuid "{doc_uuid}"
|
||||
set docText to plain text of theRecord
|
||||
set docTitle to name of theRecord
|
||||
return docTitle & "|||" & docText
|
||||
end tell
|
||||
'''
|
||||
result = run_applescript_inline(script)
|
||||
parts = result.split("|||", 1)
|
||||
title = parts[0] if len(parts) > 0 else ""
|
||||
text = parts[1] if len(parts) > 1 else ""
|
||||
return title, text
|
||||
|
||||
|
||||
def get_embedding(text: str, gpu_server_ip: str) -> list[float] | None:
|
||||
"""GPU 서버의 bge-m3로 임베딩 생성"""
|
||||
url = f"http://{gpu_server_ip}:11434/api/embed"
|
||||
try:
|
||||
resp = requests.post(url, json={
|
||||
"model": "bge-m3",
|
||||
"input": [text[:8000]]
|
||||
}, timeout=60)
|
||||
resp.raise_for_status()
|
||||
embeddings = resp.json().get("embeddings")
|
||||
return embeddings[0] if embeddings else None
|
||||
except Exception as e:
|
||||
logger.error(f"임베딩 생성 실패: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def store_in_qdrant(doc_uuid: str, title: str, text: str, embedding: list[float]):
|
||||
"""Qdrant에 저장"""
|
||||
# UUID 문자열을 정수 ID로 변환 (Qdrant point ID)
|
||||
point_id = uuid_mod.uuid5(uuid_mod.NAMESPACE_URL, doc_uuid).int >> 64
|
||||
|
||||
payload = {
|
||||
"uuid": doc_uuid,
|
||||
"title": title,
|
||||
"text_preview": text[:500],
|
||||
"source": "devonthink",
|
||||
}
|
||||
|
||||
resp = requests.put(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points",
|
||||
json={
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload,
|
||||
}]
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
logger.info(f"Qdrant 저장: {doc_uuid} ({title[:30]})")
|
||||
|
||||
|
||||
def run(doc_uuid: str):
|
||||
"""단일 문서 임베딩 처리"""
|
||||
logger.info(f"임베딩 처리 시작: {doc_uuid}")
|
||||
|
||||
creds = load_credentials()
|
||||
gpu_ip = creds.get("GPU_SERVER_IP")
|
||||
if not gpu_ip:
|
||||
logger.warning("GPU_SERVER_IP 미설정 — 임베딩 건너뜀")
|
||||
return
|
||||
|
||||
try:
|
||||
title, text = get_document_text(doc_uuid)
|
||||
if not text or len(text) < 10:
|
||||
logger.warning(f"텍스트 부족 [{doc_uuid}]: {len(text)}자")
|
||||
return
|
||||
|
||||
embedding = get_embedding(text, gpu_ip)
|
||||
if embedding:
|
||||
store_in_qdrant(doc_uuid, title, text, embedding)
|
||||
logger.info(f"임베딩 완료: {doc_uuid}")
|
||||
else:
|
||||
logger.error(f"임베딩 실패: {doc_uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"임베딩 처리 에러 [{doc_uuid}]: {e}", exc_info=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("사용법: python3 embed_to_qdrant.py <DEVONthink_UUID>")
|
||||
sys.exit(1)
|
||||
run(sys.argv[1])
|
||||
239
scripts/pkm_api_server.py
Normal file
239
scripts/pkm_api_server.py
Normal file
@@ -0,0 +1,239 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PKM Host API Server
|
||||
DEVONthink + OmniFocus AppleScript 중계용 경량 HTTP 서버.
|
||||
NanoClaw 컨테이너에서 호출. LaunchAgent(GUI 세션)로 실행 필수.
|
||||
|
||||
범위: DEVONthink + OmniFocus 전용. 이 이상 확장하지 않을 것.
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from flask import Flask, request, jsonify
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
def run_applescript(script: str, timeout: int = 120) -> str:
|
||||
result = subprocess.run(
|
||||
['osascript', '-e', script],
|
||||
capture_output=True, text=True, timeout=timeout
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(result.stderr.strip())
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
# --- DEVONthink ---
|
||||
|
||||
@app.route('/devonthink/stats')
|
||||
def devonthink_stats():
|
||||
try:
|
||||
script = (
|
||||
'tell application id "DNtp"\n'
|
||||
' set today to current date\n'
|
||||
' set time of today to 0\n'
|
||||
' set stats to {}\n'
|
||||
' repeat with db in databases\n'
|
||||
' set dbName to name of db\n'
|
||||
' set addedCount to 0\n'
|
||||
' set modifiedCount to 0\n'
|
||||
' repeat with rec in children of root of db\n'
|
||||
' try\n'
|
||||
' if creation date of rec >= today then set addedCount to addedCount + 1\n'
|
||||
' if modification date of rec >= today then set modifiedCount to modifiedCount + 1\n'
|
||||
' end try\n'
|
||||
' end repeat\n'
|
||||
' if addedCount > 0 or modifiedCount > 0 then\n'
|
||||
' set end of stats to dbName & ":" & addedCount & ":" & modifiedCount\n'
|
||||
' end if\n'
|
||||
' end repeat\n'
|
||||
' set AppleScript\'s text item delimiters to "|"\n'
|
||||
' return stats as text\n'
|
||||
'end tell'
|
||||
)
|
||||
result = run_applescript(script)
|
||||
stats = {}
|
||||
if result:
|
||||
for item in result.split('|'):
|
||||
parts = item.split(':')
|
||||
if len(parts) == 3:
|
||||
stats[parts[0]] = {'added': int(parts[1]), 'modified': int(parts[2])}
|
||||
total_added = sum(s['added'] for s in stats.values())
|
||||
total_modified = sum(s['modified'] for s in stats.values())
|
||||
return jsonify(success=True, data={
|
||||
'databases': stats,
|
||||
'total_added': total_added,
|
||||
'total_modified': total_modified
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify(success=False, error=str(e)), 500
|
||||
|
||||
|
||||
@app.route('/devonthink/search')
|
||||
def devonthink_search():
|
||||
q = request.args.get('q', '')
|
||||
limit = int(request.args.get('limit', '10'))
|
||||
if not q:
|
||||
return jsonify(success=False, error='q parameter required'), 400
|
||||
try:
|
||||
script = (
|
||||
'tell application id "DNtp"\n'
|
||||
f' set results to search "{q}"\n'
|
||||
' set output to {}\n'
|
||||
f' set maxCount to {limit}\n'
|
||||
' set i to 0\n'
|
||||
' repeat with rec in results\n'
|
||||
' if i >= maxCount then exit repeat\n'
|
||||
' set recName to name of rec\n'
|
||||
' set recDB to name of database of rec\n'
|
||||
' set recDate to modification date of rec as text\n'
|
||||
' set end of output to recName & "||" & recDB & "||" & recDate\n'
|
||||
' set i to i + 1\n'
|
||||
' end repeat\n'
|
||||
' set AppleScript\'s text item delimiters to linefeed\n'
|
||||
' return output as text\n'
|
||||
'end tell'
|
||||
)
|
||||
result = run_applescript(script)
|
||||
items = []
|
||||
if result:
|
||||
for line in result.split('\n'):
|
||||
parts = line.split('||')
|
||||
if len(parts) == 3:
|
||||
items.append({'name': parts[0], 'database': parts[1], 'modified': parts[2]})
|
||||
return jsonify(success=True, data=items, count=len(items))
|
||||
except Exception as e:
|
||||
return jsonify(success=False, error=str(e)), 500
|
||||
|
||||
|
||||
@app.route('/devonthink/inbox-count')
|
||||
def devonthink_inbox_count():
|
||||
try:
|
||||
script = (
|
||||
'tell application id "DNtp"\n'
|
||||
' set inboxDB to database "Inbox"\n'
|
||||
' return count of children of root of inboxDB\n'
|
||||
'end tell'
|
||||
)
|
||||
count = int(run_applescript(script))
|
||||
return jsonify(success=True, data={'inbox_count': count})
|
||||
except Exception as e:
|
||||
return jsonify(success=False, error=str(e)), 500
|
||||
|
||||
|
||||
# --- OmniFocus ---
|
||||
|
||||
@app.route('/omnifocus/stats')
|
||||
def omnifocus_stats():
|
||||
try:
|
||||
script = (
|
||||
'tell application "OmniFocus"\n'
|
||||
' tell default document\n'
|
||||
' set today to current date\n'
|
||||
' set time of today to 0\n'
|
||||
' set completedCount to count of (every flattened task whose completed is true and completion date >= today)\n'
|
||||
' set addedCount to count of (every flattened task whose creation date >= today)\n'
|
||||
' set overdueCount to count of (every flattened task whose completed is false and due date < today and due date is not missing value)\n'
|
||||
' return (completedCount as text) & "|" & (addedCount as text) & "|" & (overdueCount as text)\n'
|
||||
' end tell\n'
|
||||
'end tell'
|
||||
)
|
||||
result = run_applescript(script)
|
||||
parts = result.split('|')
|
||||
return jsonify(success=True, data={
|
||||
'completed': int(parts[0]) if len(parts) > 0 else 0,
|
||||
'added': int(parts[1]) if len(parts) > 1 else 0,
|
||||
'overdue': int(parts[2]) if len(parts) > 2 else 0
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify(success=False, error=str(e)), 500
|
||||
|
||||
|
||||
@app.route('/omnifocus/overdue')
|
||||
def omnifocus_overdue():
|
||||
try:
|
||||
script = (
|
||||
'tell application "OmniFocus"\n'
|
||||
' tell default document\n'
|
||||
' set today to current date\n'
|
||||
' set time of today to 0\n'
|
||||
' set overdueTasks to every flattened task whose completed is false and due date < today and due date is not missing value\n'
|
||||
' set output to {}\n'
|
||||
' repeat with t in overdueTasks\n'
|
||||
' set taskName to name of t\n'
|
||||
' set dueDate to due date of t as text\n'
|
||||
' set projName to ""\n'
|
||||
' try\n'
|
||||
' set projName to name of containing project of t\n'
|
||||
' end try\n'
|
||||
' set end of output to taskName & "||" & projName & "||" & dueDate\n'
|
||||
' end repeat\n'
|
||||
' set AppleScript\'s text item delimiters to linefeed\n'
|
||||
' return output as text\n'
|
||||
' end tell\n'
|
||||
'end tell'
|
||||
)
|
||||
result = run_applescript(script)
|
||||
tasks = []
|
||||
if result:
|
||||
for line in result.split('\n'):
|
||||
parts = line.split('||')
|
||||
tasks.append({
|
||||
'name': parts[0],
|
||||
'project': parts[1] if len(parts) > 1 else '',
|
||||
'due_date': parts[2] if len(parts) > 2 else ''
|
||||
})
|
||||
return jsonify(success=True, data=tasks, count=len(tasks))
|
||||
except Exception as e:
|
||||
return jsonify(success=False, error=str(e)), 500
|
||||
|
||||
|
||||
@app.route('/omnifocus/today')
|
||||
def omnifocus_today():
|
||||
try:
|
||||
script = (
|
||||
'tell application "OmniFocus"\n'
|
||||
' tell default document\n'
|
||||
' set today to current date\n'
|
||||
' set time of today to 0\n'
|
||||
' set tomorrow to today + 1 * days\n'
|
||||
' set todayTasks to every flattened task whose completed is false and ((due date >= today and due date < tomorrow) or (defer date >= today and defer date < tomorrow))\n'
|
||||
' set output to {}\n'
|
||||
' repeat with t in todayTasks\n'
|
||||
' set taskName to name of t\n'
|
||||
' set projName to ""\n'
|
||||
' try\n'
|
||||
' set projName to name of containing project of t\n'
|
||||
' end try\n'
|
||||
' set end of output to taskName & "||" & projName\n'
|
||||
' end repeat\n'
|
||||
' set AppleScript\'s text item delimiters to linefeed\n'
|
||||
' return output as text\n'
|
||||
' end tell\n'
|
||||
'end tell'
|
||||
)
|
||||
result = run_applescript(script)
|
||||
tasks = []
|
||||
if result:
|
||||
for line in result.split('\n'):
|
||||
parts = line.split('||')
|
||||
tasks.append({'name': parts[0], 'project': parts[1] if len(parts) > 1 else ''})
|
||||
return jsonify(success=True, data=tasks, count=len(tasks))
|
||||
except Exception as e:
|
||||
return jsonify(success=False, error=str(e)), 500
|
||||
|
||||
|
||||
@app.route('/health')
|
||||
def health():
|
||||
return jsonify(success=True, service='pkm-api', endpoints=[
|
||||
'/devonthink/stats', '/devonthink/search?q=',
|
||||
'/devonthink/inbox-count', '/omnifocus/stats', '/omnifocus/overdue', '/omnifocus/today'
|
||||
])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
port = int(sys.argv[1]) if len(sys.argv) > 1 else 9900
|
||||
print(f'PKM API Server starting on port {port}')
|
||||
app.run(host='127.0.0.1', port=port)
|
||||
Reference in New Issue
Block a user