- pkm_utils.py: strip_thinking() 추가 + llm_generate() no_think 옵션
- <think> 태그 제거 + thinking 패턴("Wait,", "Let me" 등) 필터링
- enable_thinking: false 파라미터 지원
- law_monitor.py: JP 번역 호출에 no_think=True 적용
- pkm_api_server.py: /devonthink/stats 최적화 (children 순회 → count 사용)
+ /devonthink/search 한글 쿼리 이스케이프 수정
- auto_classify.scpt: baseDir property로 경로 변수화
- omnifocus_sync.scpt: 로그 경로 변수화
인프라: MailPlus IMAP HOST → LAN IP(192.168.1.227)로 변경
참고: 한국 법령 API IP(122.153.226.74) open.law.go.kr 등록 필요
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
125 lines
4.9 KiB
AppleScript
125 lines
4.9 KiB
AppleScript
-- DEVONthink 4 Smart Rule: AI 자동 분류
|
|
-- Inbox DB 새 문서 → OCR 전처리 → MLX 분류 → 태그 + 메타데이터 + 도메인 DB 이동 → Qdrant 임베딩
|
|
-- Smart Rule 설정: Event = On Import, 조건 = Tags is empty
|
|
|
|
property baseDir : "Documents/code/DEVONThink_my server"
|
|
|
|
on performSmartRule(theRecords)
|
|
set homeDir to POSIX path of (path to home folder)
|
|
set pkmRoot to homeDir & baseDir
|
|
set venvPython to pkmRoot & "/venv/bin/python3"
|
|
set logFile to pkmRoot & "/logs/auto_classify.log"
|
|
|
|
tell application id "DNtp"
|
|
repeat with theRecord in theRecords
|
|
try
|
|
-- 0. OCR 전처리: 텍스트 없는 PDF/이미지 → Surya OCR
|
|
set docText to plain text of theRecord
|
|
set docUUID to uuid of theRecord
|
|
set docType to type of theRecord as string
|
|
|
|
if docText is "" then
|
|
if docType is in {"PDF Document", "JPEG image", "PNG image", "TIFF image"} then
|
|
set ocrPy to pkmRoot & "/scripts/ocr_preprocess.py"
|
|
try
|
|
set ocrText to do shell script venvPython & " " & quoted form of ocrPy & " " & quoted form of docUUID
|
|
if length of ocrText > 0 then
|
|
set plain text of theRecord to ocrText
|
|
set docText to ocrText
|
|
end if
|
|
on error ocrErr
|
|
do shell script "echo '[OCR ERROR] " & ocrErr & "' >> " & quoted form of logFile
|
|
end try
|
|
end if
|
|
end if
|
|
|
|
-- 1. 문서 텍스트 추출 (최대 4000자)
|
|
if length of docText > 4000 then
|
|
set docText to text 1 thru 4000 of docText
|
|
end if
|
|
|
|
if length of docText < 10 then
|
|
-- OCR 후에도 텍스트가 부족하면 검토필요 태그
|
|
set tags of theRecord to {"@상태/검토필요"}
|
|
continue repeat
|
|
end if
|
|
|
|
-- 2. 분류 프롬프트 로딩
|
|
set promptPath to pkmRoot & "/scripts/prompts/classify_document.txt"
|
|
set promptTemplate to do shell script "cat " & quoted form of promptPath
|
|
|
|
-- 문서 텍스트를 프롬프트에 삽입 (특수문자 이스케이프)
|
|
set escapedText to do shell script "echo " & quoted form of docText & " | sed 's/\\\\/\\\\\\\\/g; s/\"/\\\\\"/g; s/\\n/\\\\n/g' | head -c 4000"
|
|
|
|
-- 3. MLX 서버 API 호출 (OpenAI 호환)
|
|
set curlCmd to "curl -s --max-time 120 http://localhost:8800/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\": \"mlx-community/Qwen3.5-35B-A3B-4bit\", \"messages\": [{\"role\": \"user\", \"content\": " & quoted form of escapedText & "}], \"temperature\": 0.3, \"max_tokens\": 1024}'"
|
|
set jsonResult to do shell script curlCmd
|
|
|
|
-- 4. JSON 파싱 (Python 사용)
|
|
set parseCmd to "echo " & quoted form of jsonResult & " | python3 -c \"
|
|
import sys, json
|
|
try:
|
|
r = json.loads(sys.stdin.read())
|
|
content = r['choices'][0]['message']['content']
|
|
d = json.loads(content)
|
|
tags = ','.join(d.get('tags', []))
|
|
db = d.get('domain_db', '00_Note_BOX')
|
|
grp = d.get('sub_group', '00_Inbox')
|
|
ch = d.get('sourceChannel', 'inbox_route')
|
|
origin = d.get('dataOrigin', 'external')
|
|
print(f'{db}|{grp}|{tags}|{ch}|{origin}')
|
|
except:
|
|
print('00_Note_BOX|00_Inbox||inbox_route|external')
|
|
\""
|
|
|
|
set classResult to do shell script parseCmd
|
|
set AppleScript's text item delimiters to "|"
|
|
set resultParts to text items of classResult
|
|
set targetDB to item 1 of resultParts
|
|
set targetGroup to item 2 of resultParts
|
|
set tagString to item 3 of resultParts
|
|
set sourceChannel to item 4 of resultParts
|
|
set dataOrigin to item 5 of resultParts
|
|
set AppleScript's text item delimiters to ""
|
|
|
|
-- 5. 태그 설정
|
|
if tagString is not "" then
|
|
set AppleScript's text item delimiters to ","
|
|
set tagList to text items of tagString
|
|
set AppleScript's text item delimiters to ""
|
|
set tags of theRecord to tagList
|
|
end if
|
|
|
|
-- 6. 커스텀 메타데이터 설정
|
|
add custom meta data sourceChannel for "sourceChannel" to theRecord
|
|
add custom meta data dataOrigin for "dataOrigin" to theRecord
|
|
add custom meta data (current date) for "lastAIProcess" to theRecord
|
|
|
|
-- 7. 대상 도메인 DB로 이동
|
|
set targetDatabase to missing value
|
|
repeat with db in databases
|
|
if name of db is targetDB then
|
|
set targetDatabase to db
|
|
exit repeat
|
|
end if
|
|
end repeat
|
|
|
|
if targetDatabase is not missing value then
|
|
set groupPath to "/" & targetGroup
|
|
set targetLocation to create location groupPath in targetDatabase
|
|
move record theRecord to targetLocation
|
|
end if
|
|
|
|
-- 8. GPU 서버 벡터 임베딩 비동기 전송
|
|
set embedPy to pkmRoot & "/scripts/embed_to_qdrant.py"
|
|
do shell script venvPython & " " & quoted form of embedPy & " " & quoted form of docUUID & " &> /dev/null &"
|
|
|
|
on error errMsg
|
|
-- 에러 시 로그 기록 + 검토필요 태그
|
|
set tags of theRecord to {"@상태/검토필요", "AI분류실패"}
|
|
do shell script "echo '[" & (current date) & "] [auto_classify] [ERROR] " & errMsg & "' >> " & quoted form of logFile
|
|
end try
|
|
end repeat
|
|
end tell
|
|
end performSmartRule
|