Files
hyungi_document_server/applescript/auto_classify.scpt
hyungi dc3f03b421 fix: Phase 2 버그 픽스 — JP 번역, API 서버, AppleScript 경로
- pkm_utils.py: strip_thinking() 추가 + llm_generate() no_think 옵션
  - <think> 태그 제거 + thinking 패턴("Wait,", "Let me" 등) 필터링
  - enable_thinking: false 파라미터 지원
- law_monitor.py: JP 번역 호출에 no_think=True 적용
- pkm_api_server.py: /devonthink/stats 최적화 (children 순회 → count 사용)
  + /devonthink/search 한글 쿼리 이스케이프 수정
- auto_classify.scpt: baseDir property로 경로 변수화
- omnifocus_sync.scpt: 로그 경로 변수화

인프라: MailPlus IMAP HOST → LAN IP(192.168.1.227)로 변경
참고: 한국 법령 API IP(122.153.226.74) open.law.go.kr 등록 필요

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 14:00:46 +09:00

125 lines
4.9 KiB
AppleScript

-- DEVONthink 4 Smart Rule: AI 자동 분류
-- Inbox DB 새 문서 → OCR 전처리 → MLX 분류 → 태그 + 메타데이터 + 도메인 DB 이동 → Qdrant 임베딩
-- Smart Rule 설정: Event = On Import, 조건 = Tags is empty
property baseDir : "Documents/code/DEVONThink_my server"
on performSmartRule(theRecords)
set homeDir to POSIX path of (path to home folder)
set pkmRoot to homeDir & baseDir
set venvPython to pkmRoot & "/venv/bin/python3"
set logFile to pkmRoot & "/logs/auto_classify.log"
tell application id "DNtp"
repeat with theRecord in theRecords
try
-- 0. OCR 전처리: 텍스트 없는 PDF/이미지 → Surya OCR
set docText to plain text of theRecord
set docUUID to uuid of theRecord
set docType to type of theRecord as string
if docText is "" then
if docType is in {"PDF Document", "JPEG image", "PNG image", "TIFF image"} then
set ocrPy to pkmRoot & "/scripts/ocr_preprocess.py"
try
set ocrText to do shell script venvPython & " " & quoted form of ocrPy & " " & quoted form of docUUID
if length of ocrText > 0 then
set plain text of theRecord to ocrText
set docText to ocrText
end if
on error ocrErr
do shell script "echo '[OCR ERROR] " & ocrErr & "' >> " & quoted form of logFile
end try
end if
end if
-- 1. 문서 텍스트 추출 (최대 4000자)
if length of docText > 4000 then
set docText to text 1 thru 4000 of docText
end if
if length of docText < 10 then
-- OCR 후에도 텍스트가 부족하면 검토필요 태그
set tags of theRecord to {"@상태/검토필요"}
continue repeat
end if
-- 2. 분류 프롬프트 로딩
set promptPath to pkmRoot & "/scripts/prompts/classify_document.txt"
set promptTemplate to do shell script "cat " & quoted form of promptPath
-- 문서 텍스트를 프롬프트에 삽입 (특수문자 이스케이프)
set escapedText to do shell script "echo " & quoted form of docText & " | sed 's/\\\\/\\\\\\\\/g; s/\"/\\\\\"/g; s/\\n/\\\\n/g' | head -c 4000"
-- 3. MLX 서버 API 호출 (OpenAI 호환)
set curlCmd to "curl -s --max-time 120 http://localhost:8800/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\": \"mlx-community/Qwen3.5-35B-A3B-4bit\", \"messages\": [{\"role\": \"user\", \"content\": " & quoted form of escapedText & "}], \"temperature\": 0.3, \"max_tokens\": 1024}'"
set jsonResult to do shell script curlCmd
-- 4. JSON 파싱 (Python 사용)
set parseCmd to "echo " & quoted form of jsonResult & " | python3 -c \"
import sys, json
try:
r = json.loads(sys.stdin.read())
content = r['choices'][0]['message']['content']
d = json.loads(content)
tags = ','.join(d.get('tags', []))
db = d.get('domain_db', '00_Note_BOX')
grp = d.get('sub_group', '00_Inbox')
ch = d.get('sourceChannel', 'inbox_route')
origin = d.get('dataOrigin', 'external')
print(f'{db}|{grp}|{tags}|{ch}|{origin}')
except:
print('00_Note_BOX|00_Inbox||inbox_route|external')
\""
set classResult to do shell script parseCmd
set AppleScript's text item delimiters to "|"
set resultParts to text items of classResult
set targetDB to item 1 of resultParts
set targetGroup to item 2 of resultParts
set tagString to item 3 of resultParts
set sourceChannel to item 4 of resultParts
set dataOrigin to item 5 of resultParts
set AppleScript's text item delimiters to ""
-- 5. 태그 설정
if tagString is not "" then
set AppleScript's text item delimiters to ","
set tagList to text items of tagString
set AppleScript's text item delimiters to ""
set tags of theRecord to tagList
end if
-- 6. 커스텀 메타데이터 설정
add custom meta data sourceChannel for "sourceChannel" to theRecord
add custom meta data dataOrigin for "dataOrigin" to theRecord
add custom meta data (current date) for "lastAIProcess" to theRecord
-- 7. 대상 도메인 DB로 이동
set targetDatabase to missing value
repeat with db in databases
if name of db is targetDB then
set targetDatabase to db
exit repeat
end if
end repeat
if targetDatabase is not missing value then
set groupPath to "/" & targetGroup
set targetLocation to create location groupPath in targetDatabase
move record theRecord to targetLocation
end if
-- 8. GPU 서버 벡터 임베딩 비동기 전송
set embedPy to pkmRoot & "/scripts/embed_to_qdrant.py"
do shell script venvPython & " " & quoted form of embedPy & " " & quoted form of docUUID & " &> /dev/null &"
on error errMsg
-- 에러 시 로그 기록 + 검토필요 태그
set tags of theRecord to {"@상태/검토필요", "AI분류실패"}
do shell script "echo '[" & (current date) & "] [auto_classify] [ERROR] " & errMsg & "' >> " & quoted form of logFile
end try
end repeat
end tell
end performSmartRule