-- DEVONthink 4 Smart Rule: AI 자동 분류 -- Inbox DB 새 문서 → OCR 전처리 → MLX 분류 → 태그 + 메타데이터 + 도메인 DB 이동 → Qdrant 임베딩 -- Smart Rule 설정: Event = On Import, 조건 = Tags is empty property baseDir : "Documents/code/DEVONThink_my server" on performSmartRule(theRecords) set homeDir to POSIX path of (path to home folder) set pkmRoot to homeDir & baseDir set venvPython to pkmRoot & "/venv/bin/python3" set logFile to pkmRoot & "/logs/auto_classify.log" tell application id "DNtp" repeat with theRecord in theRecords try -- 0. OCR 전처리: 텍스트 없는 PDF/이미지 → Surya OCR set docText to plain text of theRecord set docUUID to uuid of theRecord set docType to type of theRecord as string if docText is "" then if docType is in {"PDF Document", "JPEG image", "PNG image", "TIFF image"} then set ocrPy to pkmRoot & "/scripts/ocr_preprocess.py" try set ocrText to do shell script venvPython & " " & quoted form of ocrPy & " " & quoted form of docUUID if length of ocrText > 0 then set plain text of theRecord to ocrText set docText to ocrText end if on error ocrErr do shell script "echo '[OCR ERROR] " & ocrErr & "' >> " & quoted form of logFile end try end if end if -- 1. 문서 텍스트 추출 (최대 4000자) if length of docText > 4000 then set docText to text 1 thru 4000 of docText end if if length of docText < 10 then -- OCR 후에도 텍스트가 부족하면 검토필요 태그 set tags of theRecord to {"@상태/검토필요"} continue repeat end if -- 2. 분류 프롬프트 로딩 set promptPath to pkmRoot & "/scripts/prompts/classify_document.txt" set promptTemplate to do shell script "cat " & quoted form of promptPath -- 문서 텍스트를 프롬프트에 삽입 (특수문자 이스케이프) set escapedText to do shell script "echo " & quoted form of docText & " | sed 's/\\\\/\\\\\\\\/g; s/\"/\\\\\"/g; s/\\n/\\\\n/g' | head -c 4000" -- 3. MLX 서버 API 호출 (OpenAI 호환) set curlCmd to "curl -s --max-time 120 http://localhost:8800/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\": \"mlx-community/Qwen3.5-35B-A3B-4bit\", \"messages\": [{\"role\": \"user\", \"content\": " & quoted form of escapedText & "}], \"temperature\": 0.3, \"max_tokens\": 1024}'" set jsonResult to do shell script curlCmd -- 4. JSON 파싱 (Python 사용) set parseCmd to "echo " & quoted form of jsonResult & " | python3 -c \" import sys, json try: r = json.loads(sys.stdin.read()) content = r['choices'][0]['message']['content'] d = json.loads(content) tags = ','.join(d.get('tags', [])) db = d.get('domain_db', '00_Note_BOX') grp = d.get('sub_group', '00_Inbox') ch = d.get('sourceChannel', 'inbox_route') origin = d.get('dataOrigin', 'external') print(f'{db}|{grp}|{tags}|{ch}|{origin}') except: print('00_Note_BOX|00_Inbox||inbox_route|external') \"" set classResult to do shell script parseCmd set AppleScript's text item delimiters to "|" set resultParts to text items of classResult set targetDB to item 1 of resultParts set targetGroup to item 2 of resultParts set tagString to item 3 of resultParts set sourceChannel to item 4 of resultParts set dataOrigin to item 5 of resultParts set AppleScript's text item delimiters to "" -- 5. 태그 설정 if tagString is not "" then set AppleScript's text item delimiters to "," set tagList to text items of tagString set AppleScript's text item delimiters to "" set tags of theRecord to tagList end if -- 6. 커스텀 메타데이터 설정 add custom meta data sourceChannel for "sourceChannel" to theRecord add custom meta data dataOrigin for "dataOrigin" to theRecord add custom meta data (current date) for "lastAIProcess" to theRecord -- 7. 대상 도메인 DB로 이동 set targetDatabase to missing value repeat with db in databases if name of db is targetDB then set targetDatabase to db exit repeat end if end repeat if targetDatabase is not missing value then set groupPath to "/" & targetGroup set targetLocation to create location groupPath in targetDatabase move record theRecord to targetLocation end if -- 8. GPU 서버 벡터 임베딩 비동기 전송 set embedPy to pkmRoot & "/scripts/embed_to_qdrant.py" do shell script venvPython & " " & quoted form of embedPy & " " & quoted form of docUUID & " &> /dev/null &" on error errMsg -- 에러 시 로그 기록 + 검토필요 태그 set tags of theRecord to {"@상태/검토필요", "AI분류실패"} do shell script "echo '[" & (current date) & "] [auto_classify] [ERROR] " & errMsg & "' >> " & quoted form of logFile end try end repeat end tell end performSmartRule