hyungi_document_server/applescript/auto_classify.scpt

-- DEVONthink 4 Smart Rule: AI 자동 분류
-- Inbox DB 새 문서 → OCR 전처리 → MLX 분류 → 태그 + 메타데이터 + 도메인 DB 이동 → Qdrant 임베딩
-- Smart Rule 설정: Event = On Import, 조건 = Tags is empty

property baseDir : "Documents/code/DEVONThink_my server"

on performSmartRule(theRecords)
	set homeDir to POSIX path of (path to home folder)
	set pkmRoot to homeDir & baseDir
	set venvPython to pkmRoot & "/venv/bin/python3"
	set logFile to pkmRoot & "/logs/auto_classify.log"

	tell application id "DNtp"
		repeat with theRecord in theRecords
			try
				-- 0. OCR 전처리: 텍스트 없는 PDF/이미지 → Surya OCR
				set docText to plain text of theRecord
				set docUUID to uuid of theRecord
				set docType to type of theRecord as string

				if docText is "" then
					if docType is in {"PDF Document", "JPEG image", "PNG image", "TIFF image"} then
						set ocrPy to pkmRoot & "/scripts/ocr_preprocess.py"
						try
							set ocrText to do shell script venvPython & " " & quoted form of ocrPy & " " & quoted form of docUUID
							if length of ocrText > 0 then
								set plain text of theRecord to ocrText
								set docText to ocrText
							end if
						on error ocrErr
							do shell script "echo '[OCR ERROR] " & ocrErr & "' >> " & quoted form of logFile
						end try
					end if
				end if

				-- 1. 문서 텍스트 추출 (최대 4000자)
				if length of docText > 4000 then
					set docText to text 1 thru 4000 of docText
				end if

				if length of docText < 10 then
					-- OCR 후에도 텍스트가 부족하면 검토필요 태그
					set tags of theRecord to {"@상태/검토필요"}
					continue repeat
				end if

				-- 2. 분류 프롬프트 로딩
				set promptPath to pkmRoot & "/scripts/prompts/classify_document.txt"
				set promptTemplate to do shell script "cat " & quoted form of promptPath

				-- 문서 텍스트를 프롬프트에 삽입 (특수문자 이스케이프)
				set escapedText to do shell script "echo " & quoted form of docText & " | sed 's/\\\\/\\\\\\\\/g; s/\"/\\\\\"/g; s/\\n/\\\\n/g' | head -c 4000"

				-- 3. MLX 서버 API 호출 (OpenAI 호환)
				set curlCmd to "curl -s --max-time 120 http://localhost:8800/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\": \"mlx-community/Qwen3.5-35B-A3B-4bit\", \"messages\": [{\"role\": \"user\", \"content\": " & quoted form of escapedText & "}], \"temperature\": 0.3, \"max_tokens\": 1024}'"
				set jsonResult to do shell script curlCmd

				-- 4. JSON 파싱 (Python 사용)
				set parseCmd to "echo " & quoted form of jsonResult & " | python3 -c \"
import sys, json
try:
    r = json.loads(sys.stdin.read())
    content = r['choices'][0]['message']['content']
    d = json.loads(content)
    tags = ','.join(d.get('tags', []))
    db = d.get('domain_db', '00_Note_BOX')
    grp = d.get('sub_group', '00_Inbox')
    ch = d.get('sourceChannel', 'inbox_route')
    origin = d.get('dataOrigin', 'external')
    print(f'{db}|{grp}|{tags}|{ch}|{origin}')
except:
    print('00_Note_BOX|00_Inbox||inbox_route|external')
\""

				set classResult to do shell script parseCmd
				set AppleScript's text item delimiters to "|"
				set resultParts to text items of classResult
				set targetDB to item 1 of resultParts
				set targetGroup to item 2 of resultParts
				set tagString to item 3 of resultParts
				set sourceChannel to item 4 of resultParts
				set dataOrigin to item 5 of resultParts
				set AppleScript's text item delimiters to ""

				-- 5. 태그 설정
				if tagString is not "" then
					set AppleScript's text item delimiters to ","
					set tagList to text items of tagString
					set AppleScript's text item delimiters to ""
					set tags of theRecord to tagList
				end if

				-- 6. 커스텀 메타데이터 설정
				add custom meta data sourceChannel for "sourceChannel" to theRecord
				add custom meta data dataOrigin for "dataOrigin" to theRecord
				add custom meta data (current date) for "lastAIProcess" to theRecord

				-- 7. 대상 도메인 DB로 이동
				set targetDatabase to missing value
				repeat with db in databases
					if name of db is targetDB then
						set targetDatabase to db
						exit repeat
					end if
				end repeat

				if targetDatabase is not missing value then
					set groupPath to "/" & targetGroup
					set targetLocation to create location groupPath in targetDatabase
					move record theRecord to targetLocation
				end if

				-- 8. GPU 서버 벡터 임베딩 비동기 전송
				set embedPy to pkmRoot & "/scripts/embed_to_qdrant.py"
				do shell script venvPython & " " & quoted form of embedPy & " " & quoted form of docUUID & " &> /dev/null &"

			on error errMsg
				-- 에러 시 로그 기록 + 검토필요 태그
				set tags of theRecord to {"@상태/검토필요", "AI분류실패"}
				do shell script "echo '[" & (current date) & "] [auto_classify] [ERROR] " & errMsg & "' >> " & quoted form of logFile
			end try
		end repeat
	end tell
end performSmartRule