Files
hyungi_document_server/scripts/mailplus_archive.py
Hyungi Ahn 084d3a8c63 feat: 전체 PKM 스크립트 일괄 작성 — 분류/법령/메일/다이제스트/임베딩
- scripts/pkm_utils.py: 공통 유틸 (로거, dotenv, osascript 래퍼)
- scripts/prompts/classify_document.txt: Ollama 분류 프롬프트
- applescript/auto_classify.scpt: Inbox → AI 분류 → DB 이동
- applescript/omnifocus_sync.scpt: Projects → OmniFocus 작업 생성
- scripts/law_monitor.py: 법령 변경 모니터링 + DEVONthink 임포트
- scripts/mailplus_archive.py: MailPlus IMAP → Archive DB
- scripts/pkm_daily_digest.py: 일일 다이제스트 + OmniFocus 액션
- scripts/embed_to_chroma.py: GPU 서버 벡터 임베딩 → ChromaDB
- launchd/*.plist: 3개 스케줄 (07:00, 07:00+18:00, 20:00)
- docs/deploy.md: Mac mini 배포 가이드
- docs/devonagent-setup.md: 검색 세트 9종 설정 가이드
- tests/test_classify.py: 5종 문서 분류 테스트

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 12:32:36 +09:00

210 lines
6.5 KiB
Python

#!/usr/bin/env python3
"""
MailPlus → DEVONthink Archive DB 이메일 수집
- Synology MailPlus IMAP 접속
- 마지막 동기화 이후 새 메일 가져오기
- DEVONthink Archive DB 임포트
"""
import os
import sys
import imaplib
import email
from email.header import decode_header
from datetime import datetime
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from pkm_utils import setup_logger, load_credentials, run_applescript_inline, DATA_DIR
logger = setup_logger("mailplus")
LAST_UID_FILE = DATA_DIR / "mailplus_last_uid.txt"
MAIL_TMP_DIR = DATA_DIR / "mail_tmp"
MAIL_TMP_DIR.mkdir(exist_ok=True)
# 안전 관련 키워드 (dataOrigin 판별용)
SAFETY_KEYWORDS = [
"안전", "위험", "사고", "재해", "점검", "보건", "화학물질",
"OSHA", "safety", "hazard", "incident", "KOSHA"
]
def decode_mime_header(value: str) -> str:
"""MIME 헤더 디코딩"""
if not value:
return ""
decoded_parts = decode_header(value)
result = []
for part, charset in decoded_parts:
if isinstance(part, bytes):
result.append(part.decode(charset or "utf-8", errors="replace"))
else:
result.append(part)
return " ".join(result)
def load_last_uid() -> int:
"""마지막 처리 UID 로딩"""
if LAST_UID_FILE.exists():
return int(LAST_UID_FILE.read_text().strip())
return 0
def save_last_uid(uid: int):
"""마지막 처리 UID 저장"""
LAST_UID_FILE.write_text(str(uid))
def detect_data_origin(subject: str, body: str) -> str:
"""안전 키워드 감지로 dataOrigin 판별"""
text = (subject + " " + body).lower()
for kw in SAFETY_KEYWORDS:
if kw.lower() in text:
return "work"
return "external"
def save_email_file(msg: email.message.Message, uid: int) -> Path:
"""이메일을 .eml 파일로 저장"""
subject = decode_mime_header(msg.get("Subject", ""))
safe_subject = "".join(c if c.isalnum() or c in " _-" else "_" for c in subject)[:50]
date_str = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{date_str}_{uid}_{safe_subject}.eml"
filepath = MAIL_TMP_DIR / filename
with open(filepath, "wb") as f:
f.write(msg.as_bytes())
return filepath
def get_email_body(msg: email.message.Message) -> str:
"""이메일 본문 추출"""
body = ""
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
payload = part.get_payload(decode=True)
if payload:
charset = part.get_content_charset() or "utf-8"
body += payload.decode(charset, errors="replace")
else:
payload = msg.get_payload(decode=True)
if payload:
charset = msg.get_content_charset() or "utf-8"
body = payload.decode(charset, errors="replace")
return body[:2000]
def import_to_devonthink(filepath: Path, subject: str, data_origin: str):
"""DEVONthink Archive DB로 임포트"""
escaped_path = str(filepath).replace('"', '\\"')
escaped_subject = subject.replace('"', '\\"').replace("'", "\\'")
script = f'''
tell application id "DNtp"
set targetDB to missing value
repeat with db in databases
if name of db is "Archive" then
set targetDB to db
exit repeat
end if
end repeat
if targetDB is not missing value then
set targetGroup to create location "/Email" in targetDB
set theRecord to import POSIX path "{escaped_path}" to targetGroup
add custom meta data "email" for "sourceChannel" to theRecord
add custom meta data "{data_origin}" for "dataOrigin" to theRecord
add custom meta data (current date) for "lastAIProcess" to theRecord
end if
end tell
'''
try:
run_applescript_inline(script)
logger.info(f"DEVONthink 임포트: {subject[:40]}")
except Exception as e:
logger.error(f"DEVONthink 임포트 실패: {e}")
def run():
"""메인 실행"""
logger.info("=== MailPlus 이메일 수집 시작 ===")
creds = load_credentials()
host = creds.get("MAILPLUS_HOST")
port = int(creds.get("MAILPLUS_PORT", "993"))
user = creds.get("MAILPLUS_USER")
password = creds.get("MAILPLUS_PASS")
if not all([host, user, password]):
logger.error("MAILPLUS 접속 정보가 불완전합니다. credentials.env를 확인하세요.")
sys.exit(1)
last_uid = load_last_uid()
logger.info(f"마지막 처리 UID: {last_uid}")
try:
# IMAP SSL 접속
mail = imaplib.IMAP4_SSL(host, port)
mail.login(user, password)
mail.select("INBOX")
logger.info("IMAP 접속 성공")
# 마지막 UID 이후 메일 검색
if last_uid > 0:
status, data = mail.uid("search", None, f"UID {last_uid + 1}:*")
else:
# 최초 실행: 최근 7일치만
from datetime import timedelta
since = (datetime.now() - timedelta(days=7)).strftime("%d-%b-%Y")
status, data = mail.uid("search", None, f"SINCE {since}")
if status != "OK":
logger.error(f"메일 검색 실패: {status}")
mail.logout()
sys.exit(1)
uids = data[0].split()
logger.info(f"새 메일: {len(uids)}")
max_uid = last_uid
imported = 0
for uid_bytes in uids:
uid = int(uid_bytes)
if uid <= last_uid:
continue
status, msg_data = mail.uid("fetch", uid_bytes, "(RFC822)")
if status != "OK":
continue
raw_email = msg_data[0][1]
msg = email.message_from_bytes(raw_email)
subject = decode_mime_header(msg.get("Subject", "(제목 없음)"))
body = get_email_body(msg)
data_origin = detect_data_origin(subject, body)
filepath = save_email_file(msg, uid)
import_to_devonthink(filepath, subject, data_origin)
max_uid = max(max_uid, uid)
imported += 1
if max_uid > last_uid:
save_last_uid(max_uid)
mail.logout()
logger.info(f"=== MailPlus 수집 완료 — {imported}건 임포트 ===")
except imaplib.IMAP4.error as e:
logger.error(f"IMAP 에러: {e}")
sys.exit(1)
except Exception as e:
logger.error(f"예상치 못한 에러: {e}", exc_info=True)
sys.exit(1)
if __name__ == "__main__":
run()