feat(email): schema for email source_external_id + metadata

migrations 259~261:
- documents.source_external_id TEXT NULL (email 에선 always non-null, ingest 책임)
- documents.email_metadata JSONB NULL (from/to/cc/subject/folder/uidvalidity/uid/received_at/attachments)
- partial unique on (source_external_id) WHERE source_channel = email AND source_external_id IS NOT NULL

ORM:
- Document.source_external_id / email_metadata mapped_column 추가

dedup 진실원장 = DB unique index. server-side IMAP \\Seen flag 는 best-effort.
mailplus_archive 의 INBOX root archive row 는 source_external_id=NULL 이라 unique 에서 자연 제외.

plan: ~/.claude/plans/document-enchanted-candy.md
This commit is contained in:
hyungi
2026-05-12 06:56:23 +00:00
parent 49f44bba60
commit c49047bf2a
4 changed files with 29 additions and 0 deletions
+6
View File
@@ -100,6 +100,12 @@ class Document(Base):
preview_hash: Mapped[str | None] = mapped_column(String(64))
preview_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
# PR-4 Email Ingest — 외부 source dedup key + 메일 metadata
# source_external_id: email 에선 always non-null (Message-ID 또는 imap UID fallback). 다른 source 는 NULL 가능.
# email_metadata: from/to/cc/subject/folder/uidvalidity/uid/received_at/mailplus_link/attachments[].
source_external_id: Mapped[str | None] = mapped_column(Text)
email_metadata: Mapped[dict | None] = mapped_column(JSONB)
# 메타데이터
source_channel: Mapped[str | None] = mapped_column(
Enum("law_monitor", "devonagent", "email", "web_clip",
@@ -0,0 +1,7 @@
-- PR-4 Email Ingest — documents.source_external_id 컬럼 추가
-- 외부 source 의 dedup key. email source 에서는 always non-null (ingest 코드 책임).
-- Message-ID 정규화 또는 imap:{folder}:{uidvalidity}:{uid} fallback.
-- 다른 source_channel 에서는 NULL 허용 (별 의미 부여 시 nullable→unique 검토).
ALTER TABLE documents
ADD COLUMN IF NOT EXISTS source_external_id TEXT;
@@ -0,0 +1,9 @@
-- PR-4 Email Ingest — documents.email_metadata JSONB 컬럼 추가
-- 구조: {from, to[], cc[], subject, folder, uidvalidity, uid, received_at,
-- mailplus_link, attachments: [{filename, mime, size, part_id}],
-- parse_error?: string}
-- mailplus_archive (기존 INBOX root archive 워커) 가 만든 row 는 NULL 유지.
-- inbox_ingest 가 만든 row 만 채움.
ALTER TABLE documents
ADD COLUMN IF NOT EXISTS email_metadata JSONB;
@@ -0,0 +1,7 @@
-- PR-4 Email Ingest — partial unique on (source_external_id) for email source
-- inbox_ingest 의 dedup 진실원장. 같은 메일 재 ingest 시 ON CONFLICT DO NOTHING.
-- mailplus_archive 의 INBOX root archive row 는 source_external_id=NULL 이라 자동 제외.
CREATE UNIQUE INDEX IF NOT EXISTS uq_documents_email_source_external_id
ON documents (source_external_id)
WHERE source_channel = 'email' AND source_external_id IS NOT NULL;