From c49047bf2ab14a17ac83679f48573664ad59be60 Mon Sep 17 00:00:00 2001 From: hyungi Date: Tue, 12 May 2026 06:56:23 +0000 Subject: [PATCH] feat(email): schema for email source_external_id + metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit migrations 259~261: - documents.source_external_id TEXT NULL (email 에선 always non-null, ingest 책임) - documents.email_metadata JSONB NULL (from/to/cc/subject/folder/uidvalidity/uid/received_at/attachments) - partial unique on (source_external_id) WHERE source_channel = email AND source_external_id IS NOT NULL ORM: - Document.source_external_id / email_metadata mapped_column 추가 dedup 진실원장 = DB unique index. server-side IMAP \\Seen flag 는 best-effort. mailplus_archive 의 INBOX root archive row 는 source_external_id=NULL 이라 unique 에서 자연 제외. plan: ~/.claude/plans/document-enchanted-candy.md --- app/models/document.py | 6 ++++++ migrations/259_documents_source_external_id.sql | 7 +++++++ migrations/260_documents_email_metadata.sql | 9 +++++++++ migrations/261_documents_source_external_id_uq.sql | 7 +++++++ 4 files changed, 29 insertions(+) create mode 100644 migrations/259_documents_source_external_id.sql create mode 100644 migrations/260_documents_email_metadata.sql create mode 100644 migrations/261_documents_source_external_id_uq.sql diff --git a/app/models/document.py b/app/models/document.py index a7cd6df..415a8c2 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -100,6 +100,12 @@ class Document(Base): preview_hash: Mapped[str | None] = mapped_column(String(64)) preview_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + # PR-4 Email Ingest — 외부 source dedup key + 메일 metadata + # source_external_id: email 에선 always non-null (Message-ID 또는 imap UID fallback). 다른 source 는 NULL 가능. + # email_metadata: from/to/cc/subject/folder/uidvalidity/uid/received_at/mailplus_link/attachments[]. + source_external_id: Mapped[str | None] = mapped_column(Text) + email_metadata: Mapped[dict | None] = mapped_column(JSONB) + # 메타데이터 source_channel: Mapped[str | None] = mapped_column( Enum("law_monitor", "devonagent", "email", "web_clip", diff --git a/migrations/259_documents_source_external_id.sql b/migrations/259_documents_source_external_id.sql new file mode 100644 index 0000000..14678e5 --- /dev/null +++ b/migrations/259_documents_source_external_id.sql @@ -0,0 +1,7 @@ +-- PR-4 Email Ingest — documents.source_external_id 컬럼 추가 +-- 외부 source 의 dedup key. email source 에서는 always non-null (ingest 코드 책임). +-- Message-ID 정규화 또는 imap:{folder}:{uidvalidity}:{uid} fallback. +-- 다른 source_channel 에서는 NULL 허용 (별 의미 부여 시 nullable→unique 검토). + +ALTER TABLE documents + ADD COLUMN IF NOT EXISTS source_external_id TEXT; diff --git a/migrations/260_documents_email_metadata.sql b/migrations/260_documents_email_metadata.sql new file mode 100644 index 0000000..2b0d945 --- /dev/null +++ b/migrations/260_documents_email_metadata.sql @@ -0,0 +1,9 @@ +-- PR-4 Email Ingest — documents.email_metadata JSONB 컬럼 추가 +-- 구조: {from, to[], cc[], subject, folder, uidvalidity, uid, received_at, +-- mailplus_link, attachments: [{filename, mime, size, part_id}], +-- parse_error?: string} +-- mailplus_archive (기존 INBOX root archive 워커) 가 만든 row 는 NULL 유지. +-- inbox_ingest 가 만든 row 만 채움. + +ALTER TABLE documents + ADD COLUMN IF NOT EXISTS email_metadata JSONB; diff --git a/migrations/261_documents_source_external_id_uq.sql b/migrations/261_documents_source_external_id_uq.sql new file mode 100644 index 0000000..bcc159c --- /dev/null +++ b/migrations/261_documents_source_external_id_uq.sql @@ -0,0 +1,7 @@ +-- PR-4 Email Ingest — partial unique on (source_external_id) for email source +-- inbox_ingest 의 dedup 진실원장. 같은 메일 재 ingest 시 ON CONFLICT DO NOTHING. +-- mailplus_archive 의 INBOX root archive row 는 source_external_id=NULL 이라 자동 제외. + +CREATE UNIQUE INDEX IF NOT EXISTS uq_documents_email_source_external_id + ON documents (source_external_id) + WHERE source_channel = 'email' AND source_external_id IS NOT NULL;