diff --git a/migrations/279_document_chunks_split_columns.sql b/migrations/279_document_chunks_split_columns.sql new file mode 100644 index 0000000..3b23093 --- /dev/null +++ b/migrations/279_document_chunks_split_columns.sql @@ -0,0 +1,10 @@ +-- PR-DocSrv-LargeDoc-Split-Markdown-1 (commit 1: schema) +-- 통합 schema: LargeDoc(page_start/end) + Phase 3A(source_type/chunker_version/source_hash/chunk_content_hash) 선반영. +-- Phase 3A 세션은 이 컬럼 위에서 동작 (schema 미터치). 분담 plan: brisk-paging-quokka.md +ALTER TABLE document_chunks + ADD COLUMN IF NOT EXISTS page_start integer, + ADD COLUMN IF NOT EXISTS page_end integer, + ADD COLUMN IF NOT EXISTS source_type text, + ADD COLUMN IF NOT EXISTS chunker_version text, + ADD COLUMN IF NOT EXISTS source_hash text, + ADD COLUMN IF NOT EXISTS chunk_content_hash text; diff --git a/migrations/280_document_chunks_legacy_backfill.sql b/migrations/280_document_chunks_legacy_backfill.sql new file mode 100644 index 0000000..278ddfe --- /dev/null +++ b/migrations/280_document_chunks_legacy_backfill.sql @@ -0,0 +1,8 @@ +-- 기존 row 안전 backfill = legacy/legacy (과분류 X; Phase 3A 가 정확 source_type 으로 재생성/정리). +-- page_start/end 는 기존 단일 page 컬럼으로 (page NULL 이면 NULL 유지). +UPDATE document_chunks SET + source_type = COALESCE(source_type, 'legacy'), + chunker_version = COALESCE(chunker_version, 'legacy'), + page_start = COALESCE(page_start, page), + page_end = COALESCE(page_end, page) +WHERE source_type IS NULL OR chunker_version IS NULL OR page_start IS NULL OR page_end IS NULL; diff --git a/migrations/281_document_chunks_source_unique_idx.sql b/migrations/281_document_chunks_source_unique_idx.sql new file mode 100644 index 0000000..2e7c04d --- /dev/null +++ b/migrations/281_document_chunks_source_unique_idx.sql @@ -0,0 +1,5 @@ +-- UNIQUE (doc_id, source_type, chunker_version, chunk_index). +-- dup-check (doc_id, chunk_index)=0 확인 (2026-05-24) → legacy backfill 후 키 유효. +-- 비-CONCURRENTLY: runner 트랜잭션 안전 + ~30K rows 규모 현실적. +CREATE UNIQUE INDEX IF NOT EXISTS uq_document_chunks_source_version_index +ON document_chunks (doc_id, source_type, chunker_version, chunk_index);