feat(search): add document_chunks page/source columns + unique idx
migrations 279-281: page_start/end + source_type/chunker_version/source_hash/chunk_content_hash, legacy backfill (30,952 rows), unique (doc_id,source_type,chunker_version,chunk_index). PR-DocSrv-LargeDoc-Split-Markdown-1 commit 1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,10 @@
|
|||||||
|
-- PR-DocSrv-LargeDoc-Split-Markdown-1 (commit 1: schema)
|
||||||
|
-- 통합 schema: LargeDoc(page_start/end) + Phase 3A(source_type/chunker_version/source_hash/chunk_content_hash) 선반영.
|
||||||
|
-- Phase 3A 세션은 이 컬럼 위에서 동작 (schema 미터치). 분담 plan: brisk-paging-quokka.md
|
||||||
|
ALTER TABLE document_chunks
|
||||||
|
ADD COLUMN IF NOT EXISTS page_start integer,
|
||||||
|
ADD COLUMN IF NOT EXISTS page_end integer,
|
||||||
|
ADD COLUMN IF NOT EXISTS source_type text,
|
||||||
|
ADD COLUMN IF NOT EXISTS chunker_version text,
|
||||||
|
ADD COLUMN IF NOT EXISTS source_hash text,
|
||||||
|
ADD COLUMN IF NOT EXISTS chunk_content_hash text;
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
-- 기존 row 안전 backfill = legacy/legacy (과분류 X; Phase 3A 가 정확 source_type 으로 재생성/정리).
|
||||||
|
-- page_start/end 는 기존 단일 page 컬럼으로 (page NULL 이면 NULL 유지).
|
||||||
|
UPDATE document_chunks SET
|
||||||
|
source_type = COALESCE(source_type, 'legacy'),
|
||||||
|
chunker_version = COALESCE(chunker_version, 'legacy'),
|
||||||
|
page_start = COALESCE(page_start, page),
|
||||||
|
page_end = COALESCE(page_end, page)
|
||||||
|
WHERE source_type IS NULL OR chunker_version IS NULL OR page_start IS NULL OR page_end IS NULL;
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
-- UNIQUE (doc_id, source_type, chunker_version, chunk_index).
|
||||||
|
-- dup-check (doc_id, chunk_index)=0 확인 (2026-05-24) → legacy backfill 후 키 유효.
|
||||||
|
-- 비-CONCURRENTLY: runner 트랜잭션 안전 + ~30K rows 규모 현실적.
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS uq_document_chunks_source_version_index
|
||||||
|
ON document_chunks (doc_id, source_type, chunker_version, chunk_index);
|
||||||
Reference in New Issue
Block a user