6e9d73278f
COMMENT ON VIEW + header — corpus_chunks_{prehier,hier_sim_raw,hier_sim_clean} 은
?corpus_variant= eval dispatch 전용. production retrieval default-path 는 corpus_chunks
(partial ivfflat) 만. 재측정/passage-RAG 재평가 자산으로 보존, 오용 방지 박제.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
51 lines
3.0 KiB
SQL
51 lines
3.0 KiB
SQL
-- PR-DocSrv-Hier-Replace-Diagnose-1 c4: 측정 전용 view (additive, droppable, in_corpus 무관)
|
|
-- prehier = pre-hier baseline (legacy + null-source). hier_sim_* = post-replace 시뮬(doc 단위 fallback).
|
|
-- clean = childless-tiny(<30자) leaf 제외 (A1 held-out 발견). kept-leaf = is_leaf AND (len>=30 OR has child).
|
|
--
|
|
-- ⚠️ EVAL-ONLY — production retrieval default-path 아님. 접근은 오직 `?corpus_variant=` (run_eval) 로만.
|
|
-- default 검색은 corpus_chunks(WHERE in_corpus=true, partial ivfflat) 만 본다. 이 뷰들을 production
|
|
-- retrieval 경로로 끌어쓰지 말 것. Replace-Diagnose 결정 = NO-GO (2026-05-25), 재측정/passage-RAG
|
|
-- 재평가용 자산으로 보존. 폐기 시 3 view DROP 으로 운영 무영향. (COMMENT ON VIEW 로 DB 에도 박제)
|
|
|
|
DROP VIEW IF EXISTS corpus_chunks_prehier;
|
|
DROP VIEW IF EXISTS corpus_chunks_hier_sim_raw;
|
|
DROP VIEW IF EXISTS corpus_chunks_hier_sim_clean;
|
|
|
|
CREATE VIEW corpus_chunks_prehier AS
|
|
SELECT * FROM document_chunks
|
|
WHERE source_type IS DISTINCT FROM 'hier_section' AND embedding IS NOT NULL;
|
|
|
|
CREATE VIEW corpus_chunks_hier_sim_raw AS
|
|
SELECT * FROM document_chunks dc
|
|
WHERE dc.embedding IS NOT NULL AND (
|
|
(dc.source_type = 'hier_section' AND dc.is_leaf = true)
|
|
OR (dc.source_type IS DISTINCT FROM 'hier_section'
|
|
AND NOT EXISTS (SELECT 1 FROM document_chunks h
|
|
WHERE h.doc_id = dc.doc_id AND h.source_type = 'hier_section'
|
|
AND h.is_leaf = true AND h.embedding IS NOT NULL))
|
|
);
|
|
|
|
CREATE VIEW corpus_chunks_hier_sim_clean AS
|
|
SELECT * FROM document_chunks dc
|
|
WHERE dc.embedding IS NOT NULL AND (
|
|
-- kept hier leaf: is_leaf AND NOT childless-tiny
|
|
(dc.source_type = 'hier_section' AND dc.is_leaf = true
|
|
AND (length(trim(dc.text)) >= 30
|
|
OR EXISTS (SELECT 1 FROM document_chunks ch WHERE ch.parent_id = dc.id)))
|
|
-- legacy fallback: doc 에 kept(clean) hier leaf 가 하나도 없을 때만
|
|
OR (dc.source_type IS DISTINCT FROM 'hier_section'
|
|
AND NOT EXISTS (SELECT 1 FROM document_chunks h
|
|
WHERE h.doc_id = dc.doc_id AND h.source_type = 'hier_section'
|
|
AND h.is_leaf = true AND h.embedding IS NOT NULL
|
|
AND (length(trim(h.text)) >= 30
|
|
OR EXISTS (SELECT 1 FROM document_chunks ch2 WHERE ch2.parent_id = h.id))))
|
|
);
|
|
|
|
-- 용도 박제 (production retrieval 경로 오용 방지)
|
|
COMMENT ON VIEW corpus_chunks_prehier IS
|
|
'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). pre-hier baseline. ?corpus_variant=prehier 전용. default retrieval 은 corpus_chunks 만.';
|
|
COMMENT ON VIEW corpus_chunks_hier_sim_raw IS
|
|
'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). post-replace 시뮬(raw). ?corpus_variant=hier_sim_raw 전용. production 검색 미사용.';
|
|
COMMENT ON VIEW corpus_chunks_hier_sim_clean IS
|
|
'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). post-replace 시뮬(childless-tiny 제외). ?corpus_variant=hier_sim_clean 전용. production 검색 미사용.';
|