Files
hyungi 6e9d73278f docs(search): pin hier measurement views as EVAL-ONLY (replace-diagnose)
COMMENT ON VIEW + header — corpus_chunks_{prehier,hier_sim_raw,hier_sim_clean} 은
?corpus_variant= eval dispatch 전용. production retrieval default-path 는 corpus_chunks
(partial ivfflat) 만. 재측정/passage-RAG 재평가 자산으로 보존, 오용 방지 박제.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 05:53:04 +00:00

51 lines
3.0 KiB
SQL

-- PR-DocSrv-Hier-Replace-Diagnose-1 c4: 측정 전용 view (additive, droppable, in_corpus 무관)
-- prehier = pre-hier baseline (legacy + null-source). hier_sim_* = post-replace 시뮬(doc 단위 fallback).
-- clean = childless-tiny(<30자) leaf 제외 (A1 held-out 발견). kept-leaf = is_leaf AND (len>=30 OR has child).
--
-- ⚠️ EVAL-ONLY — production retrieval default-path 아님. 접근은 오직 `?corpus_variant=` (run_eval) 로만.
-- default 검색은 corpus_chunks(WHERE in_corpus=true, partial ivfflat) 만 본다. 이 뷰들을 production
-- retrieval 경로로 끌어쓰지 말 것. Replace-Diagnose 결정 = NO-GO (2026-05-25), 재측정/passage-RAG
-- 재평가용 자산으로 보존. 폐기 시 3 view DROP 으로 운영 무영향. (COMMENT ON VIEW 로 DB 에도 박제)
DROP VIEW IF EXISTS corpus_chunks_prehier;
DROP VIEW IF EXISTS corpus_chunks_hier_sim_raw;
DROP VIEW IF EXISTS corpus_chunks_hier_sim_clean;
CREATE VIEW corpus_chunks_prehier AS
SELECT * FROM document_chunks
WHERE source_type IS DISTINCT FROM 'hier_section' AND embedding IS NOT NULL;
CREATE VIEW corpus_chunks_hier_sim_raw AS
SELECT * FROM document_chunks dc
WHERE dc.embedding IS NOT NULL AND (
(dc.source_type = 'hier_section' AND dc.is_leaf = true)
OR (dc.source_type IS DISTINCT FROM 'hier_section'
AND NOT EXISTS (SELECT 1 FROM document_chunks h
WHERE h.doc_id = dc.doc_id AND h.source_type = 'hier_section'
AND h.is_leaf = true AND h.embedding IS NOT NULL))
);
CREATE VIEW corpus_chunks_hier_sim_clean AS
SELECT * FROM document_chunks dc
WHERE dc.embedding IS NOT NULL AND (
-- kept hier leaf: is_leaf AND NOT childless-tiny
(dc.source_type = 'hier_section' AND dc.is_leaf = true
AND (length(trim(dc.text)) >= 30
OR EXISTS (SELECT 1 FROM document_chunks ch WHERE ch.parent_id = dc.id)))
-- legacy fallback: doc 에 kept(clean) hier leaf 가 하나도 없을 때만
OR (dc.source_type IS DISTINCT FROM 'hier_section'
AND NOT EXISTS (SELECT 1 FROM document_chunks h
WHERE h.doc_id = dc.doc_id AND h.source_type = 'hier_section'
AND h.is_leaf = true AND h.embedding IS NOT NULL
AND (length(trim(h.text)) >= 30
OR EXISTS (SELECT 1 FROM document_chunks ch2 WHERE ch2.parent_id = h.id))))
);
-- 용도 박제 (production retrieval 경로 오용 방지)
COMMENT ON VIEW corpus_chunks_prehier IS
'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). pre-hier baseline. ?corpus_variant=prehier 전용. default retrieval 은 corpus_chunks 만.';
COMMENT ON VIEW corpus_chunks_hier_sim_raw IS
'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). post-replace 시뮬(raw). ?corpus_variant=hier_sim_raw 전용. production 검색 미사용.';
COMMENT ON VIEW corpus_chunks_hier_sim_clean IS
'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). post-replace 시뮬(childless-tiny 제외). ?corpus_variant=hier_sim_clean 전용. production 검색 미사용.';