From e860baa179ec3818293adbc122272914124e33c2 Mon Sep 17 00:00:00 2001 From: hyungi Date: Mon, 25 May 2026 05:23:38 +0000 Subject: [PATCH] ops(hier): Phase A law/library decompose + snapshot freeze (replace-diagnose c3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 47 eval-target undecomposed non-news docs (law21+library24+document2) 분해+임베딩 (--skip-analysis, additive). 1005 leaf 생성 fail0, in_corpus 634 무손상 검증. snapshot doc_id_max=25912 chunk_id_max=71164 docs_decomposed 301->348. 측정 drift 0. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...lace_snapshot_after_phaseA_2026-05-25.json | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/search_eval/baselines/v0_2_hier_replace_snapshot_after_phaseA_2026-05-25.json diff --git a/tests/search_eval/baselines/v0_2_hier_replace_snapshot_after_phaseA_2026-05-25.json b/tests/search_eval/baselines/v0_2_hier_replace_snapshot_after_phaseA_2026-05-25.json new file mode 100644 index 0000000..c645ef7 --- /dev/null +++ b/tests/search_eval/baselines/v0_2_hier_replace_snapshot_after_phaseA_2026-05-25.json @@ -0,0 +1,42 @@ +{ + "version": "v0.2-hier-replace", + "label": "snapshot_after_phaseA", + "date": "2026-05-25", + "plan": "hier-hazy-waffle.md (PR-DocSrv-Hier-Replace-Diagnose-1)", + "purpose": "Corpus drift freeze for hier vs prehier (legacy) go/no-go measurement. Frozen AFTER Phase A law/library decomposition so the 47 newly-decomposed eval-target docs' leaves are included.", + "snapshot": { + "doc_id_max": 25912, + "chunk_id_max": 71164, + "documents_n": 22097, + "prehier_chunks_n": 31327, + "hier_leaves_emb": 12697, + "docs_decomposed": 348, + "hier_in_corpus": 634 + }, + "phase_a": { + "newly_decomposed_docs": 47, + "leaves_created": 1005, + "null_embedding_leaves": 9, + "skip_analysis": true, + "in_corpus_unchanged": "634 (invariant preserved)", + "doc_ids": [3776,3853,3854,3855,3856,3865,3867,3868,3878,3879,3886,3887,3888,3897,3916,3917,3920,3921,3980,3981,3982,4041,5205,11495,11496,11500,11503,11504,11505,11514,11515,11591,11617,11620,11625,11627,11644,11645,11646,11647,11669,11689,11691,11711,11712,13305,13652], + "note": "47 = eval-target undecomposed non-news docs (law 21 + library 24 + document 2). DOC_MIN_CHARS=4000 gate bypassed via --doc; section analysis deferred (Mac mini, separate axis)." + }, + "eval_set": { + "total_cases": 51, + "target_docs": 101, + "target_docs_decomposed_after_phaseA": "33 + 47 (news 21 + single-chunk excluded by design)" + }, + "model_config": { + "embedding": "BAAI/bge-m3 (production)", + "reranker": "BAAI/bge-reranker-v2-m3", + "search_mode": "hybrid", + "knn": "exact (enable_indexscan/bitmapscan=off in eval) — isolates chunking from ivfflat approximation" + }, + "variants": { + "prehier": "document_chunks WHERE source_type IS DISTINCT FROM 'hier_section' (legacy 30952 + null-source 375)", + "hier_sim_raw": "post-replace simulation: hier leaf if doc decomposed else prehier (per-doc fallback), current builder output incl tiny leaves", + "hier_sim_clean": "raw + exclude childless leaves with length(trim(text)) < 30 (A1 held-out finding, measured as variable vs raw)" + }, + "baseline_reference": "PR-Eval-V0_2 production baseline graded NDCG 0.659 (v0_2_baseline_2026-05-23.json) — harness sanity target (production ivfflat path)" +}