From 448195637b799f059eadb7538a3bac4133dab442 Mon Sep 17 00:00:00 2001 From: hyungi Date: Tue, 9 Jun 2026 11:54:01 +0900 Subject: [PATCH] =?UTF-8?q?fix(documents):=20g-measure=20verdict=20?= =?UTF-8?q?=EB=A5=BC=20jump-target=20=EB=8C=80=20jump-target=20=EB=B9=84?= =?UTF-8?q?=EA=B5=90=EB=A1=9C=20=EC=A0=95=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hier_outline_quality_gate 의 keep-better verdict 가 build jump-target(n_b, window-child 제외)을 stored leaf 전수(n_a, window-child 포함)와 비교 → windowed doc 이 n_a≫n_b 로 거짓 A_better 강등되던 bias 제거. stored 도 jump-target((비-window leaf OR %_split)+제목)만 카운트. 정정 후 hash_stable 31(≈MEASURE2 32, fence-flip 1)·dup_title 8·in_corpus 3(5140/5186/5225) 전부 UPDATE-only = MEASURE2 와 정합. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/hier_outline_quality_gate.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/hier_outline_quality_gate.py b/scripts/hier_outline_quality_gate.py index 656671a..b819e0c 100644 --- a/scripts/hier_outline_quality_gate.py +++ b/scripts/hier_outline_quality_gate.py @@ -106,7 +106,14 @@ async def _measure_doc(session, doc_id): # verdict 휴리스틱 (high-recall junk 보호 + absent-structure → A_better). # MEASURE2 가 canonical 분포를 이미 박제 — 이 verdict 는 재현/감사용. 애매(notes:ambiguous)는 PASS 미차단. - n_a = sum(1 for s in stored if s["is_leaf"]) + # ★ apples-to-apples: 양쪽 모두 JUMP-TARGET 수로 비교(stored leaf 전수 X — window-child 가 n_a 를 부풀려 + # windowed doc 을 거짓 A_better 로 떨구는 bias 제거). stored jump-target = (비-window leaf OR %_split) + 제목. + def _stored_is_jt(s): + st = (s["is_leaf"] and s["node_type"] != "window") or bool( + s["node_type"] and s["node_type"].endswith("_split")) + return st and bool(s["section_title"]) + n_a = sum(1 for s in stored if _stored_is_jt(s)) + res["a_jumptarget"] = n_a n_b = res["b_jumptarget"] if n_b == 0: res["verdict"] = "A_better" # B 개요 없음(빈 jump-target)