aeb9290cbd
플랜 ds-outline-anchor-b5 (g1~g6 코드). 핵심 ASME/법령 windowed 절의 0% 점프를
서버계산 char_start(builder offset)로 100% deterministic 점프로 전환.
- g1 migration 318: document_chunks.char_start INTEGER NULL (단일 statement, 멱등)
- g2 builder: char_start emit = FE 라인/offset 모델 미러(split('\n')+UTF-16 code unit+코드펜스 skip).
window-child=NULL, split-parent=heading offset, preamble=NULL, CR 미strip, NFC=telemetry.
node.text 보존(라인모델 hash-neutral) → hash_stable doc 보존. 단위테스트 7건.
- g3 persist+backfill 하이브리드:
* persist INSERT char_start
* update-char-start (g3-tU): hash_stable doc 비파괴 — 100% jump-target VERIFY(NEW-1) +
position-aligned PK UPDATE(NEW-2), 미달 doc DEMOTE → re-decompose 합류(NEW-4)
* --reprocess (g3-t2): md_content 출처(g0-t1) + jump-target-set 완료마커(B1) + B_jumptarget>=1(B3),
--doc 필수 else REFUSE. self-heal sweep(g3-t3).
- g4 /sections: char_start inner+outer SELECT + split-parent 노출(is_leaf OR %_split)
- g5 FE: resolveAnchorMap(BE-first, NEW-5 jump-target-candidate-scoped 폴백, C1 OR-exclude),
per-render-site basis guard(C3), endsWith('_split') 정정 + collapseWindows split-parent 흡수(C2).
단위테스트 25건(NEW-5/B4/C1/C2 포함).
- g6 hier_outline_quality_gate.py: read-only g-measure(verdict/B_jumptarget/hash_stable/dup/fence)
배포(g7: --no-deps, 스냅샷, UPDATE-only 32 + re-decompose 230∪demote, 정확도 게이트)는 별 ops 단계.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
125 lines
4.8 KiB
TypeScript
125 lines
4.8 KiB
TypeScript
// 순수함수 회귀 테스트. 실행(로컬, 의존성 0): node --test src/lib/utils/headingPath.test.ts
|
||
// (Node ≥23 또는 22.6+ --experimental-strip-types — TS 타입 네이티브 strip.)
|
||
import { test } from 'node:test';
|
||
import assert from 'node:assert/strict';
|
||
import {
|
||
cleanHeading,
|
||
pathSegments,
|
||
collapseWindows,
|
||
groupOrFlat,
|
||
sectionTypeLabel,
|
||
type DocumentSection,
|
||
} from './headingPath.ts';
|
||
|
||
let _id = 0;
|
||
function sec(p: Partial<DocumentSection>): DocumentSection {
|
||
return {
|
||
chunk_id: ++_id,
|
||
section_title: null,
|
||
heading_path: null,
|
||
level: null,
|
||
node_type: null,
|
||
is_leaf: true,
|
||
section_type: null,
|
||
summary: null,
|
||
confidence: null,
|
||
...p,
|
||
};
|
||
}
|
||
|
||
test('cleanHeading: 마크다운/HTML 잔재 strip', () => {
|
||
assert.equal(cleanHeading('**UG-5 PLATE**<sup>2</sup>'), 'UG-5 PLATE');
|
||
assert.equal(cleanHeading(' **DESIGN** '), 'DESIGN');
|
||
assert.equal(cleanHeading('a b\tc'), 'a b c');
|
||
assert.equal(cleanHeading(null), '');
|
||
assert.equal(cleanHeading(''), '');
|
||
});
|
||
|
||
test('pathSegments: > 분할 + 정제', () => {
|
||
assert.deepEqual(pathSegments('**A** > **B**<sup>1</sup> > C'), ['A', 'B', 'C']);
|
||
assert.deepEqual(pathSegments(null), []);
|
||
assert.deepEqual(pathSegments(' '), []);
|
||
});
|
||
|
||
test('sectionTypeLabel: 한글 매핑 + passthrough', () => {
|
||
assert.equal(sectionTypeLabel('requirement'), '요건');
|
||
assert.equal(sectionTypeLabel('unknown_type'), 'unknown_type');
|
||
assert.equal(sectionTypeLabel(null), null);
|
||
});
|
||
|
||
test('collapseWindows: 연속 동일 heading window 만 dedupe, 순서 유지', () => {
|
||
const input = [
|
||
sec({ heading_path: 'Intro', node_type: null }),
|
||
sec({ heading_path: 'Pearson', node_type: 'window' }),
|
||
sec({ heading_path: 'Pearson', node_type: 'window' }),
|
||
sec({ heading_path: 'Pearson', node_type: 'window' }),
|
||
sec({ heading_path: 'Conf', node_type: null }),
|
||
sec({ heading_path: 'Pearson', node_type: 'window' }), // 비연속 → 새 항목
|
||
];
|
||
const out = collapseWindows(input);
|
||
assert.equal(out.length, 4);
|
||
assert.equal(out[0].fragmentCount, 1); // Intro
|
||
assert.equal(out[1].fragmentCount, 3); // Pearson ×3 합침
|
||
assert.equal(out[2].fragmentCount, 1); // Conf
|
||
assert.equal(out[3].fragmentCount, 1); // 비연속 Pearson
|
||
// 순서 보존
|
||
assert.deepEqual(
|
||
out.map((o) => cleanHeading(o.section.heading_path)),
|
||
['Intro', 'Pearson', 'Conf', 'Pearson'],
|
||
);
|
||
});
|
||
|
||
test('[C2] collapseWindows: split-parent + window 들 → rail 1행, 대표=split-parent(char_start 보유)', () => {
|
||
const input = [
|
||
sec({ section_title: 'Article 5', heading_path: 'Article 5', node_type: 'chapter_split', is_leaf: false, char_start: 120 }),
|
||
sec({ section_title: 'Article 5', heading_path: 'Article 5', node_type: 'window', is_leaf: true, char_start: null }),
|
||
sec({ section_title: 'Article 5', heading_path: 'Article 5', node_type: 'window', is_leaf: true, char_start: null }),
|
||
];
|
||
const out = collapseWindows(input);
|
||
assert.equal(out.length, 1, 'split-parent + 2 window → rail 1행');
|
||
// 대표 = split-parent (char_start 보유) → jump 성립
|
||
assert.equal(out[0].section.node_type, 'chapter_split');
|
||
assert.equal(out[0].section.char_start, 120);
|
||
assert.equal(out[0].fragmentCount, 2, 'window 조각 수 = 2 (split-parent 자신 제외)');
|
||
});
|
||
|
||
test('groupOrFlat: 적은 그룹 + 낮은 기타% → group (5140-류)', () => {
|
||
// 3 top segment × 4 = 12절, window 없음 → group_count 3, 기타 0%
|
||
const sections: DocumentSection[] = [];
|
||
for (const top of ['장1', '장2', '장3']) {
|
||
for (let i = 0; i < 4; i++) sections.push(sec({ heading_path: `${top} > 절${i}` }));
|
||
}
|
||
const layout = groupOrFlat(sections);
|
||
assert.equal(layout.mode, 'group');
|
||
assert.equal(layout.groups.length, 3);
|
||
assert.deepEqual(layout.groups.map((g) => g.key), ['장1', '장2', '장3']); // 등장순서
|
||
assert.equal(layout.groups[0].items.length, 4);
|
||
});
|
||
|
||
test('groupOrFlat: 기타% ≥ 50 → flat 강등 (5186/5225-류)', () => {
|
||
const sections: DocumentSection[] = [
|
||
sec({ heading_path: 'A > a1' }),
|
||
sec({ heading_path: 'B > b1' }),
|
||
sec({ node_type: 'window', heading_path: 'W1' }),
|
||
sec({ node_type: 'window', heading_path: 'W2' }),
|
||
sec({ node_type: 'section_split', heading_path: 'S1' }),
|
||
sec({ node_type: 'window', heading_path: 'W3' }), // 기타 4/6 = 66.7%
|
||
];
|
||
const layout = groupOrFlat(sections);
|
||
assert.equal(layout.mode, 'flat');
|
||
assert.ok(layout.items.length > 0);
|
||
});
|
||
|
||
test('groupOrFlat: group_count > 30 → flat 강등', () => {
|
||
const sections: DocumentSection[] = [];
|
||
for (let i = 0; i < 31; i++) sections.push(sec({ heading_path: `seg${i} > x` }));
|
||
const layout = groupOrFlat(sections);
|
||
assert.equal(layout.mode, 'flat');
|
||
});
|
||
|
||
test('groupOrFlat: 빈 입력 → flat, 항목 0', () => {
|
||
const layout = groupOrFlat([]);
|
||
assert.equal(layout.mode, 'flat');
|
||
assert.equal(layout.items.length, 0);
|
||
});
|