From f7198d9d68ded5e5e6000b8bf8f30475b1ce1c3f Mon Sep 17 00:00:00 2001 From: hyungi Date: Mon, 25 May 2026 00:22:34 +0000 Subject: [PATCH] feat(search): expose hier section outline & summaries in document detail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-DocSrv-Hier-Section-UI-1 Phase 1 (코드+커밋만, 배포는 Phase 2 backfill 완주 후). - backend: GET /documents/{id}/sections — hier leaf 목차 + chunk_section_analysis 요약. document_chunks 직접 조회(retrieval 아닌 목차 표시라 corpus_chunks 뷰 의도적 우회 — docstring 명시). DISTINCT ON 으로 최신 분석 1행. - frontend: SectionOutline.svelte(좌측 목차, per-doc 동적 그룹/flat, window dedupe, 클릭 시 요약/breadcrumb 인라인), headingPath.ts 순수 유틸(+node:test 단위테스트 8케이스). [id]/+page.svelte 3-zone 레이아웃 + 우측 메타 Tabs [정보|AI|관리] 로 카드 스프롤 해소. - 절 없는 문서/404 는 목차 숨김(graceful). 본문 점프는 follow-up. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/api/documents.py | 75 +++++++++ .../src/lib/components/SectionOutline.svelte | 117 +++++++++++++ frontend/src/lib/utils/headingPath.test.ts | 110 +++++++++++++ frontend/src/lib/utils/headingPath.ts | 154 ++++++++++++++++++ .../src/routes/documents/[id]/+page.svelte | 130 +++++++++------ 5 files changed, 537 insertions(+), 49 deletions(-) create mode 100644 frontend/src/lib/components/SectionOutline.svelte create mode 100644 frontend/src/lib/utils/headingPath.test.ts create mode 100644 frontend/src/lib/utils/headingPath.ts diff --git a/app/api/documents.py b/app/api/documents.py index 441769b..724eac6 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -537,6 +537,81 @@ async def get_document( return DocumentDetailResponse.model_validate(doc) +# ─── 절(hier section) 목차 + 요약 (PR-DocSrv-Hier-Section-UI-1) ─── +class SectionItem(BaseModel): + chunk_id: int + section_title: str | None = None # raw 마크다운 포함 — 정제는 프런트(headingPath.ts) + heading_path: str | None = None # raw + level: int | None = None + node_type: str | None = None # window | section_split | null + is_leaf: bool + section_type: str | None = None + summary: str | None = None # status='summarized' 인 분석행에만, 그 외 None + confidence: float | None = None + + +class DocumentSectionsResponse(BaseModel): + doc_id: int + sections: list[SectionItem] + + +@router.get("/{doc_id}/sections", response_model=DocumentSectionsResponse) +async def get_document_sections( + doc_id: int, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """문서의 hier 절(leaf) 목차 + 절-레벨 요약(chunk_section_analysis). + + ⚠ 뷰 우회 — 의도적 예외 (변경 금지): + retrieval 경로(retrieval_service / *_rag)는 in_corpus=false 누출 방지를 위해 + 반드시 corpus_chunks 뷰만 본다. 그러나 이 endpoint 는 retrieval 이 아니라 + "문서 전체 leaf 목차 표시"라서 in_corpus=false(검색 비활성) 절도 보여야 하므로 + document_chunks 를 직접 조회한다. corpus_chunks 로 바꾸면 비활성 절이 목차에서 + 사라지는 회귀가 생기니 절대 바꾸지 말 것. (Hier-Decomp 코퍼스 격리 규율의 명시적 예외) + + DISTINCT ON (c.id) + ORDER BY a.created_at/a.id DESC: chunk 당 최신 분석 1행만 + (prompt_version 다중 시 중복 JOIN 방지). 절 없는 문서(legacy/news)는 sections=[]. + """ + from sqlalchemy import text as sql_text + + doc = await session.get(Document, doc_id) + if not doc or doc.deleted_at is not None: + raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + + rows = ( + await session.execute( + sql_text( + """ + SELECT chunk_id, section_title, heading_path, level, node_type, is_leaf, + section_type, summary, confidence + FROM ( + SELECT DISTINCT ON (c.id) + c.id AS chunk_id, c.chunk_index, c.section_title, c.heading_path, + c.level, c.node_type, c.is_leaf, + a.section_type, + CASE WHEN a.status = 'summarized' THEN a.summary ELSE NULL END AS summary, + a.confidence + FROM document_chunks c + LEFT JOIN chunk_section_analysis a + ON a.chunk_id = c.id AND a.status = 'summarized' + WHERE c.doc_id = :doc_id + AND c.source_type = 'hier_section' + AND c.is_leaf = true + ORDER BY c.id, a.created_at DESC, a.id DESC + ) t + ORDER BY t.chunk_index + """ + ).bindparams(doc_id=doc_id) + ) + ).mappings().all() + + return DocumentSectionsResponse( + doc_id=doc_id, + sections=[SectionItem(**dict(r)) for r in rows], + ) + + # ─── 자료실 인접 자료 (이전/다음) ─── # 학습 흐름: 한 자료 다 읽으면 같은 챕터의 다음 자료로 자연스럽게 이동. # library_path (정확 일치 + 하위 prefix) 안에서 title 오름차순 기준. diff --git a/frontend/src/lib/components/SectionOutline.svelte b/frontend/src/lib/components/SectionOutline.svelte new file mode 100644 index 0000000..d63591f --- /dev/null +++ b/frontend/src/lib/components/SectionOutline.svelte @@ -0,0 +1,117 @@ + + +{#snippet itemRow(item: OutlineItem)} + {@const s = item.section} + {@const open = selectedId === s.chunk_id} + {@const typeLabel = sectionTypeLabel(s.section_type)} +
  • + + {#if open} +
    + {#if pathSegments(s.heading_path).length} +
    + {pathSegments(s.heading_path).join(' › ')} +
    + {/if} + {#if s.summary} +

    {s.summary}

    + {#if isLowConf(s)} +
    + 저신뢰 — 표 추출이 불완전할 수 있음 +
    + {/if} + {:else} +

    요약 없음 — 짧은 절이거나 아직 분석되지 않았습니다.

    + {/if} +
    + {/if} +
  • +{/snippet} + +
    +

    + 절 목차 + {total} +

    + + {#if layout.mode === 'group'} +
    + {#each layout.groups as g (g.key)} +
    +
    + {g.key} + ({g.items.length}) +
    +
      + {#each g.items as item (item.section.chunk_id)} + {@render itemRow(item)} + {/each} +
    +
    + {/each} +
    + {:else} +
      + {#each layout.items as item (item.section.chunk_id)} + {@render itemRow(item)} + {/each} +
    + {/if} +
    diff --git a/frontend/src/lib/utils/headingPath.test.ts b/frontend/src/lib/utils/headingPath.test.ts new file mode 100644 index 0000000..bdbfdd1 --- /dev/null +++ b/frontend/src/lib/utils/headingPath.test.ts @@ -0,0 +1,110 @@ +// 순수함수 회귀 테스트. 실행(로컬, 의존성 0): node --test src/lib/utils/headingPath.test.ts +// (Node ≥23 또는 22.6+ --experimental-strip-types — TS 타입 네이티브 strip.) +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { + cleanHeading, + pathSegments, + collapseWindows, + groupOrFlat, + sectionTypeLabel, + type DocumentSection, +} from './headingPath.ts'; + +let _id = 0; +function sec(p: Partial): DocumentSection { + return { + chunk_id: ++_id, + section_title: null, + heading_path: null, + level: null, + node_type: null, + is_leaf: true, + section_type: null, + summary: null, + confidence: null, + ...p, + }; +} + +test('cleanHeading: 마크다운/HTML 잔재 strip', () => { + assert.equal(cleanHeading('**UG-5 PLATE**2'), 'UG-5 PLATE'); + assert.equal(cleanHeading(' **DESIGN** '), 'DESIGN'); + assert.equal(cleanHeading('a b\tc'), 'a b c'); + assert.equal(cleanHeading(null), ''); + assert.equal(cleanHeading(''), ''); +}); + +test('pathSegments: > 분할 + 정제', () => { + assert.deepEqual(pathSegments('**A** > **B**1 > C'), ['A', 'B', 'C']); + assert.deepEqual(pathSegments(null), []); + assert.deepEqual(pathSegments(' '), []); +}); + +test('sectionTypeLabel: 한글 매핑 + passthrough', () => { + assert.equal(sectionTypeLabel('requirement'), '요건'); + assert.equal(sectionTypeLabel('unknown_type'), 'unknown_type'); + assert.equal(sectionTypeLabel(null), null); +}); + +test('collapseWindows: 연속 동일 heading window 만 dedupe, 순서 유지', () => { + const input = [ + sec({ heading_path: 'Intro', node_type: null }), + sec({ heading_path: 'Pearson', node_type: 'window' }), + sec({ heading_path: 'Pearson', node_type: 'window' }), + sec({ heading_path: 'Pearson', node_type: 'window' }), + sec({ heading_path: 'Conf', node_type: null }), + sec({ heading_path: 'Pearson', node_type: 'window' }), // 비연속 → 새 항목 + ]; + const out = collapseWindows(input); + assert.equal(out.length, 4); + assert.equal(out[0].fragmentCount, 1); // Intro + assert.equal(out[1].fragmentCount, 3); // Pearson ×3 합침 + assert.equal(out[2].fragmentCount, 1); // Conf + assert.equal(out[3].fragmentCount, 1); // 비연속 Pearson + // 순서 보존 + assert.deepEqual( + out.map((o) => cleanHeading(o.section.heading_path)), + ['Intro', 'Pearson', 'Conf', 'Pearson'], + ); +}); + +test('groupOrFlat: 적은 그룹 + 낮은 기타% → group (5140-류)', () => { + // 3 top segment × 4 = 12절, window 없음 → group_count 3, 기타 0% + const sections: DocumentSection[] = []; + for (const top of ['장1', '장2', '장3']) { + for (let i = 0; i < 4; i++) sections.push(sec({ heading_path: `${top} > 절${i}` })); + } + const layout = groupOrFlat(sections); + assert.equal(layout.mode, 'group'); + assert.equal(layout.groups.length, 3); + assert.deepEqual(layout.groups.map((g) => g.key), ['장1', '장2', '장3']); // 등장순서 + assert.equal(layout.groups[0].items.length, 4); +}); + +test('groupOrFlat: 기타% ≥ 50 → flat 강등 (5186/5225-류)', () => { + const sections: DocumentSection[] = [ + sec({ heading_path: 'A > a1' }), + sec({ heading_path: 'B > b1' }), + sec({ node_type: 'window', heading_path: 'W1' }), + sec({ node_type: 'window', heading_path: 'W2' }), + sec({ node_type: 'section_split', heading_path: 'S1' }), + sec({ node_type: 'window', heading_path: 'W3' }), // 기타 4/6 = 66.7% + ]; + const layout = groupOrFlat(sections); + assert.equal(layout.mode, 'flat'); + assert.ok(layout.items.length > 0); +}); + +test('groupOrFlat: group_count > 30 → flat 강등', () => { + const sections: DocumentSection[] = []; + for (let i = 0; i < 31; i++) sections.push(sec({ heading_path: `seg${i} > x` })); + const layout = groupOrFlat(sections); + assert.equal(layout.mode, 'flat'); +}); + +test('groupOrFlat: 빈 입력 → flat, 항목 0', () => { + const layout = groupOrFlat([]); + assert.equal(layout.mode, 'flat'); + assert.equal(layout.items.length, 0); +}); diff --git a/frontend/src/lib/utils/headingPath.ts b/frontend/src/lib/utils/headingPath.ts new file mode 100644 index 0000000..597fc48 --- /dev/null +++ b/frontend/src/lib/utils/headingPath.ts @@ -0,0 +1,154 @@ +// hier 절(section) 목차 표시용 순수 유틸 (PR-DocSrv-Hier-Section-UI-1). +// SvelteKit/Svelte 의존 0 → Node 내장 test runner(`node --test`)로 검증 가능. +// +// 책임: +// - cleanHeading: section_title/heading_path 의 raw 마크다운/HTML 잔재 strip. +// - pathSegments: heading_path("A > B > C")를 정제 세그먼트 배열로. +// - collapseWindows: 연속 동일 heading 의 node_type='window'(과대 본문 인공 분할) dedupe. +// - groupOrFlat: per-doc 동적 판정 — top-segment 1단 그룹 vs flat (실측 임계 기반). + +export interface DocumentSection { + chunk_id: number; + section_title: string | null; + heading_path: string | null; + level: number | null; + node_type: string | null; // 'window' | 'section_split' | null + is_leaf: boolean; + section_type: string | null; + summary: string | null; + confidence: number | null; +} + +/** window dedupe 후 목차 한 항목 (대표 절 + 합쳐진 조각 수). */ +export interface OutlineItem { + section: DocumentSection; + fragmentCount: number; // >1 이면 "(n조각)" 배지 +} + +export interface OutlineGroup { + key: string; // top segment (OTHER → '기타') + isOther: boolean; + items: OutlineItem[]; +} + +export interface OutlineLayout { + mode: 'group' | 'flat'; + items: OutlineItem[]; // flat 모드에서 채워짐 + groups: OutlineGroup[]; // group 모드에서 채워짐 +} + +const OTHER = '__OTHER__'; + +// 동적 그룹 판정 임계 (실측 pilot 3 검증: 5140 group→그룹 / 5186·5225→flat). +const GROUP_MIN = 2; +const GROUP_MAX = 30; +const OTHER_PCT_MAX = 50; + +/** section_type → 한글 라벨 (느슨한 enum, 미지정/미상은 그대로 표시). */ +export const SECTION_TYPE_LABEL: Record = { + definition: '정의', + requirement: '요건', + procedure: '절차', + formula: '수식', + data_table: '표·데이터', + example: '예시', + case_study: '사례', + question: '문제', + reference: '참조', + overview: '개요', + other: '기타', +}; + +export function sectionTypeLabel(t: string | null | undefined): string | null { + if (!t) return null; + return SECTION_TYPE_LABEL[t] ?? t; +} + +export function cleanHeading(raw: string | null | undefined): string { + if (!raw) return ''; + return raw + .replace(/.*?<\/sup>/gi, '') // 각주 위첨자 + .replace(/.*?<\/sub>/gi, '') + .replace(/<[^>]+>/g, '') // 잔여 HTML 태그 + .replace(/\*\*/g, '') // **bold** + .replace(/[*_`]/g, '') // 잔여 마크다운 마커 + .replace(/\s+/g, ' ') + .trim(); +} + +export function pathSegments(hp: string | null | undefined): string[] { + if (!hp) return []; + // ⚠ 먼저 strip 후 split: heading_path 에 2 등 raw HTML 의 '>' 가 섞여 있어 + // bare '>' 로 먼저 split 하면 태그가 잘림(단위테스트로 발견). cleanHeading 이 HTML 태그를 + // 제거하므로 separator ' > '(bare '>')만 남은 뒤 split 한다. + return cleanHeading(hp) + .split('>') + .map((s) => s.trim()) + .filter(Boolean); +} + +/** 그룹 키: window/section_split(인공 조각) 또는 path 없음/깨짐 → OTHER. */ +function topSegment(s: DocumentSection): string { + if (s.node_type === 'window' || s.node_type === 'section_split') return OTHER; + const segs = pathSegments(s.heading_path); + return segs.length === 0 ? OTHER : segs[0]; +} + +/** + * 서버 chunk_index 순서를 유지한 채(정렬 변경 금지), 연속된 동일 cleaned heading_path 의 + * node_type='window' 절을 1 항목으로 dedupe. 대표 = 첫 조각(요약 사용), fragmentCount 누적. + */ +export function collapseWindows(sections: DocumentSection[]): OutlineItem[] { + const out: OutlineItem[] = []; + for (const s of sections) { + const prev = out[out.length - 1]; + const h = cleanHeading(s.heading_path); + if ( + s.node_type === 'window' && + prev && + prev.section.node_type === 'window' && + h !== '' && + cleanHeading(prev.section.heading_path) === h + ) { + prev.fragmentCount += 1; + } else { + out.push({ section: s, fragmentCount: 1 }); + } + } + return out; +} + +/** + * per-doc 동적 판정: top-segment 1단 그룹 vs flat. + * 판정은 raw 절 기준(실측 임계와 동일 차원), 표시는 collapseWindows 적용. + * - 그룹 채택: GROUP_MIN ≤ distinct top-segment ≤ GROUP_MAX AND 기타% < OTHER_PCT_MAX. + * - 아니면 flat 강등. + */ +export function groupOrFlat(sections: DocumentSection[]): OutlineLayout { + const total = sections.length; + const order: string[] = []; + const map = new Map(); + let otherCount = 0; + for (const s of sections) { + const key = topSegment(s); + if (key === OTHER) otherCount += 1; + if (!map.has(key)) { + map.set(key, []); + order.push(key); + } + map.get(key)!.push(s); + } + const groupCount = map.size; + const otherPct = total === 0 ? 0 : (otherCount / total) * 100; + const useGroup = groupCount >= GROUP_MIN && groupCount <= GROUP_MAX && otherPct < OTHER_PCT_MAX; + + if (!useGroup) { + return { mode: 'flat', items: collapseWindows(sections), groups: [] }; + } + const groups: OutlineGroup[] = order.map((key) => ({ + key: key === OTHER ? '기타' : key, + isOther: key === OTHER, + items: collapseWindows(map.get(key)!), + })); + return { mode: 'group', items: [], groups }; +} diff --git a/frontend/src/routes/documents/[id]/+page.svelte b/frontend/src/routes/documents/[id]/+page.svelte index bd39baa..3ab8d40 100644 --- a/frontend/src/routes/documents/[id]/+page.svelte +++ b/frontend/src/routes/documents/[id]/+page.svelte @@ -27,6 +27,8 @@ import DocumentDangerZone from '$lib/components/editors/DocumentDangerZone.svelte'; import AnalysisPanel from '$lib/components/AnalysisPanel.svelte'; import ReadCounter from '$lib/components/ReadCounter.svelte'; + import SectionOutline from '$lib/components/SectionOutline.svelte'; + import Tabs from '$lib/components/ui/Tabs.svelte'; marked.use({ mangle: false, headerIds: false }); function renderMd(text) { @@ -84,6 +86,20 @@ } } + // 절(hier section) 목차 — 본문 로드와 독립, 실패(404 포함) 무해. + // reqId guard: 문서 전환 race 시 stale 결과가 새 문서에 붙지 않게. + let sections = $state([]); + let hasSections = $derived(sections.length > 0); + async function loadSections() { + const reqId = docId; + try { + const r = await api(`/documents/${reqId}/sections`); + if (reqId === docId) sections = r?.sections ?? []; + } catch { + if (reqId === docId) sections = []; // Phase 1 미배포 시 404 → 목차 숨김(graceful) + } + } + // "1회독 완료 + 다음 자료로" 한 번에 async function readAndGoNext() { try { @@ -117,6 +133,7 @@ } // 자료실 자료면 인접 자료 미리 fetch (학습 흐름 네비) if (doc && doc.category === 'library') loadNeighbors(); + if (doc) loadSections(); }); let viewerType = $derived( @@ -206,9 +223,25 @@ {:else if doc} -
    - -
    +
    + {#if hasSections} + + + {/if} + + +
    + {#if hasSections} + +
    + 절 목차 ({sections.length}) + +
    + {/if}
    {#if doc.edit_url} @@ -382,53 +415,52 @@ {/if}
    - -