feat(search): expose hier section outline & summaries in document detail

PR-DocSrv-Hier-Section-UI-1 Phase 1 (코드+커밋만, 배포는 Phase 2 backfill 완주 후). - backend: GET /documents/{id}/sections — hier leaf 목차 + chunk_section_analysis 요약. document_chunks 직접 조회(retrieval 아닌 목차 표시라 corpus_chunks 뷰 의도적 우회 — docstring 명시). DISTINCT ON 으로 최신 분석 1행. - frontend: SectionOutline.svelte(좌측 목차, per-doc 동적 그룹/flat, window dedupe, 클릭 시 요약/breadcrumb 인라인), headingPath.ts 순수 유틸(+node:test 단위테스트 8케이스). [id]/+page.svelte 3-zone 레이아웃 + 우측 메타 Tabs [정보|AI|관리] 로 카드 스프롤 해소. - 절 없는 문서/404 는 목차 숨김(graceful). 본문 점프는 follow-up. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 00:22:34 +00:00
parent ec174fc1e7
commit f7198d9d68
5 changed files with 537 additions and 49 deletions
@@ -537,6 +537,81 @@ async def get_document(
    return DocumentDetailResponse.model_validate(doc)


+# ─── 절(hier section) 목차 + 요약 (PR-DocSrv-Hier-Section-UI-1) ───
+class SectionItem(BaseModel):
+    chunk_id: int
+    section_title: str | None = None  # raw 마크다운 포함 — 정제는 프런트(headingPath.ts)
+    heading_path: str | None = None   # raw
+    level: int | None = None
+    node_type: str | None = None      # window | section_split | null
+    is_leaf: bool
+    section_type: str | None = None
+    summary: str | None = None        # status='summarized' 인 분석행에만, 그 외 None
+    confidence: float | None = None
+
+
+class DocumentSectionsResponse(BaseModel):
+    doc_id: int
+    sections: list[SectionItem]
+
+
+@router.get("/{doc_id}/sections", response_model=DocumentSectionsResponse)
+async def get_document_sections(
+    doc_id: int,
+    user: Annotated[User, Depends(get_current_user)],
+    session: Annotated[AsyncSession, Depends(get_session)],
+):
+    """문서의 hier 절(leaf) 목차 + 절-레벨 요약(chunk_section_analysis).
+
+    ⚠ 뷰 우회 — 의도적 예외 (변경 금지):
+        retrieval 경로(retrieval_service / *_rag)는 in_corpus=false 누출 방지를 위해
+        반드시 corpus_chunks 뷰만 본다. 그러나 이 endpoint 는 retrieval 이 아니라
+        "문서 전체 leaf 목차 표시"라서 in_corpus=false(검색 비활성) 절도 보여야 하므로
+        document_chunks 를 직접 조회한다. corpus_chunks 로 바꾸면 비활성 절이 목차에서
+        사라지는 회귀가 생기니 절대 바꾸지 말 것. (Hier-Decomp 코퍼스 격리 규율의 명시적 예외)
+
+    DISTINCT ON (c.id) + ORDER BY a.created_at/a.id DESC: chunk 당 최신 분석 1행만
+    (prompt_version 다중 시 중복 JOIN 방지). 절 없는 문서(legacy/news)는 sections=[].
+    """
+    from sqlalchemy import text as sql_text
+
+    doc = await session.get(Document, doc_id)
+    if not doc or doc.deleted_at is not None:
+        raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
+
+    rows = (
+        await session.execute(
+            sql_text(
+                """
+                SELECT chunk_id, section_title, heading_path, level, node_type, is_leaf,
+                       section_type, summary, confidence
+                FROM (
+                  SELECT DISTINCT ON (c.id)
+                         c.id AS chunk_id, c.chunk_index, c.section_title, c.heading_path,
+                         c.level, c.node_type, c.is_leaf,
+                         a.section_type,
+                         CASE WHEN a.status = 'summarized' THEN a.summary ELSE NULL END AS summary,
+                         a.confidence
+                  FROM document_chunks c
+                  LEFT JOIN chunk_section_analysis a
+                         ON a.chunk_id = c.id AND a.status = 'summarized'
+                  WHERE c.doc_id = :doc_id
+                    AND c.source_type = 'hier_section'
+                    AND c.is_leaf = true
+                  ORDER BY c.id, a.created_at DESC, a.id DESC
+                ) t
+                ORDER BY t.chunk_index
+                """
+            ).bindparams(doc_id=doc_id)
+        )
+    ).mappings().all()
+
+    return DocumentSectionsResponse(
+        doc_id=doc_id,
+        sections=[SectionItem(**dict(r)) for r in rows],
+    )
+
+
 # ─── 자료실 인접 자료 (이전/다음) ───
 # 학습 흐름: 한 자료 다 읽으면 같은 챕터의 다음 자료로 자연스럽게 이동.
 # library_path (정확 일치 + 하위 prefix) 안에서 title 오름차순 기준.