"""summarize_units PR2 헬퍼 단위테스트 — map/reduce 프롬프트 조립 순수함수. 핵심 불변식: - render_map_slice: 유닛 위치(1-based)/섹션 라벨 + 본문 그대로 (손실 0). - build_reduce_units_block: 어떤 입력에도 반환 블록 est_tokens <= budget (캡 초과 0 검증 게이트의 reduce 측). 절단은 detail 만 — 라벨/TLDR/불일치/순서 보존. pytest + 단독 실행 양쪽 지원: PYTHONPATH=. pytest tests/summarize_units/ -q """ from __future__ import annotations from app.services.summarize_units import ( SummarizeUnit, build_reduce_units_block, estimate_tokens, render_map_slice, ) def _result(idx: int, detail: str, *, tldr: str = "요약", inc: list | None = None) -> dict: return { "index": idx, "titles": [f"섹션{idx}"], "tldr": tldr, "detail": detail, "inconsistencies": inc or [], } # ---------- render_map_slice ---------- def test_render_map_slice_label_and_body(): unit = SummarizeUnit(index=2, section_titles=["개요", None, "본론"], text="본문입니다") out = render_map_slice(unit, total_units=5) assert out.startswith("[유닛 3/5 — 섹션: 개요 · 본론]\n") assert out.endswith("본문입니다") def test_render_map_slice_untitled(): unit = SummarizeUnit(index=0, section_titles=[None], text="x") assert "(무제 구간)" in render_map_slice(unit, total_units=1) # ---------- build_reduce_units_block ---------- def test_reduce_block_within_budget_untouched(): results = [_result(i, "가" * 100) for i in range(3)] block, truncated = build_reduce_units_block(results, budget_tokens=11_000) assert not truncated # 순서/라벨/TLDR 보존 assert block.index("[유닛 1/3") < block.index("[유닛 2/3") < block.index("[유닛 3/3") assert "TLDR: 요약" in block assert "가" * 100 in block def test_reduce_block_truncates_to_budget(): # 유닛 8개 × 한글 detail 5,000자 ≈ 21K tok — budget 5,000 으로 절단 강제 results = [_result(i, "가" * 5_000) for i in range(8)] block, truncated = build_reduce_units_block(results, budget_tokens=5_000) assert truncated assert estimate_tokens(block) <= 5_000 # 라벨(유닛 순서)은 절단 후에도 보존 assert "[유닛 1/8" in block def test_reduce_block_hard_cut_floor(): # min_detail_chars floor 에 막혀 비례 절단으로 불충분한 극단 케이스 — 하드 컷 발동 results = [_result(i, "가" * 300) for i in range(50)] block, truncated = build_reduce_units_block(results, budget_tokens=500) assert truncated assert estimate_tokens(block) <= 500 def test_reduce_block_preserves_inconsistencies(): results = [ _result(0, "가" * 50, inc=[{"kind": "version_drift", "desc": "개정판 차이"}]), ] block, _ = build_reduce_units_block(results, budget_tokens=10_000) assert "불일치(version_drift): 개정판 차이" in block