"""Briefing historical 분기 회귀 — Plan §"Verification 9". 3 경로 검증: 1. flag off → retrieve_historical 호출 안 함, prompt {historical_block} = "(과거 참고 자료 없음)" 2. flag on + fixture top-K → similarity ≥0.70 docs 만 반환 3. flag on + zero match → 빈 list (no fallback hallucination) """ import os import sys from datetime import datetime, timezone, timedelta from pathlib import Path import numpy as np import pytest # PYTHONPATH = /app (디렉토리 안에서 실행 가정 또는 sys.path 추가) APP_DIR = Path(__file__).resolve().parent.parent / "app" if str(APP_DIR) not in sys.path: sys.path.insert(0, str(APP_DIR)) from services.briefing.comparator import ( HISTORICAL_SIMILARITY_MIN, HISTORICAL_TOP_K, _build_historical_block, _make_fallback, _sanitize_envelope, build_prompt, historical_enabled, retrieve_historical, ) from services.clustering_common import normalize_vector def _make_doc(doc_id: int, embedding: np.ndarray, hours_ago: int = 1) -> dict: return { "id": doc_id, "title": f"doc {doc_id}", "ai_summary": f"summary {doc_id}", "embedding": embedding, "created_at": datetime.now(timezone.utc) - timedelta(hours=hours_ago), } def _make_cluster_with_centroid(centroid_vec: np.ndarray) -> dict: return { "centroid": normalize_vector(centroid_vec), "members": [], } def test_flag_default_off(): """env 미설정 → historical disabled.""" os.environ.pop("BRIEFING_HISTORICAL_ENABLED", None) assert historical_enabled() is False def test_flag_on(): os.environ["BRIEFING_HISTORICAL_ENABLED"] = "true" try: assert historical_enabled() is True finally: os.environ.pop("BRIEFING_HISTORICAL_ENABLED", None) def test_historical_block_empty_when_no_docs(): """경로 1: flag off 또는 historical_docs=[] → 빈 라벨.""" block = _build_historical_block([]) assert block == "(과거 참고 자료 없음)" def test_historical_block_has_label_when_docs(): docs = [_make_doc(1, np.ones(1024, dtype=np.float32))] block = _build_historical_block(docs) assert "이전 30일 흐름" in block assert "직접 인용 금지" in block assert "[H1]" in block def test_retrieve_historical_topk(): """경로 2: flag on + fixture top-K similarity ≥ threshold.""" # cluster centroid = 모두 1 방향 centroid = np.ones(8, dtype=np.float32) cluster = _make_cluster_with_centroid(centroid) # 후보 10개: 5개는 centroid 와 유사 (sim≈1.0), 5개는 직교 (sim≈0) similar_emb = np.ones(8, dtype=np.float32) orthogonal_emb = np.array([1, -1, 1, -1, 1, -1, 1, -1], dtype=np.float32) candidates = ( [_make_doc(i, similar_emb + np.random.rand(8).astype(np.float32) * 0.01) for i in range(1, 6)] + [_make_doc(10 + i, orthogonal_emb) for i in range(5)] ) out = retrieve_historical(cluster, candidates, top_k=5, sim_min=0.70) assert len(out) == 5 # 모두 similar 그룹 (id 1~5) 만 선택됨 selected_ids = {d["id"] for d in out} assert selected_ids.issubset({1, 2, 3, 4, 5}) def test_retrieve_historical_zero_match(): """경로 3: 모든 candidate similarity < threshold → 빈 list.""" centroid = np.ones(8, dtype=np.float32) cluster = _make_cluster_with_centroid(centroid) orthogonal_emb = np.array([1, -1, 1, -1, 1, -1, 1, -1], dtype=np.float32) candidates = [_make_doc(i, orthogonal_emb) for i in range(5)] out = retrieve_historical(cluster, candidates, top_k=5, sim_min=0.70) assert out == [] def test_retrieve_historical_empty_candidates(): centroid = np.ones(8, dtype=np.float32) cluster = _make_cluster_with_centroid(centroid) assert retrieve_historical(cluster, [], top_k=5) == [] def test_sanitize_envelope_valid(): cluster = {"members": [{"id": 1}, {"id": 2}]} parsed = { "topic_label": "이란 충돌", "headline": "긴장 격화", "country_perspectives": [ {"country": "kr", "summary": "유가 충격", "article_ids": [1]}, {"country": "us", "summary": "외교 압박", "article_ids": [2]}, ], "divergences": ["KR=경제 / US=외교"], "convergences": ["민간 사상 우려 공통"], "key_quotes": [{"country": "US", "source": "NYT", "quote": "Tehran ..."}], "historical_context": "지난 3주 6회 공방", } sanitized = _sanitize_envelope(parsed, cluster) assert sanitized is not None assert sanitized["topic_label"] == "이란 충돌" # country 대문자 변환 assert sanitized["country_perspectives"][0]["country"] == "KR" assert sanitized["historical_context"] == "지난 3주 6회 공방" assert sanitized["llm_fallback_used"] is False def test_sanitize_envelope_empty_perspectives_to_fallback(): """country_perspectives 비어 있으면 None (caller 가 fallback 발동).""" cluster = {"members": []} parsed = { "topic_label": "X", "headline": "Y", "country_perspectives": [], } assert _sanitize_envelope(parsed, cluster) is None def test_fallback_row_fixed_form(): """Plan §"Fallback Topic Row 고정 형태".""" cluster = {"members": [{"id": 1}]} fb = _make_fallback(cluster) assert fb["topic_label"] == "주요 뉴스 묶음" assert fb["country_perspectives"] == [] assert fb["divergences"] == [] assert fb["convergences"] == [] assert fb["key_quotes"] == [] assert fb["historical_context"] is None assert fb["llm_fallback_used"] is True def test_prompt_includes_both_blocks(): selected = [_make_doc(1, np.ones(8, dtype=np.float32))] selected[0]["country"] = "KR" selected[0]["ai_sub_group"] = "경향신문" selected[0]["ai_summary_truncated"] = "오늘 한국 뉴스" prompt = build_prompt(selected, historical_docs=[]) assert "{articles_block}" not in prompt # 치환됨 assert "{historical_block}" not in prompt assert "(KR · 경향신문)" in prompt assert "(과거 참고 자료 없음)" in prompt def test_perspective_summary_cap_enforced(): """sanitize 가 길이 cap 강제.""" cluster = {"members": []} long_summary = "가" * 500 # 500자, cap=240 parsed = { "topic_label": "T", "headline": "H", "country_perspectives": [{"country": "KR", "summary": long_summary, "article_ids": []}], } s = _sanitize_envelope(parsed, cluster) assert s is not None assert len(s["country_perspectives"][0]["summary"]) <= 241 # 240 + "…" def test_article_ids_fallback_when_llm_empty(): """LLM 이 article_ids 를 비워두면 같은 country cluster member top-N 자동 주입.""" cluster = { "members": [ {"id": 101, "country": "KR", "weight": 0.9}, {"id": 102, "country": "KR", "weight": 0.8}, {"id": 103, "country": "KR", "weight": 0.7}, {"id": 201, "country": "US", "weight": 0.5}, ] } parsed = { "topic_label": "T", "headline": "H", "country_perspectives": [ {"country": "KR", "summary": "한국 시각", "article_ids": []}, {"country": "US", "summary": "미국 시각", "article_ids": []}, ], } s = _sanitize_envelope(parsed, cluster) assert s is not None kr = next(p for p in s["country_perspectives"] if p["country"] == "KR") us = next(p for p in s["country_perspectives"] if p["country"] == "US") assert kr["article_ids"] == [101, 102, 103] # weight desc assert us["article_ids"] == [201] def test_article_ids_intersect_with_cluster(): """LLM 이 엉뚱한 id 를 넣으면 cluster member 와 교집합만.""" cluster = { "members": [ {"id": 101, "country": "KR", "weight": 0.9}, {"id": 102, "country": "KR", "weight": 0.8}, ] } parsed = { "topic_label": "T", "headline": "H", "country_perspectives": [ {"country": "KR", "summary": "한국 시각", "article_ids": [101, 999, 888]}, ], } s = _sanitize_envelope(parsed, cluster) assert s is not None assert s["country_perspectives"][0]["article_ids"] == [101] def test_article_ids_capped_to_max(): """후처리 후에도 country 당 MAX_ARTICLE_IDS_PER_COUNTRY cap.""" cluster = { "members": [ {"id": i, "country": "KR", "weight": 1.0 / i} for i in range(1, 15) ] } parsed = { "topic_label": "T", "headline": "H", "country_perspectives": [ {"country": "KR", "summary": "한국 시각", "article_ids": []}, ], } s = _sanitize_envelope(parsed, cluster) assert s is not None from services.briefing.comparator import MAX_ARTICLE_IDS_PER_COUNTRY assert len(s["country_perspectives"][0]["article_ids"]) == MAX_ARTICLE_IDS_PER_COUNTRY def test_max_perspectives_cap(): cluster = {"members": []} parsed = { "topic_label": "T", "headline": "H", "country_perspectives": [ {"country": f"C{i}", "summary": "s", "article_ids": []} for i in range(20) ], } s = _sanitize_envelope(parsed, cluster) assert s is not None assert len(s["country_perspectives"]) <= 10