"""PR-RAG-Time-1: freshness decay 단위 테스트. 가드레일 (plan §10): 1. news recent vs old — recent 가 final score 높음 2. law_monitor recent vs old 3. non-target source unaffected — manual 등은 score 변화 0 4. missing created_at unaffected — decay None, score 변화 0 5. future created_at clamped — age_days = 0 6. floor behavior — 매우 오래되어도 multiplier 0.7 미만 안 떨어짐 """ from __future__ import annotations import math import os import sys from datetime import datetime, timedelta, timezone from types import SimpleNamespace # tests/ → 프로젝트 루트 → app/ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app")) import pytest from services.search.freshness_decay import ( DECAY_FLOOR, HALF_LIFE_DAYS, _DocMeta, adjusted_score, apply_freshness_decay, compute_age_days, compute_decay, freshness_policy, ) NOW = datetime(2026, 5, 3, 12, 0, 0, tzinfo=timezone.utc) def _meta(channel: str | None, *, days_ago: float | None = 30.0, origin: str | None = None, material_type: str | None = None) -> _DocMeta: if days_ago is None: created = None elif days_ago < 0: # future: now + |days_ago| created = NOW + timedelta(days=-days_ago) else: created = NOW - timedelta(days=days_ago) return _DocMeta(source_channel=channel, content_origin=origin, created_at=created, material_type=material_type) # ─── policy dispatcher ──────────────────────────────────────────── def test_policy_news(): assert freshness_policy(_meta("news")) == "news_90d" def test_policy_law_monitor_now_unaffected(): # C-1 후속: law_365d 폐기 → law_monitor 비적용 (현행성은 version_status 가 처리) assert freshness_policy(_meta("law_monitor")) is None def test_policy_incident(): # C-1 후속: 재해사례/사망사고(material_type='incident') → news_90d 흡수 (source 무관) assert freshness_policy(_meta("crawl", material_type="incident")) == "news_90d" assert freshness_policy(_meta("inbox_route", material_type="incident")) == "news_90d" def test_policy_manual_unaffected(): assert freshness_policy(_meta("manual")) is None def test_policy_drive_sync_unaffected(): assert freshness_policy(_meta("drive_sync")) is None def test_policy_inbox_route_unaffected(): assert freshness_policy(_meta("inbox_route")) is None def test_policy_memo_unaffected(): assert freshness_policy(_meta("memo")) is None def test_policy_ai_drafted_skipped_even_for_news(): # 가드 2: content_origin='ai_drafted' 면 source_channel 무관 비적용 assert freshness_policy(_meta("news", origin="ai_drafted")) is None def test_policy_meta_none(): assert freshness_policy(None) is None def test_policy_unknown_source(): # 가드 6: unknown source_channel → no decay assert freshness_policy(_meta("future_channel")) is None # ─── compute_age_days ───────────────────────────────────────────── def test_age_normal(): age = compute_age_days(NOW - timedelta(days=10), now=NOW) assert age == pytest.approx(10.0) def test_age_missing_date(): # 가드 6: missing date → None assert compute_age_days(None, now=NOW) is None def test_age_future_clamped_to_zero(): # 가드 6: future date → age_days = 0 (음수 금지) future = NOW + timedelta(days=5) assert compute_age_days(future, now=NOW) == 0.0 def test_age_naive_datetime_treated_as_utc(): # DB는 timestamptz 라 naive 안 오지만 mock 안전망 naive = datetime(2026, 4, 23, 12, 0, 0) # 10일 전 age = compute_age_days(naive, now=NOW) assert age == pytest.approx(10.0) # ─── compute_decay ──────────────────────────────────────────────── def test_decay_at_half_life_news(): # age=90 → decay = 0.5 assert compute_decay(90.0, "news_90d") == pytest.approx(0.5, rel=1e-6) def test_decay_law_365d_removed_returns_one(): # C-1 후속: law_365d 폐기 → HALF_LIFE_DAYS 미등록 policy → decay 1.0 (no-op) assert compute_decay(365.0, "law_365d") == 1.0 def test_decay_age_zero_full(): assert compute_decay(0.0, "news_90d") == pytest.approx(1.0) def test_decay_news_30_days(): expected = math.exp(-math.log(2) * 30 / 90) # ~0.794 assert compute_decay(30.0, "news_90d") == pytest.approx(expected, rel=1e-6) def test_decay_policy_none_returns_one(): assert compute_decay(100.0, None) == 1.0 def test_decay_age_none_returns_one(): assert compute_decay(None, "news_90d") == 1.0 # ─── adjusted_score ─────────────────────────────────────────────── def test_adjusted_score_decay_one_no_change(): # decay=1 → final = base * 1.0 assert adjusted_score(0.85, 1.0) == pytest.approx(0.85) def test_adjusted_score_decay_zero_floor(): # 가드 11: decay=0 → final = base * FLOOR (0.7), 그 아래로 안 내려감 assert adjusted_score(0.85, 0.0) == pytest.approx(0.85 * DECAY_FLOOR) def test_adjusted_score_old_news_floor_bound(): # 매우 오래된 뉴스 (age >> half_life) — multiplier 가 0.7 미만 절대 아님 decay = compute_decay(10000.0, "news_90d") # ~0 score = adjusted_score(1.0, decay) assert score >= DECAY_FLOOR assert score == pytest.approx(DECAY_FLOOR, abs=0.01) # ─── apply_freshness_decay (integration with mock session) ──────── class _MockSession: """SQLAlchemy AsyncSession mock — execute(text, params) 호출만 받음.""" def __init__(self, meta_rows: list[dict]): self._rows = meta_rows self.last_ids: list[int] | None = None async def execute(self, _stmt, params): self.last_ids = list(params["ids"]) # SearchPipeline 의 row._mapping 패턴이 아니라 row.field 패턴이라 # SimpleNamespace 로 충분. return [SimpleNamespace(**row) for row in self._rows if row["id"] in self.last_ids] def _result(doc_id: int, score: float): """SearchResult 흉내. freshness_decay 는 .id / .score / .freshness_debug 만 만짐.""" return SimpleNamespace(id=doc_id, score=score, freshness_debug=None) @pytest.mark.asyncio async def test_apply_news_recent_vs_old_recent_higher(): # 가드 1: news recent vs old → recent 가 final score 높음 base = 0.50 # 동일 base rows = [ {"id": 1, "source_channel": "news", "content_origin": "extracted", "created_at": NOW - timedelta(days=3)}, # 매우 최근 {"id": 2, "source_channel": "news", "content_origin": "extracted", "created_at": NOW - timedelta(days=365)}, # 1년 전 ] session = _MockSession(rows) results = [_result(1, base), _result(2, base)] out = await apply_freshness_decay(results, session, now=NOW) # 재정렬: 최근(id=1) 이 위 assert out[0].id == 1 assert out[0].score > out[1].score # debug assert out[0].freshness_debug["freshness_policy"] == "news_90d" assert out[0].freshness_debug["base_score"] == base assert out[0].freshness_debug["age_days"] == 3 assert out[0].freshness_debug["freshness_date_source"] == "created_at" assert out[1].freshness_debug["age_days"] == 365 @pytest.mark.asyncio async def test_apply_law_monitor_now_unaffected(): # C-1 후속: law_monitor freshness 폐기 → recent/old 동일 score (재정렬 없음) base = 0.50 rows = [ {"id": 1, "source_channel": "law_monitor", "content_origin": "extracted", "material_type": "law", "created_at": NOW - timedelta(days=10)}, {"id": 2, "source_channel": "law_monitor", "content_origin": "extracted", "material_type": "law", "created_at": NOW - timedelta(days=730)}, ] session = _MockSession(rows) results = [_result(1, base), _result(2, base)] out = await apply_freshness_decay(results, session, now=NOW) assert out[0].score == base and out[1].score == base assert out[0].freshness_debug["freshness_policy"] is None @pytest.mark.asyncio async def test_apply_incident_recent_vs_old_recent_higher(): # C-1 후속: 재해사례(incident) recent 가 위 (news_90d 흡수, source_channel='crawl') base = 0.50 rows = [ {"id": 1, "source_channel": "crawl", "content_origin": "extracted", "material_type": "incident", "created_at": NOW - timedelta(days=5)}, {"id": 2, "source_channel": "crawl", "content_origin": "extracted", "material_type": "incident", "created_at": NOW - timedelta(days=400)}, ] session = _MockSession(rows) results = [_result(1, base), _result(2, base)] out = await apply_freshness_decay(results, session, now=NOW) assert out[0].id == 1 assert out[0].score > out[1].score assert out[0].freshness_debug["freshness_policy"] == "news_90d" @pytest.mark.asyncio async def test_apply_non_target_unaffected(): # 가드 3: manual 은 score 변화 0 base = 0.42 rows = [ {"id": 1, "source_channel": "manual", "content_origin": "extracted", "created_at": NOW - timedelta(days=1000)}, ] session = _MockSession(rows) results = [_result(1, base)] out = await apply_freshness_decay(results, session, now=NOW) assert out[0].score == base assert out[0].freshness_debug["freshness_policy"] is None assert out[0].freshness_debug["base_score"] == base assert out[0].freshness_debug["decay_factor"] is None @pytest.mark.asyncio async def test_apply_missing_created_at_unaffected(): # 가드 4: missing date → score 변화 0 base = 0.50 rows = [ {"id": 1, "source_channel": "news", "content_origin": "extracted", "created_at": None}, ] session = _MockSession(rows) results = [_result(1, base)] out = await apply_freshness_decay(results, session, now=NOW) assert out[0].score == base # policy 는 'news_90d' 인데 age None 이라 decay 1.0 → adjusted_score 호출 자체가 안 됨 (policy 없는 길 아님) # freshness_decay 코드: policy True + age None → decay=1.0 → adjusted_score(base, 1.0) = base * 1.0 # 결과적으로 score 변화 0 — 가드 4 통과 assert out[0].freshness_debug["freshness_policy"] == "news_90d" assert out[0].freshness_debug["age_days"] is None @pytest.mark.asyncio async def test_apply_future_created_at_clamped(): # 가드 5: future created_at → age_days = 0 → decay = 1.0 → score 그대로 base = 0.50 rows = [ {"id": 1, "source_channel": "news", "content_origin": "extracted", "created_at": NOW + timedelta(days=10)}, ] session = _MockSession(rows) results = [_result(1, base)] out = await apply_freshness_decay(results, session, now=NOW) assert out[0].freshness_debug["age_days"] == 0 assert out[0].score == pytest.approx(base * 1.0) @pytest.mark.asyncio async def test_apply_floor_never_below_0_7(): # 가드 6 (floor): 매우 오래된 news → multiplier 0.7 미만 절대 아님 base = 1.0 rows = [ {"id": 1, "source_channel": "news", "content_origin": "extracted", "created_at": NOW - timedelta(days=10000)}, # 27년 ㅋ ] session = _MockSession(rows) results = [_result(1, base)] out = await apply_freshness_decay(results, session, now=NOW) assert out[0].score >= base * DECAY_FLOOR assert out[0].score == pytest.approx(base * DECAY_FLOOR, abs=0.001) @pytest.mark.asyncio async def test_apply_ai_drafted_news_skipped(): # 가드 2 통합: source=news 라도 content_origin='ai_drafted' 면 비적용 base = 0.50 rows = [ {"id": 1, "source_channel": "news", "content_origin": "ai_drafted", "created_at": NOW - timedelta(days=300)}, ] session = _MockSession(rows) results = [_result(1, base)] out = await apply_freshness_decay(results, session, now=NOW) assert out[0].score == base assert out[0].freshness_debug["freshness_policy"] is None @pytest.mark.asyncio async def test_apply_empty_results_noop(): session = _MockSession([]) out = await apply_freshness_decay([], session, now=NOW) assert out == [] @pytest.mark.asyncio async def test_apply_resort_by_adjusted_score(): # base score 가 같아도 freshness 가 정렬을 바꾼다. rows = [ {"id": 1, "source_channel": "news", "content_origin": "extracted", "created_at": NOW - timedelta(days=200)}, # decay ~0.21 {"id": 2, "source_channel": "news", "content_origin": "extracted", "created_at": NOW - timedelta(days=10)}, # decay ~0.926 {"id": 3, "source_channel": "manual", "content_origin": "extracted", "created_at": NOW - timedelta(days=10000)}, # 비적용 → base 그대로 ] session = _MockSession(rows) # 일부러 base 동일 0.5 → 정렬은 freshness 만으로 결정 results = [_result(1, 0.5), _result(2, 0.5), _result(3, 0.5)] out = await apply_freshness_decay(results, session, now=NOW) # manual 은 base 0.5 그대로, news 10일 전 은 0.5 * (0.7 + 0.3*0.926) ≈ 0.489 # news 200일 전 은 0.5 * (0.7 + 0.3*0.21) ≈ 0.382 # 정렬: manual(0.5) > news_recent(0.489) > news_old(0.382) assert out[0].id == 3 # manual assert out[1].id == 2 # news 10일 assert out[2].id == 1 # news 200일