"""INV-2, INV-3, INV-4, INV-5, INV-6 — 결정론적 불변식 검증.""" from __future__ import annotations import pytest from policy.routing import ( REASON_FALLBACK_DOMAIN, REASON_HIGH_IMPACT, REASON_LONG_CONTEXT, REASON_LOW_CONFIDENCE, REASON_MULTI_DOC, REASON_RISK_FLAG, decide_routing, ) # ===================================================================== # INV-2: risk_flag_requires_26b_forces_escalation # ===================================================================== def test_risk_flag_forces_escalation(policy): """INV-2: requires_26b=True flag 가 있으면 무조건 escalate.""" # safety_legal_interpretation 은 requires_26b=true decision = decide_routing( subject_domain="news_item", # 자체 high_impact=false content_chars=500, self_declared_high_impact=False, self_declared_risk_flags=["safety_legal_interpretation"], confidence=0.95, # high confidence 여도 policy=policy, ) assert decision.escalate_to_26b is True assert REASON_RISK_FLAG in decision.escalation_reasons assert "safety_legal_interpretation" in decision.risk_flags def test_pii_flag_does_not_force_escalation_on_its_own(policy): """pii_present 는 requires_26b=false → 단독으로는 escalate 안 시킴.""" decision = decide_routing( subject_domain="news_item", content_chars=500, self_declared_high_impact=False, self_declared_risk_flags=["pii_present"], confidence=0.95, policy=policy, ) assert "pii_present" in decision.risk_flags assert decision.escalate_to_26b is False # 다른 조건 없으면 escalate 안 함 # ===================================================================== # INV-3: context_cap_forces_escalation # ===================================================================== def test_context_cap_forces_escalation(policy): """INV-3: content_chars > context_char_cap_4b → long_context escalation.""" cap = policy.escalation.context_char_cap_4b decision = decide_routing( subject_domain="news_item", content_chars=cap + 1, self_declared_high_impact=False, confidence=0.95, policy=policy, ) assert decision.escalate_to_26b is True assert REASON_LONG_CONTEXT in decision.escalation_reasons def test_context_at_cap_does_not_escalate(policy): """경계값: content_chars == cap 는 escalate 안 함 (strict >).""" cap = policy.escalation.context_char_cap_4b decision = decide_routing( subject_domain="news_item", content_chars=cap, self_declared_high_impact=False, confidence=0.95, policy=policy, ) # news_item 은 high_impact=false 이고 다른 조건 없음 assert REASON_LONG_CONTEXT not in decision.escalation_reasons # ===================================================================== # INV-4: multi_doc_forces_escalation # ===================================================================== def test_multi_doc_forces_escalation(policy): """INV-4: evidence_doc_count >= threshold → multi_doc escalation + derived flag.""" threshold = policy.escalation.escalate_on_multi_doc_count decision = decide_routing( subject_domain="news_item", content_chars=500, evidence_doc_count=threshold, # = 3 self_declared_high_impact=False, confidence=0.95, policy=policy, ) assert decision.escalate_to_26b is True assert REASON_MULTI_DOC in decision.escalation_reasons assert "multi_doc_dependency" in decision.risk_flags def test_multi_doc_below_threshold_no_escalation(policy): """경계값: 2개는 escalate 안 함.""" decision = decide_routing( subject_domain="news_item", content_chars=500, evidence_doc_count=2, self_declared_high_impact=False, confidence=0.95, policy=policy, ) assert REASON_MULTI_DOC not in decision.escalation_reasons assert "multi_doc_dependency" not in decision.risk_flags # ===================================================================== # INV-5: risk_flags_union # ===================================================================== def test_risk_flags_union_default_plus_self_declared(policy): """INV-5: default + self_declared 가 UNION. 둘 다 포함돼야 함.""" # safety_reference 의 default = [safety_legal_interpretation] decision = decide_routing( subject_domain="safety_reference", content_chars=1000, self_declared_high_impact=False, self_declared_risk_flags=["pii_present"], # 다른 flag 추가 confidence=0.95, policy=policy, ) assert "safety_legal_interpretation" in decision.risk_flags # default assert "pii_present" in decision.risk_flags # self_declared # 둘 다 포함되어 있으면 UNION 통과 def test_risk_flags_union_with_derived_flags(policy): """default + self + derived (long_context, low_confidence, multi_doc) 모두 합쳐짐.""" cap = policy.escalation.context_char_cap_4b decision = decide_routing( subject_domain="safety_reference", content_chars=cap + 1, # long_context → low_confidence_reasoning NOT added here evidence_doc_count=3, # multi_doc_dependency added self_declared_high_impact=False, self_declared_risk_flags=["pii_present"], confidence=0.5, # < 0.7 → low_confidence_reasoning added policy=policy, ) # 4개 flag 다 있어야 함 assert "safety_legal_interpretation" in decision.risk_flags # default assert "pii_present" in decision.risk_flags # self assert "multi_doc_dependency" in decision.risk_flags # derived (INV-4) assert "low_confidence_reasoning" in decision.risk_flags # derived (low_conf) def test_risk_flags_is_sorted_tuple(policy): """RoutingDecision.risk_flags 는 정렬된 tuple (재현성).""" decision = decide_routing( subject_domain="news_item", content_chars=500, self_declared_risk_flags=["pii_present", "safety_legal_interpretation"], confidence=0.95, policy=policy, ) assert isinstance(decision.risk_flags, tuple) assert list(decision.risk_flags) == sorted(decision.risk_flags) # ===================================================================== # INV-6: fallback_domain for unknown # ===================================================================== def test_fallback_domain_used_for_unknown(policy): """INV-6: 미정의 subject_domain 주면 fallback_domain 적용.""" decision = decide_routing( subject_domain="__nonexistent_domain__", content_chars=500, confidence=0.95, policy=policy, ) assert decision is not None assert decision.used_fallback is True assert decision.subject_domain_used == policy.fallback_domain.name assert REASON_FALLBACK_DOMAIN in decision.escalation_reasons def test_fallback_still_respects_other_invariants(policy): """fallback 이어도 INV-3 (long_context) 은 그대로 작동.""" cap = policy.escalation.context_char_cap_4b decision = decide_routing( subject_domain="__nonexistent__", content_chars=cap + 1, # long context confidence=0.95, policy=policy, ) assert decision.used_fallback is True assert REASON_LONG_CONTEXT in decision.escalation_reasons assert decision.escalate_to_26b is True def test_fallback_default_risk_flags_applied(policy): """fallback.default_risk_flags = [low_confidence_reasoning] 가 결과에 반영.""" decision = decide_routing( subject_domain="__unknown__", content_chars=500, confidence=0.95, policy=policy, ) # fallback 의 default = ["low_confidence_reasoning"] 는 requires_26b=true 이므로 escalate assert "low_confidence_reasoning" in decision.risk_flags # ===================================================================== # low_confidence escalation (not a numbered invariant but required) # ===================================================================== def test_low_confidence_forces_escalation(policy): """confidence < threshold → low_confidence escalation + derived flag.""" threshold = policy.escalation.confidence_threshold decision = decide_routing( subject_domain="news_item", content_chars=500, self_declared_high_impact=False, confidence=threshold - 0.01, policy=policy, ) assert decision.escalate_to_26b is True assert REASON_LOW_CONFIDENCE in decision.escalation_reasons assert "low_confidence_reasoning" in decision.risk_flags # ===================================================================== # 도메인 × 시나리오 스냅샷 (테이블 드리븐) # ===================================================================== @pytest.mark.parametrize( "domain,expected_escalate,expected_high_impact", [ ("safety_reference", True, True), ("safety_operational", True, True), ("msds", True, True), ("hazard_specific", True, True), ("incident_report", True, True), ("health_record", True, True), ("safety_video", False, False), ("news_item", False, False), ("news_digest_request", True, True), ], ) def test_default_escalation_per_domain(policy, domain, expected_escalate, expected_high_impact): """각 도메인 기본 상태 (high confidence, 짧은 본문, self_declare=false) 의 escalate 여부.""" decision = decide_routing( subject_domain=domain, content_chars=1000, self_declared_high_impact=False, self_declared_risk_flags=[], confidence=0.95, policy=policy, ) assert decision.high_impact_task is expected_high_impact, ( f"domain={domain}: high_impact expected={expected_high_impact}, got={decision.high_impact_task}" ) assert decision.escalate_to_26b is expected_escalate, ( f"domain={domain}: escalate expected={expected_escalate}, got={decision.escalate_to_26b}, " f"reasons={decision.escalation_reasons}" ) def test_synthesis_directives_collected(policy): """requires_26b flag 의 synthesis_directive 가 결과에 수집됨.""" decision = decide_routing( subject_domain="msds", # default=[chemical_hazard, safety_legal_interpretation] content_chars=1000, self_declared_high_impact=False, confidence=0.95, policy=policy, ) # 둘 다 synthesis_directive 가 yaml 에 있음 assert len(decision.synthesis_directives) >= 2 # 문자열이 비어있지 않아야 함 for d in decision.synthesis_directives: assert len(d) > 0