Files
Hyungi Ahn 99672292d3 fix(policy): use container-compatible imports (drop app. prefix)
프로덕션 컨테이너는 /app 을 cwd 로 실행하고 import 는 `from api...`,
`from core...`, `from workers...` 처럼 무접두 스타일을 사용한다.
PR-A 내부 import 가 `from app.policy...`, `from app.ai.envelope` 로
되어 있어서 컨테이너에서 ModuleNotFoundError 발생.

변경:
- app/policy/*.py: `from app.policy.X` → `from policy.X`
- app/services/prompt_versions.py: lazy import 도 `from policy.prompt_render`
- app/ai/envelope.py: 영향 없음 (내부 import 없음)
- tests/policy/*.py: 모두 `from policy.X` / `from ai.envelope` 로 통일
- tests/policy/conftest.py: 로컬 pytest 용 sys.path.insert(app/) 추가
  (MacBook 에서 repo-root 기준 실행 시 app/ 를 package root 로 취급)

CI: pytest tests/policy/ -q → 98 passed (로컬, 동일 결과)
프로덕션: docker exec fastapi python -c "from policy.loader import load_policy" → OK

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 09:42:24 +09:00

291 lines
11 KiB
Python

"""INV-2, INV-3, INV-4, INV-5, INV-6 — 결정론적 불변식 검증."""
from __future__ import annotations
import pytest
from policy.routing import (
REASON_FALLBACK_DOMAIN,
REASON_HIGH_IMPACT,
REASON_LONG_CONTEXT,
REASON_LOW_CONFIDENCE,
REASON_MULTI_DOC,
REASON_RISK_FLAG,
decide_routing,
)
# =====================================================================
# INV-2: risk_flag_requires_26b_forces_escalation
# =====================================================================
def test_risk_flag_forces_escalation(policy):
"""INV-2: requires_26b=True flag 가 있으면 무조건 escalate."""
# safety_legal_interpretation 은 requires_26b=true
decision = decide_routing(
subject_domain="news_item", # 자체 high_impact=false
content_chars=500,
self_declared_high_impact=False,
self_declared_risk_flags=["safety_legal_interpretation"],
confidence=0.95, # high confidence 여도
policy=policy,
)
assert decision.escalate_to_26b is True
assert REASON_RISK_FLAG in decision.escalation_reasons
assert "safety_legal_interpretation" in decision.risk_flags
def test_pii_flag_does_not_force_escalation_on_its_own(policy):
"""pii_present 는 requires_26b=false → 단독으로는 escalate 안 시킴."""
decision = decide_routing(
subject_domain="news_item",
content_chars=500,
self_declared_high_impact=False,
self_declared_risk_flags=["pii_present"],
confidence=0.95,
policy=policy,
)
assert "pii_present" in decision.risk_flags
assert decision.escalate_to_26b is False # 다른 조건 없으면 escalate 안 함
# =====================================================================
# INV-3: context_cap_forces_escalation
# =====================================================================
def test_context_cap_forces_escalation(policy):
"""INV-3: content_chars > context_char_cap_4b → long_context escalation."""
cap = policy.escalation.context_char_cap_4b
decision = decide_routing(
subject_domain="news_item",
content_chars=cap + 1,
self_declared_high_impact=False,
confidence=0.95,
policy=policy,
)
assert decision.escalate_to_26b is True
assert REASON_LONG_CONTEXT in decision.escalation_reasons
def test_context_at_cap_does_not_escalate(policy):
"""경계값: content_chars == cap 는 escalate 안 함 (strict >)."""
cap = policy.escalation.context_char_cap_4b
decision = decide_routing(
subject_domain="news_item",
content_chars=cap,
self_declared_high_impact=False,
confidence=0.95,
policy=policy,
)
# news_item 은 high_impact=false 이고 다른 조건 없음
assert REASON_LONG_CONTEXT not in decision.escalation_reasons
# =====================================================================
# INV-4: multi_doc_forces_escalation
# =====================================================================
def test_multi_doc_forces_escalation(policy):
"""INV-4: evidence_doc_count >= threshold → multi_doc escalation + derived flag."""
threshold = policy.escalation.escalate_on_multi_doc_count
decision = decide_routing(
subject_domain="news_item",
content_chars=500,
evidence_doc_count=threshold, # = 3
self_declared_high_impact=False,
confidence=0.95,
policy=policy,
)
assert decision.escalate_to_26b is True
assert REASON_MULTI_DOC in decision.escalation_reasons
assert "multi_doc_dependency" in decision.risk_flags
def test_multi_doc_below_threshold_no_escalation(policy):
"""경계값: 2개는 escalate 안 함."""
decision = decide_routing(
subject_domain="news_item",
content_chars=500,
evidence_doc_count=2,
self_declared_high_impact=False,
confidence=0.95,
policy=policy,
)
assert REASON_MULTI_DOC not in decision.escalation_reasons
assert "multi_doc_dependency" not in decision.risk_flags
# =====================================================================
# INV-5: risk_flags_union
# =====================================================================
def test_risk_flags_union_default_plus_self_declared(policy):
"""INV-5: default + self_declared 가 UNION. 둘 다 포함돼야 함."""
# safety_reference 의 default = [safety_legal_interpretation]
decision = decide_routing(
subject_domain="safety_reference",
content_chars=1000,
self_declared_high_impact=False,
self_declared_risk_flags=["pii_present"], # 다른 flag 추가
confidence=0.95,
policy=policy,
)
assert "safety_legal_interpretation" in decision.risk_flags # default
assert "pii_present" in decision.risk_flags # self_declared
# 둘 다 포함되어 있으면 UNION 통과
def test_risk_flags_union_with_derived_flags(policy):
"""default + self + derived (long_context, low_confidence, multi_doc) 모두 합쳐짐."""
cap = policy.escalation.context_char_cap_4b
decision = decide_routing(
subject_domain="safety_reference",
content_chars=cap + 1, # long_context → low_confidence_reasoning NOT added here
evidence_doc_count=3, # multi_doc_dependency added
self_declared_high_impact=False,
self_declared_risk_flags=["pii_present"],
confidence=0.5, # < 0.7 → low_confidence_reasoning added
policy=policy,
)
# 4개 flag 다 있어야 함
assert "safety_legal_interpretation" in decision.risk_flags # default
assert "pii_present" in decision.risk_flags # self
assert "multi_doc_dependency" in decision.risk_flags # derived (INV-4)
assert "low_confidence_reasoning" in decision.risk_flags # derived (low_conf)
def test_risk_flags_is_sorted_tuple(policy):
"""RoutingDecision.risk_flags 는 정렬된 tuple (재현성)."""
decision = decide_routing(
subject_domain="news_item",
content_chars=500,
self_declared_risk_flags=["pii_present", "safety_legal_interpretation"],
confidence=0.95,
policy=policy,
)
assert isinstance(decision.risk_flags, tuple)
assert list(decision.risk_flags) == sorted(decision.risk_flags)
# =====================================================================
# INV-6: fallback_domain for unknown
# =====================================================================
def test_fallback_domain_used_for_unknown(policy):
"""INV-6: 미정의 subject_domain 주면 fallback_domain 적용."""
decision = decide_routing(
subject_domain="__nonexistent_domain__",
content_chars=500,
confidence=0.95,
policy=policy,
)
assert decision is not None
assert decision.used_fallback is True
assert decision.subject_domain_used == policy.fallback_domain.name
assert REASON_FALLBACK_DOMAIN in decision.escalation_reasons
def test_fallback_still_respects_other_invariants(policy):
"""fallback 이어도 INV-3 (long_context) 은 그대로 작동."""
cap = policy.escalation.context_char_cap_4b
decision = decide_routing(
subject_domain="__nonexistent__",
content_chars=cap + 1, # long context
confidence=0.95,
policy=policy,
)
assert decision.used_fallback is True
assert REASON_LONG_CONTEXT in decision.escalation_reasons
assert decision.escalate_to_26b is True
def test_fallback_default_risk_flags_applied(policy):
"""fallback.default_risk_flags = [low_confidence_reasoning] 가 결과에 반영."""
decision = decide_routing(
subject_domain="__unknown__",
content_chars=500,
confidence=0.95,
policy=policy,
)
# fallback 의 default = ["low_confidence_reasoning"] 는 requires_26b=true 이므로 escalate
assert "low_confidence_reasoning" in decision.risk_flags
# =====================================================================
# low_confidence escalation (not a numbered invariant but required)
# =====================================================================
def test_low_confidence_forces_escalation(policy):
"""confidence < threshold → low_confidence escalation + derived flag."""
threshold = policy.escalation.confidence_threshold
decision = decide_routing(
subject_domain="news_item",
content_chars=500,
self_declared_high_impact=False,
confidence=threshold - 0.01,
policy=policy,
)
assert decision.escalate_to_26b is True
assert REASON_LOW_CONFIDENCE in decision.escalation_reasons
assert "low_confidence_reasoning" in decision.risk_flags
# =====================================================================
# 도메인 × 시나리오 스냅샷 (테이블 드리븐)
# =====================================================================
@pytest.mark.parametrize(
"domain,expected_escalate,expected_high_impact",
[
("safety_reference", True, True),
("safety_operational", True, True),
("msds", True, True),
("hazard_specific", True, True),
("incident_report", True, True),
("health_record", True, True),
("safety_video", False, False),
("news_item", False, False),
("news_digest_request", True, True),
],
)
def test_default_escalation_per_domain(policy, domain, expected_escalate, expected_high_impact):
"""각 도메인 기본 상태 (high confidence, 짧은 본문, self_declare=false) 의 escalate 여부."""
decision = decide_routing(
subject_domain=domain,
content_chars=1000,
self_declared_high_impact=False,
self_declared_risk_flags=[],
confidence=0.95,
policy=policy,
)
assert decision.high_impact_task is expected_high_impact, (
f"domain={domain}: high_impact expected={expected_high_impact}, got={decision.high_impact_task}"
)
assert decision.escalate_to_26b is expected_escalate, (
f"domain={domain}: escalate expected={expected_escalate}, got={decision.escalate_to_26b}, "
f"reasons={decision.escalation_reasons}"
)
def test_synthesis_directives_collected(policy):
"""requires_26b flag 의 synthesis_directive 가 결과에 수집됨."""
decision = decide_routing(
subject_domain="msds", # default=[chemical_hazard, safety_legal_interpretation]
content_chars=1000,
self_declared_high_impact=False,
confidence=0.95,
policy=policy,
)
# 둘 다 synthesis_directive 가 yaml 에 있음
assert len(decision.synthesis_directives) >= 2
# 문자열이 비어있지 않아야 함
for d in decision.synthesis_directives:
assert len(d) > 0