diff --git a/app/policy/routing.py b/app/policy/routing.py new file mode 100644 index 0000000..2f1bc8e --- /dev/null +++ b/app/policy/routing.py @@ -0,0 +1,178 @@ +"""Routing engine — 4B 출력 + 상황을 받아 26B 에스컬레이션 여부를 결정. + +6 invariants (모두 deterministic, code-level HARD rules): + +INV-1 self_declare_add_only + deterministic_high_impact=True AND self_declare=False → high_impact_task=True + (self_declare 는 ADD only; OFF 불가) + +INV-2 risk_flag_requires_26b_forces_escalation + any(flag where policy.risk_flags[flag].requires_26b) → escalate=True + +INV-3 context_cap_forces_escalation + content_chars > policy.escalation.context_char_cap_4b → escalate=True, reason="long_context" + +INV-4 multi_doc_forces_escalation + evidence_doc_count >= policy.escalation.escalate_on_multi_doc_count + → escalate=True, reason="multi_doc", add "multi_doc_dependency" to risk_flags + +INV-5 risk_flags_union + final risk_flags = UNION(domain.default_risk_flags, self_declared, derived) + self_declared 는 ADD only; default 있어도 self 가 추가 flag 붙이면 합집합 + +INV-6 fallback_domain for unknown + subject_domain not in policy.subject_domains → use policy.fallback_domain + (routing 이 None/undefined 로 빠지는 edge case 0) +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Iterable + +from app.policy.loader import load_policy +from app.policy.schema import DomainPolicy, SubjectDomain, FallbackDomain + + +# --- Reason 문자열 상수 (tests 에서 참조) ----------------------------------- +REASON_HIGH_IMPACT = "high_impact" +REASON_RISK_FLAG = "risk_flag_requires_26b" +REASON_LOW_CONFIDENCE = "low_confidence" +REASON_LONG_CONTEXT = "long_context" +REASON_MULTI_DOC = "multi_doc" +REASON_FALLBACK_DOMAIN = "fallback_domain" + + +@dataclass(frozen=True) +class RoutingDecision: + escalate_to_26b: bool + escalation_reasons: tuple[str, ...] + risk_flags: tuple[str, ...] + high_impact_task: bool + synthesis_directives: tuple[str, ...] + subject_domain_used: str # 실제 적용된 도메인 이름 (fallback 인 경우 fallback_domain.name) + used_fallback: bool = False + + +def _resolve_domain( + policy: DomainPolicy, subject_domain: str +) -> tuple[SubjectDomain | FallbackDomain, str, bool]: + """INV-6 — 매칭 실패 시 fallback_domain.""" + spec = policy.subject_domains.get(subject_domain) + if spec is not None: + return spec, subject_domain, False + return policy.fallback_domain, policy.fallback_domain.name, True + + +def decide_routing( + *, + subject_domain: str, + content_chars: int, + deterministic_keyword_hits: Iterable[str] = (), + self_declared_high_impact: bool = False, + self_declared_risk_flags: Iterable[str] = (), + confidence: float = 1.0, + evidence_doc_count: int = 0, + policy: DomainPolicy | None = None, +) -> RoutingDecision: + """Pure function — yaml 과 입력만으로 결정론적 결과. + + Parameters + ---------- + subject_domain: upstream (keyword/source_channel 매칭) 이 정한 도메인 이름. + content_chars: 4B 에 들어간 본문 문자 수. + deterministic_keyword_hits: upstream 의 keyword 매칭 결과 (비어있어도 domain.high_impact + 가 True 면 INV 는 그대로 작동). + self_declared_high_impact: 4B 출력의 high_impact_self_declared 필드. + self_declared_risk_flags: 4B 출력의 risk_flags 자기선언. + confidence: 4B 출력의 confidence (0.0~1.0). + evidence_doc_count: /ask 경로 등에서 합성 대상 문서 수. + policy: 주입용 (테스트). None 이면 loader.load_policy(). + """ + if policy is None: + policy = load_policy() + + domain_spec, domain_name, used_fallback = _resolve_domain(policy, subject_domain) + + reasons: list[str] = [] + flags: set[str] = set() + + # --- INV-1: high_impact (deterministic → self_declare 는 ADD only) ----- + deterministic_high_impact = ( + bool(list(deterministic_keyword_hits)) + or domain_spec.high_impact + ) + high_impact = deterministic_high_impact + if self_declared_high_impact: + high_impact = True # ADD only — False 로 되돌릴 수 없음 + + if high_impact: + reasons.append(REASON_HIGH_IMPACT) + + # --- INV-5: risk_flags UNION merge ------------------------------------- + # (a) domain 기본 + flags.update(domain_spec.default_risk_flags) + # (b) 4B 자기선언 (ADD only) + flags.update(self_declared_risk_flags) + + # --- INV-3: long_context (derived flag 추가 전에 판정) ---------------- + if content_chars > policy.escalation.context_char_cap_4b: + reasons.append(REASON_LONG_CONTEXT) + + # --- INV-4: multi_doc (derived flag 추가) ----------------------------- + if evidence_doc_count >= policy.escalation.escalate_on_multi_doc_count: + reasons.append(REASON_MULTI_DOC) + flags.add("multi_doc_dependency") + + # --- low_confidence (derived flag 추가) -------------------------------- + if confidence < policy.escalation.confidence_threshold: + reasons.append(REASON_LOW_CONFIDENCE) + flags.add("low_confidence_reasoning") + + # --- INV-2: risk_flag_requires_26b ------------------------------------- + requires_26b_flag = any( + policy.risk_flags[f].requires_26b + for f in flags + if f in policy.risk_flags and policy.risk_flags[f].requires_26b + ) + if requires_26b_flag: + reasons.append(REASON_RISK_FLAG) + + # --- INV-6: fallback 사용 사실 기록 ----------------------------------- + if used_fallback: + # 에스컬레이션 자체를 강제하진 않지만 visibility 위해 reason 에 추가 + reasons.append(REASON_FALLBACK_DOMAIN) + + # --- synthesis directives 수집 (26B 에 전달될 규칙) ------------------- + directives: list[str] = [] + for f in sorted(flags): + rf = policy.risk_flags.get(f) + if rf is not None and rf.synthesis_directive: + directives.append(rf.synthesis_directive) + + # --- 최종 escalate 판정 --------------------------------------------- + escalate = ( + high_impact + or requires_26b_flag + or content_chars > policy.escalation.context_char_cap_4b + or evidence_doc_count >= policy.escalation.escalate_on_multi_doc_count + or confidence < policy.escalation.confidence_threshold + ) + + # 중복 reason 제거 (순서 유지) + seen: set[str] = set() + dedup_reasons: list[str] = [] + for r in reasons: + if r not in seen: + seen.add(r) + dedup_reasons.append(r) + + return RoutingDecision( + escalate_to_26b=escalate, + escalation_reasons=tuple(dedup_reasons), + risk_flags=tuple(sorted(flags)), + high_impact_task=high_impact, + synthesis_directives=tuple(directives), + subject_domain_used=domain_name, + used_fallback=used_fallback, + )