feat(policy): prompt_render + policy_version hash

app/policy/prompt_render.py: - render_4b(task, subject) / render_26b(task, subject) — template + yaml excerpt 주입. {forbidden_block} / {subject_description} / {confidence_threshold} / {context_cap} placeholder 치환. - policy_version(task) → sha256(yaml_bytes + template_bytes)[:12]. deterministic — yaml 이나 template 이 바뀌면 hash 변경, analyze_events. policy_version 컬럼으로 drift 추적. - KNOWN_4B_TASKS / KNOWN_26B_TASKS — 잘못된 task 호출 ValueError. - 미정의 subject_domain 은 fallback_domain.description 사용. app/services/prompt_versions.py: - compute_policy_version(task) helper 추가. app.policy 지연 import 로 worker 경로에 정책 dependency 유입 방지 (런타임 격리). - 기존 ASK_PROMPT_VERSION / ANALYZE_PROMPT_VERSION 상수 미변경. plan: ~/.claude/plans/wise-gliding-hippo.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 09:31:19 +09:00
parent 5057c48ad3
commit f51583f9d6
2 changed files with 175 additions and 0 deletions
@@ -0,0 +1,153 @@
+"""Prompt rendering — yaml excerpt 를 template placeholder 에 주입.
+
+템플릿에는 다음 placeholder 가 있다:
+  {forbidden_block}          — subject 별 forbidden_for_4b 블록 주입
+  {subject_description}       — subject_domains[domain].description
+  {confidence_threshold}      — escalation.confidence_threshold
+  {context_cap}               — escalation.context_char_cap_4b
+  {context_cap_doc_count}     — P6 전용 (batch 문서 수 cap, 기본 500)
+
+policy_version() = sha256(yaml_bytes + template_bytes)[:12].
+yaml 또는 template 이 바뀌면 자동 bump → analyze_events.policy_version 으로 추적.
+"""
+
+from __future__ import annotations
+
+import hashlib
+from functools import lru_cache
+from pathlib import Path
+
+from app.policy.loader import load_policy, read_policy_bytes
+from app.policy.schema import DomainPolicy
+
+
+# 기본 템플릿 경로 — repo root 기준
+TEMPLATE_DIR = Path(__file__).resolve().parent.parent / "prompts" / "policy"
+
+# 4B / 26B 구분 (관측성 + 테스트 편의)
+KNOWN_4B_TASKS = {
+    "p1_triage",
+    "p2_nas_rule",
+    "p3a_short_summary",
+    "p3b_entities",
+    "p4a_advice_trigger",
+    "p4b_retrieval",
+    "p6_night_sweep",
+}
+KNOWN_26B_TASKS = {
+    "p3c_deep_summary",
+    "p4b_synthesis",
+}
+
+
+def _template_path(task: str) -> Path:
+    return TEMPLATE_DIR / f"{task}.txt"
+
+
+@lru_cache(maxsize=64)
+def _read_template(task: str) -> str:
+    path = _template_path(task)
+    if not path.exists():
+        raise FileNotFoundError(f"policy template '{task}' not found at {path}")
+    return path.read_text(encoding="utf-8")
+
+
+@lru_cache(maxsize=64)
+def _read_template_bytes(task: str) -> bytes:
+    return _template_path(task).read_bytes()
+
+
+def _forbidden_block_for(
+    policy: DomainPolicy, subject_domain: str
+) -> str:
+    """해당 도메인에 적용되는 forbidden_for_4b 규칙을 프롬프트 블록으로 렌더."""
+    lines = ["=== 4B 절대 금지 작업 ===",
+             "다음에 해당하면 자체 답변 금지, escalate_to_26b=true + envelope 만 응답.",
+             ""]
+    count = 0
+    for rule in policy.forbidden_for_4b:
+        if subject_domain in rule.applies_when_subject_in:
+            count += 1
+            lines.append(f"{count}. [{rule.id}] {rule.description}")
+    if count == 0:
+        lines.append("(해당 도메인에 등록된 금지 항목 없음 — 일반 규칙만 적용)")
+    lines.append("")
+    lines.append("금지 위반 시 사후 audit (check_4b_output_violations) 에서 탐지되어")
+    lines.append("policy_violation=true 로 기록 + under_escalation 큐로 재처리.")
+    return "\n".join(lines)
+
+
+def render_4b(
+    task: str,
+    subject_domain: str,
+    *,
+    policy: DomainPolicy | None = None,
+) -> str:
+    """4B 용 템플릿에 정책 excerpt 를 주입하고 반환.
+
+    사용자 input placeholder ({{filename}}, {{extracted_text}} 등, 이중중괄호) 는
+    그대로 남는다. PR-B 의 worker 가 str.format 또는 Template 으로 최종 주입.
+    """
+    if task not in KNOWN_4B_TASKS:
+        raise ValueError(f"'{task}' is not a 4B task (known: {KNOWN_4B_TASKS})")
+    if policy is None:
+        policy = load_policy()
+
+    template = _read_template(task)
+    domain_spec = (
+        policy.subject_domains.get(subject_domain)
+        or policy.fallback_domain
+    )
+
+    return template.format(
+        forbidden_block=_forbidden_block_for(policy, subject_domain),
+        subject_description=domain_spec.description,
+        confidence_threshold=policy.escalation.confidence_threshold,
+        context_cap=policy.escalation.context_char_cap_4b,
+        context_cap_doc_count=500,
+    )
+
+
+def render_26b(
+    task: str,
+    subject_domain: str,
+    *,
+    policy: DomainPolicy | None = None,
+) -> str:
+    """26B 용 템플릿 렌더."""
+    if task not in KNOWN_26B_TASKS:
+        raise ValueError(f"'{task}' is not a 26B task (known: {KNOWN_26B_TASKS})")
+    if policy is None:
+        policy = load_policy()
+
+    template = _read_template(task)
+    domain_spec = (
+        policy.subject_domains.get(subject_domain)
+        or policy.fallback_domain
+    )
+
+    return template.format(
+        forbidden_block=_forbidden_block_for(policy, subject_domain),
+        subject_description=domain_spec.description,
+        confidence_threshold=policy.escalation.confidence_threshold,
+        context_cap=policy.escalation.context_char_cap_26b,
+        context_cap_doc_count=500,
+    )
+
+
+def policy_version(task: str, *, policy_path: str | None = None) -> str:
+    """Return sha256(yaml_bytes + template_bytes)[:12].
+
+    Deterministic — 같은 (yaml, template) → 같은 hash. 한 쪽만 변경돼도 변경됨.
+    analyze_events.policy_version 에 저장되어 drift 추적.
+    """
+    yaml_bytes = read_policy_bytes(policy_path)
+    template_bytes = _read_template_bytes(task)
+    h = hashlib.sha256(yaml_bytes + template_bytes).hexdigest()
+    return h[:12]
+
+
+def clear_cache() -> None:
+    """테스트용 — 템플릿 재읽기."""
+    _read_template.cache_clear()
+    _read_template_bytes.cache_clear()
@@ -38,3 +38,25 @@ def resolve_primary_model() -> str | None:
    except Exception:
        pass
    return None
+
+
+# ─── Policy-layer prompt version helper (PR-A) ──────────────────────
+# domain_policy.yaml + 정책 template 의 결합 해시로 automatic version 산출.
+# analyze_events.policy_version 컬럼에 기록되어 drift 추적.
+#
+# 기존 ASK_PROMPT_VERSION / ANALYZE_PROMPT_VERSION 상수는 그대로 유지 — PR-B 에서
+# 정책 렌더된 프롬프트로 전환 시 compute_policy_version() 결과로 대체할지 병기할지 결정.
+
+def compute_policy_version(
+    task: str, *, policy_path: str | None = None
+) -> str:
+    """sha256(yaml_bytes + template_bytes)[:12] — deterministic hash.
+
+    task: policy template 이름 (예: 'p3a_short_summary'). app/prompts/policy/ 하위.
+    policy_path: override (테스트용). None 이면 loader 기본값.
+
+    import 지연 — app.policy 는 아직 worker 경로에서 쓰지 않는다 (PR-A 런타임 격리).
+    """
+    from app.policy.prompt_render import policy_version as _pv
+
+    return _pv(task, policy_path=policy_path)