feat(policy): prompt_render + policy_version hash

app/policy/prompt_render.py:
- render_4b(task, subject) / render_26b(task, subject) — template + yaml
  excerpt 주입. {forbidden_block} / {subject_description} /
  {confidence_threshold} / {context_cap} placeholder 치환.
- policy_version(task) → sha256(yaml_bytes + template_bytes)[:12].
  deterministic — yaml 이나 template 이 바뀌면 hash 변경, analyze_events.
  policy_version 컬럼으로 drift 추적.
- KNOWN_4B_TASKS / KNOWN_26B_TASKS — 잘못된 task 호출 ValueError.
- 미정의 subject_domain 은 fallback_domain.description 사용.

app/services/prompt_versions.py:
- compute_policy_version(task) helper 추가. app.policy 지연 import 로
  worker 경로에 정책 dependency 유입 방지 (런타임 격리).
- 기존 ASK_PROMPT_VERSION / ANALYZE_PROMPT_VERSION 상수 미변경.

plan: ~/.claude/plans/wise-gliding-hippo.md

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-24 09:31:19 +09:00
parent 5057c48ad3
commit f51583f9d6
2 changed files with 175 additions and 0 deletions
+153
View File
@@ -0,0 +1,153 @@
"""Prompt rendering — yaml excerpt 를 template placeholder 에 주입.
템플릿에는 다음 placeholder 가 있다:
{forbidden_block} — subject 별 forbidden_for_4b 블록 주입
{subject_description} — subject_domains[domain].description
{confidence_threshold} — escalation.confidence_threshold
{context_cap} — escalation.context_char_cap_4b
{context_cap_doc_count} — P6 전용 (batch 문서 수 cap, 기본 500)
policy_version() = sha256(yaml_bytes + template_bytes)[:12].
yaml 또는 template 이 바뀌면 자동 bump → analyze_events.policy_version 으로 추적.
"""
from __future__ import annotations
import hashlib
from functools import lru_cache
from pathlib import Path
from app.policy.loader import load_policy, read_policy_bytes
from app.policy.schema import DomainPolicy
# 기본 템플릿 경로 — repo root 기준
TEMPLATE_DIR = Path(__file__).resolve().parent.parent / "prompts" / "policy"
# 4B / 26B 구분 (관측성 + 테스트 편의)
KNOWN_4B_TASKS = {
"p1_triage",
"p2_nas_rule",
"p3a_short_summary",
"p3b_entities",
"p4a_advice_trigger",
"p4b_retrieval",
"p6_night_sweep",
}
KNOWN_26B_TASKS = {
"p3c_deep_summary",
"p4b_synthesis",
}
def _template_path(task: str) -> Path:
return TEMPLATE_DIR / f"{task}.txt"
@lru_cache(maxsize=64)
def _read_template(task: str) -> str:
path = _template_path(task)
if not path.exists():
raise FileNotFoundError(f"policy template '{task}' not found at {path}")
return path.read_text(encoding="utf-8")
@lru_cache(maxsize=64)
def _read_template_bytes(task: str) -> bytes:
return _template_path(task).read_bytes()
def _forbidden_block_for(
policy: DomainPolicy, subject_domain: str
) -> str:
"""해당 도메인에 적용되는 forbidden_for_4b 규칙을 프롬프트 블록으로 렌더."""
lines = ["=== 4B 절대 금지 작업 ===",
"다음에 해당하면 자체 답변 금지, escalate_to_26b=true + envelope 만 응답.",
""]
count = 0
for rule in policy.forbidden_for_4b:
if subject_domain in rule.applies_when_subject_in:
count += 1
lines.append(f"{count}. [{rule.id}] {rule.description}")
if count == 0:
lines.append("(해당 도메인에 등록된 금지 항목 없음 — 일반 규칙만 적용)")
lines.append("")
lines.append("금지 위반 시 사후 audit (check_4b_output_violations) 에서 탐지되어")
lines.append("policy_violation=true 로 기록 + under_escalation 큐로 재처리.")
return "\n".join(lines)
def render_4b(
task: str,
subject_domain: str,
*,
policy: DomainPolicy | None = None,
) -> str:
"""4B 용 템플릿에 정책 excerpt 를 주입하고 반환.
사용자 input placeholder ({{filename}}, {{extracted_text}} 등, 이중중괄호) 는
그대로 남는다. PR-B 의 worker 가 str.format 또는 Template 으로 최종 주입.
"""
if task not in KNOWN_4B_TASKS:
raise ValueError(f"'{task}' is not a 4B task (known: {KNOWN_4B_TASKS})")
if policy is None:
policy = load_policy()
template = _read_template(task)
domain_spec = (
policy.subject_domains.get(subject_domain)
or policy.fallback_domain
)
return template.format(
forbidden_block=_forbidden_block_for(policy, subject_domain),
subject_description=domain_spec.description,
confidence_threshold=policy.escalation.confidence_threshold,
context_cap=policy.escalation.context_char_cap_4b,
context_cap_doc_count=500,
)
def render_26b(
task: str,
subject_domain: str,
*,
policy: DomainPolicy | None = None,
) -> str:
"""26B 용 템플릿 렌더."""
if task not in KNOWN_26B_TASKS:
raise ValueError(f"'{task}' is not a 26B task (known: {KNOWN_26B_TASKS})")
if policy is None:
policy = load_policy()
template = _read_template(task)
domain_spec = (
policy.subject_domains.get(subject_domain)
or policy.fallback_domain
)
return template.format(
forbidden_block=_forbidden_block_for(policy, subject_domain),
subject_description=domain_spec.description,
confidence_threshold=policy.escalation.confidence_threshold,
context_cap=policy.escalation.context_char_cap_26b,
context_cap_doc_count=500,
)
def policy_version(task: str, *, policy_path: str | None = None) -> str:
"""Return sha256(yaml_bytes + template_bytes)[:12].
Deterministic — 같은 (yaml, template) → 같은 hash. 한 쪽만 변경돼도 변경됨.
analyze_events.policy_version 에 저장되어 drift 추적.
"""
yaml_bytes = read_policy_bytes(policy_path)
template_bytes = _read_template_bytes(task)
h = hashlib.sha256(yaml_bytes + template_bytes).hexdigest()
return h[:12]
def clear_cache() -> None:
"""테스트용 — 템플릿 재읽기."""
_read_template.cache_clear()
_read_template_bytes.cache_clear()
+22
View File
@@ -38,3 +38,25 @@ def resolve_primary_model() -> str | None:
except Exception:
pass
return None
# ─── Policy-layer prompt version helper (PR-A) ──────────────────────
# domain_policy.yaml + 정책 template 의 결합 해시로 automatic version 산출.
# analyze_events.policy_version 컬럼에 기록되어 drift 추적.
#
# 기존 ASK_PROMPT_VERSION / ANALYZE_PROMPT_VERSION 상수는 그대로 유지 — PR-B 에서
# 정책 렌더된 프롬프트로 전환 시 compute_policy_version() 결과로 대체할지 병기할지 결정.
def compute_policy_version(
task: str, *, policy_path: str | None = None
) -> str:
"""sha256(yaml_bytes + template_bytes)[:12] — deterministic hash.
task: policy template 이름 (예: 'p3a_short_summary'). app/prompts/policy/ 하위.
policy_path: override (테스트용). None 이면 loader 기본값.
import 지연 — app.policy 는 아직 worker 경로에서 쓰지 않는다 (PR-A 런타임 격리).
"""
from app.policy.prompt_render import policy_version as _pv
return _pv(task, policy_path=policy_path)