hyungi_document_server/app/policy/prompt_render.py

"""Prompt rendering — yaml excerpt 를 template placeholder 에 주입.

템플릿에는 다음 placeholder 가 있다:
  {forbidden_block}          — subject 별 forbidden_for_4b 블록 주입
  {subject_description}       — subject_domains[domain].description
  {confidence_threshold}      — escalation.confidence_threshold
  {context_cap}               — escalation.context_char_cap_4b
  {context_cap_doc_count}     — P6 전용 (batch 문서 수 cap, 기본 500)

policy_version() = sha256(yaml_bytes + template_bytes)[:12].
yaml 또는 template 이 바뀌면 자동 bump → analyze_events.policy_version 으로 추적.
"""

from __future__ import annotations

import hashlib
from functools import lru_cache
from pathlib import Path

from policy.loader import load_policy, read_policy_bytes
from policy.schema import DomainPolicy


# 기본 템플릿 경로 — repo root 기준
TEMPLATE_DIR = Path(__file__).resolve().parent.parent / "prompts" / "policy"

# 4B / 26B 구분 (관측성 + 테스트 편의)
KNOWN_4B_TASKS = {
    "p1_triage",
    "p2_nas_rule",
    "p3a_short_summary",
    "p3b_entities",
    "p4a_advice_trigger",
    "p4b_retrieval",
    "p6_night_sweep",
}
KNOWN_26B_TASKS = {
    "p3c_deep_summary",
    # presegment PR2 — 거대문서 map-reduce 의 reduce 단계 (요약들의 요약)
    "p3c_deep_summary_reduce",
    "p4b_synthesis",
}


def _template_path(task: str) -> Path:
    return TEMPLATE_DIR / f"{task}.txt"


@lru_cache(maxsize=64)
def _read_template(task: str) -> str:
    path = _template_path(task)
    if not path.exists():
        raise FileNotFoundError(f"policy template '{task}' not found at {path}")
    return path.read_text(encoding="utf-8")


@lru_cache(maxsize=64)
def _read_template_bytes(task: str) -> bytes:
    return _template_path(task).read_bytes()


def _forbidden_block_for(
    policy: DomainPolicy, subject_domain: str
) -> str:
    """해당 도메인에 적용되는 forbidden_for_4b 규칙을 프롬프트 블록으로 렌더."""
    lines = ["=== 4B 절대 금지 작업 ===",
             "다음에 해당하면 자체 답변 금지, escalate_to_26b=true + envelope 만 응답.",
             ""]
    count = 0
    for rule in policy.forbidden_for_4b:
        if subject_domain in rule.applies_when_subject_in:
            count += 1
            lines.append(f"{count}. [{rule.id}] {rule.description}")
    if count == 0:
        lines.append("(해당 도메인에 등록된 금지 항목 없음 — 일반 규칙만 적용)")
    lines.append("")
    lines.append("금지 위반 시 사후 audit (check_4b_output_violations) 에서 탐지되어")
    lines.append("policy_violation=true 로 기록 + under_escalation 큐로 재처리.")
    return "\n".join(lines)


def render_4b(
    task: str,
    subject_domain: str,
    *,
    policy: DomainPolicy | None = None,
) -> str:
    """4B 용 템플릿에 정책 excerpt 를 주입하고 반환.

    사용자 input placeholder ({{filename}}, {{extracted_text}} 등, 이중중괄호) 는
    그대로 남는다. PR-B 의 worker 가 str.format 또는 Template 으로 최종 주입.
    """
    if task not in KNOWN_4B_TASKS:
        raise ValueError(f"'{task}' is not a 4B task (known: {KNOWN_4B_TASKS})")
    if policy is None:
        policy = load_policy()

    template = _read_template(task)
    domain_spec = (
        policy.subject_domains.get(subject_domain)
        or policy.fallback_domain
    )

    return template.format(
        forbidden_block=_forbidden_block_for(policy, subject_domain),
        subject_description=domain_spec.description,
        confidence_threshold=policy.escalation.confidence_threshold,
        context_cap=policy.escalation.context_char_cap_4b,
        context_cap_doc_count=500,
    )


def render_26b(
    task: str,
    subject_domain: str,
    *,
    policy: DomainPolicy | None = None,
) -> str:
    """26B 용 템플릿 렌더."""
    if task not in KNOWN_26B_TASKS:
        raise ValueError(f"'{task}' is not a 26B task (known: {KNOWN_26B_TASKS})")
    if policy is None:
        policy = load_policy()

    template = _read_template(task)
    domain_spec = (
        policy.subject_domains.get(subject_domain)
        or policy.fallback_domain
    )

    return template.format(
        forbidden_block=_forbidden_block_for(policy, subject_domain),
        subject_description=domain_spec.description,
        confidence_threshold=policy.escalation.confidence_threshold,
        context_cap=policy.escalation.context_char_cap_26b,
        context_cap_doc_count=500,
    )


def policy_version(task: str, *, policy_path: str | None = None) -> str:
    """Return sha256(yaml_bytes + template_bytes)[:12].

    Deterministic — 같은 (yaml, template) → 같은 hash. 한 쪽만 변경돼도 변경됨.
    analyze_events.policy_version 에 저장되어 drift 추적.
    """
    yaml_bytes = read_policy_bytes(policy_path)
    template_bytes = _read_template_bytes(task)
    h = hashlib.sha256(yaml_bytes + template_bytes).hexdigest()
    return h[:12]


def clear_cache() -> None:
    """테스트용 — 템플릿 재읽기."""
    _read_template.cache_clear()
    _read_template_bytes.cache_clear()