Files
hyungi_document_server/app/workers/deep_summary_worker.py
T
Hyungi Ahn 93a687a51d fix(ai): B-1 deep_summary 잘린 응답 field-level regex fallback
_parse_outermost_json 도 열린 문자열 중간에 응답 끊기면 실패.
실전 MLX 응답이 entities_confirmed 내부 문자열에서 종료되는 패턴이라
detail/tldr/bullets/inconsistencies 전부 손실되던 이슈.

_regex_extract_fields helper 추가: "key":"value" 쌍 개별 매칭으로
앞쪽 완결된 필드만이라도 건진다. detail 이 응답 앞부분에 있어 잘림
지점보다 앞이면 성공.

순서:
  1. _parse_outermost_json (brace balance)
  2. parse_json_response (기존 regex)
  3. _regex_extract_fields (field-level fallback)

entities_confirmed 제거 같은 프롬프트 수정은 PR-A 영역이라 건드리지
않고, PR-B 워커에서 방어. 근본 해결은 p3c_deep_summary 에서 불필요
필드 제거 또는 max_tokens 튜닝을 policy 소유자가 결정.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 11:27:04 +09:00

319 lines
12 KiB
Python

"""PR-B B-1 Deep Summary 워커 — 26B (primary MLX) 에스컬레이션 분석.
큐 stage 'deep_summary' 에서 pickup. classify_worker 가 enqueue 시 payload 로 실은
EscalationEnvelope + subject_domain 을 읽어, PR-A policy 템플릿 `p3c_deep_summary` 를
렌더링한 뒤 26B primary 를 호출한다. llm_gate Semaphore(1) 경유 — MLX 단일 인퍼런스 보호.
출력을 documents.ai_detail_summary / ai_inconsistencies 에 저장하고 ai_analysis_tier 를
'deep' 으로 전이. 실패는 무해하게 legacy 결과 보존.
"""
from __future__ import annotations
import json
import time
from datetime import datetime, timezone
from pydantic import BaseModel, Field, ValidationError
from sqlalchemy import desc, select
from sqlalchemy.ext.asyncio import AsyncSession
import json
import re
from ai.client import AIClient, parse_json_response, strip_thinking
from ai.envelope import EscalationEnvelope
from core.config import settings
from core.utils import setup_logger
from models.document import Document
from models.queue import ProcessingQueue
from policy.prompt_render import render_26b, policy_version as compute_policy_version
from services.document_telemetry import record_analyze_event
from services.search.llm_gate import get_mlx_gate
logger = setup_logger("deep_summary_worker")
DEEP_SUMMARY_TASK = "p3c_deep_summary"
# inconsistencies kind 허용 목록 (feedback_document_server_domain_scope.md — 구매/계약 제외)
ALLOWED_INCONSISTENCY_KINDS = {
"version_drift", "procedure_conflict", "source_conflict", "missing_basis",
}
class DeepSummaryOutput(BaseModel):
"""p3c_deep_summary (26B) 응답 스키마. 파싱 실패 시 기본값 + skip."""
mode: str = "single"
tldr: str = ""
bullets: list[str] = Field(default_factory=list)
detail: str = "" # 26B 가 채우는 상세 (detail_summary 로 저장)
bundle_flow: list[str] | None = None
inconsistencies: list[dict] | None = None
entities_confirmed: dict | None = None
directives_applied: list[str] | None = None
confidence: float = 0.5
async def process(document_id: int, session: AsyncSession) -> None:
"""deep_summary 큐 pickup → 26B 호출 → 필드 저장."""
doc = await session.get(Document, document_id)
if not doc:
raise ValueError(f"deep_summary: document id={document_id} 없음")
# 최신 deep_summary 큐 행의 payload 조회 (queue_consumer 가 status='processing' 으로 세팅한 상태)
queue_row = (await session.execute(
select(ProcessingQueue)
.where(
ProcessingQueue.document_id == document_id,
ProcessingQueue.stage == "deep_summary",
ProcessingQueue.status == "processing",
)
.order_by(desc(ProcessingQueue.id))
.limit(1)
)).scalar_one_or_none()
if not queue_row:
logger.warning(f"[deep] processing 상태의 deep_summary row 없음 id={document_id}")
return
payload = queue_row.payload or {}
envelope_raw = payload.get("envelope")
subject_domain = payload.get("subject_domain") or "generic"
if not envelope_raw:
logger.error(f"[deep] envelope 없음 id={document_id} payload_keys={list(payload.keys())}")
raise ValueError("deep_summary payload 에 envelope 없음")
envelope = EscalationEnvelope.from_json(json.dumps(envelope_raw))
# 원문 슬라이스 추출 (envelope.original_pointers.text_ranges 기반)
slices = _build_text_slices(doc.extracted_text or "", envelope.original_pointers)
# PR-A 템플릿 렌더
try:
rendered = render_26b(DEEP_SUMMARY_TASK, subject_domain)
except Exception as exc:
logger.exception(f"[deep] render_26b 실패 subject={subject_domain}: {exc}")
raise
prompt = (
rendered
.replace("{escalation_envelope_json}", envelope.to_system_injection())
.replace("{original_text_slices}", slices)
)
client = AIClient()
latency_ms = 0
parse_error: str | None = None
deep_out = DeepSummaryOutput()
try:
start = time.perf_counter()
async with get_mlx_gate(): # primary(26B) 보호 Semaphore(1)
raw = await client.call_primary(prompt)
latency_ms = int((time.perf_counter() - start) * 1000)
except Exception as exc:
logger.warning(f"[deep] 26B 호출 실패 id={document_id}: {exc}")
parse_error = "call_failed"
raw = ""
finally:
await client.close()
if raw:
try:
# parse_json_response 는 중첩 JSON (entities_confirmed) 을 최외곽으로 오인하는
# 케이스가 있어 — deep_summary 응답에서 자주 발생 — 최외곽 추출 전용 helper 사용.
parsed = _parse_outermost_json(raw) or parse_json_response(raw)
if not parsed:
# 잘린 응답 fallback — field-level regex 로 detail/tldr/inconsistencies 추출
parsed = _regex_extract_fields(raw)
deep_out = DeepSummaryOutput.model_validate(parsed or {})
if not deep_out.detail and parsed and parsed.get("_fallback"):
logger.info(f"[deep] id={document_id} regex fallback parsed keys={list(parsed.keys())}")
except (ValidationError, ValueError, TypeError) as exc:
parse_error = f"parse:{type(exc).__name__}"
logger.warning(f"[deep] JSON 파싱/검증 실패 id={document_id}: {exc}")
if not parse_error:
doc.ai_detail_summary = (deep_out.detail or "").strip() or None
doc.ai_inconsistencies = _filter_inconsistencies(deep_out.inconsistencies or [])
doc.ai_analysis_tier = "deep"
doc.ai_processed_at = datetime.now(timezone.utc)
try:
pv = compute_policy_version(DEEP_SUMMARY_TASK)
except Exception:
pv = None
await record_analyze_event(
doc_id=document_id,
user_id=None,
mode="summary_deep",
text_limit=settings.ai.primary.context_char_limit or 260000,
truncated=False,
layers_returned=["detail_summary", "inconsistencies"] if not parse_error else [],
cached=False,
latency_ms=latency_ms,
model_name=settings.ai.primary.model,
prompt_version=(f"{DEEP_SUMMARY_TASK}@{pv}" if pv else DEEP_SUMMARY_TASK),
error_code=parse_error,
source="document_server",
subject_domain=subject_domain,
risk_flags=list(envelope.risk_flags),
high_impact_task=None,
escalation_reasons=list(envelope.escalation_reasons),
confidence=deep_out.confidence,
policy_version=pv,
shadow_would_route_to="primary",
tier="primary",
escalated_to_26b=True,
suppressed_reason=None,
)
logger.info(
f"[deep] id={document_id} subject={subject_domain} "
f"detail_len={len(doc.ai_detail_summary or '')} "
f"inc={len(doc.ai_inconsistencies or [])} latency_ms={latency_ms} "
f"parse_error={parse_error}"
)
def _build_text_slices(text: str, pointers: dict) -> str:
"""original_pointers.text_ranges 의 [{start, end}] 를 실제 본문 슬라이스로 합친다.
3조각 이상이면 각 조각 앞에 [slice N — head/middle/tail] 라벨을 붙여 26B 가 순서 인지.
단일 슬라이스면 라벨 없이 원문 그대로.
"""
ranges = (pointers or {}).get("text_ranges") or []
if not ranges:
return text[:260_000]
if len(ranges) == 1:
r = ranges[0]
return text[r.get("start", 0):r.get("end", len(text))]
labels = ["head", "middle", "tail"]
parts: list[str] = []
for idx, r in enumerate(ranges):
label = labels[idx] if idx < len(labels) else f"slice{idx}"
chunk = text[r.get("start", 0):r.get("end", len(text))]
parts.append(f"[slice {idx}{label}]\n{chunk}")
return "\n\n".join(parts)
def _parse_outermost_json(raw: str) -> dict | None:
"""Response 의 첫 '{' 부터 brace balance 로 최외곽 JSON 추출.
parse_json_response 의 re.finditer 패턴이 1단계 중첩까지만 매치해서 deep_summary
응답처럼 `entities_confirmed: {...}` 2단계 중첩이 포함된 경우 최외곽 대신 내부
객체만 반환되는 문제를 우회. 또한 응답이 잘려 closure `}` 가 없으면 강제로
`}` 추가 시도하여 부분 파싱.
"""
cleaned = strip_thinking(raw)
code_match = re.search(r"```(?:json)?\s*(\{.*)", cleaned, re.DOTALL)
if code_match:
cleaned = code_match.group(1)
start = cleaned.find("{")
if start < 0:
return None
depth = 0
end = -1
in_str = False
esc = False
for i in range(start, len(cleaned)):
ch = cleaned[i]
if esc:
esc = False
continue
if ch == "\\":
esc = True
continue
if ch == '"':
in_str = not in_str
continue
if in_str:
continue
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
end = i + 1
break
if end > 0:
try:
return json.loads(cleaned[start:end])
except json.JSONDecodeError:
pass
# 응답 잘림 — 남은 depth 만큼 `}` 보강 후 재시도
if depth > 0:
candidate = cleaned[start:].rstrip().rstrip(",") + ("}" * depth)
try:
return json.loads(candidate)
except json.JSONDecodeError:
pass
return None
def _regex_extract_fields(raw: str) -> dict:
"""JSON parse 실패 시 field-level regex 로 detail/tldr/mode/inconsistencies 추출.
응답이 잘렸거나 중간에 문자열이 끊긴 경우에도 앞쪽에 완결된 필드는 건진다.
`"detail": ""` 처럼 key-value 쌍을 개별 매칭.
"""
def _str_field(key: str) -> str | None:
m = re.search(rf'"{key}"\s*:\s*"((?:[^"\\]|\\.)*)"', raw, re.DOTALL)
if not m:
return None
try:
# JSON string escape 복원 (\n, \\, \" 등)
return json.loads('"' + m.group(1) + '"')
except json.JSONDecodeError:
return m.group(1)
def _arr_field(key: str) -> list | None:
# 단순 문자열 배열만 지원 — bullets / inconsistencies_desc 등
m = re.search(rf'"{key}"\s*:\s*(\[[^\]]*\])', raw, re.DOTALL)
if not m:
return None
try:
return json.loads(m.group(1))
except json.JSONDecodeError:
return None
out: dict = {"_fallback": True}
mode = _str_field("mode")
if mode:
out["mode"] = mode
tldr = _str_field("tldr")
if tldr:
out["tldr"] = tldr
detail = _str_field("detail")
if detail:
out["detail"] = detail
bullets = _arr_field("bullets")
if bullets is not None:
out["bullets"] = bullets
inc = _arr_field("inconsistencies")
if inc is not None:
out["inconsistencies"] = inc
return out
def _filter_inconsistencies(items: list) -> list[dict]:
"""허용 kind 목록 (safety/news 도메인 한정) 만 통과시킨다.
`amount_mismatch` 같은 구매/계약 kind 는 여기서 drop (feedback_document_server_domain_scope.md).
구조 오류 (kind/desc 누락) 도 drop.
"""
out: list[dict] = []
for it in items or []:
if not isinstance(it, dict):
continue
kind = str(it.get("kind") or "")
desc_ = str(it.get("desc") or "").strip()
if kind not in ALLOWED_INCONSISTENCY_KINDS:
continue
if not desc_:
continue
out.append({"kind": kind, "desc": desc_})
return out