feat(dashboard): Day 4 튜닝 — 임계치 재조정 + deep_summary 안정성 카드
3일 telemetry (599 triage / 555 deep) 기반 임계치 재평가:
1. 에스컬레이션 비율 — 임계치 의미 reframe
- 기존: >20% 적색 (튜닝 필요) → 항상 적색 (운영 패턴 97%)
- 신규: <80% 적색 (정책 매칭 실패 증가)
- 메시지: "safety 정책상 95~100% 가 정상" 보조 표시
- safety_reference 99.7%, generic 100% (fallback risk_flag), msds 46.2%
→ 운영 정상 패턴 확인
2. Deep summary 안정성 — 신규 카드 추가
- mode='summary_deep' 의 error_code IS NOT NULL 비율
- 현재 5.2% (call_failed 21 + parse:ValidationError 8)
- >5% 적색 임계
- MLX 호출 timeout / JSON 파싱 실패 모니터
3. triage JSON 건강도, Backlog Suppression — 임계치 유지
- 현재 0%, 1% — 매우 안정. 보수적 임계 유효.
Backend: TierHealthStack 에 deep_total / deep_err_total 추가
Frontend: 카드 그리드 3열 → 4열 (lg), Day 4 신규 카드.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+15
-5
@@ -50,12 +50,15 @@ class QueueLag(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class TierHealthStack(BaseModel):
|
class TierHealthStack(BaseModel):
|
||||||
"""PR-B B-3 — tier 관측성 3종 카드 소스 (24h 윈도우).
|
"""PR-B B-3 — tier 관측성 카드 소스 (24h 윈도우).
|
||||||
|
|
||||||
대시보드 카드:
|
대시보드 카드 (Day 4 튜닝 — 2026-04-27 임계치 재조정):
|
||||||
- "에스컬레이션 비율": escalated_total / triage_total (>20% 적색, <1% 회색)
|
- "에스컬레이션 비율": escalated_total / triage_total
|
||||||
|
· <80% 적색 (정책 매칭 실패 증가 — 진짜 튜닝 필요)
|
||||||
|
· 80~99% 정상 (safety/health 정책 의도)
|
||||||
- "triage JSON 건강도": triage_json_invalid / triage_total (>5% 적색)
|
- "triage JSON 건강도": triage_json_invalid / triage_total (>5% 적색)
|
||||||
- "Backlog Suppression": suppressed_total / triage_total (>10% 주황)
|
- "Backlog Suppression": suppressed_total / triage_total (>10% 주황)
|
||||||
|
- "Deep summary 안정성": deep_err_total / deep_total (>5% 적색)
|
||||||
"""
|
"""
|
||||||
triage_total: int = 0
|
triage_total: int = 0
|
||||||
escalated_total: int = 0
|
escalated_total: int = 0
|
||||||
@@ -63,6 +66,9 @@ class TierHealthStack(BaseModel):
|
|||||||
escalation_by_domain: dict[str, int] = {} # safety_reference / news_item / ...
|
escalation_by_domain: dict[str, int] = {} # safety_reference / news_item / ...
|
||||||
triage_json_invalid: int = 0 # error_code='triage_json_invalid'
|
triage_json_invalid: int = 0 # error_code='triage_json_invalid'
|
||||||
suppressed_total: int = 0 # suppressed_reason IS NOT NULL
|
suppressed_total: int = 0 # suppressed_reason IS NOT NULL
|
||||||
|
# Day 4 튜닝 신규 — deep_summary 호출 안정성
|
||||||
|
deep_total: int = 0 # mode='summary_deep' 전체
|
||||||
|
deep_err_total: int = 0 # error_code IS NOT NULL (call_failed / parse:*)
|
||||||
|
|
||||||
|
|
||||||
class DashboardResponse(BaseModel):
|
class DashboardResponse(BaseModel):
|
||||||
@@ -216,13 +222,15 @@ async def get_dashboard(
|
|||||||
for row in lag_result.all()
|
for row in lag_result.all()
|
||||||
]
|
]
|
||||||
|
|
||||||
# ─── PR-B B-3 — tier 관측성 (24h) ───
|
# ─── PR-B B-3 — tier 관측성 (24h) + Day 4 deep_err 추가 ───
|
||||||
tier_rows = (await session.execute(text("""
|
tier_rows = (await session.execute(text("""
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(*) FILTER (WHERE mode = 'summary_triage') AS triage_total,
|
COUNT(*) FILTER (WHERE mode = 'summary_triage') AS triage_total,
|
||||||
COUNT(*) FILTER (WHERE mode = 'summary_triage' AND escalated_to_26b = true) AS escalated_total,
|
COUNT(*) FILTER (WHERE mode = 'summary_triage' AND escalated_to_26b = true) AS escalated_total,
|
||||||
COUNT(*) FILTER (WHERE mode = 'summary_triage' AND error_code = 'triage_json_invalid') AS json_invalid,
|
COUNT(*) FILTER (WHERE mode = 'summary_triage' AND error_code = 'triage_json_invalid') AS json_invalid,
|
||||||
COUNT(*) FILTER (WHERE mode = 'summary_triage' AND suppressed_reason IS NOT NULL) AS suppressed_total
|
COUNT(*) FILTER (WHERE mode = 'summary_triage' AND suppressed_reason IS NOT NULL) AS suppressed_total,
|
||||||
|
COUNT(*) FILTER (WHERE mode = 'summary_deep') AS deep_total,
|
||||||
|
COUNT(*) FILTER (WHERE mode = 'summary_deep' AND error_code IS NOT NULL) AS deep_err_total
|
||||||
FROM analyze_events
|
FROM analyze_events
|
||||||
WHERE created_at > NOW() - INTERVAL '24 hours'
|
WHERE created_at > NOW() - INTERVAL '24 hours'
|
||||||
"""))).one()
|
"""))).one()
|
||||||
@@ -253,6 +261,8 @@ async def get_dashboard(
|
|||||||
escalated_total=int(tier_rows.escalated_total or 0),
|
escalated_total=int(tier_rows.escalated_total or 0),
|
||||||
triage_json_invalid=int(tier_rows.json_invalid or 0),
|
triage_json_invalid=int(tier_rows.json_invalid or 0),
|
||||||
suppressed_total=int(tier_rows.suppressed_total or 0),
|
suppressed_total=int(tier_rows.suppressed_total or 0),
|
||||||
|
deep_total=int(tier_rows.deep_total or 0),
|
||||||
|
deep_err_total=int(tier_rows.deep_err_total or 0),
|
||||||
escalation_by_reason=escalation_by_reason,
|
escalation_by_reason=escalation_by_reason,
|
||||||
escalation_by_domain=escalation_by_domain,
|
escalation_by_domain=escalation_by_domain,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ export interface TierHealthStack {
|
|||||||
escalation_by_domain: Record<string, number>;
|
escalation_by_domain: Record<string, number>;
|
||||||
triage_json_invalid: number;
|
triage_json_invalid: number;
|
||||||
suppressed_total: number;
|
suppressed_total: number;
|
||||||
|
// Day 4 신규 — deep_summary 호출 안정성
|
||||||
|
deep_total?: number;
|
||||||
|
deep_err_total?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DashboardSummary {
|
export interface DashboardSummary {
|
||||||
|
|||||||
@@ -357,10 +357,15 @@
|
|||||||
{@const esc_rate = th.triage_total > 0 ? th.escalated_total / th.triage_total : 0}
|
{@const esc_rate = th.triage_total > 0 ? th.escalated_total / th.triage_total : 0}
|
||||||
{@const json_rate = th.triage_total > 0 ? th.triage_json_invalid / th.triage_total : 0}
|
{@const json_rate = th.triage_total > 0 ? th.triage_json_invalid / th.triage_total : 0}
|
||||||
{@const sup_rate = th.triage_total > 0 ? th.suppressed_total / th.triage_total : 0}
|
{@const sup_rate = th.triage_total > 0 ? th.suppressed_total / th.triage_total : 0}
|
||||||
{@const esc_tone = esc_rate > 0.20 ? 'text-error' : (esc_rate < 0.01 ? 'text-dim' : 'text-text')}
|
{@const deep_total = th.deep_total ?? 0}
|
||||||
|
{@const deep_err_rate = deep_total > 0 ? (th.deep_err_total ?? 0) / deep_total : 0}
|
||||||
|
<!-- Day 4 튜닝 (2026-04-27): 운영 패턴 실측 후 임계치 재조정.
|
||||||
|
3일 telemetry 기준 escalate 97% 가 정상 (safety 정책 의도) → <80% 가 진짜 신호. -->
|
||||||
|
{@const esc_tone = esc_rate < 0.80 ? 'text-error' : 'text-text'}
|
||||||
{@const json_tone = json_rate > 0.05 ? 'text-error' : 'text-text'}
|
{@const json_tone = json_rate > 0.05 ? 'text-error' : 'text-text'}
|
||||||
{@const sup_tone = sup_rate > 0.10 ? 'text-warning' : 'text-text'}
|
{@const sup_tone = sup_rate > 0.10 ? 'text-warning' : 'text-text'}
|
||||||
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-5">
|
{@const deep_tone = deep_err_rate > 0.05 ? 'text-error' : 'text-text'}
|
||||||
|
<div class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4 mb-5">
|
||||||
<!-- 에스컬레이션 비율 -->
|
<!-- 에스컬레이션 비율 -->
|
||||||
<Card class="h-full">
|
<Card class="h-full">
|
||||||
<div class="flex items-start justify-between">
|
<div class="flex items-start justify-between">
|
||||||
@@ -372,9 +377,9 @@
|
|||||||
</p>
|
</p>
|
||||||
<p class="text-xs text-dim mt-1">
|
<p class="text-xs text-dim mt-1">
|
||||||
{th.escalated_total} / {th.triage_total}
|
{th.escalated_total} / {th.triage_total}
|
||||||
{#if esc_rate > 0.20}<span class="text-error ml-1">(튜닝 필요)</span>{/if}
|
{#if esc_rate < 0.80}<span class="text-error ml-1">(매칭 실패 증가)</span>{/if}
|
||||||
{#if esc_rate < 0.01}<span class="text-dim ml-1">(false negative?)</span>{/if}
|
|
||||||
</p>
|
</p>
|
||||||
|
<p class="text-[10px] text-faint mt-1">safety 정책상 95~100% 가 정상</p>
|
||||||
{#if Object.keys(th.escalation_by_reason).length > 0}
|
{#if Object.keys(th.escalation_by_reason).length > 0}
|
||||||
<div class="mt-2 flex flex-wrap gap-1">
|
<div class="mt-2 flex flex-wrap gap-1">
|
||||||
{#each Object.entries(th.escalation_by_reason).slice(0, 4) as [reason, n]}
|
{#each Object.entries(th.escalation_by_reason).slice(0, 4) as [reason, n]}
|
||||||
@@ -417,6 +422,22 @@
|
|||||||
</p>
|
</p>
|
||||||
<p class="text-[10px] text-faint mt-1">10% 초과 시 ratio/pending threshold 조정</p>
|
<p class="text-[10px] text-faint mt-1">10% 초과 시 ratio/pending threshold 조정</p>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
|
<!-- Deep summary 안정성 (Day 4 신규) -->
|
||||||
|
<Card class="h-full">
|
||||||
|
<div class="flex items-start justify-between">
|
||||||
|
<p class="text-sm text-dim">Deep summary 안정성 (24h)</p>
|
||||||
|
<Sparkles size={18} class="text-faint" />
|
||||||
|
</div>
|
||||||
|
<p class="text-2xl font-bold mt-2 {deep_tone}">
|
||||||
|
{(deep_err_rate * 100).toFixed(1)}%
|
||||||
|
</p>
|
||||||
|
<p class="text-xs text-dim mt-1">
|
||||||
|
실패 {th.deep_err_total ?? 0} / {deep_total}
|
||||||
|
{#if deep_err_rate > 0.05}<span class="text-error ml-1">(MLX 안정성 점검)</span>{/if}
|
||||||
|
</p>
|
||||||
|
<p class="text-[10px] text-faint mt-1">call_failed / parse:* 합계, 5% 초과 시 점검</p>
|
||||||
|
</Card>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user