-- Phase 4 운영 점검 SQL — 4-A (study_question_jobs) + 4-B (study_quiz_session_jobs) -- 사용: -- ssh gpu 'docker exec -i hyungi_document_server-postgres-1 psql -U pkm pkm' < scripts/phase4_health.sql -- 또는 개별 SECTION 만 골라 실행. 모든 섹션은 read-only. \echo '── 1. 4-A study_question_jobs status × error_code 분포 ──' SELECT status, COALESCE(error_code, '(none)') AS error_code, COUNT(*) AS cnt FROM study_question_jobs GROUP BY status, error_code ORDER BY status, error_code; \echo '' \echo '── 2. 4-B study_quiz_session_jobs status × error_code 분포 ──' SELECT status, COALESCE(error_code, '(none)') AS error_code, COUNT(*) AS cnt FROM study_quiz_session_jobs GROUP BY status, error_code ORDER BY status, error_code; \echo '' \echo '── 3. 4-B study_quiz_session_analysis confidence 분포 (calibrated) ──' SELECT COALESCE(confidence, '(null)') AS confidence, COUNT(*) AS cnt, COUNT(*) FILTER (WHERE is_stale) AS stale_count FROM study_quiz_session_analysis GROUP BY confidence ORDER BY CASE COALESCE(confidence, '(null)') WHEN 'high' THEN 0 WHEN 'medium' THEN 1 WHEN 'low' THEN 2 ELSE 3 END; \echo '' \echo '── 4. 4-B confidence calibration 차이 (job.payload 기반) ──' \echo ' model_confidence_raw vs calibrated_confidence — 자료 부족 cap 작동 빈도 측정' SELECT payload->>'model_confidence_raw' AS model_raw, payload->>'calibrated_confidence' AS calibrated, (payload->>'ctx_docs_count')::int AS docs_n, (payload->>'ready_explanation_count')::int AS ready_n, COUNT(*) AS cnt FROM study_quiz_session_jobs WHERE status = 'completed' AND payload IS NOT NULL AND payload ? 'model_confidence_raw' GROUP BY model_raw, calibrated, docs_n, ready_n ORDER BY cnt DESC LIMIT 20; \echo '' \echo '── 5. 4-A/4-B 최근 7일 처리 지연 (created_at → completed_at) ──' \echo ' p50/p95/max 단순 ROUND(EXTRACT). 4-A 와 4-B 분리.' SELECT 'study_question_jobs' AS source, COUNT(*) AS terminal_n, ROUND(AVG(EXTRACT(EPOCH FROM (completed_at - created_at)))::numeric, 1) AS avg_sec, ROUND(MAX(EXTRACT(EPOCH FROM (completed_at - created_at)))::numeric, 1) AS max_sec, ROUND((PERCENTILE_CONT(0.5) WITHIN GROUP ( ORDER BY EXTRACT(EPOCH FROM (completed_at - created_at)) ))::numeric, 1) AS p50_sec, ROUND((PERCENTILE_CONT(0.95) WITHIN GROUP ( ORDER BY EXTRACT(EPOCH FROM (completed_at - created_at)) ))::numeric, 1) AS p95_sec FROM study_question_jobs WHERE created_at >= NOW() - INTERVAL '7 days' AND completed_at IS NOT NULL UNION ALL SELECT 'study_quiz_session_jobs', COUNT(*), ROUND(AVG(EXTRACT(EPOCH FROM (completed_at - created_at)))::numeric, 1), ROUND(MAX(EXTRACT(EPOCH FROM (completed_at - created_at)))::numeric, 1), ROUND((PERCENTILE_CONT(0.5) WITHIN GROUP ( ORDER BY EXTRACT(EPOCH FROM (completed_at - created_at)) ))::numeric, 1), ROUND((PERCENTILE_CONT(0.95) WITHIN GROUP ( ORDER BY EXTRACT(EPOCH FROM (completed_at - created_at)) ))::numeric, 1) FROM study_quiz_session_jobs WHERE created_at >= NOW() - INTERVAL '7 days' AND completed_at IS NOT NULL; \echo '' \echo '── 6. 4-A/4-B skipped 사유 분포 (어떤 데이터 부족이 가장 많이 막는가) ──' SELECT 'study_question_jobs' AS source, error_code, COUNT(*) AS cnt FROM study_question_jobs WHERE status = 'skipped' GROUP BY error_code UNION ALL SELECT 'study_quiz_session_jobs', error_code, COUNT(*) FROM study_quiz_session_jobs WHERE status = 'skipped' GROUP BY error_code ORDER BY source, cnt DESC; \echo '' \echo '── 7. 4-B guard_fail / parse_fail / llm_timeout 비율 (전체 job 대비) ──' SELECT error_code, COUNT(*) AS cnt, ROUND(100.0 * COUNT(*) / NULLIF((SELECT COUNT(*) FROM study_quiz_session_jobs), 0), 1) AS pct FROM study_quiz_session_jobs WHERE error_code IN ('guard_fail', 'parse_fail', 'llm_timeout', 'unknown') GROUP BY error_code ORDER BY cnt DESC;