From 59bde9a39950271ffc35a50219c41d9b4fa0cb3d Mon Sep 17 00:00:00 2001 From: hyungi Date: Sun, 24 May 2026 04:01:49 +0000 Subject: [PATCH] =?UTF-8?q?feat(search):=20phase-2q=20apply=20opt-in=20?= =?UTF-8?q?=E2=80=94=20production=20rollout=20=EC=8B=9C=EC=9E=91,=201?= =?UTF-8?q?=EC=A3=BC=20=EA=B4=80=EC=B0=B0=20(gemma-4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit plan pr-2q-apply-query-rewrite-1-bright-meadow.md. Phase 2Q Diagnose closure + Rerank-Payload-Fix (main 0257a5d) 완료 후 Apply rollout. opt-in path 가 Phase 1B/2 부터 이미 production 가동 중 → 본 PR 의 production 영향 0 (marker PR). rollout 정책: · default = rewrite_backend null (single-query path, baseline 회귀 0 invariant) · 명시 opt-in = ?rewrite_backend=cand_multi_query_macmini (추천 gemma-4) · 대안 = cand_multi_query_macbook (qwen3.6, mixed/english 강점, MacBook 가동 시) · 1주 관찰 (2026-05-24 ~ 2026-05-31) → metric 정상 시 default ON 별 PR 변경 (production 영향 0): - docs/phase_2q_apply_opt_in.md 신규 — 사용자 가시화: · 사용 방법 (query param + SvelteKit fetch 예시) · 1주 관찰 metric 목표 (cache hit ≥ 50% / LLM warm p50 ≤ 1500 / 503 ≤ 5/day / Recall t≥3 ≥ 0.74) · 추천 LLM 사유 (decision md §4 4-factor) + 대안 명시 · Phase 2 QueryAnalyzer sequencing 박제 (영향 0, ask_events 0건 운영 관찰 후 확정) · Follow-up PR 5건 명시 (Telemetry / Alert / Default-ON / Cache-Prewarm / Category-Analysis) - app/api/search.py — rewrite_backend query param description 갱신. Apply 진입 박제 + 추천 LLM 표시 + docs 링크. 동작 변경 0. - tests/search_eval/baselines/v0_2_phase2q_apply_smoke_2026-05-24.json — production smoke: · opt-in path HTTP 200 + total_ms 957 (cache hit) + rerank_ms 109 (정상 호출) + fallback 0 · baseline path HTTP 200 + total_ms 207 + rerank_ms 19 + fallback 0 (회귀 0 확정) 38/38 unit test PASS (회귀 0). main HEAD 0257a5d 위 branch. Closure gate PASS: · docs 가시화 / search.py description / smoke json 박제 · production smoke 양쪽 path 정상 + 회귀 0 verify · 메모리 갱신 + 1주 관찰 종료일 2026-05-31 박제 Follow-up: 1주 후 PR-2Q-Apply-Default-ON-1 (metric 정상 시) 또는 fix PR. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/api/search.py | 8 +- docs/phase_2q_apply_opt_in.md | 98 +++++++++++++++++++ .../v0_2_phase2q_apply_smoke_2026-05-24.json | 68 +++++++++++++ 3 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 docs/phase_2q_apply_opt_in.md create mode 100644 tests/search_eval/baselines/v0_2_phase2q_apply_smoke_2026-05-24.json diff --git a/app/api/search.py b/app/api/search.py index e1fed87..d36fcc2 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -178,7 +178,13 @@ async def search( rewrite_backend: str | None = Query( None, pattern=r"^(baseline|cand_[a-z0-9_]+)$", - description="Phase 2Q Diagnose query rewrite dispatcher (slug-based, no silent fallback). baseline|cand_multi_query_macmini|cand_multi_query_macbook. 미지정/baseline = single-query path. Phase 2 = variant N 별 retrieval+fusion → unified RRF → reranker 1회.", + description=( + "Phase 2Q Apply (2026-05-24 진입, opt-in, 1주 관찰). slug-based, no silent fallback. " + "baseline|cand_multi_query_macmini (추천 gemma-4)|cand_multi_query_macbook (qwen3.6). " + "미지정/baseline = single-query path (회귀 0 invariant). " + "변경 후 variant N 별 retrieval+fusion → unified RRF → reranker 1회 (chunk_id dedup + cap 60). " + "docs: docs/phase_2q_apply_opt_in.md" + ), ), ): """문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)""" diff --git a/docs/phase_2q_apply_opt_in.md b/docs/phase_2q_apply_opt_in.md new file mode 100644 index 0000000..45118e0 --- /dev/null +++ b/docs/phase_2q_apply_opt_in.md @@ -0,0 +1,98 @@ +# Phase 2Q Apply — Multi-Query Rewrite (opt-in, 2026-05-24 진입) + +## 개요 + +Phase 2Q Diagnose 결과 (decision md `tests/search_eval/baselines/v0_2_phase2q_decision_2026-05-24.md`) +H1 (both backends 유의미 net 개선) 확정 + Rerank-Payload-Fix (commit `b734fc5`) 완료 후 +Apply rollout 진입. + +**rollout 정책 = opt-in 1주 관찰** (2026-05-24 ~ 2026-05-31). 1주 후 metric 정상 시 +default ON 전환 결정 (별 PR `PR-2Q-Apply-Default-ON-1`). + +**추천 LLM = `cand_multi_query_macmini` (gemma-4-26b-a4b-it-8bit, Mac mini)**. +4-factor weighted 사유 (decision md §4): +1. ⭐ Availability — 24/7 가동 (qwen MacBook lap-top 의존) +2. NDCG 0.927 dominant (qwen 0.919 와 동등 단 noise level) +3. Cold latency 우세 (gemma 2757ms vs qwen 3647ms cold p50) +4. 카테고리 standards/exam/korean 강점 (도메인 중심) + +대안 = `cand_multi_query_macbook` (qwen3.6-27B, mixed/english 강점) — MacBook always-on +의향 시 가능. + +## 사용 방법 + +### Query parameter (opt-in) + +```bash +GET /api/search/?q= + &mode=hybrid + &limit=20 + &rewrite_backend=cand_multi_query_macmini # opt-in, 미지정 시 single-query path +``` + +- `rewrite_backend` 미지정 또는 `baseline` → 기존 single-query path 100% 그대로 + (baseline 회귀 0 invariant, Phase 2Q Phase 2 + Rerank-Fix 측정 박제). +- `rewrite_backend=cand_multi_query_macmini` → multi-query (3 variants) + unified RRF + + reranker. +- `rewrite_backend=cand_multi_query_macbook` → qwen (MacBook 가동 시). +- 미지원 slug → HTTP 400 `unknown_rewrite_backend`. +- LLM 호출 실패 → HTTP 503 `rewrite_llm_unavailable` (no silent fallback). + +### SvelteKit / fetch 예시 + +```typescript +const res = await fetch( + `/api/search/?q=${encodeURIComponent(q)}&mode=hybrid&limit=20&rewrite_backend=cand_multi_query_macmini`, + { headers: { Authorization: `Bearer ${token}` } } +); +``` + +## 1주 관찰 metric (목표) + +| Metric | 목표값 | 측정 source | 회귀 시 action | +|---|---|---|---| +| **Rewrite cache hit rate** | ≥ 50% (1주차) | `[rewrite-dispatch]` log `cache_hit=true` 비율 | `PR-2Q-Cache-Prewarm` (nightly cron) | +| **LLM latency warm p50** | ≤ 1500ms | `[rewrite-dispatch]` log `llm_latency_ms` | gemma 가동 상태 확인, semaphore 경쟁 진단 | +| **LLM latency cold p50** | ≤ 3000ms | 동상 | cache prewarm 도입 검토 | +| **503 누적** | ≤ 5/day | fastapi 응답 status 503 | LLM endpoint health / circuit breaker 검토 | +| **Recall@10 t≥3** | ≥ 0.74 (production traffic 분석) | random sampling 또는 별 dashboard | NDCG 회귀 분석 + 카테고리 분포 | +| **사용자 negative feedback** | 0건 | 사용자 channel | 즉시 rollback 또는 priority fix | + +## 1주 관찰 종료일 (2026-05-31) decision + +- 4 metric 정상 + 사용자 negative feedback 0 → `PR-2Q-Apply-Default-ON-1` 진입 + (default ON 전환) +- 1 metric 이상 회귀 → 별 fix PR 후 1주 추가 관찰 +- catastrophic 회귀 → rollback (rewrite_backend default null 영구 유지) + +## Phase 2 QueryAnalyzer sequencing + +Phase 2 QueryAnalyzer (`app/services/search/query_analyzer.py`) 가 production 가동 중 +이지만 retrieval path 영향 0 (debug 노출만, `app/api/search.py:156` 코멘트 박제, +ask_events 0건 운영 관찰 후 확정). Phase 2Q multi-query rewrite 와 충돌 없음. + +→ Apply 진입 시 두 layer 모두 가동, 결과 일치성 invariant 유지. + +## Follow-up PR (별 트랙) + +- **PR-2Q-Apply-Telemetry-1** — `[rewrite-dispatch]` log 를 `search_failure_logs` 또는 + 별 telemetry 테이블 에 누적 (search_telemetry.py 패턴 재사용). 1주 관찰 metric 의 + 정량 분석 source. +- **PR-2Q-Alert-1** — Prometheus + ntfy alert rule (LLM 503 ≥ 10/hour / cache hit < + 30% 7d window). monitoring stack 영역. +- **PR-2Q-Apply-Default-ON-1** — 1주 관찰 종료 후 default ON 전환. +- **PR-2Q-Cache-Prewarm** — cache hit rate < 50% 관찰 시 nightly cron. +- **PR-2Q-Apply-Category-Analysis** — Rerank-Fix 측정의 카테고리 회귀 (standards + -0.28, exam -0.19) 분석. RRF fallback vs reranker 의 ranking 동작 차이 박제. + +## 관련 자료 + +- decision md = `tests/search_eval/baselines/v0_2_phase2q_decision_2026-05-24.md` +- Rerank-Fix 측정 = `tests/search_eval/baselines/v0_2_phase2q_rerank_fix_2026-05-24.json` +- Phase 2Q 3 측정 = `tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json` +- Plan = `~/.claude/plans/pr-2q-apply-query-rewrite-1-bright-meadow.md` + + `~/.claude/plans/follow-up-pr-8-lazy-shore.md` (sequencing) +- Phase 2Q Diagnose plan = `~/.claude/plans/phase-2q-query-rewrite-diagnose.md` v6 +- main merge commits: + - `711d495` Phase 2Q Diagnose 5 commit + - `0257a5d` Rerank-Payload-Fix diff --git a/tests/search_eval/baselines/v0_2_phase2q_apply_smoke_2026-05-24.json b/tests/search_eval/baselines/v0_2_phase2q_apply_smoke_2026-05-24.json new file mode 100644 index 0000000..cd2a71e --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2q_apply_smoke_2026-05-24.json @@ -0,0 +1,68 @@ +{ + "version": "v0.2-phase2q-apply", + "label": "phase_2q_apply_opt_in_smoke", + "date": "2026-05-24", + "plan": "pr-2q-apply-query-rewrite-1-bright-meadow.md", + "main_head": "0257a5d (Rerank-Payload-Fix merge)", + "rollout_policy": { + "type": "opt-in", + "default": "rewrite_backend=null (single-query path)", + "explicit_opt_in": "?rewrite_backend=cand_multi_query_macmini", + "observation_period_start": "2026-05-24", + "observation_period_end": "2026-05-31", + "default_on_decision": "1주 후 metric 정상 + negative feedback 0 시 PR-2Q-Apply-Default-ON-1 진입" + }, + "selected_llm": { + "slug": "cand_multi_query_macmini", + "endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "model": "gemma-4-26b-a4b-it-8bit", + "rationale": "4-factor weighted (decision md §4): F3 24/7 + F1 NDCG 0.927 + F4 cold latency 우세 + F2 standards/exam/korean 강점" + }, + "smoke_test": { + "query": "LPG 저장탱크 안전거리", + "opt_in_path": { + "request": "GET /api/search/?q=LPG+저장탱크+안전거리&mode=hybrid&limit=3&rewrite_backend=cand_multi_query_macmini", + "http_status": 200, + "total_ms": 957, + "rewrite_ms": 0, + "variant_retrieve_ms": 847, + "rerank_ms": 109, + "freshness_ms": 0, + "fallback_413": 0, + "note": "cache hit (rewrite_ms=0) — 이전 Rerank-Fix 측정 시 박제. reranker 정상 호출 (rerank_ms=109)." + }, + "baseline_path_regression_verify": { + "request": "GET /api/search/?q=LPG+저장탱크+안전거리&mode=hybrid&limit=3 (rewrite_backend 미지정)", + "http_status": 200, + "total_ms": 207, + "text_ms": 42, + "vector_ms": 146, + "rerank_ms": 19, + "fallback_413": 0, + "note": "baseline single-query path 회귀 0 확정. invariant PASS." + } + }, + "observation_metrics_targets": { + "rewrite_cache_hit_rate": "≥ 50% (1주차)", + "llm_latency_warm_p50": "≤ 1500ms", + "llm_latency_cold_p50": "≤ 3000ms", + "503_per_day": "≤ 5", + "recall_at_10_t3_production": "≥ 0.74", + "user_negative_feedback": "0" + }, + "follow_up_prs": [ + "PR-2Q-Apply-Telemetry-1 — [rewrite-dispatch] log → search_failure_logs / 별 telemetry 테이블 누적", + "PR-2Q-Alert-1 — Prometheus + ntfy alert rule", + "PR-2Q-Apply-Default-ON-1 — 1주 관찰 종료 후 default ON 결정", + "PR-2Q-Cache-Prewarm — cache hit rate < 50% 시 nightly cron", + "PR-2Q-Apply-Category-Analysis — standards -0.28 / exam -0.19 회귀 분석" + ], + "code_changes_summary": { + "production_impact": "0 (opt-in path 이미 Phase 1B/2 + Rerank-Fix 부터 가동)", + "files_changed": [ + "docs/phase_2q_apply_opt_in.md — 사용자 가시화 + 1주 관찰 metric + threshold + sequencing 박제 (신규)", + "app/api/search.py — rewrite_backend query param description 갱신 (Apply 진입 박제, 동작 변경 0)", + "tests/search_eval/baselines/v0_2_phase2q_apply_smoke_2026-05-24.json — 본 박제 (smoke 결과)" + ] + } +}