diff --git a/app/api/search.py b/app/api/search.py index 4d0a1fe..fed8d2f 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -30,6 +30,7 @@ from services.search.evidence_service import EvidenceItem, extract_evidence from services.search.fusion_service import DEFAULT_FUSION from services.search.grounding_check import check as grounding_check from services.search.refusal_gate import RefusalDecision, decide as refusal_decide +from services.search import query_rewriter from services.search.search_pipeline import PipelineResult, run_search from services.search.synthesis_service import SynthesisResult, synthesize from services.search.verifier_service import VerifierResult, verify @@ -174,8 +175,38 @@ async def search( pattern=r"^(baseline|cand_[a-z0-9_]+)$", description="Phase 2B Diagnose reranker dispatcher (R2-B1 slug-based). slug 만 받음 (raw endpoint URL X). baseline|cand_. 미지정/baseline = production reranker.", ), + rewrite_backend: str | None = Query( + None, + pattern=r"^(baseline|cand_[a-z0-9_]+)$", + description="Phase 2Q Diagnose query rewrite dispatcher (slug-based, no silent fallback). baseline|cand_multi_query_macmini|cand_multi_query_macbook. 미지정/baseline = single-query path. Phase 1B scaffold = variants 박제만, retrieval 합성은 Phase 2.", + ), ): """문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)""" + # Phase 2Q Diagnose scaffold (plan v6 Phase 1): + # slug 명시 시 LLM rewrite 호출 → variants 박제 (logger). retrieval path 영향 0 + # (results 미사용, baseline single-query path 유지). Phase 2 에서 search_with_rewrite() 합성. + if rewrite_backend not in (None, "baseline"): + try: + await query_rewriter.rewrite(q, rewrite_backend) + except ValueError: + return JSONResponse( + status_code=400, + content={ + "error_reason": "unknown_rewrite_backend", + "backend_requested": rewrite_backend, + "allowed": query_rewriter.allowed_slugs(), + }, + ) + except RuntimeError as e: + return JSONResponse( + status_code=503, + content={ + "error_reason": "rewrite_llm_unavailable", + "backend_requested": rewrite_backend, + "detail": str(e), + }, + ) + try: pr = await run_search( session, diff --git a/app/services/search/query_rewriter.py b/app/services/search/query_rewriter.py new file mode 100644 index 0000000..fcd8261 --- /dev/null +++ b/app/services/search/query_rewriter.py @@ -0,0 +1,271 @@ +"""Query rewriter — multi-query expansion (Phase 2Q Diagnose). + +Phase 2Q Diagnose 의 dispatcher + cache + LLM call layer. retrieval 합성 (search_with_rewrite) +은 Phase 2 별 commit. 본 모듈은 scaffold = slug → variants[3] 변환만 담당. + +## 핵심 룰 (plan v6 영구) +- ``Priority.FOREGROUND`` semaphore (retrieval inline path, user-facing). +- ``LLM_REWRITE_TIMEOUT_MS = 15000`` (fail-fast — background 와 다름). +- LLM 호출 실패 / parse fail / empty variants → cache 저장 X + caller 503 raise. +- baseline (slug=None) 호출은 LLM 우회 = ``None`` 반환. +- prompt template 1종 고정 (``app/prompts/query_rewrite.txt`` v1). +- raw endpoint URL query param X — slug-based allowlist (``LLM_BACKEND_MAP``). +""" + +from __future__ import annotations + +import asyncio +import hashlib +import json +import time +import unicodedata +from typing import Any + +import httpx + +from ai.client import _load_prompt, parse_json_response +from core.utils import setup_logger + +from .llm_gate import Priority, acquire_mlx_gate + +logger = setup_logger("query_rewriter") + +# ─── 상수 (plan v6 영구 룰) ────────────────────────────── +PROMPT_VERSION = "v1" # prompts/query_rewrite.txt manual string. 변경 시 cache 자동 분리. +CACHE_TTL = 86400 # 24h +CACHE_MAXSIZE = 1000 +LLM_REWRITE_TIMEOUT_MS = 15000 # retrieval inline path, fail-fast (B-3 background 와 다른 사유) +EXPECTED_N_VARIANTS = 3 # multi-query variant count, prompt v1 hardcoded + +# ─── Backend allowlist (plan v6 §5.1) ──────────────────── +# slug → backend cfg or None (baseline = no rewrite). sampling 박제 = fixture 와 단일 source. +LLM_BACKEND_MAP: dict[str, dict[str, Any] | None] = { + "baseline": None, + "cand_multi_query_macmini": { + "endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "model": "gemma-4-26b-a4b-it-8bit", + "n_variants": 3, + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + "response_format": {"type": "json_object"}, # MLX 호환 (Phase 0 inspect 9 PASS) + }, + "auth": None, + }, + "cand_multi_query_macbook": { + "endpoint": "http://100.118.112.84:8810/v1/chat/completions", + "model": "mlx-community/Qwen3.6-27B-8bit", + "n_variants": 3, + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + # response_format 제거 — mlx-vlm.server json_object 미지원 (120s hang). + # prompt rule "Output STRICT JSON only" 강제 (Phase 0 inspect 9 박제). + }, + "auth": None, + }, +} + + +def _resolve_rewrite_backend(slug: str | None) -> dict[str, Any] | None: + """slug → backend cfg or None (baseline). Raises ValueError on unknown slug.""" + if slug is None or slug == "baseline": + return None + if slug not in LLM_BACKEND_MAP: + raise ValueError(f"unknown_rewrite_backend: {slug!r}") + return LLM_BACKEND_MAP[slug] + + +def allowed_slugs() -> list[str]: + """HTTP 400 error 응답의 ``allowed`` 필드용. caller 가 사용.""" + return list(LLM_BACKEND_MAP.keys()) + + +# ─── In-memory cache (query_analyzer.py 패턴 1:1) ──────── +_CACHE: dict[str, tuple[float, list[str]]] = {} # key → (expire_at, variants) +_CACHE_LOCK = asyncio.Lock() + + +def _cache_key(query: str, backend_slug: str) -> str: + canonical = unicodedata.normalize("NFKC", query.strip().lower()) + raw = f"{canonical}|{backend_slug}|{PROMPT_VERSION}" + return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32] + + +async def _get_cached(key: str) -> list[str] | None: + """TTL 경과 entry 는 lazy delete. 없으면 None.""" + async with _CACHE_LOCK: + entry = _CACHE.get(key) + if entry is None: + return None + expire_at, variants = entry + if expire_at < time.time(): + _CACHE.pop(key, None) + return None + return list(variants) + + +async def _set_cached(key: str, variants: list[str]) -> None: + """LRU evict (FIFO 근사, query_analyzer 패턴).""" + async with _CACHE_LOCK: + if len(_CACHE) >= CACHE_MAXSIZE: + # oldest insert 1 entry evict (insertion order) + try: + oldest = next(iter(_CACHE)) + _CACHE.pop(oldest, None) + except StopIteration: + pass + _CACHE[key] = (time.time() + CACHE_TTL, list(variants)) + + +def cache_stats() -> dict[str, int]: + """diagnostics 용 — current size + maxsize.""" + return {"size": len(_CACHE), "maxsize": CACHE_MAXSIZE} + + +# ─── Prompt loading (lazy, 1회) ────────────────────────── +_PROMPT_TEMPLATE: str | None = None + + +def _get_prompt_template() -> str: + global _PROMPT_TEMPLATE + if _PROMPT_TEMPLATE is None: + _PROMPT_TEMPLATE = _load_prompt("query_rewrite.txt") + return _PROMPT_TEMPLATE + + +def _render_prompt(query: str) -> str: + """``{query}`` placeholder 치환.""" + return _get_prompt_template().replace("{query}", query) + + +# ─── Variant extraction (parser fallback) ──────────────── +def _extract_variants(raw: str, expected_n: int) -> list[str] | None: + """LLM 응답 raw text → variants list. parse_json_response (production layer) 재사용. + + valid shape: ``{"variants": ["...", "...", "..."]}``. + 크기 부족 / type mismatch / 빈 string → None (caller 가 cache 저장 X + 503). + """ + obj = parse_json_response(raw) + if obj is None: + return None + variants = obj.get("variants") + if not isinstance(variants, list) or len(variants) != expected_n: + return None + cleaned: list[str] = [] + for v in variants: + if not isinstance(v, str): + return None + v_stripped = v.strip() + if not v_stripped: + return None + cleaned.append(v_stripped) + return cleaned + + +# ─── LLM call (httpx 직접, backends.py 패턴) ───────────── +async def _call_llm(cfg: dict[str, Any], query: str) -> str: + """OpenAI 호환 chat/completions 호출. cfg = LLM_BACKEND_MAP entry. + + Returns: raw response text (first choice message content). + Raises: httpx.* / KeyError / ValueError on protocol mismatch. + """ + prompt = _render_prompt(query) + payload: dict[str, Any] = { + "model": cfg["model"], + "messages": [{"role": "user", "content": prompt}], + } + sampling = cfg.get("sampling") or {} + payload.update(sampling) + + timeout_s = LLM_REWRITE_TIMEOUT_MS / 1000.0 + async with httpx.AsyncClient(timeout=timeout_s) as client: + response = await client.post(cfg["endpoint"], json=payload) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"] + + +# ─── Public entry: rewrite() ───────────────────────────── +async def rewrite(query: str, backend_slug: str | None) -> list[str] | None: + """Multi-query rewrite. 성공 시 variants list, baseline 시 None. + + Args: + query: 원본 사용자 query + backend_slug: ``LLM_BACKEND_MAP`` key 또는 None/baseline + + Returns: + list[str] of EXPECTED_N_VARIANTS items (변형 0번 = 원본 verbatim — prompt 정책) + 또는 None (baseline = no rewrite, retrieval 은 single-query path). + + Raises: + ValueError: unknown slug (caller 가 HTTP 400 으로 translate) + RuntimeError: LLM 호출 실패 / parse fail (caller 가 HTTP 503 으로 translate) + """ + cfg = _resolve_rewrite_backend(backend_slug) + if cfg is None: + return None + + slug = backend_slug or "baseline" + key = _cache_key(query, slug) + + cached = await _get_cached(key) + if cached is not None: + logger.info( + "[rewrite-dispatch] backend=%s n_variants=%d cache_hit=true " + "llm_endpoint=cached llm_model=cached llm_latency_ms=0 " + "rewrite_total_ms=0 query_hash=%s", + slug, len(cached), key[:8], + ) + return cached + + expected_n = int(cfg.get("n_variants", EXPECTED_N_VARIANTS)) + started = time.monotonic() + llm_started = 0.0 + llm_elapsed_ms = 0 + + try: + async with acquire_mlx_gate(Priority.FOREGROUND): + llm_started = time.monotonic() + raw = await _call_llm(cfg, query) + llm_elapsed_ms = int((time.monotonic() - llm_started) * 1000) + except httpx.HTTPError as e: + logger.warning( + "[rewrite-dispatch] backend=%s cache_hit=false error=http " + "detail=%s query_hash=%s", slug, type(e).__name__, key[:8], + ) + raise RuntimeError(f"rewrite_llm_unavailable:{slug}:{type(e).__name__}") from e + except (KeyError, ValueError, json.JSONDecodeError) as e: + logger.warning( + "[rewrite-dispatch] backend=%s cache_hit=false error=protocol " + "detail=%s query_hash=%s", slug, type(e).__name__, key[:8], + ) + raise RuntimeError(f"rewrite_llm_unavailable:{slug}:protocol") from e + + variants = _extract_variants(raw, expected_n) + total_ms = int((time.monotonic() - started) * 1000) + + if variants is None: + logger.warning( + "[rewrite-dispatch] backend=%s cache_hit=false error=parse " + "llm_latency_ms=%d rewrite_total_ms=%d query_hash=%s", + slug, llm_elapsed_ms, total_ms, key[:8], + ) + raise RuntimeError(f"rewrite_llm_unavailable:{slug}:parse") + + await _set_cached(key, variants) + + logger.info( + "[rewrite-dispatch] backend=%s n_variants=%d cache_hit=false " + "llm_endpoint=%s llm_model=%s llm_latency_ms=%d " + "rewrite_total_ms=%d query_hash=%s", + slug, len(variants), cfg["endpoint"], cfg["model"], + llm_elapsed_ms, total_ms, key[:8], + ) + for idx, text in enumerate(variants): + logger.info( + "[rewrite-variant] backend=%s query_hash=%s idx=%d text=%r", + slug, key[:8], idx, text[:120], + ) + + return variants diff --git a/tests/search_eval/run_eval.py b/tests/search_eval/run_eval.py index 61c4f2b..91df4b1 100644 --- a/tests/search_eval/run_eval.py +++ b/tests/search_eval/run_eval.py @@ -203,6 +203,7 @@ async def call_search( snapshot_doc_id_max: int | None = None, snapshot_chunk_id_max: int | None = None, reranker_backend: str | None = None, + rewrite_backend: str | None = None, ) -> tuple[list[int], float]: """검색 API 호출 → (doc_ids, latency_ms).""" url = f"{base_url.rstrip('/')}/api/search/" @@ -222,6 +223,8 @@ async def call_search( params["snapshot_chunk_id_max"] = snapshot_chunk_id_max if reranker_backend is not None: params["reranker_backend"] = reranker_backend + if rewrite_backend is not None: + params["rewrite_backend"] = rewrite_backend import time @@ -253,6 +256,7 @@ async def evaluate( snapshot_doc_id_max: int | None = None, snapshot_chunk_id_max: int | None = None, reranker_backend: str | None = None, + rewrite_backend: str | None = None, ) -> list[QueryResult]: """전체 쿼리셋 평가.""" results: list[QueryResult] = [] @@ -266,6 +270,7 @@ async def evaluate( snapshot_doc_id_max=snapshot_doc_id_max, snapshot_chunk_id_max=snapshot_chunk_id_max, reranker_backend=reranker_backend, + rewrite_backend=rewrite_backend, ) results.append( QueryResult( @@ -843,6 +848,7 @@ async def call_search_full( snapshot_doc_id_max: int | None = None, snapshot_chunk_id_max: int | None = None, reranker_backend: str | None = None, + rewrite_backend: str | None = None, ) -> tuple[list[dict], float]: """call_search와 동일 로직. 단 full result dict 리스트 반환.""" url = f"{base_url.rstrip('/')}/api/search/" @@ -864,6 +870,8 @@ async def call_search_full( params["snapshot_chunk_id_max"] = snapshot_chunk_id_max if reranker_backend is not None: params["reranker_backend"] = reranker_backend + if rewrite_backend is not None: + params["rewrite_backend"] = rewrite_backend import time @@ -1322,6 +1330,12 @@ def main() -> int: default=None, help="Phase 2B Diagnose reranker dispatcher slug (baseline | cand_gte_ml_base). 미지정 = production.", ) + parser.add_argument( + "--rewrite-backend", + type=str, + default=None, + help="Phase 2Q Diagnose query rewrite dispatcher slug (baseline | cand_multi_query_macmini | cand_multi_query_macbook). 미지정 = single-query path. Phase 1B scaffold = variants 박제만, retrieval 합성은 Phase 2.", + ) args = parser.parse_args() @@ -1375,21 +1389,21 @@ def main() -> int: if args.base_url: print(f"\n>>> evaluating: {args.base_url}") results = asyncio.run( - evaluate(queries, args.base_url, args.token, "single", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend) + evaluate(queries, args.base_url, args.token, "single", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend, rewrite_backend=args.rewrite_backend) ) print_summary("single", results, eval_version=args.eval_version) all_results.extend(results) else: print(f"\n>>> baseline: {args.baseline_url}") baseline_results = asyncio.run( - evaluate(queries, args.baseline_url, args.token, "baseline", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend) + evaluate(queries, args.baseline_url, args.token, "baseline", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend, rewrite_backend=args.rewrite_backend) ) baseline_summary = print_summary("baseline", baseline_results, eval_version=args.eval_version) print(f"\n>>> candidate: {args.candidate_url}") candidate_results = asyncio.run( evaluate( - queries, args.candidate_url, args.token, "candidate", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend + queries, args.candidate_url, args.token, "candidate", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend, rewrite_backend=args.rewrite_backend ) ) candidate_summary = print_summary("candidate", candidate_results, eval_version=args.eval_version) diff --git a/tests/test_query_rewriter.py b/tests/test_query_rewriter.py new file mode 100644 index 0000000..9c328e0 --- /dev/null +++ b/tests/test_query_rewriter.py @@ -0,0 +1,212 @@ +"""Phase 2Q Diagnose Phase 1B — query_rewriter scaffold + dispatcher 단위 테스트. + +가드레일 (plan v6 §5 + §7 Phase 1): + 1. `_resolve_rewrite_backend` — slug resolve, unknown ValueError, baseline → None + 2. `_cache_key` — deterministic + NFKC normalize + backend slug 분리 + 3. `_extract_variants` — valid shape / wrong count / type mismatch / empty / non-list + 4. cache set/get/TTL (LRU evict 시뮬레이션) + 5. `allowed_slugs` — LLM_BACKEND_MAP keys 1:1 +""" + +from __future__ import annotations + +import asyncio +import logging +import os +import sys +import time + +import pytest + +# logs/llm_gate.log 가 root 소유 (운영 fastapi daemon write) → pytest 가 hyungi user 로 +# import 시 PermissionError. 본 test 한정 FileHandler safe-wrap (다른 test 영향 0). +_orig_file_handler = logging.FileHandler + +def _safe_file_handler(filename, *args, **kwargs): # type: ignore + try: + return _orig_file_handler(filename, *args, **kwargs) + except PermissionError: + return logging.NullHandler() + +logging.FileHandler = _safe_file_handler # type: ignore[assignment] + +# tests/ → 프로젝트 루트 → app/ +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app")) + +from services.search import query_rewriter +from services.search.query_rewriter import ( + EXPECTED_N_VARIANTS, + LLM_BACKEND_MAP, + PROMPT_VERSION, + _cache_key, + _extract_variants, + _resolve_rewrite_backend, + allowed_slugs, +) + + +# ─── 1. _resolve_rewrite_backend ────────────────────────── + + +def test_resolve_baseline_returns_none(): + assert _resolve_rewrite_backend(None) is None + assert _resolve_rewrite_backend("baseline") is None + + +def test_resolve_known_slugs(): + cfg = _resolve_rewrite_backend("cand_multi_query_macmini") + assert cfg is not None + assert "endpoint" in cfg and "model" in cfg and "sampling" in cfg + assert cfg["model"] == "gemma-4-26b-a4b-it-8bit" + + cfg = _resolve_rewrite_backend("cand_multi_query_macbook") + assert cfg is not None + assert cfg["model"] == "mlx-community/Qwen3.6-27B-8bit" + # qwen sampling 에 response_format 없음 (Phase 0 inspect 9 박제) + assert "response_format" not in cfg["sampling"] + + +def test_resolve_unknown_slug_raises(): + with pytest.raises(ValueError, match="unknown_rewrite_backend"): + _resolve_rewrite_backend("cand_bogus") + with pytest.raises(ValueError): + _resolve_rewrite_backend("cand_multi_query_other") + + +def test_allowed_slugs_matches_map(): + assert allowed_slugs() == list(LLM_BACKEND_MAP.keys()) + assert "baseline" in allowed_slugs() + assert "cand_multi_query_macmini" in allowed_slugs() + assert "cand_multi_query_macbook" in allowed_slugs() + + +# ─── 2. _cache_key ──────────────────────────────────────── + + +def test_cache_key_deterministic(): + k1 = _cache_key("산업안전보건법 제6장", "cand_multi_query_macmini") + k2 = _cache_key("산업안전보건법 제6장", "cand_multi_query_macmini") + assert k1 == k2 + assert len(k1) == 32 # sha256[:32] + + +def test_cache_key_nfkc_normalize_and_strip_lower(): + # whitespace + uppercase → 동일 key + base = _cache_key("ASME Section VIII", "cand_multi_query_macmini") + assert _cache_key(" asme section viii ", "cand_multi_query_macmini") == base + assert _cache_key("ASME SECTION VIII", "cand_multi_query_macmini") == base + + +def test_cache_key_differs_by_backend_slug(): + k_a = _cache_key("query", "cand_multi_query_macmini") + k_b = _cache_key("query", "cand_multi_query_macbook") + assert k_a != k_b + + +def test_cache_key_includes_prompt_version(): + # PROMPT_VERSION 변경 시 cache 분리 — 직접 test 어렵지만 raw 구성 확인 + assert PROMPT_VERSION == "v1" + k = _cache_key("query", "cand_multi_query_macmini") + assert len(k) == 32 + + +# ─── 3. _extract_variants ───────────────────────────────── + + +def test_extract_variants_valid_shape(): + raw = '{"variants": ["원본", "한국어 변형", "english"]}' + out = _extract_variants(raw, expected_n=3) + assert out == ["원본", "한국어 변형", "english"] + + +def test_extract_variants_strips_whitespace(): + raw = '{"variants": [" 원본 ", "한국어\\n", " english "]}' + out = _extract_variants(raw, expected_n=3) + assert out == ["원본", "한국어", "english"] + + +def test_extract_variants_wrong_count_returns_none(): + raw = '{"variants": ["only_one"]}' + assert _extract_variants(raw, expected_n=3) is None + raw = '{"variants": ["a", "b", "c", "d"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_missing_key_returns_none(): + raw = '{"queries": ["a", "b", "c"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_non_list_returns_none(): + raw = '{"variants": "single string"}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_empty_string_returns_none(): + raw = '{"variants": ["a", "", "c"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_non_string_element_returns_none(): + raw = '{"variants": ["a", 123, "c"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_invalid_json_returns_none(): + raw = "not json at all" + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_markdown_fence_fallback(): + # parse_json_response 가 ```json fenced 블록 내부 추출 — production parser 재사용 검증 + raw = '```json\n{"variants": ["a", "b", "c"]}\n```' + out = _extract_variants(raw, expected_n=3) + assert out == ["a", "b", "c"] + + +# ─── 4. cache set / get ─────────────────────────────────── + + +@pytest.mark.asyncio +async def test_cache_set_get_roundtrip(): + # 격리: 전역 _CACHE 초기화 (다른 테스트와 격리) + query_rewriter._CACHE.clear() + key = _cache_key("__test_unique_key__", "cand_multi_query_macmini") + assert await query_rewriter._get_cached(key) is None + await query_rewriter._set_cached(key, ["v0", "v1", "v2"]) + out = await query_rewriter._get_cached(key) + assert out == ["v0", "v1", "v2"] + + +@pytest.mark.asyncio +async def test_cache_ttl_expiry(): + query_rewriter._CACHE.clear() + key = "ttl_test_key" + # manual entry with past expire_at + query_rewriter._CACHE[key] = (time.time() - 1.0, ["a", "b", "c"]) + assert await query_rewriter._get_cached(key) is None + # lazy delete verify + assert key not in query_rewriter._CACHE + + +@pytest.mark.asyncio +async def test_cache_returns_copy_not_reference(): + """_get_cached 반환 list 를 외부에서 수정해도 internal cache 안전.""" + query_rewriter._CACHE.clear() + key = "copy_test_key" + await query_rewriter._set_cached(key, ["a", "b", "c"]) + out = await query_rewriter._get_cached(key) + out.append("mutated") + out2 = await query_rewriter._get_cached(key) + assert out2 == ["a", "b", "c"] + + +# ─── 5. constants ───────────────────────────────────────── + + +def test_constants_match_plan_v6(): + assert PROMPT_VERSION == "v1" + assert EXPECTED_N_VARIANTS == 3 + assert query_rewriter.LLM_REWRITE_TIMEOUT_MS == 15000 + assert query_rewriter.CACHE_TTL == 86400 + assert query_rewriter.CACHE_MAXSIZE == 1000