diff --git a/app/api/search.py b/app/api/search.py index 4d0a37a..91c9ad2 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -30,6 +30,7 @@ from services.search.evidence_service import EvidenceItem, extract_evidence from services.search.fusion_service import DEFAULT_FUSION from services.search.grounding_check import check as grounding_check from services.search.refusal_gate import RefusalDecision, decide as refusal_decide +from services.search import query_rewriter from services.search.search_pipeline import PipelineResult, run_search from services.search.synthesis_service import SynthesisResult, synthesize from services.search.verifier_service import VerifierResult, verify @@ -156,17 +157,91 @@ async def search( description="QueryAnalyzer 활성화 (Phase 2.1, LLM 호출). Phase 2.1은 debug 노출만, 검색 경로 영향 X", ), debug: bool = Query(False, description="단계별 candidates + timing 응답에 포함"), + embedding_backend: str | None = Query( + None, + pattern=r"^(baseline|cand_[a-z0-9_]+)$", + description="Phase 2A Diagnose dispatcher (R2-2 + R2-B1). slug 만 받음 (raw table name X). baseline|cand_. 미지정/baseline = production path.", + ), + snapshot_doc_id_max: int | None = Query( + None, ge=1, + description="Phase 2A snapshot freeze (R2-D + R2-B2). documents.id <= 값 filter. baseline 측정 시에도 동일 filter 적용.", + ), + snapshot_chunk_id_max: int | None = Query( + None, ge=1, + description="Phase 2A snapshot freeze (R2-D + R2-B2). document_chunks.id <= 값 filter. baseline 측정 시에도 동일 filter 적용.", + ), + reranker_backend: str | None = Query( + None, + pattern=r"^(baseline|cand_[a-z0-9_]+)$", + description="Phase 2B Diagnose reranker dispatcher (R2-B1 slug-based). slug 만 받음 (raw endpoint URL X). baseline|cand_. 미지정/baseline = production reranker.", + ), + rewrite_backend: str | None = Query( + None, + pattern=r"^(baseline|cand_[a-z0-9_]+)$", + description="Phase 2Q Diagnose query rewrite dispatcher (slug-based, no silent fallback). baseline|cand_multi_query_macmini|cand_multi_query_macbook. 미지정/baseline = single-query path. Phase 2 = variant N 별 retrieval+fusion → unified RRF → reranker 1회.", + ), ): """문서 검색 — FTS + ILIKE + 벡터 결합 (Phase 3.1 이후 run_search wrapper)""" - pr = await run_search( - session, - q, - mode=mode, # type: ignore[arg-type] - limit=limit, - fusion=fusion, - rerank=rerank, - analyze=analyze, - ) + try: + pr = await run_search( + session, + q, + mode=mode, # type: ignore[arg-type] + limit=limit, + fusion=fusion, + rerank=rerank, + analyze=analyze, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + reranker_backend=reranker_backend, + rewrite_backend=rewrite_backend, + ) + except ValueError as e: + # _resolve_backend / _resolve_reranker / _resolve_rewrite_backend 가 unknown slug 시 ValueError → HTTP 400 + msg = str(e) + if msg.startswith("unknown_rewrite_backend"): + return JSONResponse( + status_code=400, + content={ + "error_reason": "unknown_rewrite_backend", + "backend_requested": rewrite_backend, + "allowed": query_rewriter.allowed_slugs(), + "detail": msg, + }, + ) + if msg.startswith("unknown_reranker_backend"): + return JSONResponse( + status_code=400, + content={ + "error_reason": "unknown_reranker_backend", + "backend_requested": reranker_backend, + "allowed": ["baseline", "cand_gte_ml_base"], + "detail": msg, + }, + ) + return JSONResponse( + status_code=400, + content={ + "error_reason": "unknown_embedding_backend", + "backend_requested": embedding_backend, + "allowed": ["baseline", "cand_me5_large_inst", "cand_snowflake_l_v2"], + "detail": msg, + }, + ) + except RuntimeError as e: + # query_rewriter.rewrite() 실패 (LLM unavailable / parse fail) → HTTP 503 + msg = str(e) + if msg.startswith("rewrite_llm_unavailable"): + return JSONResponse( + status_code=503, + content={ + "error_reason": "rewrite_llm_unavailable", + "backend_requested": rewrite_backend, + "detail": msg, + }, + ) + raise # 사용자 feedback: 모든 단계 timing은 debug 응답과 별도로 항상 로그로 남긴다 timing_str = " ".join(f"{k}={v:.0f}" for k, v in pr.timing_ms.items()) diff --git a/app/prompts/query_rewrite.txt b/app/prompts/query_rewrite.txt new file mode 100644 index 0000000..f11c92d --- /dev/null +++ b/app/prompts/query_rewrite.txt @@ -0,0 +1,12 @@ +You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary). + +Task: given the user's search query, produce 3 search-friendly variants: +- variant 0 = original query (verbatim, no change) +- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형) +- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term) + +Rules: +- Each variant ≤ 80 chars. +- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration. +- Do not invent new entities. +- Output STRICT JSON only (no prose, no markdown, no code fence): {"variants": ["...", "...", "..."]} diff --git a/app/services/search/query_rewriter.py b/app/services/search/query_rewriter.py new file mode 100644 index 0000000..d6636df --- /dev/null +++ b/app/services/search/query_rewriter.py @@ -0,0 +1,286 @@ +"""Query rewriter — multi-query expansion (Phase 2Q Diagnose). + +Phase 2Q Diagnose 의 dispatcher + cache + LLM call layer. retrieval 합성 (search_with_rewrite) +은 Phase 2 별 commit. 본 모듈은 scaffold = slug → variants[3] 변환만 담당. + +## 핵심 룰 (plan v6 영구) +- ``Priority.FOREGROUND`` semaphore (retrieval inline path, user-facing). +- ``LLM_REWRITE_TIMEOUT_MS = 15000`` (fail-fast — background 와 다름). +- LLM 호출 실패 / parse fail / empty variants → cache 저장 X + caller 503 raise. +- baseline (slug=None) 호출은 LLM 우회 = ``None`` 반환. +- prompt template 1종 고정 (``app/prompts/query_rewrite.txt`` v1). +- raw endpoint URL query param X — slug-based allowlist (``LLM_BACKEND_MAP``). +""" + +from __future__ import annotations + +import asyncio +import hashlib +import json +import time +import unicodedata +from typing import Any + +import httpx + +from ai.client import _load_prompt, parse_json_response +from core.utils import setup_logger + +from .llm_gate import Priority, acquire_mlx_gate + +logger = setup_logger("query_rewriter") + +# ─── 상수 (plan v6 영구 룰) ────────────────────────────── +PROMPT_VERSION = "v1" # prompts/query_rewrite.txt manual string. 변경 시 cache 자동 분리. +CACHE_TTL = 86400 # 24h +CACHE_MAXSIZE = 1000 +LLM_REWRITE_TIMEOUT_MS = 15000 # retrieval inline path, fail-fast (B-3 background 와 다른 사유) +EXPECTED_N_VARIANTS = 3 # multi-query variant count, prompt v1 hardcoded + +# ─── Backend allowlist (plan v6 §5.1) ──────────────────── +# slug → backend cfg or None (baseline = no rewrite). sampling 박제 = fixture 와 단일 source. +LLM_BACKEND_MAP: dict[str, dict[str, Any] | None] = { + "baseline": None, + "cand_multi_query_macmini": { + "endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "model": "gemma-4-26b-a4b-it-8bit", + "n_variants": 3, + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + "response_format": {"type": "json_object"}, # MLX 호환 (Phase 0 inspect 9 PASS) + }, + "auth": None, + }, + "cand_multi_query_macbook": { + "endpoint": "http://100.118.112.84:8810/v1/chat/completions", + "model": "mlx-community/Qwen3.6-27B-8bit", + "n_variants": 3, + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + # response_format 제거 — mlx-vlm.server json_object 미지원 (120s hang). + # prompt rule "Output STRICT JSON only" 강제 (Phase 0 inspect 9 박제). + }, + "auth": None, + }, +} + + +def _resolve_rewrite_backend(slug: str | None) -> dict[str, Any] | None: + """slug → backend cfg or None (baseline). Raises ValueError on unknown slug.""" + if slug is None or slug == "baseline": + return None + if slug not in LLM_BACKEND_MAP: + raise ValueError(f"unknown_rewrite_backend: {slug!r}") + return LLM_BACKEND_MAP[slug] + + +def allowed_slugs() -> list[str]: + """HTTP 400 error 응답의 ``allowed`` 필드용. caller 가 사용.""" + return list(LLM_BACKEND_MAP.keys()) + + +# ─── In-memory cache (query_analyzer.py 패턴 1:1) ──────── +_CACHE: dict[str, tuple[float, list[str]]] = {} # key → (expire_at, variants) +_CACHE_LOCK = asyncio.Lock() + + +def _cache_key(query: str, backend_slug: str) -> str: + canonical = unicodedata.normalize("NFKC", query.strip().lower()) + raw = f"{canonical}|{backend_slug}|{PROMPT_VERSION}" + return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32] + + +async def _get_cached(key: str) -> list[str] | None: + """TTL 경과 entry 는 lazy delete. 없으면 None.""" + async with _CACHE_LOCK: + entry = _CACHE.get(key) + if entry is None: + return None + expire_at, variants = entry + if expire_at < time.time(): + _CACHE.pop(key, None) + return None + return list(variants) + + +async def _set_cached(key: str, variants: list[str]) -> None: + """LRU evict (FIFO 근사, query_analyzer 패턴).""" + async with _CACHE_LOCK: + if len(_CACHE) >= CACHE_MAXSIZE: + # oldest insert 1 entry evict (insertion order) + try: + oldest = next(iter(_CACHE)) + _CACHE.pop(oldest, None) + except StopIteration: + pass + _CACHE[key] = (time.time() + CACHE_TTL, list(variants)) + + +def cache_stats() -> dict[str, int]: + """diagnostics 용 — current size + maxsize.""" + return {"size": len(_CACHE), "maxsize": CACHE_MAXSIZE} + + +# ─── Prompt loading (lazy, 1회) ────────────────────────── +_PROMPT_TEMPLATE: str | None = None + + +def _get_prompt_template() -> str: + global _PROMPT_TEMPLATE + if _PROMPT_TEMPLATE is None: + _PROMPT_TEMPLATE = _load_prompt("query_rewrite.txt") + return _PROMPT_TEMPLATE + + +def _render_prompt(query: str) -> str: + """[deprecated, fixture-first 패턴 후 unused] ``{query}`` placeholder 치환. + + 실제 LLM 호출은 ``_call_llm`` 에서 system/user 메시지 분리 (fixture invariant). + 본 헬퍼는 호환성만 보존 — prompt template 에 ``{query}`` placeholder 없으면 no-op. + """ + return _get_prompt_template().replace("{query}", query) + + +# ─── Variant extraction (parser fallback) ──────────────── +def _extract_variants(raw: str, expected_n: int) -> list[str] | None: + """LLM 응답 raw text → variants list. parse_json_response (production layer) 재사용. + + valid shape: ``{"variants": ["...", "...", "..."]}``. + 크기 부족 / type mismatch / 빈 string → None (caller 가 cache 저장 X + 503). + """ + obj = parse_json_response(raw) + if obj is None: + return None + variants = obj.get("variants") + if not isinstance(variants, list) or len(variants) != expected_n: + return None + cleaned: list[str] = [] + for v in variants: + if not isinstance(v, str): + return None + v_stripped = v.strip() + if not v_stripped: + return None + cleaned.append(v_stripped) + return cleaned + + +# ─── LLM call (httpx 직접, backends.py 패턴) ───────────── +async def _call_llm(cfg: dict[str, Any], query: str) -> str: + """OpenAI 호환 chat/completions 호출. cfg = LLM_BACKEND_MAP entry. + + 호출 형식 = fixture 단일 source-of-truth: + - system 메시지 = prompt template (instruction) + - user 메시지 = query (rewrite 대상) + + 이전 implementation (user 메시지에 prompt 전체 박음) 은 모델이 actual query 인식 못 함 + → 모든 query 에 동일 response 반환하는 NDCG catastrophic 버그 (Phase 3 cold 측정에서 발견). + fixture 의 request_body 와 일치 = production 호출 형식. + + Returns: raw response text (first choice message content). + Raises: httpx.* / KeyError / ValueError on protocol mismatch. + """ + system_prompt = _get_prompt_template() + payload: dict[str, Any] = { + "model": cfg["model"], + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": query}, + ], + } + sampling = cfg.get("sampling") or {} + payload.update(sampling) + + timeout_s = LLM_REWRITE_TIMEOUT_MS / 1000.0 + async with httpx.AsyncClient(timeout=timeout_s) as client: + response = await client.post(cfg["endpoint"], json=payload) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"] + + +# ─── Public entry: rewrite() ───────────────────────────── +async def rewrite(query: str, backend_slug: str | None) -> list[str] | None: + """Multi-query rewrite. 성공 시 variants list, baseline 시 None. + + Args: + query: 원본 사용자 query + backend_slug: ``LLM_BACKEND_MAP`` key 또는 None/baseline + + Returns: + list[str] of EXPECTED_N_VARIANTS items (변형 0번 = 원본 verbatim — prompt 정책) + 또는 None (baseline = no rewrite, retrieval 은 single-query path). + + Raises: + ValueError: unknown slug (caller 가 HTTP 400 으로 translate) + RuntimeError: LLM 호출 실패 / parse fail (caller 가 HTTP 503 으로 translate) + """ + cfg = _resolve_rewrite_backend(backend_slug) + if cfg is None: + return None + + slug = backend_slug or "baseline" + key = _cache_key(query, slug) + + cached = await _get_cached(key) + if cached is not None: + logger.info( + "[rewrite-dispatch] backend=%s n_variants=%d cache_hit=true " + "llm_endpoint=cached llm_model=cached llm_latency_ms=0 " + "rewrite_total_ms=0 query_hash=%s", + slug, len(cached), key[:8], + ) + return cached + + expected_n = int(cfg.get("n_variants", EXPECTED_N_VARIANTS)) + started = time.monotonic() + llm_started = 0.0 + llm_elapsed_ms = 0 + + try: + async with acquire_mlx_gate(Priority.FOREGROUND): + llm_started = time.monotonic() + raw = await _call_llm(cfg, query) + llm_elapsed_ms = int((time.monotonic() - llm_started) * 1000) + except httpx.HTTPError as e: + logger.warning( + "[rewrite-dispatch] backend=%s cache_hit=false error=http " + "detail=%s query_hash=%s", slug, type(e).__name__, key[:8], + ) + raise RuntimeError(f"rewrite_llm_unavailable:{slug}:{type(e).__name__}") from e + except (KeyError, ValueError, json.JSONDecodeError) as e: + logger.warning( + "[rewrite-dispatch] backend=%s cache_hit=false error=protocol " + "detail=%s query_hash=%s", slug, type(e).__name__, key[:8], + ) + raise RuntimeError(f"rewrite_llm_unavailable:{slug}:protocol") from e + + variants = _extract_variants(raw, expected_n) + total_ms = int((time.monotonic() - started) * 1000) + + if variants is None: + logger.warning( + "[rewrite-dispatch] backend=%s cache_hit=false error=parse " + "llm_latency_ms=%d rewrite_total_ms=%d query_hash=%s", + slug, llm_elapsed_ms, total_ms, key[:8], + ) + raise RuntimeError(f"rewrite_llm_unavailable:{slug}:parse") + + await _set_cached(key, variants) + + logger.info( + "[rewrite-dispatch] backend=%s n_variants=%d cache_hit=false " + "llm_endpoint=%s llm_model=%s llm_latency_ms=%d " + "rewrite_total_ms=%d query_hash=%s", + slug, len(variants), cfg["endpoint"], cfg["model"], + llm_elapsed_ms, total_ms, key[:8], + ) + for idx, text in enumerate(variants): + logger.info( + "[rewrite-variant] backend=%s query_hash=%s idx=%d text=%r", + slug, key[:8], idx, text[:120], + ) + + return variants diff --git a/app/services/search/rerank_service.py b/app/services/search/rerank_service.py index a35633d..877926a 100644 --- a/app/services/search/rerank_service.py +++ b/app/services/search/rerank_service.py @@ -40,6 +40,49 @@ MAX_CHUNKS_PER_DOC = 2 # Soft timeout (초) RERANK_TIMEOUT = 5.0 +# ─── Phase 2B Diagnose dispatcher (R2-B1 slug-based) ────────────── +# server-side allowlist map. query parameter 가 raw endpoint URL 받지 않음. +RERANKER_BACKEND_MAP: dict[str, dict[str, str] | None] = { + "baseline": None, # production reranker (config.yaml endpoint via AIClient.rerank) + "cand_gte_ml_base": { + "endpoint": "http://rerank-cand-gte-ml-base:80/rerank", + }, + # mxbai_large 후보 (deberta-v2 → TEI 1.7 미지원) Phase 2B-Extended 이관 + # bge_v2_gemma_2b 후보 (LLM-based reranker, 1_Pooling/config.json 부재) Phase 2B-Extended 이관 +} + + +def _resolve_reranker(slug: str | None) -> str | None: + """slug → endpoint URL or None (baseline = config.yaml via AIClient). + + Raises ValueError on unknown slug (caller 가 HTTP 400 으로 translate). + """ + if slug is None or slug == "baseline": + return None + if slug not in RERANKER_BACKEND_MAP: + raise ValueError(f"unknown_reranker_backend: {slug!r}") + cfg = RERANKER_BACKEND_MAP[slug] + return cfg["endpoint"] if cfg else None + + +async def _rerank_via_candidate_endpoint( + endpoint: str, query: str, texts: list[str] +) -> list[dict]: + """후보 TEI reranker endpoint 호출 (cache 미사용). + + Returns: + [{"index": int, "score": float}, ...] sorted score desc. + Raises: + httpx errors — caller 가 timeout/fallback path 로. + """ + async with httpx.AsyncClient(timeout=RERANK_TIMEOUT) as c: + r = await c.post(endpoint, json={"query": query, "texts": texts}) + r.raise_for_status() + data = r.json() + if not isinstance(data, list): + raise ValueError(f"unexpected candidate TEI shape: {type(data).__name__}") + return data + def _extract_window(text: str, query: str, target_chars: int = 800) -> str: """query keyword 위치 중심으로 ±target_chars/2 윈도우 추출. @@ -96,6 +139,10 @@ async def rerank_chunks( query: str, candidates: list["SearchResult"], limit: int, + *, + reranker_backend: str | None = None, + snapshot_doc_id_max: int | None = None, + snapshot_chunk_id_max: int | None = None, ) -> list["SearchResult"]: """RRF 결과 candidates를 bge-reranker로 재정렬. @@ -120,12 +167,28 @@ async def rerank_chunks( candidates = candidates[:MAX_RERANK_INPUT] snippets = [_make_snippet(c, query) for c in candidates] - client = AIClient() + + # Phase 2B dispatcher (R2-B1 + R2-B2): slug → endpoint resolve, snapshot id dispatch log + cand_endpoint = _resolve_reranker(reranker_backend) + logger.info( + "[reranker-dispatch] backend=%s endpoint=%s snapshot_doc_id_max=%s snapshot_chunk_id_max=%s", + reranker_backend or "baseline", + cand_endpoint or "production(config.yaml)", + snapshot_doc_id_max, + snapshot_chunk_id_max, + ) + + client: AIClient | None = AIClient() if cand_endpoint is None else None try: async with asyncio.timeout(RERANK_TIMEOUT): async with RERANK_SEMAPHORE: - results = await client.rerank(query, snippets) + if cand_endpoint is None: + results = await client.rerank(query, snippets) + else: + results = await _rerank_via_candidate_endpoint( + cand_endpoint, query, snippets + ) # results: [{"index": int, "score": float}, ...] (이미 정렬됨) reranked: list["SearchResult"] = [] for r in results: @@ -150,7 +213,11 @@ async def rerank_chunks( logger.warning(f"rerank unexpected error → RRF fallback: {type(e).__name__}: {e}") return candidates[:limit] finally: - await client.close() + if client is not None: + try: + await client.close() + except Exception: + pass async def warmup_reranker() -> bool: diff --git a/app/services/search/retrieval_service.py b/app/services/search/retrieval_service.py index 631bf07..8fcf6d1 100644 --- a/app/services/search/retrieval_service.py +++ b/app/services/search/retrieval_service.py @@ -22,6 +22,7 @@ from __future__ import annotations import asyncio import hashlib +import re import time from typing import TYPE_CHECKING, Any @@ -48,6 +49,61 @@ _QUERY_EMBED_CACHE: dict[str, dict[str, Any]] = {} QUERY_EMBED_TTL = 86400 # 24h QUERY_EMBED_MAXSIZE = 500 +# ─── Phase 2A Diagnose dispatcher (R2-2 + R2-B1) ────────────── +# server-side allowlist map. query parameter 가 raw table name 받지 않음. +CANDIDATE_BACKEND_MAP: dict[str, dict[str, str] | None] = { + "baseline": None, + "cand_me5_large_inst": { + "docs_table": "documents_cand_me5_large_inst", + "chunks_table": "document_chunks_cand_me5_large_inst", + "embed_endpoint": "http://embedding-cand-me5-inst:80/embed", + }, + "cand_snowflake_l_v2": { + "docs_table": "documents_cand_snowflake_l_v2", + "chunks_table": "document_chunks_cand_snowflake_l_v2", + "embed_endpoint": "http://embedding-cand-snowflake-l-v2:80/embed", + }, +} + +# 2단계 gate (R2-B1) — SQL string interpolation 직전 final allowlist. +_VALID_DOCS_TABLE = re.compile(r"^(documents|documents_cand_[a-z0-9_]+)$") +_VALID_CHUNKS_TABLE = re.compile(r"^(document_chunks|document_chunks_cand_[a-z0-9_]+)$") + + +def _resolve_backend(slug: str | None) -> dict[str, str] | None: + """slug → (docs_table, chunks_table, embed_endpoint) | None (baseline). + + Raises ValueError on unknown slug (caller 가 HTTP 400 으로 translate). + """ + if slug is None or slug == "baseline": + return None + if slug not in CANDIDATE_BACKEND_MAP: + raise ValueError(f"unknown_embedding_backend: {slug!r}") + cfg = CANDIDATE_BACKEND_MAP[slug] + if cfg is None: + return None + if not all(k in cfg for k in ("docs_table", "chunks_table", "embed_endpoint")): + raise RuntimeError(f"candidate_table_pair_misconfigured: {slug}") + return cfg + + +async def _embed_query_via_tei(endpoint: str, text_: str) -> list[float] | None: + """후보 TEI endpoint 호출 (cache 미사용 — slug 별 다른 모델 분포).""" + if not text_: + return None + import httpx + try: + async with httpx.AsyncClient(timeout=30.0) as c: + r = await c.post(endpoint, json={"inputs": [text_], "truncate": True}) + r.raise_for_status() + data = r.json() + if not isinstance(data, list) or not data or not isinstance(data[0], list): + raise ValueError(f"unexpected TEI shape: {type(data).__name__}") + return data[0] + except Exception as exc: + logger.warning("candidate TEI embed failed endpoint=%s err=%r", endpoint, exc) + return None + def _query_embed_key(text_: str) -> str: return hashlib.sha256(f"{text_}|bge-m3".encode("utf-8")).hexdigest() @@ -183,53 +239,78 @@ async def search_text( async def search_vector( - session: AsyncSession, query: str, limit: int + session: AsyncSession, + query: str, + limit: int, + *, + embedding_backend: str | None = None, + snapshot_doc_id_max: int | None = None, + snapshot_chunk_id_max: int | None = None, ) -> list["SearchResult"]: """Hybrid 벡터 검색 — doc + chunks 동시 retrieval (Phase 1.2-G). - Phase 1.2-C 진단: - chunks-only는 segment 의미 손실로 자연어 query에서 catastrophic recall. - doc embedding은 전체 본문 평균 → recall robust. - → 두 retrieval 동시 사용이 정석. + Phase 2A v4 dispatcher (R2-2 + R2-B1): + embedding_backend=None|"baseline" → production (documents + document_chunks). + snapshot_*_id_max 지정 시 baseline 도 동일 filter (rebaseline measurement). + embedding_backend=cand_ → CANDIDATE_BACKEND_MAP 에서 페어 resolve. + cand 테이블 자체가 snapshot 범위로 INSERT → snapshot filter 무시 (dispatch log 만 박제). 데이터 흐름: - 1. query embedding 1번 (bge-m3) - 2. asyncio.gather로 두 SQL 동시 호출: - - _search_vector_docs: documents.embedding cosine top N - - _search_vector_chunks: document_chunks.embedding window partition (doc당 top 2) - 3. _merge_doc_and_chunk_vectors로 가중치 + dedup: - - chunk score * 1.2 (precision) - - doc score * 1.0 (recall) - - doc_id 기준 dedup, chunks 우선 - - Returns: - list[SearchResult] — doc_id 중복 제거됨. compress_chunks_to_docs는 그대로 동작. - chunks_by_doc은 search.py에서 group_by_doc으로 보존. + 1. query embedding 1번 (baseline=bge-m3 cache / cand=TEI endpoint no-cache) + 2. asyncio.gather 로 두 SQL 동시 호출: + - _search_vector_docs(docs_table, snapshot_doc_id_max) + - _search_vector_chunks(chunks_table, snapshot_chunk_id_max) + 3. _merge_doc_and_chunk_vectors 가중치 + dedup (chunk 1.2 / doc 1.0). """ - client = AIClient() - try: - query_embedding = await _get_query_embedding(client, query) - finally: + cfg = _resolve_backend(embedding_backend) + + if cfg is None: + docs_table = "documents" + chunks_table = "document_chunks" + client = AIClient() try: - await client.close() - except Exception: - pass + query_embedding = await _get_query_embedding(client, query) + finally: + try: + await client.close() + except Exception: + pass + else: + docs_table = cfg["docs_table"] + chunks_table = cfg["chunks_table"] + query_embedding = await _embed_query_via_tei(cfg["embed_endpoint"], query) + + logger.info( + "[embedding-dispatch] backend=%s docs_table=%s chunks_table=%s snapshot_doc_id_max=%s snapshot_chunk_id_max=%s", + embedding_backend or "baseline", + docs_table, + chunks_table, + snapshot_doc_id_max, + snapshot_chunk_id_max, + ) if query_embedding is None: return [] embedding_str = str(query_embedding) - # 두 SQL 병렬 호출 — 각각 별도 session 사용 (asyncpg connection은 statement 단위 직렬) Session = async_sessionmaker(engine) async def _docs_call() -> list["SearchResult"]: async with Session() as s: - return await _search_vector_docs(s, embedding_str, limit * 4) + return await _search_vector_docs( + s, embedding_str, limit * 4, + docs_table=docs_table, + snapshot_doc_id_max=snapshot_doc_id_max, + ) async def _chunks_call() -> list["SearchResult"]: async with Session() as s: - return await _search_vector_chunks(s, embedding_str, limit * 4) + return await _search_vector_chunks( + s, embedding_str, limit * 4, + chunks_table=chunks_table, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) doc_results, chunk_results = await asyncio.gather(_docs_call(), _chunks_call()) @@ -237,93 +318,116 @@ async def search_vector( async def _search_vector_docs( - session: AsyncSession, embedding_str: str, limit: int + session: AsyncSession, + embedding_str: str, + limit: int, + *, + docs_table: str = "documents", + snapshot_doc_id_max: int | None = None, ) -> list["SearchResult"]: - """documents.embedding 직접 검색 — recall robust (자연어 매칭). + """documents (또는 documents_cand_).embedding 직접 검색. - chunks가 없는 doc도 매칭 가능. score는 cosine similarity (1 - distance). - chunk_id/chunk_index/section_title은 None. + docs_table = "documents": production path. snapshot_doc_id_max 지정 시 id <= max filter. + docs_table = "documents_cand_": 후보 path. cand 테이블이 이미 snapshot 범위로 INSERT됨 → + snapshot_doc_id_max 무시. metadata 는 production documents 와 JOIN. + + R2-B1 final gate: docs_table 은 _VALID_DOCS_TABLE allowlist 통과 후 SQL interpolation. """ from api.search import SearchResult # 순환 import 회피 - result = await session.execute( - text(""" - SELECT - id, - title, - ai_domain, - ai_summary, - file_format, - (1 - (embedding <=> cast(:embedding AS vector))) AS score, - left(extracted_text, 1200) AS snippet, - 'vector_doc' AS match_reason, - NULL::bigint AS chunk_id, - NULL::integer AS chunk_index, - NULL::text AS section_title + if not _VALID_DOCS_TABLE.match(docs_table): + raise RuntimeError(f"invalid_docs_table: {docs_table!r}") + + params: dict[str, Any] = {"embedding": embedding_str, "limit": limit} + + if docs_table == "documents": + snapshot_clause = "" + if snapshot_doc_id_max is not None: + snapshot_clause = " AND id <= :snapshot_doc_id_max" + params["snapshot_doc_id_max"] = snapshot_doc_id_max + sql = f""" + SELECT id, title, ai_domain, ai_summary, file_format, + (1 - (embedding <=> cast(:embedding AS vector))) AS score, + left(extracted_text, 1200) AS snippet, + 'vector_doc' AS match_reason, + NULL::bigint AS chunk_id, NULL::integer AS chunk_index, NULL::text AS section_title FROM documents - WHERE embedding IS NOT NULL AND deleted_at IS NULL + WHERE embedding IS NOT NULL AND deleted_at IS NULL{snapshot_clause} ORDER BY embedding <=> cast(:embedding AS vector) LIMIT :limit - """), - {"embedding": embedding_str, "limit": limit}, - ) + """ + else: + # candidate: docs_table 은 (doc_id, embed_input, embed_input_hash, embedding) 만 보유 → JOIN documents + sql = f""" + SELECT d.id, d.title, d.ai_domain, d.ai_summary, d.file_format, + (1 - (c.embedding <=> cast(:embedding AS vector))) AS score, + left(d.extracted_text, 1200) AS snippet, + 'vector_doc' AS match_reason, + NULL::bigint AS chunk_id, NULL::integer AS chunk_index, NULL::text AS section_title + FROM {docs_table} c + JOIN documents d ON d.id = c.doc_id + WHERE d.deleted_at IS NULL + ORDER BY c.embedding <=> cast(:embedding AS vector) + LIMIT :limit + """ + result = await session.execute(text(sql), params) return [SearchResult(**row._mapping) for row in result] async def _search_vector_chunks( - session: AsyncSession, embedding_str: str, limit: int + session: AsyncSession, + embedding_str: str, + limit: int, + *, + chunks_table: str = "document_chunks", + snapshot_chunk_id_max: int | None = None, ) -> list["SearchResult"]: - """document_chunks.embedding 검색 + window partition (doc당 top 2 chunks). + """document_chunks (또는 document_chunks_cand_).embedding window partition. - SQL 흐름: - 1. inner CTE topk: ivfflat 인덱스로 top-K chunks 추출 - 2. ranked CTE: doc_id PARTITION + ROW_NUMBER (score 내림차순) - 3. outer: rn <= 2 (doc당 max 2 chunks) + JOIN documents + chunks_table = "document_chunks": production path. snapshot_chunk_id_max 지정 시 c.id <= max filter. + chunks_table = "document_chunks_cand_": cand 테이블 (이미 snapshot 범위로 INSERT) → filter 무시. + + R2-B1 final gate: chunks_table 은 _VALID_CHUNKS_TABLE allowlist 통과 후 SQL interpolation. """ from api.search import SearchResult # 순환 import 회피 + if not _VALID_CHUNKS_TABLE.match(chunks_table): + raise RuntimeError(f"invalid_chunks_table: {chunks_table!r}") + inner_k = max(limit * 5, 500) - result = await session.execute( - text(""" - WITH topk AS ( - SELECT - c.id AS chunk_id, - c.doc_id, - c.chunk_index, - c.section_title, - c.text, - c.embedding <=> cast(:embedding AS vector) AS dist - FROM document_chunks c - WHERE c.embedding IS NOT NULL - ORDER BY c.embedding <=> cast(:embedding AS vector) - LIMIT :inner_k - ), - ranked AS ( - SELECT - chunk_id, doc_id, chunk_index, section_title, text, dist, - ROW_NUMBER() OVER (PARTITION BY doc_id ORDER BY dist ASC) AS rn - FROM topk - ) - SELECT - d.id AS id, - d.title AS title, - d.ai_domain AS ai_domain, - d.ai_summary AS ai_summary, - d.file_format AS file_format, - (1 - r.dist) AS score, - left(r.text, 1200) AS snippet, - 'vector_chunk' AS match_reason, - r.chunk_id AS chunk_id, - r.chunk_index AS chunk_index, - r.section_title AS section_title - FROM ranked r - JOIN documents d ON d.id = r.doc_id - WHERE r.rn <= 2 AND d.deleted_at IS NULL - ORDER BY r.dist - LIMIT :limit - """), - {"embedding": embedding_str, "inner_k": inner_k, "limit": limit}, - ) + params: dict[str, Any] = {"embedding": embedding_str, "inner_k": inner_k, "limit": limit} + + snapshot_clause = "" + if chunks_table == "document_chunks" and snapshot_chunk_id_max is not None: + snapshot_clause = " AND c.id <= :snapshot_chunk_id_max" + params["snapshot_chunk_id_max"] = snapshot_chunk_id_max + + sql = f""" + WITH topk AS ( + SELECT c.id AS chunk_id, c.doc_id, c.chunk_index, c.section_title, c.text, + c.embedding <=> cast(:embedding AS vector) AS dist + FROM {chunks_table} c + WHERE c.embedding IS NOT NULL{snapshot_clause} + ORDER BY c.embedding <=> cast(:embedding AS vector) + LIMIT :inner_k + ), + ranked AS ( + SELECT chunk_id, doc_id, chunk_index, section_title, text, dist, + ROW_NUMBER() OVER (PARTITION BY doc_id ORDER BY dist ASC) AS rn + FROM topk + ) + SELECT d.id AS id, d.title AS title, d.ai_domain AS ai_domain, + d.ai_summary AS ai_summary, d.file_format AS file_format, + (1 - r.dist) AS score, left(r.text, 1200) AS snippet, + 'vector_chunk' AS match_reason, + r.chunk_id AS chunk_id, r.chunk_index AS chunk_index, r.section_title AS section_title + FROM ranked r + JOIN documents d ON d.id = r.doc_id + WHERE r.rn <= 2 AND d.deleted_at IS NULL + ORDER BY r.dist + LIMIT :limit + """ + result = await session.execute(text(sql), params) return [SearchResult(**row._mapping) for row in result] @@ -369,6 +473,10 @@ async def search_vector_multilingual( session: AsyncSession, normalized_queries: list[dict], limit: int, + *, + embedding_backend: str | None = None, + snapshot_doc_id_max: int | None = None, + snapshot_chunk_id_max: int | None = None, ) -> list["SearchResult"]: """Phase 2.2 — 다국어 normalized_queries 배열로 vector retrieval. @@ -393,18 +501,24 @@ async def search_vector_multilingual( if not normalized_queries: return [] - # 1. 각 lang별 embedding 병렬 (cache hit 활용) - client = AIClient() - try: - embed_tasks = [ - _get_query_embedding(client, q["text"]) for q in normalized_queries - ] - embeddings = await asyncio.gather(*embed_tasks) - finally: + # 1. 각 lang별 embedding 병렬 (baseline=AIClient.embed cache / cand=TEI endpoint no-cache) + _cfg_for_embed = _resolve_backend(embedding_backend) + if _cfg_for_embed is None: + client = AIClient() try: - await client.close() - except Exception: - pass + embed_tasks = [ + _get_query_embedding(client, q["text"]) for q in normalized_queries + ] + embeddings = await asyncio.gather(*embed_tasks) + finally: + try: + await client.close() + except Exception: + pass + else: + ep = _cfg_for_embed["embed_endpoint"] + embed_tasks = [_embed_query_via_tei(ep, q["text"]) for q in normalized_queries] + embeddings = await asyncio.gather(*embed_tasks) # embedding 실패한 query는 skip (weight 재정규화 없이 조용히 drop) per_query_plan: list[tuple[dict, str]] = [] @@ -417,17 +531,38 @@ async def search_vector_multilingual( if not per_query_plan: return [] - # 2. 각 embedding에 대해 doc + chunks 병렬 retrieval + # 2. multilingual dispatcher resolve (모든 lang query 가 동일 backend 사용) + cfg = _resolve_backend(embedding_backend) + docs_table = cfg["docs_table"] if cfg else "documents" + chunks_table = cfg["chunks_table"] if cfg else "document_chunks" + logger.info( + "[embedding-dispatch] backend=%s docs_table=%s chunks_table=%s snapshot_doc_id_max=%s snapshot_chunk_id_max=%s multilingual=true", + embedding_backend or "baseline", + docs_table, + chunks_table, + snapshot_doc_id_max, + snapshot_chunk_id_max, + ) + + # 3. 각 embedding에 대해 doc + chunks 병렬 retrieval Session = async_sessionmaker(engine) async def _one_query(q_meta: dict, embedding_str: str) -> list["SearchResult"]: async def _docs() -> list["SearchResult"]: async with Session() as s: - return await _search_vector_docs(s, embedding_str, limit * 4) + return await _search_vector_docs( + s, embedding_str, limit * 4, + docs_table=docs_table, + snapshot_doc_id_max=snapshot_doc_id_max, + ) async def _chunks() -> list["SearchResult"]: async with Session() as s: - return await _search_vector_chunks(s, embedding_str, limit * 4) + return await _search_vector_chunks( + s, embedding_str, limit * 4, + chunks_table=chunks_table, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) doc_r, chunk_r = await asyncio.gather(_docs(), _chunks()) return _merge_doc_and_chunk_vectors(doc_r, chunk_r) diff --git a/app/services/search/search_pipeline.py b/app/services/search/search_pipeline.py index 29b7980..c59a728 100644 --- a/app/services/search/search_pipeline.py +++ b/app/services/search/search_pipeline.py @@ -25,13 +25,14 @@ byte-level 에 가깝게 일치해야 한다. from __future__ import annotations +import asyncio import time from dataclasses import dataclass, field from typing import TYPE_CHECKING, Literal from sqlalchemy.ext.asyncio import AsyncSession -from . import query_analyzer +from . import query_analyzer, query_rewriter from .fusion_service import ( DEFAULT_FUSION, apply_soft_filter_boost, @@ -68,6 +69,13 @@ ANALYZER_TIER_IGNORE = 0.5 # < 0.5 → analyzer 완전 무시, soft_filter 비 ANALYZER_TIER_ORIGINAL = 0.7 # < 0.7 → original query fallback ANALYZER_TIER_MERGE = 0.85 # < 0.85 → original + analyzed merge +# ─── Phase 2Q multi-query 합성 상수 (plan v6 §5.5 박제) ── +# per-variant top-K = PRODUCTION_TOPK // N (50 // 3 = 16, A1 채택). +# reranker batch ≤ 60 cap → latency 회귀 0. +PHASE2Q_PRODUCTION_TOPK = 50 +PHASE2Q_UNIFIED_CAP = 60 # variant 합성 후 reranker 입력 후보 doc cap +PHASE2Q_RRF_K = 60 # production fusion_service.RRFOnly.K 와 동일 + def _analyzer_tier(confidence: float) -> str: """analyzer_confidence → 사용 tier 문자열. Phase 2.2/2.3에서 실제 분기용.""" @@ -121,6 +129,11 @@ async def run_search( fusion: str = DEFAULT_FUSION, rerank: bool = True, analyze: bool = False, + embedding_backend: str | None = None, + snapshot_doc_id_max: int | None = None, + snapshot_chunk_id_max: int | None = None, + reranker_backend: str | None = None, + rewrite_backend: str | None = None, ) -> PipelineResult: """검색 파이프라인 실행. @@ -136,6 +149,9 @@ async def run_search( fusion: legacy | rrf | rrf_boost rerank: bge-reranker-v2-m3 활성화 (hybrid 전용) analyze: QueryAnalyzer 활성화 (cache hit 조건부 멀티링구얼 / soft filter) + rewrite_backend: Phase 2Q multi-query rewrite dispatcher slug. None/baseline = + single-query path (기존 동작). hybrid + cand_ 시 search_with_rewrite() + 로 위임 — variant N retrieval → per-variant fusion → unified RRF → reranker 1회. Returns: PipelineResult @@ -143,6 +159,21 @@ async def run_search( # 로컬 import — circular 방지 (SearchResult 는 api.search 에 inline 선언) from api.search import SearchResult # noqa: F401 — TYPE_CHECKING 실런타임 반영 + # Phase 2Q dispatch — rewrite_backend 활성 + hybrid 만 multi-query path. + # 기타 mode 또는 baseline/None 은 기존 single-query 경로 그대로. + if rewrite_backend not in (None, "baseline") and mode == "hybrid": + return await search_with_rewrite( + session, q, + limit=limit, + fusion=fusion, + rerank=rerank, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + reranker_backend=reranker_backend, + rewrite_backend=rewrite_backend, + ) + timing: dict[str, float] = {} notes: list[str] = [] text_results: list["SearchResult"] = [] @@ -214,9 +245,19 @@ async def run_search( if mode == "vector": t0 = time.perf_counter() if use_multilingual: - raw_chunks = await search_vector_multilingual(session, normalized_queries, limit) + raw_chunks = await search_vector_multilingual( + session, normalized_queries, limit, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) else: - raw_chunks = await search_vector(session, q, limit) + raw_chunks = await search_vector( + session, q, limit, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) timing["vector_ms"] = (time.perf_counter() - t0) * 1000 if not raw_chunks: notes.append("vector_search_returned_empty (AI client error or no embeddings)") @@ -231,9 +272,19 @@ async def run_search( if mode == "hybrid": t1 = time.perf_counter() if use_multilingual: - raw_chunks = await search_vector_multilingual(session, normalized_queries, limit) + raw_chunks = await search_vector_multilingual( + session, normalized_queries, limit, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) else: - raw_chunks = await search_vector(session, q, limit) + raw_chunks = await search_vector( + session, q, limit, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) timing["vector_ms"] = (time.perf_counter() - t1) * 1000 # chunk-level → doc-level 압축 (raw chunks는 chunks_by_doc에 보존) @@ -287,7 +338,12 @@ async def run_search( rerank_input = rerank_input[:MAX_RERANK_INPUT] notes.append(f"rerank input={len(rerank_input)}") - reranked = await rerank_chunks(q, rerank_input, limit * 3) + reranked = await rerank_chunks( + q, rerank_input, limit * 3, + reranker_backend=reranker_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) timing["rerank_ms"] = (time.perf_counter() - t3) * 1000 # diversity (chunk → doc 압축, max_per_doc=2, top score>0.90 unlimited) @@ -340,3 +396,205 @@ async def run_search( timing_ms=timing, notes=notes, ) + + +# ─── Phase 2Q multi-query retrieval 합성 ────────────────── + + +def _rrf_fuse_variants( + variant_lists: "list[list[SearchResult]]", + k: int, + limit: int, +) -> "list[SearchResult]": + """N variant 의 ranked list 를 RRF 합성. fusion_service.RRFOnly 알고리즘 동일. + + 각 doc_id 의 RRF_score = Σ 1/(k + rank_i) over variant lists. + 같은 doc_id 가 여러 variant 에서 등장하면 점수 누적. 첫 등장 variant 의 + SearchResult 를 representative 로 보존 (snippet/match_reason 등 메타). + """ + from api.search import SearchResult # 순환 import 회피 + + scores: dict[int, float] = {} + representative: dict[int, "SearchResult"] = {} + + for variant_list in variant_lists: + for rank, doc in enumerate(variant_list, start=1): + doc_id = doc.id + scores[doc_id] = scores.get(doc_id, 0.0) + 1.0 / (k + rank) + if doc_id not in representative: + representative[doc_id] = doc + + fused: list["SearchResult"] = [] + for doc_id, rrf_score in sorted(scores.items(), key=lambda x: x[1], reverse=True): + doc = representative[doc_id] + fused.append(SearchResult( + id=doc.id, + title=doc.title, + ai_domain=doc.ai_domain, + ai_summary=doc.ai_summary, + file_format=doc.file_format, + score=rrf_score, + snippet=doc.snippet, + match_reason=f"{doc.match_reason}+multi_query_rrf", + )) + return fused[:limit] + + +async def search_with_rewrite( + session: AsyncSession, + q: str, + *, + limit: int, + fusion: str, + rerank: bool, + embedding_backend: str | None, + snapshot_doc_id_max: int | None, + snapshot_chunk_id_max: int | None, + reranker_backend: str | None, + rewrite_backend: str, +) -> PipelineResult: + """Phase 2Q multi-query retrieval 합성 path (plan v6 §5.5). + + 흐름: + 1. query_rewriter.rewrite(q, slug) → variants (N=3, prompt v1 invariant) + 2. variant 별 search_text + search_vector (asyncio.gather, per-variant K=16) + 3. variant 별 strategy.fuse(text, vector) — production fusion 재사용 + 4. N variant 의 fused list → _rrf_fuse_variants (k=60, cap 60) + 5. reranker 1회 (variant 무관 unified candidate set) — query = 원본 q + 6. diversity + freshness + display 정규화 (run_search 동일 마무리) + + LLM call 실패 / parse fail → query_rewriter.rewrite 가 RuntimeError 전파. + unknown slug → ValueError. caller(search.py) 가 HTTP 503/400 으로 translate. + + mode 는 hybrid 가정 (run_search 의 분기 조건). rerank=False 시 unified_docs 그대로. + """ + from api.search import SearchResult # noqa: F401 + + timing: dict[str, float] = {} + notes: list[str] = [] + t_total = time.perf_counter() + + # 1) variants — LLM call (실패 시 caller 가 503 translate) + t_rw = time.perf_counter() + variants = await query_rewriter.rewrite(q, rewrite_backend) + timing["rewrite_ms"] = (time.perf_counter() - t_rw) * 1000 + if not variants: + # 방어 — query_rewriter.rewrite 는 backend != baseline 시 list 또는 raise. + # None 이 도달하면 명시적 503 신호. + raise RuntimeError(f"rewrite_llm_unavailable:{rewrite_backend}:empty_variants") + + per_variant_k = max(1, PHASE2Q_PRODUCTION_TOPK // len(variants)) + notes.append( + f"rewrite={rewrite_backend} n_variants={len(variants)} " + f"per_variant_k={per_variant_k}" + ) + + # 2) variant 별 retrieval (text + vector) — asyncio.gather 병렬 + t_var = time.perf_counter() + + async def _variant_retrieve( + v: str, + ) -> "tuple[list[SearchResult], list[SearchResult], dict[int, list[SearchResult]]]": + text = await search_text(session, v, per_variant_k) + raw_chunks = await search_vector( + session, v, per_variant_k, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) + vector, chunks_by_doc = compress_chunks_to_docs(raw_chunks, per_variant_k) + return text, vector, chunks_by_doc + + variant_outputs = await asyncio.gather( + *[_variant_retrieve(v) for v in variants] + ) + timing["variant_retrieve_ms"] = (time.perf_counter() - t_var) * 1000 + + # 3) variant 별 fusion (production fusion 재사용) + t_fuse = time.perf_counter() + strategy = get_strategy(fusion) + per_variant_fused: list[list["SearchResult"]] = [] + merged_chunks_by_doc: dict[int, list["SearchResult"]] = {} + for v, (text, vector, cbd) in zip(variants, variant_outputs): + fused = strategy.fuse(text, vector, v, per_variant_k) + per_variant_fused.append(fused) + for doc_id, chunks in cbd.items(): + merged_chunks_by_doc.setdefault(doc_id, []).extend(chunks) + timing["variant_fusion_ms"] = (time.perf_counter() - t_fuse) * 1000 + notes.append(f"fusion={strategy.name}") + + # 4) variant 간 RRF 합성 — unified candidate set (cap 60) + t_rrf = time.perf_counter() + unified_docs = _rrf_fuse_variants( + per_variant_fused, + k=PHASE2Q_RRF_K, + limit=PHASE2Q_UNIFIED_CAP, + ) + timing["unified_rrf_ms"] = (time.perf_counter() - t_rrf) * 1000 + notes.append( + f"unified docs={len(unified_docs)} cap={PHASE2Q_UNIFIED_CAP}" + ) + + # 5) reranker 1회 (variant 무관, query = 원본 q) + if rerank: + t_re = time.perf_counter() + rerank_input: list["SearchResult"] = [] + for doc in unified_docs: + chunks = merged_chunks_by_doc.get(doc.id, []) + if chunks: + rerank_input.extend(chunks[:MAX_CHUNKS_PER_DOC]) + else: + rerank_input.append(doc) + if len(rerank_input) >= MAX_RERANK_INPUT: + break + rerank_input = rerank_input[:MAX_RERANK_INPUT] + notes.append(f"rerank input={len(rerank_input)}") + + reranked = await rerank_chunks( + q, rerank_input, limit * 3, + reranker_backend=reranker_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + ) + timing["rerank_ms"] = (time.perf_counter() - t_re) * 1000 + + t_div = time.perf_counter() + results = apply_diversity(reranked, max_per_doc=MAX_CHUNKS_PER_DOC)[:limit] + timing["diversity_ms"] = (time.perf_counter() - t_div) * 1000 + else: + results = unified_docs[:limit] + + # 6) freshness + display 정규화 (run_search 동일 마무리) + t_fr = time.perf_counter() + results = await apply_freshness_decay(results, session) + timing["freshness_ms"] = (time.perf_counter() - t_fr) * 1000 + + normalize_display_scores(results) + + timing["total_ms"] = (time.perf_counter() - t_total) * 1000 + + # confidence — rerank 활성 시 reranker score 우선. + # multi-query 시 text/vector 개별 신호 의미 약함 → unified 결과 사용. + if rerank and "rerank_ms" in timing: + confidence_signal = compute_confidence_reranked(results) + else: + confidence_signal = compute_confidence(results, "vector") + + # text_results / vector_results 는 원본 variant (index 0, prompt v1 invariant=원본 verbatim) 만 노출 + text_v0, vector_v0, _ = variant_outputs[0] + + return PipelineResult( + results=results, + mode="hybrid", + confidence_signal=confidence_signal, + text_results=text_v0, + vector_results=vector_v0, + raw_chunks=[], # variant 별 raw chunks 합치는 의미 약함 — debug 노출 X + chunks_by_doc=merged_chunks_by_doc, + query_analysis=None, + analyzer_cache_hit=False, + analyzer_confidence=0.0, + analyzer_tier="disabled", + timing_ms=timing, + notes=notes, + ) diff --git a/docker-compose.override.cand.yml b/docker-compose.override.cand.yml new file mode 100644 index 0000000..59602c2 --- /dev/null +++ b/docker-compose.override.cand.yml @@ -0,0 +1,135 @@ +# Phase 2A — Embedding candidate compose override (Diagnose only) +# +# Profile-isolated: `--profile embed-cand` 명시 opt-in. default up 시 미기동. +# production fastapi/postgres/reranker 에 영향 0. +# 본 PR 종료 시 별 chore (PR-2A-Chunks-Cand-Cleanup-1) 에서 제거. +# +# 후보 상태 (2026-05-23): +# - me5_large_inst : ✅ smoke PASS (dim 1024) +# - bge_mgemma2 : ❌ Phase 2A-Extended 별 PR 이관 (9B FP16 → VRAM OOM risk + 다운로드 cost) +# - me5_ko : ❌ 폐기 (401 Unauthorized, gated/모델명 부정확) +# - snowflake_l_v2 : 신규 추가 (Snowflake/snowflake-arctic-embed-l-v2.0, 2024-12, multilingual 강화) +# +# 사용: +# docker compose -f docker-compose.yml -f docker-compose.override.cand.yml \ +# --profile embed-cand up -d embedding-cand-me5-inst +# +# 호출 (DS network 내부): +# http://embedding-cand-me5-inst:80/embed +# http://embedding-cand-snowflake-l-v2:80/embed + +services: + embedding-cand-me5-inst: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + restart: unless-stopped + container_name: hyungi_document_server-embedding-cand-me5-inst-1 + expose: + - "80" + environment: + - MODEL_ID=intfloat/multilingual-e5-large-instruct + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_me5_inst_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["embed-cand"] + + embedding-cand-snowflake-l-v2: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + restart: unless-stopped + container_name: hyungi_document_server-embedding-cand-snowflake-l-v2-1 + expose: + - "80" + environment: + - MODEL_ID=Snowflake/snowflake-arctic-embed-l-v2.0 + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_snowflake_l_v2_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["embed-cand"] + + # ===== 비활성 후보 (Phase 2A-Extended 별 PR 이관 또는 폐기) ===== + # 진단 박제만 보존. 본 PR scope 외. + + embedding-cand-bge-mgemma2: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + container_name: hyungi_document_server-embedding-cand-bge-mgemma2-1 + expose: + - "80" + environment: + - MODEL_ID=BAAI/bge-multilingual-gemma2 + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_bge_mgemma2_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 300s + profiles: ["embed-cand-extended"] # 본 PR 미사용. extended 별 profile. + + embedding-cand-me5-ko: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + container_name: hyungi_document_server-embedding-cand-me5-ko-1 + expose: + - "80" + environment: + - MODEL_ID=dragonkue/multilingual-e5-large-ko + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_me5_ko_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["embed-cand-disabled"] # 401 fail. 사용 X. + +volumes: + embedding_cand_me5_inst_cache: + embedding_cand_snowflake_l_v2_cache: + embedding_cand_bge_mgemma2_cache: + embedding_cand_me5_ko_cache: diff --git a/docker-compose.override.rerank-cand.yml b/docker-compose.override.rerank-cand.yml new file mode 100644 index 0000000..6ffe16e --- /dev/null +++ b/docker-compose.override.rerank-cand.yml @@ -0,0 +1,101 @@ +# Phase 2B — Reranker candidate compose override (Diagnose only) +# +# Profile-isolated: `--profile rerank-cand` 명시 opt-in. default up 시 미기동. +# production fastapi/postgres/reranker(bge-reranker-v2-m3) 에 영향 0. +# 본 PR 종료 후 별 chore (PR-2B-Rerank-Cand-Cleanup-1) 에서 제거. +# +# 후보 상태 (2026-05-23): +# - gte_ml_base : Apache 2.0, 305M, smoke 대기 +# - mxbai_large : Apache 2.0, ~435M, safetensors 부재 — TEI smoke risk +# - bge_v2_gemma_2b : Gemma 라이센스, 2.5B FP16 ~5GB, smoke 대기 +# +# 사용: +# docker compose -f docker-compose.yml -f docker-compose.override.rerank-cand.yml \ +# --profile rerank-cand up -d rerank-cand-gte-ml-base + +services: + rerank-cand-gte-ml-base: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + restart: unless-stopped + container_name: hyungi_document_server-rerank-cand-gte-ml-base-1 + expose: + - "80" + environment: + - MODEL_ID=Alibaba-NLP/gte-multilingual-reranker-base + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - rerank_cand_gte_ml_base_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["rerank-cand"] + + rerank-cand-mxbai-large: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + restart: unless-stopped + container_name: hyungi_document_server-rerank-cand-mxbai-large-1 + expose: + - "80" + environment: + - MODEL_ID=mixedbread-ai/mxbai-rerank-large-v1 + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - rerank_cand_mxbai_large_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["rerank-cand"] + + rerank-cand-bge-v2-gemma-2b: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + restart: unless-stopped + container_name: hyungi_document_server-rerank-cand-bge-v2-gemma-2b-1 + expose: + - "80" + environment: + - MODEL_ID=BAAI/bge-reranker-v2-gemma + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=2 + volumes: + - rerank_cand_bge_v2_gemma_2b_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s + profiles: ["rerank-cand"] + +volumes: + rerank_cand_gte_ml_base_cache: + rerank_cand_mxbai_large_cache: + rerank_cand_bge_v2_gemma_2b_cache: diff --git a/reports/phase_2a_embedding_decision_2026-05-23.md b/reports/phase_2a_embedding_decision_2026-05-23.md new file mode 100644 index 0000000..6b38f9a --- /dev/null +++ b/reports/phase_2a_embedding_decision_2026-05-23.md @@ -0,0 +1,97 @@ +# Phase 2A Embedding Decision Report (2026-05-23) + +> Parent: `phase-2a-embedding-diagnose.md` v4 +> +> Round 2 review: `round-2-review-mighty-starfish.md` 채택 +> +> 본 보고서 = Phase 4 산출물. Decision Tree H1~H4 중 권고 1개 + 후속 PR 후보. + +## 1. Summary + +| | Value | +|---|---| +| baseline (bge-m3, snapshot 범위) | NDCG@10 (graded) **0.659** / mixed 0.39 / korean_only 0.51 / failure 0/5 / p50 464ms / p95 1582ms | +| baseline rebaseline (snapshot filter 적용) | 위와 동일 (snapshot 범위 = corpus 전부와 거의 동일, 측정 가능 확인) | +| 후보 2종 측정 완료 | me5_large_inst (mE5-instruct), snowflake_l_v2 (Snowflake Arctic L v2.0) | +| 이관 (별 PR) | bge_mgemma2 (9B FP16 → 16GB GPU OOM risk → PR-2A-Extended-Bge-Mgemma2) | +| 폐기 | ko_me5 (HF 401 Unauthorized) | + +## 2. 후보별 Δ NDCG (vs baseline rebaseline) + +| Candidate | overall NDCG | Δ overall | mixed | Δ mixed | korean_only | Δ korean | standards | english_only | exam | failure | p50 ms | p95 ms | +|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:| +| **bge-m3 snapshot rebaseline** | **0.659** | — | **0.39** | — | **0.51** | — | 0.87 | 0.78 | 0.74 | 0/5 | 464 | 1582 | +| mE5-large-instruct | 0.477 | **-0.182** | 0.17 | **-0.22** | 0.47 | -0.04 | 0.54 | 0.63 | 0.62 | 0/5 | 194 | 1348 | +| snowflake-arctic-embed-l-v2.0 | 0.616 | -0.043 | 0.35 | -0.04 | 0.52 | +0.01 | 0.87 | 0.74 | 0.56 | 0/5 | 254 | 1412 | + +**관찰**: +- **mE5-large-instruct**: 전 카테고리 큰 회귀. Δ -0.182 overall. mixed 절반 회귀 (0.39 → 0.17). standards 도 큰 회귀 (0.87 → 0.54). 단 latency p50 270ms 단축 (mE5 의 512 context = 적은 compute). +- **snowflake_l_v2**: 가벼운 회귀 (Δ -0.043). standards / korean_only 거의 동일. mixed 약간 회귀. exam 명확 회귀 (-0.18). latency p50 210ms 단축. + +**ambiguous note** (LLM 단독 결정, [[feedback_user_block_minimize]]): +- mE5-instruct 는 query input 에 `Instruct: \nQuery: ` prefix 권장 (intfloat 모델 카드). 본 PR 측정은 plain query → prefix 효과 미반영. prefix 적용 시 +0.05~0.15 회복 가능성 있으나 측정 외 — 별 PR 후보 `PR-2A-mE5-Prefix-Retry`. +- snowflake_l_v2 의 한국어 specific 벤치마크 공개 부재. 본 측정 = 사실상 한국어 specific 첫 audit. korean_only +0.01 미세 개선 신호 있으나 통계적 의미 없음 (n=9, 0.51 vs 0.52). + +## 3. Latency 영향 + +- mE5 (512 ctx): p50 464 → 194 (**−270ms**), p95 1582 → 1348 (−234ms). 빠름. +- snowflake (8192 ctx): p50 464 → 254 (**−210ms**), p95 1582 → 1412 (−170ms). 빠름. +- 둘 다 baseline 보다 빠르지만 quality 회귀. **trade-off favor quality** ([[feedback_quant_expectation_not_hard_gate]] 룰, 정량 hard gate 없으나 +0 NDCG 회복 시 latency 30% 단축 가치는 별 평가). + +## 4. Decision (H3 — bge-m3 유지) + +| | H1 swap 권고 | H2 query rewrite 보완 | **H3 bge-m3 유지 (✅ 선택)** | H4 latency 회귀 | +|---|---|---|---|---| +| 조건 | mixed + korean_only 둘 다 명확 개선 | korean_only 만 개선 / mixed 미개선 | 모든 후보 bge-m3 대비 개선 없음 | latency p95 ≥ 3000ms | +| 결과 | ❌ 둘 다 회귀 | ❌ korean_only 미세 개선 (+0.01) 만, mixed 회귀 | ✅ **확정** | ❌ 둘 다 baseline 보다 빠름 | + +**최종 권고**: **bge-m3 유지** (Apply PR 진입 안 함). + +근거: +- mE5 -0.182 / snowflake -0.043 — 둘 다 net 회귀. +- korean_only 약점 보완 도구로 embedding swap 보다 query rewrite (Phase 2Q) 또는 reranker 튜닝 (Phase 2B) 가 더 유망. +- mE5 prefix retry 는 별 PR 로 분리 — diagnose 본 PR scope 외. + +## 5. Apply / 보완 / 보류 권고 + +- **Apply** (production embedding swap): **하지 않음**. +- **보완** (다른 트랙): **Phase 2B (Reranker)** 또는 **Phase 2Q (Query rewrite)** 우선 — korean_only / mixed 약점 다른 layer 에서 공략. +- **보류** (Phase 2A-Extended): bge_mgemma2 (별 PR), mE5 prefix retry (별 PR), Cloud embedding (Cohere/Voyage) scaffold-only (별 PR). + +## 6. 후보 cleanup 일정 + +- 미선택 후보 4 테이블 (`documents_cand_me5_large_inst` / `document_chunks_cand_me5_large_inst` / `documents_cand_snowflake_l_v2` / `document_chunks_cand_snowflake_l_v2`) = **1주 dormant 유지** (mE5 prefix retry / Phase 2Q 비교 baseline 사용 가능성). +- 1주 후 별 chore `PR-2A-Chunks-Cand-Cleanup-1` 에서 DROP + 컨테이너 docker-compose.override 제거. + +## 7. 후속 PR 후보 (백로그) + +| PR 가칭 | trigger | scope | +|---|---|---| +| `PR-2A-mE5-Prefix-Retry` | 본 PR 결과 + ambiguous note | mE5-instruct query prefix 적용 후 재측정. 페어 reindex 재실행 + 51 case 재측정. 본 PR 의 dispatcher 재사용 (`CANDIDATE_BACKEND_MAP` 에 신규 slug 추가). | +| `PR-2A-Extended-Bge-Mgemma2` | v3 short-list swap 결정 | 9B FP16 OOM 회피 (quantization int8 또는 sentence-transformers). 별 컨테이너 + reindex + 측정. | +| `PR-2A-Cloud-Embedding-Scaffold-1` | (선택) self-hosted 무개선 확정 | Cohere / Voyage scaffold-only (`[[feedback_scaffold_first_for_external_cost_pr]]`). 실비 0. | +| `PR-Search-Query-Rewrite-1` (Phase 2Q) | korean_only / mixed 약점 보완 | 자연어 query → SQL/keyword 강화. | +| `PR-Search-Reranker-V2-Diagnose` (Phase 2B) | korean_only / mixed 약점 보완 | bge-reranker-v2-m3 swap 후보 측정. | +| `PR-2A-Chunks-Cand-Cleanup-1` | 본 PR closure 후 1주 | 4 cand 테이블 DROP + 컨테이너 정리. | + +## 8. Closure gate verify (§ 8 본 plan) + +- [x] G0-1 + G0-2 fixture 박제 (Phase 1 closure 시 commit `943ac5f`) +- [x] snapshot json 박제 (`v0_2_phase2a_snapshot_2026-05-23.json`, commit `a67df0a`) +- [x] 2 후보 (me5_large_inst + snowflake_l_v2) 51 case 측정 완료 (`overall.n = 46`, 5 failure 제외) +- [x] baseline rebaseline 51 case 측정 완료 (snapshot filter 적용) +- [x] 후보별 baseline json 2개 + baseline_snapshot json 1개 박제 +- [x] documents_cand_ row count = 21365 verify (2 후보 동일) +- [x] document_chunks_cand_ row count = 30605 verify (2 후보 동일) +- [x] baseline rebaseline 측정도 동일 snapshot_doc/chunk_id_max filter 통과 verify (dispatch log) +- [x] dispatcher 호출 시 unknown slug → HTTP 400 verify (smoke test `cand_invalid` 통과) +- [x] decision md 박제 (본 파일) +- [x] Apply 권고 1줄 작성 (H3) +- [x] production embedding (bge-m3 ollama) 변경 0 verify (`docker compose ps`, `ollama list`, `config.yaml` diff 0) +- [x] production `documents` row count + embedding 변경 0 verify +- [x] production `document_chunks` row count + content 변경 0 verify +- [x] 후보 cleanup 일정 명시 (1주 dormant → `PR-2A-Chunks-Cand-Cleanup-1`) +- [x] dispatch log audit (silent fallback 0, `embedding_backend_unavailable` 0, snapshot id 박제 verify) +- [x] DOCSRV_TOKEN 만료 사고 0 (3 측정 모두 15분 이내 완주) + +**Phase 2A Diagnose PR closure: PASS**. diff --git a/reports/phase_2b_reranker_decision_2026-05-23.md b/reports/phase_2b_reranker_decision_2026-05-23.md new file mode 100644 index 0000000..69c869e --- /dev/null +++ b/reports/phase_2b_reranker_decision_2026-05-23.md @@ -0,0 +1,103 @@ +# Phase 2B Reranker Decision Report (2026-05-23) + +> Parent: `round-2-review-mighty-starfish.md` v2.1 +> +> 본 보고서 = Phase 4 산출물. Decision Tree H1~H4 중 권고 1개 + 후속 PR 후보. + +## 1. Summary + +| | Value | +|---|---| +| baseline (bge-reranker-v2-m3, snapshot 범위) | NDCG@10 (graded) **0.659** / mixed 0.39 / korean_only 0.51 / failure 0/5 / p50 454ms / p95 1573ms | +| 측정 후보 (A 그룹 1종) | cand_gte_ml_base (Alibaba-NLP/gte-multilingual-reranker-base, 305M, Apache 2.0) | +| **TEI 1.7 호환성 탈락 후보 (2종 → Phase 2B-Extended)** | (a) `cand_mxbai_large` — deberta-v2 architecture not supported by TEI candle backend. (b) `cand_bge_v2_gemma_2b` — LLM-based reranker, `1_Pooling/config.json` 부재 (FlagEmbedding LayerwiseReranker wrapper 필요) | +| 폐기 (라이센스) | jinaai/jina-reranker-v2-base-multilingual — CC-BY-NC 4.0 (v1.1 결정) | + +본 PR 은 plan 의 closure gate flex 조항 ("후보 탈락 시 N 후보 + baseline 으로 closure 가능, decision md 에 제외 사유 명시") 적용. **3 후보 원칙 → 1 후보 측정** (2 후보 TEI 호환 X). + +## 2. 후보별 Δ NDCG (vs baseline rebaseline, snapshot 범위) + +| Candidate | overall NDCG | Δ overall | mixed | Δ mixed | korean_only | Δ korean | standards | english_only | exam | failure | p50 ms | p95 ms | +|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:| +| **bge-reranker-v2-m3 snapshot rebaseline** | **0.659** | — | **0.39** | — | **0.51** | — | 0.87 | 0.78 | 0.74 | 0/5 | 454 | 1573 | +| cand_gte_ml_base (gte-multilingual-reranker-base) | 0.604 | **-0.055** | 0.38 | -0.01 | 0.41 | **-0.10** | 0.86 | 0.72 | 0.62 | 0/5 | 345 | 1460 | + +**관찰**: +- **overall NDCG**: -0.055 (회귀, 통계적 의미 있는 수준). +- **korean_only**: -0.10 (큰 회귀 — Phase 2B 가 공략 대상이던 약점이 더 악화). +- **mixed**: -0.01 (거의 같음). +- **standards / english_only / exam**: 모두 회귀 (-0.01 ~ -0.12). +- **latency**: p50 -109ms 단축 (gte 305M 가 production 568M 보다 적은 compute), p95 -113ms 단축. + +## 3. 베이스라인 재현성 (Phase 2A 와 비교) + +| | Phase 2A baseline_snapshot (2026-05-23) | Phase 2B baseline_snapshot (2026-05-23) | diff | +|---|---:|---:|---:| +| overall NDCG | 0.659 | 0.659 | 0.000 | +| mixed | 0.39 | 0.39 | 0.000 | +| korean_only | 0.51 | 0.51 | 0.000 | +| p50 ms | 464 | 454 | -10 | + +**Snapshot filter path 안정**. dispatcher 추가 후 production 회귀 0 verify. + +## 4. Decision (H3 — bge-reranker-v2-m3 유지) + +| | H1 swap 권고 | H2 부분 개선 | **H3 무개선 (✅ 선택)** | H4 latency 회귀 | +|---|---|---|---|---| +| 조건 | korean_only + mixed 둘 다 명확 개선 | 한쪽만 개선 | 모두 baseline 대비 개선 없음 | p95 ≥ 3000ms | +| 결과 | ❌ 둘 다 회귀 (korean -0.10, mixed -0.01) | ❌ 회귀만 있음 | ✅ **확정** | ❌ p95 1460ms < 3000 | + +**최종 권고**: **bge-reranker-v2-m3 유지** (Apply PR 진입 X). + +근거: +- gte_ml_base 의 reranker quality 가 production bge-reranker-v2-m3 보다 명확 약함 (특히 한국어 -0.10). +- mxbai_large + bge_v2_gemma_2b 의 TEI 1.7 호환 X → A 그룹 측정 culture 다 활용 못함. Extended PR 가능성은 있으나 본 PR scope 외. +- korean_only / mixed 약점 보완은 **다른 layer (Phase 2Q query rewrite / 보다 강력한 reranker 의 native 호출 경로 등)** 가 더 유망. + +## 5. Apply / 보완 / 보류 권고 + +- **Apply** (production reranker swap): **하지 않음**. +- **보완** (다른 트랙): + - **Phase 2Q (Query rewrite)** 우선 권고 — korean_only / mixed query 의 자연어 → 명사구 추출 / multilingual normalize. + - mxbai-rerank-large-v1 sentence-transformers 직접 호출 (TEI 우회) → Phase 2B-Extended. + - bge-reranker-v2-gemma FlagEmbedding LayerwiseReranker wrapper → Phase 2B-Extended. +- **보류** (Phase 2B-Extended): + - cand_mxbai_large (sentence-transformers direct, TEI 우회) + - cand_bge_v2_gemma_2b (FlagEmbedding wrapper) + - cand_jina_v2_ml (CC-BY-NC license 결정 후, 개인 비영리 사용 결정 시) + +## 6. 후보 cleanup 일정 + +- `rerank-cand-gte-ml-base` 컨테이너 = **1주 dormant 유지** (Phase 2Q 또는 Extended PR 비교 baseline 가능성). +- 1주 후 별 chore `PR-2B-Rerank-Cand-Cleanup-1` 에서 컨테이너 정리 + docker-compose.override.rerank-cand.yml 제거. + +## 7. 후속 PR 후보 (백로그) + +| PR 가칭 | trigger | scope | +|---|---|---| +| `PR-Search-Query-Rewrite-1` (Phase 2Q) | korean_only / mixed 약점 보완 | LLM-driven query expansion + multilingual normalize. korean_only 0.51 / mixed 0.39 출발점. | +| `PR-2B-Extended-Mxbai-Large` | (선택) sentence-transformers 트랙 | TEI 우회. sentence-transformers 직접 호출 wrapper. deberta-v2 지원. | +| `PR-2B-Extended-Bge-V2-Gemma` | (선택) FlagEmbedding 트랙 | LayerwiseReranker wrapper. 9B variant int8 quantization 옵션. | +| `PR-2B-Extended-Jina-V2-ML` | (선택) license 결정 후 | jinaai/jina-reranker-v2-base-multilingual 측정. CC-BY-NC 라이센스 + 개인 비영리 사용 가정. | +| `PR-2B-Cloud-Reranker-Scaffold-1` | (선택) self-hosted 무개선 확정 | Cohere rerank-multilingual-v3.0 scaffold-only ([[feedback_scaffold_first_for_external_cost_pr]]). 실비 0. | +| `PR-2B-Rerank-Cand-Cleanup-1` | 본 PR closure 후 1주 | rerank-cand-gte-ml-base 컨테이너 + override yml 제거. | + +## 8. Closure gate verify (§ 7 본 plan) + +- [x] G0-1 fixture 박제 commit (한국어+영어 mixed sample, sanity ASME>고압가스>weather PASS) +- [x] (flex) gte_ml_base 측정 완주 (`overall.n = 51`, scored 46). bge_v2_gemma_2b + mxbai_large 는 TEI 호환 탈락으로 제외 (사유 § 1 명시) +- [x] baseline rebaseline 51 case 측정 완료 (snapshot filter 적용, NDCG 0.659 Phase 2A 와 동일 = 재현성 PASS) +- [x] 후보 baseline json 1개 + baseline_snapshot json 1개 박제 +- [x] decision md 박제 (본 파일) +- [x] Apply 권고 1줄 (H3 bge-reranker-v2-m3 유지) +- [x] production reranker (bge-reranker-v2-m3) 변경 0 verify (`docker compose ps` reranker UP, `config.yaml` diff 0) +- [x] production documents / document_chunks 변경 0 verify (Phase 2A 결과 보존, 21365 docs / 30605 chunks) +- [x] embedding (bge-m3 ollama) 변경 0 verify +- [x] dispatcher 호출 시 unknown slug → HTTP 400 verify (smoke `cand_invalid` PASS) +- [x] reranker dispatch log audit (silent fallback 0, snapshot id 박제 verify) +- [x] 후보 컨테이너 1주 dormant 후 cleanup chore 등록 (PR-2B-Rerank-Cand-Cleanup-1) +- [x] DOCSRV_TOKEN 만료 사고 0 (3 측정 + smoke 모두 15분 이내) +- [x] Phase 2A 의 후보 컨테이너 (`embedding-cand-*`) 와 충돌 0 (별 profile `rerank-cand`) +- [x] commit 직전 `git branch --show-current` verify ([[feedback_multi_session_file_unit_division]]) + +**Phase 2B Diagnose PR closure: PASS** (flex closure — 1 후보 측정, 2 후보 TEI 호환 탈락 사유 명시). diff --git a/reports/v0_2_phase2a_baseline_snapshot_2026-05-23.csv b/reports/v0_2_phase2a_baseline_snapshot_2026-05-23.csv new file mode 100644 index 0000000..4815b17 --- /dev/null +++ b/reports/v0_2_phase2a_baseline_snapshot_2026-05-23.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;4041;3890;3917;3863;3908;3855,418.5,1.000,1.000,1.000,1,0.808,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;3919;10573;10571;3916;3874;3918;3854;3922,464.3,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3984;3993;3857;3978;3983;3957;3980;3903,291.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3877;3905;3878;3858;3903;3781;3881,478.4,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3912;3913;3911;3905;3909;3906;3910;3893,489.4,1.000,0.500,0.631,1,0.631,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;5249;3897;3863;5253;3856;3895;3867;3879;3851,505.3,0.500,0.167,0.257,0,0.314,0.667,1.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3854;5244;3851;3867;3878;3863;3908;10573,460.3,1.000,1.000,0.793,1,0.873,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3904;3903;3909;3905;3981;3760;5253;3985;3896,400.1,0.667,1.000,0.636,1,0.636,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;3917;3916;3918;5227;3854;3877;3922;5240;5226,363.9,0.500,0.500,0.441,1,0.506,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;6675;11677;10573;3757;3811,593.8,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,16081;18077;22048;12213;23984;15793;4321;21273;21276;4307,465.2,0.125,0.100,0.073,1,0.073,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;15922;17123;21890;22049;4346;9022;4767;6067,284.7,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4065;4066;4071;4068;4069;5063;5105;4067,568.8,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4062;4059;4058;4060;4063;4066;4071;4064;5095,515.5,0.667,0.500,0.478,1,0.478,0.667,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;23446;4776;4202;4679;24382;21155;4668;4199;21855,269.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3770;3817;4540;5244;3762;3789;5249;3791;3793,545.1,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5244;5236;5249;5229;3755;3774;3761;5230;10573;3787,470.0,0.250,0.200,0.151,1,0.151,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;5260;3897;5248;3771;3769;11671;13936;3755,739.8,1.000,1.000,1.000,1,1.000,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,22342;19576;17069;15924;16935;23149;16019;16462;16010;4776,324.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16927;20893;16771;17242;4329;20886;4457;4307,513.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5262;23732;24155;4546;20758;5145;4547;3774;5180,394.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,16289;5089;5092;5250;22202;20507;5070;5118;5173;23605,301.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,20022;20470;4634;15361;16059;9102;23336;18286;16218;5738,264.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3902;3887;3895;3898;3885;3905;3908;3911;3915,338.8,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;13930;3895;3911;13929;3866;3903;3890;3910;3909,295.6,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11645;13750;11676;13299;13749;13766,444.4,1.000,0.333,0.571,1,0.539,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13309;13299;13313;13918,420.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13656;13649;13651;13752;13659;13650,333.7,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3896;3887;13935;13938;3877;3900;3899,450.7,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5180;5193;5140;5137;5149;5178;5207;5148,1618.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5140,1458.1,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,1600.2,1.000,1.000,0.818,1,0.961,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,1564.2,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5182;5143;5204;5211;5207;5185;5186,1311.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,1417.4,0.500,0.250,0.264,0,0.395,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5192;5180;5187;5186;5212;5188;5182;5137,1664.3,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3759;3774;3755;3818;3812;3778;3756;3761;3771,1076.8,1.000,1.000,0.877,1,0.974,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5225;5206;5208;5210;5137;5182;5145,749.0,0.750,1.000,0.767,1,0.686,0.750,1.000, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5222;5225;5209;5180;5204;5210;5205;5178;5143,709.8,1.000,1.000,0.877,1,0.932,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5186;13913;5143;13760;13749;5145;5180;5240;5137,741.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;13755;13924;13772,371.6,0.250,1.000,0.390,1,0.647,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;11671;11649;11648;13915;5241;11563;5173;5177;11653,636.2,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5081;11509;11476;11486;5064;3788;5134;5075,528.8,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;5139;5090;5178;11515;5210;11493;11719,329.0,0.667,1.000,0.765,1,0.856,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11475;5090;5084;11531;11476;11473;5093;11479;5124,585.5,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11664;13948;13660;5177;13652;11665;13917;11660;13752,351.0,0.333,1.000,0.469,1,0.674,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13918;13751;5177;13653;13753,359.0,0.667,1.000,0.671,1,0.883,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11649;11655;11690;11658;11653;11689,299.7,0.333,0.250,0.202,0,0.321,0.500,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,357.6,1.000,1.000,1.000,1,0.858,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;11692;13665;13661;13664;13666;13670;13773;13934,360.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;5236;3977;3971;3966;4018;3972;3973;3974;3895,420.6,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2a_me5_large_inst_2026-05-23.csv b/reports/v0_2_phase2a_me5_large_inst_2026-05-23.csv new file mode 100644 index 0000000..fbe8d71 --- /dev/null +++ b/reports/v0_2_phase2a_me5_large_inst_2026-05-23.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,10573;3868;3854;3879;3890;3856;3971;3867;3910;3876,129.9,1.000,0.500,0.665,0,0.546,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;10573;3919;3923;3916;3874;3854;3918;3922,243.4,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3985;3981;3978;3984;3983;3980;3904;3869;3979;3988,115.7,1.000,0.500,0.631,1,0.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3851;3858;5227;3881;4036;4040;4045;10573;3853,273.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3895;3890;3901;3899;3910;3905;3915;3911;3894;3913,476.0,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;5253;3895;3868;3879;3856;3921;3854;3923;3915,289.2,1.000,0.250,0.581,0,0.486,1.000,1.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3867;3855;10571;10573;3917;5231;3878;3918;3851;3854,182.6,1.000,1.000,0.922,1,0.810,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,11686;3980;3903;3869;3918;3981;3985;3854;3896;3955,127.6,0.667,0.500,0.463,1,0.463,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10571;10572;10573;3918;3917;3921;3877;3923;3854;11677,99.8,0.500,0.200,0.290,0,0.323,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,11677;10573;5234;3876;13926;5249;13935;11676;3853;3921,274.7,0.500,0.111,0.185,0,0.237,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,22907;21273;23757;21276;23571;18077;16526;15922;15911;15919,194.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,20240;23572;22067;20898;16532;18087;17123;15922;15918;4327,100.8,0.250,0.100,0.113,0,0.074,0.250,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4071;4066;4064;4065;5105;4067;5086;5064;4068,198.8,1.000,1.000,0.850,1,0.918,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4060;4064;4059;4070;4058;4068;4061;4066;5086,171.1,1.000,1.000,0.913,1,0.913,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21441;22075;20274;23242;21897;20440;18428;16404;17008;16823,70.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;5244;11736;11638;11675;11634;11656;11737;11648;5236,214.6,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5249;11637;6674;5230;11737;11638;11676;3876;3867;3859,153.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,13938;11565;13937;11572;11737;13769;13943;3897;5260;4020,516.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,23149;25056;22342;16351;16842;17069;16457;4688;4670;4507,119.4,0.125,0.100,0.073,1,0.073,0.125,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,22907;17128;17242;19111;16526;16761;4761;4307;4457;4452,313.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,11733;11698;11735;11613;11711;11736;24508;24268;5215;20238,90.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,11513;11510;11711;11739;11736;11738;11508;11735;11523;11509,44.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,11510;11735;23082;23336;11711;11513;11507;11712;11698;11508,43.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3895;3913;3901;3899;3910;3905;3890;3915;3908;3911,160.4,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,11565;11637;11572;11636;11568;11673;11678;11634;3896;3894,110.8,0.500,0.111,0.185,0,0.276,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11676;11693;13941;13299;13749;13766;13306;4026;13302,139.7,0.500,0.111,0.185,0,0.102,0.500,0.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13941;13311;13913;13653;13307;13306;13317;13310;13313,111.7,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13941;11689;13752;13655;13319;13653;11690;11612;11693,73.1,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,13938;3897;13937;3854;3895;3901;3915;3890;3899;3867,194.5,0.500,0.500,0.387,1,0.579,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5147;5137;5210;5180;5140;5149;5133;5145,1273.4,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5210;5137;5212;5178;5144;5180;5145;5147;11634;5141,1116.0,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5178;5214;5210;5148;5145;5190;5186;5192;5209,1449.5,1.000,1.000,0.850,1,0.968,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5136;5178;5186;5145;5207;5140;5143;5204,1383.7,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5143;5180;5148;5207;5210;5179;5182;5133;5208,1312.7,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5178;5180;5205;5209;5212;5145;5186;4835;4826;5182,1252.0,0.500,0.250,0.264,0,0.395,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5187;5191;5186;5188;5190;5148;5182;5143;5210,1480.7,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3774;3755;3763;3812;3815;3756;3758;3757;3773;3770,744.7,1.000,0.500,0.693,1,0.541,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5135;5204;5225;5133;5195;5224;5180;5209,529.6,0.750,1.000,0.832,1,0.628,0.750,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5222;5225;5209;5135;5133;5208;5205;11601;5206,540.8,1.000,1.000,0.877,1,0.932,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,11585;11633;11634;11737;11590;13942;13919;13917;11693;3895,560.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,13764;11652;11690;11693;13941;11689;11650;11669;11651;11653,152.6,0.250,0.167,0.139,0,0.099,0.333,0.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5173;5177;11671;11559;11651;11672;11588;11477;11652;5179,403.3,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11533;11504;5090;11482;11509;11505;11513;11510;11514;11534,236.5,1.000,0.500,0.605,1,0.617,1.000,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11501;11503;11719;11517;11713;11715;11712;11594;11514,105.6,0.333,1.000,0.469,1,0.674,0.333,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;5090;11475;11518;11515;11516;11517;11694;11478,339.3,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,13660;11691;11591;13943;13942;13917;13653;13752;11579;13753,132.6,0.667,0.500,0.531,1,0.519,0.667,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11692;11627;11655;13753;11651;11670;11646;11690;11617,151.5,0.667,0.333,0.383,1,0.406,0.500,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11658;11595;11690;11669;11581;11639;11655;11650;11649;11617,113.8,0.667,0.250,0.338,0,0.353,1.000,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11713;11719;11715;11717;11716;11613;11502,149.2,1.000,1.000,1.000,1,0.858,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11647;11668;5177;11693;11692;13665;13661;13666;13663,160.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,5260;4026;3977;3971;3966;3972;3973;3974;3895;4019,218.3,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2a_snowflake_l_v2_2026-05-23.csv b/reports/v0_2_phase2a_snowflake_l_v2_2026-05-23.csv new file mode 100644 index 0000000..ad79c1a --- /dev/null +++ b/reports/v0_2_phase2a_snowflake_l_v2_2026-05-23.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3879;3868;3890;3863;3856;3908;3851;4041;3862;3873,236.8,1.000,1.000,0.947,0,0.731,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;3919;3923;10573;10571;3916;3874;3918;3854,243.0,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3985;3981;3984;3978;3983;3986;3957;3980;3992;3869,118.4,1.000,0.500,0.631,1,0.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3858;3881;4036;4040;4045;3913;3912,281.7,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3888;3893;3887;3897;3892;3890;3896;3895;3902;3889,287.1,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,3878;5249;3863;3868;3856;3879;3867;3921;3851;3923,288.6,0.750,0.250,0.458,0,0.468,1.000,1.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3917;5246;3854;5227;3851;3867;3855;3878;3863;10573,233.7,1.000,0.167,0.472,0,0.418,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3904;3903;3760;3985;3916;3851;3978;3905;3981,175.1,0.667,1.000,0.605,1,0.605,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;10573;3917;3916;3918;10571;5244;3919;5227;3854,163.1,0.500,0.333,0.363,1,0.410,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;5249;4025;5240;10573;11677;3876;3757;3811;3921,358.4,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,18077;22048;12213;4317;21273;21276;15919;16404;17242;15922,259.9,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;22049;20240;22055;15917;21890;15922;15918;4346;9022,99.0,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4066;4064;4071;4068;4058;4069;4067;5064,378.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4061;4060;4062;4070;4059;4064;4065;4066;4063;4058,356.3,1.000,1.000,1.000,1,1.000,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21441;4202;4776;4679;16941;21897;4775;21155;16823;4199,74.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5244;5239;3758;3791;3770;3817;3763;3787;4540;5253,319.4,0.500,0.200,0.237,0,0.305,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5230;5249;3755;3863;3802;3851;3859;3895;3896;3890,215.7,0.250,0.333,0.195,1,0.195,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3897;3790;3772;3775;13935;4020;4021;13934;13938;4018,527.9,1.000,0.500,0.693,1,0.693,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,23242;19434;16606;24991;18723;23149;15924;16941;16404;16538,119.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,23242;15894;16751;19434;22069;16912;18088;17242;16759;4345,305.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;23732;5262;5061;20758;4550;17810;4546;4547;20036,199.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;4057;5135;5094;22202;5092;5066;5078;17899;23498,174.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,18567;20022;20470;19172;18286;21525;16320;21847;4780;16151,79.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3887;5249;3881;3912;3892;3898;3896;3888;3893,242.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3890;3901;11572;11562;13929;11567;3894;3899;3911,115.8,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4026;4025;11693;13750;13299;13941;13749;13766,243.5,1.000,0.333,0.571,0,0.508,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;11688;13309;13313;13310,259.4,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13651;13655;13656;13649;13658;13752;13648;13659,168.3,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3886;3887;13935;3895;3902;3896;13938;3877;3900,271.8,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5149;5180;5140;5178;5207;5148;5212;5137,1436.2,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5212;5208;5210;5206;11634;5207;5141;5182;5183,1266.8,1.000,1.000,0.850,1,0.918,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5190;5148;5145;5185;5192;5212,1442.7,1.000,1.000,0.832,1,0.964,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5136;5186;5178;5145;5148;5192;5185;5212;5147,1387.4,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5204;5207;5143;5147;5179;5180;5137;5210;5182,1294.3,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5178;5224;5180;5205;5209;5212;5225;5145;4835;4826,1256.9,0.500,0.200,0.237,0,0.355,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5186;5212;5137;5148;5143;5204;5185;5140;5193,1483.3,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3812;3763;3813;3756;3755;3757;3815;3774;3814;3770,759.6,1.000,0.500,0.624,1,0.629,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5225;5204;5133;5212;5182;5140;5137;5224,567.3,0.500,1.000,0.637,1,0.522,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5222;5225;5209;5210;5208;5205;5204;5178;11601,512.5,1.000,1.000,0.877,1,0.932,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5143;3762;5210;11633;5204;3985;5182;3797;5186;11600,562.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13674;13669;13774;13773;13675;11688;13757;13769;11644,200.8,0.250,1.000,0.390,1,0.647,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5241;11671;5177;11653;11568;5173;11538;11579;5178;11648,421.8,0.500,0.111,0.185,0,0.237,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11533;11504;11482;11509;11505;11513;11510;11476;11712;11486,253.7,1.000,0.500,0.624,1,0.627,1.000,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;11503;5071;5139;13771;11515;11719;13307,156.7,0.667,1.000,0.765,1,0.856,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;5090;11475;11473;11515;11518;5057;11487;11516,381.2,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,13948;13660;5177;13652;13759;13942;13917;13752;4026;11579,161.2,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11600;11625;11692;11627;13751;11655;13753;11624;13652,170.4,0.667,0.333,0.416,0,0.448,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11605;11655;11658;11690;11653;11669;13670;11639;11649,123.0,0.333,0.143,0.156,0,0.106,0.500,0.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11711;11503;11500;11713;13930;11717;11715;11716;11719,158.3,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11647;11668;11583;11693;11692;13664;13665;13661;13666,161.9,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3977;4026;3971;3966;4018;3972;3973;3974;4019;13913,226.3,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2b_baseline_snapshot_2026-05-23.csv b/reports/v0_2_phase2b_baseline_snapshot_2026-05-23.csv new file mode 100644 index 0000000..a723ecf --- /dev/null +++ b/reports/v0_2_phase2b_baseline_snapshot_2026-05-23.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;4041;3890;3917;3863;3908;3855,343.2,1.000,1.000,1.000,1,0.808,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;3919;10573;10571;3916;3874;3918;3854;3922,456.6,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3984;3993;3857;3978;3983;3957;3980;3903,287.1,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3877;3905;3878;3858;3903;3781;3881,453.9,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3912;3913;3911;3905;3909;3906;3910;3893,480.3,1.000,0.500,0.631,1,0.631,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;5249;3897;3863;5253;3856;3895;3867;3879;3851,482.3,0.500,0.167,0.257,0,0.314,0.667,1.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3854;5244;3851;3867;3878;3863;3908;10573,452.2,1.000,1.000,0.793,1,0.873,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3904;3903;3909;3905;3981;3760;5253;3985;3896,383.6,0.667,1.000,0.636,1,0.636,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;3917;3916;3918;5227;3854;3877;3922;5240;5226,359.2,0.500,0.500,0.441,1,0.506,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;6675;11677;10573;3757;3811,570.2,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,16081;18077;22048;12213;23984;15793;4321;21273;21276;4307,459.5,0.125,0.100,0.073,1,0.073,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;15922;17123;21890;22049;4346;9022;4767;6067,289.8,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4065;4066;4071;4068;4069;5063;5105;4067,551.8,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4062;4059;4058;4060;4063;4066;4071;4064;5095,531.8,0.667,0.500,0.478,1,0.478,0.667,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;23446;4776;4202;4679;24382;21155;4668;4199;21855,262.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3770;3817;4540;3762;5244;3789;5249;3791;3793,530.5,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5244;5236;5249;5229;3755;3774;3761;5230;10573;3787,465.4,0.250,0.200,0.151,1,0.151,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;5260;3897;5248;3771;3769;11671;13936;3755,715.9,1.000,1.000,1.000,1,1.000,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,22342;19576;17069;15924;16935;23149;16019;16462;16010;4776,321.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16927;20893;16771;17242;4329;20886;4457;4307,503.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5262;23732;24155;4546;20758;5145;4547;3774;5180,392.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,16289;5089;5092;5250;22202;20507;5070;5118;5173;23605,311.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,20022;20470;4634;15361;16059;9102;23336;18286;16218;5738,268.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3902;3887;3895;3898;3885;3905;3908;3911;3915,349.0,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;13930;3895;3911;13929;3866;3903;3890;3910;3909,293.5,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11645;13750;11676;13299;13749;13766,447.0,1.000,0.333,0.571,1,0.539,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13309;13299;13313;13918,419.6,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13656;13649;13651;13752;13659;13650,326.4,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3896;3887;13935;13938;3877;3900;3899,444.6,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5180;5193;5140;5137;5149;5178;5207;5148,1596.3,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5140,1470.0,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,1588.3,1.000,1.000,0.818,1,0.961,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,1557.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5182;5143;5204;5211;5207;5185;5186,1331.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,1409.3,0.500,0.250,0.264,0,0.395,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5192;5180;5187;5186;5212;5188;5182;5137,1651.1,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3759;3774;3755;3818;3812;3778;3756;3761;3771,1089.1,1.000,1.000,0.877,1,0.974,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5225;5206;5208;5210;5137;5182;5145,755.4,0.750,1.000,0.767,1,0.686,0.750,1.000, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5222;5225;5209;5180;5204;5210;5205;5178;5143,708.5,1.000,1.000,0.877,1,0.932,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5186;13913;5143;13760;13749;5145;5180;5240;5137,742.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;13755;13924;13772,373.1,0.250,1.000,0.390,1,0.647,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;11671;11649;11648;13915;5241;11563;5173;5177;11653,620.7,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5081;11509;11476;11486;5064;3788;5134;5075,503.5,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;5139;5090;5178;11515;5210;11493;11719,381.8,0.667,1.000,0.765,1,0.856,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11475;5090;5084;11531;11476;11473;5093;11479;5124,596.0,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11664;13948;13660;5177;13652;11665;13917;11660;13752,351.9,0.333,1.000,0.469,1,0.674,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13918;13751;5177;13653;13753,361.1,0.667,1.000,0.671,1,0.883,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11649;11655;11690;11658;11653;11689,300.2,0.333,0.250,0.202,0,0.321,0.500,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,362.9,1.000,1.000,1.000,1,0.858,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;11692;13665;13661;13664;13666;13670;13773;13934,348.9,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;5236;3977;3971;3966;4018;3972;3973;3974;3895,418.5,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2b_gte_ml_base_2026-05-23.csv b/reports/v0_2_phase2b_gte_ml_base_2026-05-23.csv new file mode 100644 index 0000000..766e1e0 --- /dev/null +++ b/reports/v0_2_phase2b_gte_ml_base_2026-05-23.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;4041;3890;3917;3863;3908;3855,244.4,1.000,1.000,1.000,1,0.808,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;10571;3916;3874;3896;3922;3919;3918;3920,319.9,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3903;3880;3980;3983;3978;3896;3869;3904,199.9,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,3851;3914;4041;3852;3905;3877;3881;3903;3915;3913,345.3,1.000,0.333,0.500,1,0.500,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3888;3905;3911;3913;10570;3909;3912;3906;3904;3910,380.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;3897;5249;5253;3856;3898;3879;3851;3868;3902,392.3,0.750,0.200,0.399,0,0.426,1.000,1.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,5227;3855;5249;3867;3854;3908;5244;3898;3895;10571,345.1,0.667,0.500,0.498,1,0.549,0.667,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3903;3896;3980;3904;3905;3760;3912;3909;3757;3857,291.8,0.333,0.333,0.235,1,0.235,0.333,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3916;3917;10572;3918;5227;3854;3877;5238;3878;5240,266.6,0.500,1.000,0.637,1,0.635,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,5234;3876;3853;10573;5249;4025;6675;3810;3777;3787,465.6,0.500,0.333,0.307,1,0.394,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,18077;16081;21273;15793;23984;22048;16526;12213;4307;20893,363.6,0.125,0.111,0.076,1,0.076,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;15922;17123;21890;22049;4346;9022;4767;6067,227.6,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4064;4066;4071;4068;11481;5105;5106;5063,428.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4058;4062;4059;5095;4060;4064;4063;4066;4067,394.0,0.667,0.333,0.402,1,0.402,0.667,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;23446;4776;4202;4679;24382;21155;4668;4199;21855,199.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,4540;5239;5249;3770;5236;5244;3787;3817;4548;5253,399.3,0.500,0.250,0.264,0,0.339,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5249;5236;5244;5230;5235;5229;10573;3761;3755;3816,344.6,0.250,0.111,0.118,1,0.118,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;5260;3897;5248;3771;3769;11671;13936;3755,658.8,1.000,1.000,1.000,1,1.000,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,22342;19576;17069;15924;16935;23149;16019;16462;16010;4776,256.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16927;20893;16771;17242;4329;20886;4457;4307,434.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5262;23732;5161;24155;20032;20758;20036;17813;3816;19373,274.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,16289;22202;23605;20507;5250;24854;5168;23297;19527;18298,201.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,18286;9102;20022;16059;23336;20470;20174;15361;4634;17133,179.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3902;3887;3895;3898;3885;3905;3908;3911;3915,273.9,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;13930;3895;3911;13929;3866;3903;3890;3910;3909,227.4,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11579;11644;4025;4026;11647;11676;11591;11580;11645;13750,329.6,1.000,0.333,0.571,1,0.539,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13302;13312;13300;13311;13310;13308;13306;13299;13304,298.4,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13657;13651;13655;11689;13649;13648;11693;13322;13658,222.2,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3902;3895;3887;13935;3866;13938;3890;4018;3901,352.2,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5136;5144;5139;5180;5193;5186;5207;5210;5178;5190,1475.2,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5210;5204;5208;5137;5180;5186;5182;5206;5140;5185,1319.6,1.000,0.500,0.580,1,0.603,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5224;5204;5210;5178;5186;5148;5214;5192;5180,1475.1,1.000,1.000,0.818,1,0.961,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5137;5141;5139;5211;5207;5212;5140;5182;5206,1444.9,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5182;5143;5204;5211;5207;5185;5186,1254.6,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5225;5224;5204;5178;5205;5208;5209;5187;4826,1290.7,0.500,0.125,0.193,0,0.289,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5186;5180;5187;5192;5212;5182;5137;5210,1532.4,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3774;3755;3759;3818;3761;3769;3758;3812;3762,943.3,1.000,1.000,0.920,1,0.983,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5204;5208;5137;5225;5210;5133;5136;5180;5207,616.2,0.750,1.000,0.642,0,0.529,0.750,1.000, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5225;5222;5209;5210;5195;5180;5204;5207;5141,590.8,1.000,1.000,0.877,1,0.932,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5186;13913;5143;13760;13749;5145;5180;5240;5137,676.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13759;13310;13764;13670;11649;13674;13772;13671;13669,272.7,0.250,1.000,0.390,1,0.647,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,13299;5241;13915;10575;5173;13303;11563;5178;13948;5177,519.5,0.500,0.125,0.193,0,0.248,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11533;11504;5081;5064;11509;11476;5071;5082;11601;5075,388.9,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;5139;11493;11515;11521;11719;5090;5193;13318,228.0,0.667,1.000,0.765,1,0.856,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;5084;5090;11516;11526;11493;11479;11515;11473;5210,472.2,1.000,1.000,0.807,1,0.894,1.000,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,13665;13324;11579;11591;11580;13948;13928;13759;13752;13660,243.4,0.333,0.250,0.202,0,0.290,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11600;13918;11627;11692;11625;13753;11510;13752;13302;11658,259.8,0.667,0.333,0.416,1,0.496,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11617;13948;13924;11649;11595;11690;11616;11658;11619;11655,222.2,0.333,1.000,0.469,1,0.745,0.500,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11713;11711;13930;11712;11714;11500;11488;11503;11502;11509,267.5,1.000,0.500,0.646,0,0.563,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;13303;11692;11693;13663;13665;13933;13673;11655;13661,256.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3897;5236;4019;4018;3971;3903;3966;3974;5210;3973,321.2,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv b/reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv new file mode 100644 index 0000000..a59e592 --- /dev/null +++ b/reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;4041;3890;3917;3863;3908;3855,2322.6,1.000,1.000,1.000,1,0.808,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;3919;10573;10571;3916;3874;3918;3854;3922,500.8,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3984;3993;3857;3978;3983;3957;3980;3903,300.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3877;3905;3878;3858;3903;3781;3881,481.1,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3912;3913;3911;3905;3909;3906;3910;3893,494.2,1.000,0.500,0.631,1,0.631,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;5249;3897;3863;5253;3856;3895;3867;3879;3851,500.4,0.500,0.167,0.257,0,0.314,0.667,1.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3854;5244;3851;3867;3878;3863;3908;10573,459.3,1.000,1.000,0.793,1,0.873,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3904;3903;3909;3905;3981;3760;5253;3985;3896,402.9,0.667,1.000,0.636,1,0.636,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;3917;3916;3918;5227;3854;3877;3922;5240;5226,359.3,0.500,0.500,0.441,1,0.506,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;6675;11677;10573;3757;3811,589.4,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,16081;18077;22048;12213;23984;15793;4321;21273;21276;4307,477.9,0.125,0.100,0.073,1,0.073,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;15922;17123;21890;22049;4346;9022;4767;6067,298.5,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4065;4066;4071;4068;4069;5063;5105;4067,568.1,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4062;4059;4058;4060;4063;4066;4071;4064;5095,533.5,0.667,0.500,0.478,1,0.478,0.667,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;23446;4776;4202;4679;24382;21155;4668;4199;21855,278.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3770;3817;4540;5244;3762;3789;5249;3791;3793,555.4,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5244;5236;5249;5229;3755;3774;3761;5230;10573;3787,469.9,0.250,0.200,0.151,1,0.151,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;5260;3897;5248;3771;3769;11671;13936;3755,749.3,1.000,1.000,1.000,1,1.000,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,22342;19576;17069;15924;16935;23149;16019;16462;16010;4776,322.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16927;20893;16771;17242;4329;20886;4457;4307,517.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5262;23732;24155;4546;20758;5145;4547;3774;5180,415.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,16289;5089;5092;5250;22202;20507;5070;5118;5173;23605,303.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,20022;20470;4634;15361;16059;9102;23336;18286;16218;5738,264.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3902;3887;3895;3898;3885;3905;3908;3911;3915,344.8,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;13930;3895;3911;13929;3866;3903;3890;3910;3909,313.3,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11645;13750;11676;13299;13749;13766,456.2,1.000,0.333,0.571,1,0.539,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13309;13299;13313;13918,423.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13656;13649;13651;13752;13659;13650,342.2,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3896;3887;13935;13938;3877;3900;3899,454.6,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5180;5193;5140;5137;5149;5178;5207;5148,1638.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5140,1474.5,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,1615.3,1.000,1.000,0.818,1,0.961,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,1570.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5182;5143;5204;5211;5207;5185;5186,1325.2,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,1424.5,0.500,0.250,0.264,0,0.395,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5192;5180;5187;5186;5212;5188;5182;5137,1650.7,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3759;3774;3755;3818;3812;3778;3756;3761;3771,1083.8,1.000,1.000,0.877,1,0.974,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5225;5206;5208;5210;5137;5182;5145,749.6,0.750,1.000,0.767,1,0.686,0.750,1.000, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5222;5225;5209;5180;5204;5210;5205;5178;5143,706.2,1.000,1.000,0.877,1,0.932,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5186;13913;5143;13760;13749;5145;5180;5240;5137,748.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;13755;13924;13772,384.0,0.250,1.000,0.390,1,0.647,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;11671;11649;11648;13915;5241;11563;5173;5177;11653,638.0,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5081;11509;11476;11486;5064;3788;5134;5075,509.3,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;5139;5090;5178;11515;5210;11493;11719,326.9,0.667,1.000,0.765,1,0.856,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11475;5090;5084;11531;11476;11473;5093;11479;5124,582.3,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11664;13948;13660;5177;13652;11665;13917;11660;13752,375.3,0.333,1.000,0.469,1,0.674,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13918;13751;5177;13653;13753,369.6,0.667,1.000,0.671,1,0.883,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11649;11655;11690;11658;11653;11689,305.2,0.333,0.250,0.202,0,0.321,0.500,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,373.5,1.000,1.000,1.000,1,0.858,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;11692;13665;13661;13664;13666;13670;13773;13934,340.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;5236;3977;3971;3966;4018;3972;3973;3974;3895,438.8,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv new file mode 100644 index 0000000..5f4919c --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3856;3851;3868;3856;3851;3853;3873;10573;3873,3856.5,0.667,1.000,1.149,1,1.099,0.667,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;10573;3922;3917;3918;10573;10571,4620.2,1.000,1.000,1.204,0,0.813,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3978;3880;3857,3554.9,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3858;3896;3782;3755;3851;3775;3755;3851,3618.8,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3910;3888;3910;10570;3888;3905;3774;3895;10570;3905,3791.0,1.000,0.500,1.018,1,1.018,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5253;3895;3902;3879;3855;3897;5244;5249;3897,3313.4,0.250,0.200,0.151,0,0.107,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3855;3878;5227;10571;3867;3851;3878,3689.7,1.000,1.000,1.371,1,1.394,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;5253;3880;3980;3985;3903;3904;3981;5253;3985,3484.9,0.667,1.000,0.819,1,0.819,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3918;3917;3917;3854;3918;10572;10572;3916;3877;3854,3658.7,0.500,0.500,0.565,1,0.758,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,10573;3876;3853;5249;3876;5249;3853;3811;11677;11677,4278.8,0.500,0.333,0.511,1,0.656,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,22048;21276;15793;22048;21273;21276;4307;23571;4452;4339,2737.3,0.125,0.143,0.084,1,0.084,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21890;16532;15922;21706;17123;21890;22049;15922,2858.6,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4071;4064;4065;4066;4064;4068;5105;4065;4071,2910.7,1.000,1.000,1.062,1,1.034,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4070;4062;4060;4060;4059;4070;4061;4068,2674.9,1.000,1.000,1.161,1,1.161,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;4333;4776;4739;4202;4679;4668;4775;4679;4668,2664.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;5239;4540;3791;3770;4540;3770;3758;3791;3774,3375.8,0.500,0.200,0.442,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,3755;5239;5249;5249;5230;10573;5230;3774;5239;10573,3234.6,0.250,1.000,0.390,1,0.390,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;3790;5260;3772;3897;5248;3771;5248;3769,3693.3,1.000,1.000,1.544,1,1.544,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,2768.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16761;16771;16771;17242;21275;17242;4329;4457,2952.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;23732;20036;4547;5161;4547;20758;24155;20032;20036,3442.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5094;5057;5090;5090;5094;5250;5076;5078;5118;5092,3495.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20022;18286;9102;16218;4634;4281;4289,2794.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3912;3895;3886;3902;3902;3886;3887;3895;3898;3756,3994.3,1.000,0.333,0.729,1,0.786,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3911;3866;3903;3866;3898,4288.0,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;4025;4026;13299;13749;13941;11644;11579,4047.0,1.000,0.500,1.195,1,1.135,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13300;13306;13310;13307;13310,3843.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13752;13657;13657;13653;13655;13651;13651,4371.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3907;3899;3901;3851;3877;3898,3788.8,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,4744.1,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5210;5141,5285.2,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5178;5224;5178;5214;5180;5205;5145;5186,5152.3,0.500,0.500,0.580,1,0.868,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,4500.2,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5210;5178;5204;5182;5139;5143,5295.9,1.000,1.000,1.356,1,1.356,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5225;5178;5224;5182;5187;5180;5182;5209;5209,4644.0,0.500,0.111,0.362,0,0.541,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5210;5140;5187;5136;5145;5180;5137;5210;5149,5250.8,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3759;3763;3812;3755;3778;3756;3774;3778;3774;3791,4352.8,1.000,0.500,0.651,1,0.635,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5210;5204;5139;5210;5145;5133;5204;5206;5145,4671.8,0.500,1.000,0.956,0,0.803,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5210;5209;5141;5180;5180;5178,4381.7,1.000,1.000,1.218,1,1.404,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5210;13760;5137;5140;5149;3895;5136;3797;3797,3279.8,0.333,0.125,0.148,1,0.058,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,2598.8,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5178;5178;5177;11690;5177;10575;5173;11638;5173;5241,3454.7,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;11533;5106;11509;11509;3788;11601;5081,3075.4,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11515;11517;11521;11493;5090;5071;3788;11515,3893.1,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11475;11479;11473;11479;11473;11487;11476,3231.0,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;13928;13928;13324;13752;13948;13948;4026,4341.1,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;13753;11625;11600;13653;13653;13918;11627,3228.5,0.667,0.500,0.613,1,0.809,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;11669;11616;11690;11595;13669;11689;11649,3647.4,0.333,0.250,0.202,0,0.138,0.500,0.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11713;11500;11500;11701,3108.0,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;5210;13665;13664;11691;13673;13666;13303;11692,3335.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;3895;13944;5236;5236;13944;3961;3961;3971;3971,3495.7,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv new file mode 100644 index 0000000..a64d6b3 --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3856;3851;3868;3856;3851;3853;3873;10573;3873,1283.8,0.667,1.000,1.149,1,1.099,0.667,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;10573;3922;3917;3918;10573;10571,837.0,1.000,1.000,1.204,0,0.813,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3978;3880;3857,780.4,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3858;3896;3782;3755;3851;3775;3755;3851,1264.0,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3910;3888;3910;10570;3888;3905;3774;3895;10570;3905,1161.2,1.000,0.500,1.018,1,1.018,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5253;3895;3902;3879;3855;3897;5244;5249;3897,864.5,0.250,0.200,0.151,0,0.107,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3855;3878;5227;10571;3867;3851;3878,470.4,1.000,1.000,1.371,1,1.394,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;5253;3880;3980;3985;3903;3904;3981;5253;3985,568.8,0.667,1.000,0.819,1,0.819,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3918;3917;3917;3854;3918;10572;10572;3916;3877;3854,447.4,0.500,0.500,0.565,1,0.758,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,10573;3876;3853;5249;3876;5249;3853;3811;11677;11677,1431.3,0.500,0.333,0.511,1,0.656,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,22048;21276;15793;22048;21273;21276;4307;23571;4452;4339,628.0,0.125,0.143,0.084,1,0.084,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21890;16532;15922;21706;17123;21890;22049;15922,436.2,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4071;4064;4065;4066;4064;4068;5105;4065;4071,542.2,1.000,1.000,1.062,1,1.034,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4070;4062;4060;4060;4059;4070;4061;4068,552.5,1.000,1.000,1.161,1,1.161,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;4333;4776;4739;4202;4679;4668;4775;4679;4668,610.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;5239;4540;3791;3770;4540;3770;3758;3791;3774,1010.4,0.500,0.200,0.442,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,3755;5239;5249;5249;5230;10573;5230;3774;5239;10573,1359.6,0.250,1.000,0.390,1,0.390,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;3790;5260;3772;3897;5248;3771;5248;3769,1796.1,1.000,1.000,1.544,1,1.544,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,1159.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16761;16771;16771;17242;21275;17242;4329;4457,755.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;23732;20036;4547;5161;4547;20758;24155;20032;20036,325.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5094;5057;5090;5090;5094;5250;5076;5078;5118;5092,295.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20022;18286;9102;16218;4634;4281;4289,251.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3912;3895;3886;3902;3902;3886;3887;3895;3898;3756,1049.9,1.000,0.333,0.729,1,0.786,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3911;3866;3903;3866;3898,1027.7,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;4025;4026;13299;13749;13941;11644;11579,1398.6,1.000,0.500,1.195,1,1.135,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13300;13306;13310;13307;13310,377.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13752;13657;13657;13653;13655;13651;13651,328.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3907;3899;3901;3851;3877;3898,1249.8,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,2608.5,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5210;5141,3067.4,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5178;5224;5178;5214;5180;5205;5145;5186,2889.3,0.500,0.500,0.580,1,0.868,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,2379.2,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5210;5178;5204;5182;5139;5143,2950.9,1.000,1.000,1.356,1,1.356,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5225;5178;5224;5182;5187;5180;5182;5209;5209,2267.5,0.500,0.111,0.362,0,0.541,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5210;5140;5187;5136;5145;5180;5137;5210;5149,2912.3,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3759;3763;3812;3755;3778;3756;3774;3778;3774;3791,1664.2,1.000,0.500,0.651,1,0.635,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5210;5204;5139;5210;5145;5133;5204;5206;5145,2200.3,0.500,1.000,0.956,0,0.803,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5210;5209;5141;5180;5180;5178,1921.9,1.000,1.000,1.218,1,1.404,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5210;13760;5137;5140;5149;3895;5136;3797;3797,931.9,0.333,0.125,0.148,1,0.058,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,449.4,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5178;5178;5177;11690;5177;10575;5173;11638;5173;5241,1028.6,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;11533;5106;11509;11509;3788;11601;5081,872.8,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11515;11517;11521;11493;5090;5071;3788;11515,300.7,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11475;11479;11473;11479;11473;11487;11476,624.8,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;13928;13928;13324;13752;13948;13948;4026,1049.7,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;13753;11625;11600;13653;13653;13918;11627,536.1,0.667,0.500,0.613,1,0.809,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;11669;11616;11690;11595;13669;11689;11649,399.5,0.333,0.250,0.202,0,0.138,0.500,0.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11713;11500;11500;11701,456.6,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;5210;13665;13664;11691;13673;13666;13303;11692,356.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;3895;13944;5236;5236;13944;3961;3961;3971;3971,815.5,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv new file mode 100644 index 0000000..fff3730 --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;3868;3879;4041;3856;4041;3851,2865.3,1.000,1.000,1.497,1,1.235,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;3922;3917;3918;3917;10573;10573,8879.1,1.000,1.000,1.412,0,1.079,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3857;3880;3984,9160.5,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3905;3880;3858;3818;3781;3851;3781;3881,9813.7,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3888;3910;3910;3888;3905;3895;3905;3890;3885;3898,10152.4,1.000,1.000,1.431,1,1.431,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5249;3897;5244;5244;3868;5253;5253;3897;3851,9887.4,0.250,0.167,0.139,0,0.099,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3878;3855;5227;10571;3851;3867;5244,9554.2,1.000,1.000,1.229,1,1.290,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3985;3981;3903;3980;3909;3880;5253;3985;5253,2651.9,0.667,1.000,0.885,1,0.885,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3917;3917;3918;3854;3916;10572;3854;3918;10572;3877,2674.7,0.500,1.000,0.788,1,1.076,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3876;10573;3853;5249;3853;3876;5249;11677;4025;3811,3434.4,0.500,0.333,0.544,1,0.698,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,20893;22048;15793;22048;20893;21273;4452;15793;4331;4339,2365.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21706;16532;15922;17123;15922;17123;4346;9022,1800.0,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4071;4064;4065;4071;4065;4066;4066;4063,1811.7,1.000,1.000,1.232,1,1.258,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4059;4070;4060;4062;4070;4060;4061;4058,2161.6,1.000,1.000,1.107,1,1.107,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,23446;4775;4775;4679;23446;4776;4202;4202;4776;4679,2094.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3791;5239;3770;3817;3758;4540;3817;3791;3770,2533.0,0.500,0.250,0.441,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5249;5249;5230;5230;3774;3787;10573;10573;3819;3755,2792.1,0.250,0.100,0.113,1,0.113,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3790;3772;3772;3897;5260;13936;5260;5248;11671,3155.7,1.000,1.000,1.571,1,1.571,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,2548.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;16761;21275;21275;16927;16927;16771;17242;4329;16771,2613.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;4547;5161;20758;23732;4546;3774;3774;4547;5161,2252.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5094;5061;5094;5070;5076;5092;20507;5092;5118,1837.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20470;17133;20022;9102;20022;4634;17133,1532.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3886;3902;3887;3895;3902;3898;3887;3895;3898,2882.5,1.000,1.000,1.457,1,1.536,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3895;3866;3903;3866;3903,3467.6,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;13658;13299;4025;4026;11693;11579;11693,3395.6,1.000,0.500,1.116,1,1.058,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13310;13300;13306;13306;13310,2533.9,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13657;13752;13657;13653;13655;13651;13655,2289.4,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3915;3895;3915;3890;3902;3901,2933.5,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,4062.6,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5137;5210,4675.3,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5224;5178;5178;5180;5214;5145;5205;5214,4548.4,0.500,0.500,0.571,1,0.855,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,3980.0,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5178;5139;5210;5143;5182;5182,4303.1,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5178;5224;5182;5187;5180;5182;5209;5209;5178,4188.4,0.500,0.125,0.378,0,0.566,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5139;5135;5208;5210;5187;5210;5133;5204;4026;4026,3989.3,0.500,0.333,0.307,1,0.394,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3812;5253;5230;5249;5249;3875;3852;3812;10573;3875,2757.4,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5210;5139;5139;5210;5204;5145;5145;5206;5206;5204,3857.5,0.500,1.000,1.000,0,0.944,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5209;5210;5180;5204;5222;5209,3393.5,1.000,1.000,1.414,1,1.512,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;13913;13760;5210;5186;13913;5143;13760;5137;5137,2814.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,2005.0,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5177;5178;13299;13303;5177;11638;11690;11690;13306;5173,2882.5,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;5106;11533;5111;4544;11509;11476;5081,2524.7,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11501;11495;5090;11515;11515;11517;11495;5210,1926.1,0.667,1.000,1.109,1,1.310,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11479;11479;11475;11473;11473;5090;11531,2208.9,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;11664;13928;13917;13752;13752;13928;13917,2690.3,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;11627;13653;13753;13753;13752;11625;13918,1903.5,0.667,0.500,0.639,1,0.887,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;13669;11616;11690;13654;11669;11649;11617,2525.7,0.667,0.125,0.284,0,0.316,1.000,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11500;11713;13930;11701,1883.2,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;13673;11693;13936;5177;13666;5210;13936;5210;13665,1851.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;5210;3961;3895;4026;3971;3966;3972;5210;3961,2479.2,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv new file mode 100644 index 0000000..6221fd3 --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;3868;3879;4041;3856;4041;3851,1353.9,1.000,1.000,1.497,1,1.235,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;3922;3917;3918;3917;10573;10573,739.5,1.000,1.000,1.412,0,1.079,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3857;3880;3984,603.6,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3905;3880;3858;3818;3781;3851;3781;3881,1227.5,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3888;3910;3910;3888;3905;3895;3905;3890;3885;3898,1307.0,1.000,1.000,1.431,1,1.431,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5249;3897;5244;5244;3868;5253;5253;3897;3851,952.1,0.250,0.167,0.139,0,0.099,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3878;3855;5227;10571;3851;3867;5244,775.5,1.000,1.000,1.229,1,1.290,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3985;3981;3903;3980;3909;3880;5253;3985;5253,831.8,0.667,1.000,0.885,1,0.885,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3917;3917;3918;3854;3916;10572;3854;3918;10572;3877,879.0,0.500,1.000,0.788,1,1.076,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3876;10573;3853;5249;3853;3876;5249;11677;4025;3811,1527.5,0.500,0.333,0.544,1,0.698,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,20893;22048;15793;22048;20893;21273;4452;15793;4331;4339,629.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21706;16532;15922;17123;15922;17123;4346;9022,444.8,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4071;4064;4065;4071;4065;4066;4066;4063,477.0,1.000,1.000,1.232,1,1.258,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4059;4070;4060;4062;4070;4060;4061;4058,956.7,1.000,1.000,1.107,1,1.107,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,23446;4775;4775;4679;23446;4776;4202;4202;4776;4679,596.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3791;5239;3770;3817;3758;4540;3817;3791;3770,1197.1,0.500,0.250,0.441,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5249;5249;5230;5230;3774;3787;10573;10573;3819;3755,1271.5,0.250,0.100,0.113,1,0.113,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3790;3772;3772;3897;5260;13936;5260;5248;11671,1670.0,1.000,1.000,1.571,1,1.571,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,1160.9,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;16761;21275;21275;16927;16927;16771;17242;4329;16771,943.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;4547;5161;20758;23732;4546;3774;3774;4547;5161,364.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5094;5061;5094;5070;5076;5092;20507;5092;5118,290.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20470;17133;20022;9102;20022;4634;17133,250.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3886;3902;3887;3895;3902;3898;3887;3895;3898,1469.2,1.000,1.000,1.457,1,1.536,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3895;3866;3903;3866;3903,1511.2,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;13658;13299;4025;4026;11693;11579;11693,2166.1,1.000,0.500,1.116,1,1.058,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13310;13300;13306;13306;13310,785.6,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13657;13752;13657;13653;13655;13651;13655,998.1,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3915;3895;3915;3890;3902;3901,1614.9,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,2592.9,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5137;5210,3083.9,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5224;5178;5178;5180;5214;5145;5205;5214,3019.9,0.500,0.500,0.571,1,0.855,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,2373.9,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5178;5139;5210;5143;5182;5182,2729.3,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5178;5224;5182;5187;5180;5182;5209;5209;5178,2655.9,0.500,0.125,0.378,0,0.566,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5139;5135;5208;5210;5187;5210;5133;5204;4026;4026,2393.8,0.500,0.333,0.307,1,0.394,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3812;5253;5230;5249;5249;3875;3852;3812;10573;3875,988.5,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5210;5139;5139;5210;5204;5145;5145;5206;5206;5204,2043.3,0.500,1.000,1.000,0,0.944,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5209;5210;5180;5204;5222;5209,1698.3,1.000,1.000,1.414,1,1.512,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;13913;13760;5210;5186;13913;5143;13760;5137;5137,1209.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,439.3,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5177;5178;13299;13303;5177;11638;11690;11690;13306;5173,1644.5,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;5106;11533;5111;4544;11509;11476;5081,888.0,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11501;11495;5090;11515;11515;11517;11495;5210,297.8,0.667,1.000,1.109,1,1.310,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11479;11479;11475;11473;11473;5090;11531,823.6,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;11664;13928;13917;13752;13752;13928;13917,1047.3,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;11627;13653;13753;13753;13752;11625;13918,473.3,0.667,0.500,0.639,1,0.887,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;13669;11616;11690;13654;11669;11649;11617,930.6,0.667,0.125,0.284,0,0.316,1.000,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11500;11713;13930;11701,505.1,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;13673;11693;13936;5177;13666;5210;13936;5210;13665,344.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;5210;3961;3895;4026;3971;3966;3972;5210;3961,1051.1,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/tests/fixtures/macbook_qwen_query_rewrite_response.json b/tests/fixtures/macbook_qwen_query_rewrite_response.json new file mode 100644 index 0000000..892f88f --- /dev/null +++ b/tests/fixtures/macbook_qwen_query_rewrite_response.json @@ -0,0 +1,238 @@ +{ + "_meta": { + "description": "Phase 2Q G0-2 MacBook qwen3.6-27B query rewrite response fixture (4 카테고리)", + "captured_at": "2026-05-24", + "endpoint": "http://100.118.112.84:8810/v1/chat/completions", + "model": "mlx-community/Qwen3.6-27B-8bit", + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + "response_format_omitted": true + }, + "prompt_version": "v1", + "prompt_path": "app/prompts/query_rewrite.txt", + "note": "response_format=json_object 미지원 (120s hang at inspect 9, 2026-05-24). prompt rule 'no markdown' 만으로 strict JSON 강제. 3.2s cold first call → ~2.1s warm. OpenAI 호환 + extra reasoning/tool_calls/peak_memory fields ([[reference_mlx_vlm_tool_calling]])." + }, + "cases": { + "korean_only": { + "query": "압력용기 설계 기준", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "압력용기 설계 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-0a123e20-1f15-4e76-bfa2-34c4a1f06a98", + "object": "chat.completion", + "created": 1779574036, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"압력용기 설계 기준\", \"압력용기 설계 규정\", \"pressure vessel design standards\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 210, + "completion_tokens": 26, + "total_tokens": 236, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.210776441 + } + }, + "parsed_variants": [ + "압력용기 설계 기준", + "압력용기 설계 규정", + "pressure vessel design standards" + ] + }, + "mixed": { + "query": "ASME Section VIII 적용 기준", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "ASME Section VIII 적용 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-51c5fd7c-b4a8-4fe1-ac77-ebcf232513e7", + "object": "chat.completion", + "created": 1779574038, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"ASME Section VIII 적용 기준\", \"ASME Section VIII 적용 범위 및 기준\", \"ASME Section VIII application criteria\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 210, + "completion_tokens": 30, + "total_tokens": 240, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.210776441 + } + }, + "parsed_variants": [ + "ASME Section VIII 적용 기준", + "ASME Section VIII 적용 범위 및 기준", + "ASME Section VIII application criteria" + ] + }, + "english_only": { + "query": "pressure vessel design code", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "pressure vessel design code" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-580c1a94-9710-4cf0-931f-2a9661560ca5", + "object": "chat.completion", + "created": 1779574032, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"pressure vessel design code\", \"압력용기 설계 규정\", \"ASME pressure vessel code\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 208, + "completion_tokens": 25, + "total_tokens": 233, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.200421726 + } + }, + "parsed_variants": [ + "pressure vessel design code", + "압력용기 설계 규정", + "ASME pressure vessel code" + ] + }, + "exam": { + "query": "가스기사 필기 출제기준", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "가스기사 필기 출제기준" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-f793ee8b-73aa-4c31-b950-03f00603145e", + "object": "chat.completion", + "created": 1779574034, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"가스기사 필기 출제기준\", \"가스기사 필기시험 출제 범위\", \"Gas Engineer written exam syllabus\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 211, + "completion_tokens": 31, + "total_tokens": 242, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.210776441 + } + }, + "parsed_variants": [ + "가스기사 필기 출제기준", + "가스기사 필기시험 출제 범위", + "Gas Engineer written exam syllabus" + ] + } + } +} diff --git a/tests/fixtures/macmini_gemma4_query_rewrite_response.json b/tests/fixtures/macmini_gemma4_query_rewrite_response.json new file mode 100644 index 0000000..14e3835 --- /dev/null +++ b/tests/fixtures/macmini_gemma4_query_rewrite_response.json @@ -0,0 +1,208 @@ +{ + "_meta": { + "description": "Phase 2Q G0-1 Mac mini gemma-4 query rewrite response fixture (4 카테고리)", + "captured_at": "2026-05-24", + "endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "model": "gemma-4-26b-a4b-it-8bit", + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "prompt_version": "v1", + "prompt_path": "app/prompts/query_rewrite.txt", + "note": "response_format=json_object 지원. prompt rule 'no markdown' 강제 시 strict JSON. parser fallback (markdown fence regex) 유지 — 첫 시도 prompt 없이 호출 시 ```json...``` wrap 관찰 (2026-05-24 inspect 9)." + }, + "cases": { + "korean_only": { + "query": "압력용기 설계 기준", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "압력용기 설계 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574028", + "object": "chat.completion", + "created": 1779574028, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"압력용기 설계 기준\", \"압력용기 설계 규격 및 기준\", \"Pressure vessel design standards\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 214, + "completion_tokens": 29, + "total_tokens": 243 + } + }, + "parsed_variants": [ + "압력용기 설계 기준", + "압력용기 설계 규격 및 기준", + "Pressure vessel design standards" + ] + }, + "mixed": { + "query": "ASME Section VIII 적용 기준", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "ASME Section VIII 적용 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574029", + "object": "chat.completion", + "created": 1779574029, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"ASME Section VIII 적용 기준\", \"ASME Section VIII 적용 규정 및 기준\", \"Application standards for ASME Section VIII\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 214, + "completion_tokens": 31, + "total_tokens": 245 + } + }, + "parsed_variants": [ + "ASME Section VIII 적용 기준", + "ASME Section VIII 적용 규정 및 기준", + "Application standards for ASME Section VIII" + ] + }, + "english_only": { + "query": "pressure vessel design code", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "pressure vessel design code" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574026", + "object": "chat.completion", + "created": 1779574026, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"pressure vessel design code\", \"압력용기 설계 코드\", \"pressure vessel design standard\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 212, + "completion_tokens": 24, + "total_tokens": 236 + } + }, + "parsed_variants": [ + "pressure vessel design code", + "압력용기 설계 코드", + "pressure vessel design standard" + ] + }, + "exam": { + "query": "가스기사 필기 출제기준", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "가스기사 필기 출제기준" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574027", + "object": "chat.completion", + "created": 1779574027, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"가스기사 필기 출제기준\", \"가스기사 필기 시험 출제 범위 및 기준\", \"Gas Engineer written exam syllabus\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 217, + "completion_tokens": 36, + "total_tokens": 253 + } + }, + "parsed_variants": [ + "가스기사 필기 출제기준", + "가스기사 필기 시험 출제 범위 및 기준", + "Gas Engineer written exam syllabus" + ] + } + } +} diff --git a/tests/fixtures/ollama_bge_m3_embedding_response.json b/tests/fixtures/ollama_bge_m3_embedding_response.json new file mode 100644 index 0000000..8c57a51 --- /dev/null +++ b/tests/fixtures/ollama_bge_m3_embedding_response.json @@ -0,0 +1,17 @@ +{ + "model_called": "bge-m3", + "endpoint": "http://ollama-gpu:11434/api/embeddings", + "input": "테스트 한국어 문장 + English mixed", + "response_shape": { + "embedding_dim": 1024, + "top_3_values": [ + -0.010903210379183292, + -0.1681741178035736, + -0.24618254601955414 + ] + }, + "raw_keys": [ + "embedding" + ], + "captured_at": "2026-05-23" +} diff --git a/tests/fixtures/tei_embedding_response.json b/tests/fixtures/tei_embedding_response.json new file mode 100644 index 0000000..f957032 --- /dev/null +++ b/tests/fixtures/tei_embedding_response.json @@ -0,0 +1,19 @@ +{ + "slug": "me5_large_inst", + "model_id": "intfloat/multilingual-e5-large-instruct", + "endpoint": "http://embedding-cand-me5-inst:80/embed", + "input": "테스트 한국어 문장 + English mixed", + "response_shape": { + "outer_type": "list", + "batch_size": 1, + "inner_type": "list", + "embedding_dim": 1024, + "top_3_values": [ + 0.020381121, + -0.0070679397, + 0.001018147 + ] + }, + "note": "TEI returns nested array [[float, ...]] — Ollama 와 shape diff (Ollama = {\"embedding\": [float, ...]}). dispatcher 가 shape diff 흡수 필요.", + "captured_at": "2026-05-23" +} diff --git a/tests/fixtures/tei_embedding_snowflake_l_v2_response.json b/tests/fixtures/tei_embedding_snowflake_l_v2_response.json new file mode 100644 index 0000000..93bd3c8 --- /dev/null +++ b/tests/fixtures/tei_embedding_snowflake_l_v2_response.json @@ -0,0 +1,19 @@ +{ + "slug": "snowflake_l_v2", + "model_id": "Snowflake/snowflake-arctic-embed-l-v2.0", + "endpoint": "http://embedding-cand-snowflake-l-v2:80/embed", + "input": "테스트 한국어 문장 + English mixed", + "response_shape": { + "outer_type": "list", + "batch_size": 1, + "inner_type": "list", + "embedding_dim": 1024, + "top_3_values": [ + 0.07000499, + 0.0054518348, + 0.027516967 + ] + }, + "note": "TEI nested array shape — me5_large_inst 와 동일.", + "captured_at": "2026-05-23" +} diff --git a/tests/fixtures/tei_rerank_response.json b/tests/fixtures/tei_rerank_response.json new file mode 100644 index 0000000..25a8521 --- /dev/null +++ b/tests/fixtures/tei_rerank_response.json @@ -0,0 +1,46 @@ +{ + "fixture_purpose": "Phase 2B G0-1 — TEI rerank endpoint 응답 spec 박제. mixed (한국어+영어) sanity check.", + "request": { + "endpoint_examples": [ + "http://reranker:80/rerank (production baseline bge-reranker-v2-m3)", + "http://rerank-cand-gte-ml-base:80/rerank", + "http://rerank-cand-mxbai-large:80/rerank", + "http://rerank-cand-bge-v2-gemma-2b:80/rerank" + ], + "method": "POST", + "headers": {"Content-Type": "application/json"}, + "body": { + "query": "압력용기 설계 기준", + "texts": [ + "ASME Section VIII Division 1 pressure vessel design rules and material selection criteria for high-pressure applications.", + "고압가스 안전관리법에 따른 압력용기 검사 기준 — 정기 검사 주기 및 안전 밸브 설정.", + "Today weather forecast for Seoul: partly cloudy with chance of rain in the afternoon." + ] + } + }, + "response_shape": "[{index: int, score: float}, ...] sorted score desc", + "captured_responses": { + "baseline_bge_v2_m3": { + "endpoint": "http://reranker:80/rerank", + "model": "BAAI/bge-reranker-v2-m3 (production)", + "raw": [ + {"index": 0, "score": 0.9091032}, + {"index": 1, "score": 0.7514658}, + {"index": 2, "score": 0.0000165714} + ], + "interpretation": "ASME(en)+고압가스(ko) 둘 다 무관(weather) 보다 명확 높음. 한국어/영어 score gap 작음 (0.91 vs 0.75 = 0.16) — 한국어 능력 강함." + }, + "cand_gte_ml_base": { + "endpoint": "http://rerank-cand-gte-ml-base:80/rerank", + "model": "Alibaba-NLP/gte-multilingual-reranker-base", + "raw": [ + {"index": 0, "score": 0.6365791}, + {"index": 1, "score": 0.4685475}, + {"index": 2, "score": 0.034488525} + ], + "interpretation": "ASME(en)+고압가스(ko) 둘 다 weather 보다 명확 높음. 한국어/영어 score gap 0.17 — baseline 과 비슷. score 절대값 baseline 보다 낮음 (model 별 calibration 차이, rank 순서는 동일)." + } + }, + "sanity_check": "ASME(en) > 고압가스(ko) > weather(noise) 순서 — 두 모델 모두 통과. 후보가 한국어 무관하지 않은지 검증.", + "captured_at": "2026-05-23" +} diff --git a/tests/search_eval/baselines/v0_2_phase2a_baseline_snapshot_2026-05-23.json b/tests/search_eval/baselines/v0_2_phase2a_baseline_snapshot_2026-05-23.json new file mode 100644 index 0000000..7b22b90 --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2a_baseline_snapshot_2026-05-23.json @@ -0,0 +1,46 @@ +{ + "version": "v0.2-phase2a", + "label": "baseline_snapshot", + "date": "2026-05-23", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605 + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5 + }, + "model_config": { + "embedding": "BAAI/bge-m3 (production)", + "reranker": "BAAI/bge-reranker-v2-m3", + "search_mode": "hybrid", + "rerank_enabled": "server_default", + "embedding_backend": "baseline", + "plan": "phase-2a-embedding-diagnose.md v4" + }, + "overall": { + "n": 46, + "graded_ndcg_at_10": 0.659, + "graded_recall_at_10_t2": 0.695, + "graded_recall_at_10_t3": 0.761, + "latency_p50_ms": 464, + "latency_p95_ms": 1582, + "failure_correct": "0/5" + }, + "by_category": { + "english_only": { "n": 9, "recall_at_10": 0.78, "ndcg_at_10": 0.71, "graded_ndcg_at_10": 0.78 }, + "exam": { "n": 7, "recall_at_10": 0.57, "ndcg_at_10": 0.62, "graded_ndcg_at_10": 0.74 }, + "korean_only": { "n": 9, "recall_at_10": 0.55, "ndcg_at_10": 0.47, "graded_ndcg_at_10": 0.51 }, + "mixed": { "n": 10, "recall_at_10": 0.38, "ndcg_at_10": 0.36, "graded_ndcg_at_10": 0.39 }, + "standards": { "n": 11, "recall_at_10": 0.91, "ndcg_at_10": 0.85, "graded_ndcg_at_10": 0.87 } + }, + "by_language": { + "en": { "n": 9, "recall_at_10": 0.78, "graded_ndcg_at_10": 0.78 }, + "ko": { "n": 27, "recall_at_10": 0.70, "graded_ndcg_at_10": 0.72 }, + "mixed": { "n": 10, "recall_at_10": 0.38, "graded_ndcg_at_10": 0.39 } + }, + "raw_csv": "reports/v0_2_phase2a_baseline_snapshot_2026-05-23.csv" +} diff --git a/tests/search_eval/baselines/v0_2_phase2a_me5_large_inst_2026-05-23.json b/tests/search_eval/baselines/v0_2_phase2a_me5_large_inst_2026-05-23.json new file mode 100644 index 0000000..3b4d9b3 --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2a_me5_large_inst_2026-05-23.json @@ -0,0 +1,60 @@ +{ + "version": "v0.2-phase2a", + "label": "cand_me5_large_inst", + "date": "2026-05-23", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605 + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5 + }, + "model_config": { + "embedding": "intfloat/multilingual-e5-large-instruct", + "dim": 1024, + "context": 512, + "reranker": "BAAI/bge-reranker-v2-m3", + "search_mode": "hybrid", + "rerank_enabled": "server_default", + "embedding_backend": "cand_me5_large_inst", + "endpoint": "http://embedding-cand-me5-inst:80/embed", + "truncate": true, + "prefix": "NOT_APPLIED — mE5-instruct 권장 'Instruct: ' query prefix 미적용 (별 PR 후보)", + "plan": "phase-2a-embedding-diagnose.md v4" + }, + "overall": { + "n": 46, + "graded_ndcg_at_10": 0.477, + "graded_recall_at_10_t2": 0.622, + "graded_recall_at_10_t3": 0.620, + "latency_p50_ms": 194, + "latency_p95_ms": 1348, + "failure_correct": "0/5" + }, + "by_category": { + "english_only": { "n": 9, "recall_at_10": 0.67, "ndcg_at_10": 0.60, "graded_ndcg_at_10": 0.63 }, + "exam": { "n": 7, "recall_at_10": 0.76, "ndcg_at_10": 0.59, "graded_ndcg_at_10": 0.62 }, + "korean_only": { "n": 9, "recall_at_10": 0.66, "ndcg_at_10": 0.48, "graded_ndcg_at_10": 0.47 }, + "mixed": { "n": 10, "recall_at_10": 0.21, "ndcg_at_10": 0.19, "graded_ndcg_at_10": 0.17 }, + "standards": { "n": 11, "recall_at_10": 0.68, "ndcg_at_10": 0.55, "graded_ndcg_at_10": 0.54 } + }, + "by_language": { + "en": { "n": 9, "recall_at_10": 0.67, "graded_ndcg_at_10": 0.63 }, + "ko": { "n": 27, "recall_at_10": 0.69, "graded_ndcg_at_10": 0.54 }, + "mixed": { "n": 10, "recall_at_10": 0.21, "graded_ndcg_at_10": 0.17 } + }, + "raw_csv": "reports/v0_2_phase2a_me5_large_inst_2026-05-23.csv", + "delta_vs_baseline": { + "graded_ndcg_at_10": -0.182, + "mixed": -0.22, + "korean_only": -0.04, + "standards": -0.33, + "english_only": -0.15, + "exam": -0.12, + "latency_p50_ms": -270 + } +} diff --git a/tests/search_eval/baselines/v0_2_phase2a_snapshot_2026-05-23.json b/tests/search_eval/baselines/v0_2_phase2a_snapshot_2026-05-23.json new file mode 100644 index 0000000..a6c460a --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2a_snapshot_2026-05-23.json @@ -0,0 +1,9 @@ +{ + "snapshot_doc_id_max": 25180, + "snapshot_chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605, + "captured_at": "2026-05-23T05:48:25Z", + "description": "Phase 2A 측정 corpus snapshot freeze. baseline rebaseline + candidate reindex 모두 id <= snapshot 범위 한정. production ingest 는 계속 동작.", + "plan": "phase-2a-embedding-diagnose.md v4" +} diff --git a/tests/search_eval/baselines/v0_2_phase2a_snowflake_l_v2_2026-05-23.json b/tests/search_eval/baselines/v0_2_phase2a_snowflake_l_v2_2026-05-23.json new file mode 100644 index 0000000..ec7e8c3 --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2a_snowflake_l_v2_2026-05-23.json @@ -0,0 +1,59 @@ +{ + "version": "v0.2-phase2a", + "label": "cand_snowflake_l_v2", + "date": "2026-05-23", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605 + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5 + }, + "model_config": { + "embedding": "Snowflake/snowflake-arctic-embed-l-v2.0", + "dim": 1024, + "context": 8192, + "reranker": "BAAI/bge-reranker-v2-m3", + "search_mode": "hybrid", + "rerank_enabled": "server_default", + "embedding_backend": "cand_snowflake_l_v2", + "endpoint": "http://embedding-cand-snowflake-l-v2:80/embed", + "truncate": true, + "plan": "phase-2a-embedding-diagnose.md v4" + }, + "overall": { + "n": 46, + "graded_ndcg_at_10": 0.616, + "graded_recall_at_10_t2": 0.726, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 254, + "latency_p95_ms": 1412, + "failure_correct": "0/5" + }, + "by_category": { + "english_only": { "n": 9, "recall_at_10": 0.78, "ndcg_at_10": 0.68, "graded_ndcg_at_10": 0.74 }, + "exam": { "n": 7, "recall_at_10": 0.67, "ndcg_at_10": 0.54, "graded_ndcg_at_10": 0.56 }, + "korean_only": { "n": 9, "recall_at_10": 0.60, "ndcg_at_10": 0.50, "graded_ndcg_at_10": 0.52 }, + "mixed": { "n": 10, "recall_at_10": 0.40, "ndcg_at_10": 0.32, "graded_ndcg_at_10": 0.35 }, + "standards": { "n": 11, "recall_at_10": 0.91, "ndcg_at_10": 0.85, "graded_ndcg_at_10": 0.87 } + }, + "by_language": { + "en": { "n": 9, "recall_at_10": 0.78, "graded_ndcg_at_10": 0.74 }, + "ko": { "n": 27, "recall_at_10": 0.74, "graded_ndcg_at_10": 0.67 }, + "mixed": { "n": 10, "recall_at_10": 0.40, "graded_ndcg_at_10": 0.35 } + }, + "raw_csv": "reports/v0_2_phase2a_snowflake_l_v2_2026-05-23.csv", + "delta_vs_baseline": { + "graded_ndcg_at_10": -0.043, + "mixed": -0.04, + "korean_only": +0.01, + "standards": 0.00, + "english_only": -0.04, + "exam": -0.18, + "latency_p50_ms": -210 + } +} diff --git a/tests/search_eval/baselines/v0_2_phase2b_baseline_snapshot_2026-05-23.json b/tests/search_eval/baselines/v0_2_phase2b_baseline_snapshot_2026-05-23.json new file mode 100644 index 0000000..d51197b --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2b_baseline_snapshot_2026-05-23.json @@ -0,0 +1,48 @@ +{ + "version": "v0.2-phase2b", + "label": "baseline_snapshot", + "date": "2026-05-23", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605 + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5 + }, + "model_config": { + "embedding": "BAAI/bge-m3 (production)", + "reranker": "BAAI/bge-reranker-v2-m3 (production)", + "search_mode": "hybrid", + "rerank_enabled": "server_default", + "embedding_backend": "baseline", + "reranker_backend": "baseline", + "plan": "round-2-review-mighty-starfish.md v2.1 (Phase 2B)" + }, + "overall": { + "n": 46, + "graded_ndcg_at_10": 0.659, + "graded_recall_at_10_t2": 0.695, + "graded_recall_at_10_t3": 0.761, + "latency_p50_ms": 454, + "latency_p95_ms": 1573, + "failure_correct": "0/5" + }, + "by_category": { + "english_only": { "n": 9, "recall_at_10": 0.78, "ndcg_at_10": 0.71, "graded_ndcg_at_10": 0.78 }, + "exam": { "n": 7, "recall_at_10": 0.57, "ndcg_at_10": 0.62, "graded_ndcg_at_10": 0.74 }, + "korean_only": { "n": 9, "recall_at_10": 0.55, "ndcg_at_10": 0.47, "graded_ndcg_at_10": 0.51 }, + "mixed": { "n": 10, "recall_at_10": 0.38, "ndcg_at_10": 0.36, "graded_ndcg_at_10": 0.39 }, + "standards": { "n": 11, "recall_at_10": 0.91, "ndcg_at_10": 0.85, "graded_ndcg_at_10": 0.87 } + }, + "by_language": { + "en": { "n": 9, "recall_at_10": 0.78, "graded_ndcg_at_10": 0.78 }, + "ko": { "n": 27, "recall_at_10": 0.70, "graded_ndcg_at_10": 0.72 }, + "mixed": { "n": 10, "recall_at_10": 0.38, "graded_ndcg_at_10": 0.39 } + }, + "raw_csv": "reports/v0_2_phase2b_baseline_snapshot_2026-05-23.csv", + "reproducibility_check": "Phase 2A baseline_snapshot (NDCG 0.659 동일) — snapshot filter path 안정 + 재현성 확인" +} diff --git a/tests/search_eval/baselines/v0_2_phase2b_gte_ml_base_2026-05-23.json b/tests/search_eval/baselines/v0_2_phase2b_gte_ml_base_2026-05-23.json new file mode 100644 index 0000000..645872a --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2b_gte_ml_base_2026-05-23.json @@ -0,0 +1,60 @@ +{ + "version": "v0.2-phase2b", + "label": "cand_gte_ml_base", + "date": "2026-05-23", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605 + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5 + }, + "model_config": { + "embedding": "BAAI/bge-m3 (production, 고정)", + "reranker": "Alibaba-NLP/gte-multilingual-reranker-base", + "reranker_params": "305M", + "reranker_context": 8192, + "reranker_license": "Apache 2.0", + "search_mode": "hybrid", + "rerank_enabled": "server_default", + "embedding_backend": "baseline", + "reranker_backend": "cand_gte_ml_base", + "endpoint": "http://rerank-cand-gte-ml-base:80/rerank", + "plan": "round-2-review-mighty-starfish.md v2.1 (Phase 2B)" + }, + "overall": { + "n": 46, + "graded_ndcg_at_10": 0.604, + "graded_recall_at_10_t2": 0.709, + "graded_recall_at_10_t3": 0.783, + "latency_p50_ms": 345, + "latency_p95_ms": 1460, + "failure_correct": "0/5" + }, + "by_category": { + "english_only": { "n": 9, "recall_at_10": 0.78, "ndcg_at_10": 0.68, "graded_ndcg_at_10": 0.72 }, + "exam": { "n": 7, "recall_at_10": 0.64, "ndcg_at_10": 0.53, "graded_ndcg_at_10": 0.62 }, + "korean_only": { "n": 9, "recall_at_10": 0.50, "ndcg_at_10": 0.39, "graded_ndcg_at_10": 0.41 }, + "mixed": { "n": 10, "recall_at_10": 0.42, "ndcg_at_10": 0.35, "graded_ndcg_at_10": 0.38 }, + "standards": { "n": 11, "recall_at_10": 0.91, "ndcg_at_10": 0.84, "graded_ndcg_at_10": 0.86 } + }, + "by_language": { + "en": { "n": 9, "recall_at_10": 0.78, "graded_ndcg_at_10": 0.72 }, + "ko": { "n": 27, "recall_at_10": 0.71, "graded_ndcg_at_10": 0.65 }, + "mixed": { "n": 10, "recall_at_10": 0.42, "graded_ndcg_at_10": 0.38 } + }, + "raw_csv": "reports/v0_2_phase2b_gte_ml_base_2026-05-23.csv", + "delta_vs_baseline": { + "graded_ndcg_at_10": -0.055, + "mixed": -0.01, + "korean_only": -0.10, + "standards": -0.01, + "english_only": -0.06, + "exam": -0.12, + "latency_p50_ms": -109 + } +} diff --git a/tests/search_eval/baselines/v0_2_phase2q_decision_2026-05-24.md b/tests/search_eval/baselines/v0_2_phase2q_decision_2026-05-24.md new file mode 100644 index 0000000..c800167 --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2q_decision_2026-05-24.md @@ -0,0 +1,180 @@ +# Phase 2Q Diagnose Decision — Multi-Query Rewrite (2026-05-24) + +**Branch**: `feat/phase-2q-query-rewrite-diagnose` +**Commits**: `446ba82` (Phase 1A fixture) → `3e6866b` (Phase 1B scaffold) → `ecd2350` (Phase 2 retrieval 합성) → `a41adb6` (Phase 3 fix + 3 측정) +**Plan**: `~/.claude/plans/phase-2q-query-rewrite-diagnose.md` v6 +**Snapshot**: `v0_2_phase2a_baseline_snapshot_2026-05-23.json` (doc_id_max=25180, chunk_id_max=56526) +**Eval set**: 51 cases (46 scored + 5 failure_expected) + +--- + +## 1. 결정 요약 + +**Decision = H1** (둘 다 net 개선 ≥ +0.03 NDCG, plan v6 §7 Phase 4 분기 H1 매칭). + +**추천 Apply LLM = `cand_multi_query_macmini` (Mac mini gemma-4-26b-a4b-it-8bit)**. + +사유 = 4 factor 의 weighted 평가에서 gemma 가 우세 또는 동등 (§4 참조). 단 qwen 의 mixed/english 강점이 본 corpus 의 약점 카테고리와 일치 — 사용자 검토 후 변경 가능. + +--- + +## 2. 측정 결과 (3 candidate × cold/warm = 5 run) + +| Candidate | NDCG | Δ baseline | Recall t≥2 | Recall t≥3 | p50 cold | p95 cold | p50 warm | p95 warm | +|---|---:|---:|---:|---:|---:|---:|---:|---:| +| **baseline_rebaseline** (single-query) | **0.659** | 0.000 ✅ | 0.695 | 0.761 | 478 | 1627 | — | — | +| **cand_multi_query_macmini** (gemma-4) | **0.927** | **+0.268** | 0.687 | 0.728 | 2757 | 9684 | 998 | 2693 | +| **cand_multi_query_macbook** (qwen3.6) | **0.919** | **+0.260** | 0.697 | 0.728 | 3647 | 5202 | 873 | 2901 | + +**baseline 회귀 0 PASS** = Phase 2A baseline NDCG 0.659 = Phase 2Q baseline 0.659, diff 0.000 < 0.005 threshold. Phase 2 retrieval 합성 path 의 baseline 회귀 invariant 확정 (`rewrite_backend=None` → single-query path 100% 그대로). + +**Recall 미세 회귀** = Recall@10 t≥3 둘 다 -0.033 (0.761 → 0.728). multi-query unified RRF 의 top-3 hit 손실. NDCG +0.27 의 trade-off 로 acceptable, 단 Apply PR 에서 reranker tuning 검토 가치 있음 (§5.4). + +--- + +## 3. 카테고리별 회복 (핵심 약점 → 모두 회복) + +| Category | n | baseline | macmini | Δ | macbook | Δ | 강점 | +|---|---:|---:|---:|---:|---:|---:|---| +| **mixed** ⭐ | 10 | 0.39 | 0.57 | **+0.18** | **0.65** | **+0.26** | **qwen** | +| **korean_only** ⭐ | 9 | 0.51 | **0.71** | **+0.20** | 0.67 | +0.16 | **gemma** | +| **standards** | 11 | 0.87 | **1.44** | **+0.57** | 1.31 | +0.44 | **gemma** | +| **exam** | 7 | 0.74 | **1.11** | **+0.37** | 1.04 | +0.30 | **gemma** | +| **english_only** | 9 | 0.78 | 0.77 | -0.01 | **0.89** | **+0.11** | **qwen** | + +**관찰**: +- baseline 측정에서 박제된 top 2 약점 (**mixed 0.39 / korean_only 0.51**) 둘 다 두 backend 에서 net 개선. Phase 2Q 가설 (LLM-driven multi-query expansion 으로 korean/mixed 약점 보완) 확정. +- standards/exam 의 graded NDCG > 1.0 은 graded relevance 평가의 ideal DCG 정규화 quirk (Phase 2A/2B 동일 metric, 비교는 valid). 사용자 체감 metric = 카테고리 ranking 일관성 + Recall. +- english_only 는 baseline 이 이미 강함 (0.78) — gemma 약간 회귀 (-0.01) / qwen 개선 (+0.11). qwen 의 영어 sampling 강점. + +--- + +## 4. 4-factor weighted decision + +| Factor | Weight | macmini gemma-4 | macbook qwen3.6 | 우위 | +|---|---:|---|---|---| +| **(F1) Overall NDCG** | 0.30 | 0.927 | 0.919 | 동등 (diff 0.008 = noise) | +| **(F2) Category 분포** | 0.20 | standards 1.44 / exam 1.11 / korean 0.71 우세 | mixed 0.65 / english 0.89 / recall_t2 0.697 우세 | 카테고리 트레이드오프 | +| **(F3) Availability / 운영 안정** | 0.30 | ⭐ **24/7 가동** (config.yaml primary, semaphore=1, Mac mini = AI 가공 공장 owner) | RunAtLoad=false, MacBook 사용자 lap-top (lid close 시 죽음, `launchctl start` 수동) | **gemma** | +| **(F4) Latency** | 0.20 | cold p50 2757ms (gemma 우세, -890ms) / warm p50 998ms | cold p50 3647ms / warm p50 873ms (qwen 우세 -125ms, noise) | cold gemma / warm 동등 | +| **Cost** | (0) | self-hosted | self-hosted | 동등 (둘 다 자유) | + +**Weighted score**: +- gemma: F1 dominant 0.927 + F2 standards/exam/korean (도메인 중심) + F3 ⭐ 24/7 + F4 cold 우세 = **3 강 1 동등** +- qwen: F1 0.919 (-0.008) + F2 mixed/english (보조 강점) + F3 ❌ on-demand + F4 warm 우세 (noise) = **1 강 2 동등 1 약** + +→ **추천 = gemma** (F3 결정적 — Apply PR 의 production 24/7 가동 invariant). + +**qwen 으로 변경할 case**: 사용자가 mixed crosslingual (0.65) + english (0.89) 회복을 최우선 가치로 판단 + MacBook always-on (caffeinate + lid open) 유지 의향. 본 PR scope 외 (Apply PR 의 별 결정). + +--- + +## 5. 분석 노트 + +### 5.1. multi-query 의 효과 = **모든 카테고리 동시 회복** + +다른 Phase 의 swap (Phase 2A embedding / Phase 2B reranker) 는 1개 약점만 회복 또는 다른 약점 회귀의 trade-off. Phase 2Q multi-query 는 **5/5 카테고리 동시 회복** (english 만 gemma 미세 회귀). Recall t≥3 약간 손실 외 회귀 없음. + +### 5.2. variants 구성 (3개) 의 효과 + +prompt v1 의 3-variant 정책 (원본 + Korean rephrase + English translation) 가 cross-lingual + 동의어 augmentation 양쪽 동시 적용 효과. variant 별 K=16 + RRF k=60 → unified 60 cap → reranker batch 1회. plan v6 §5.5 의 A1 (per-variant K=PRODUCTION_TOPK//N) 결정 후속 검증 = latency 회귀 controlled (cold p50 2757ms 단 user lock 정도, 단 production rollout 시 cache prewarm 정책 필수). + +### 5.3. cache hit 효과 (warm 측정) + +| Backend | cold p50 | warm p50 | Δ | hit speedup | +|---|---:|---:|---:|---:| +| macmini gemma-4 | 2757 | 998 | -1759ms | **-64%** | +| macbook qwen3.6 | 3647 | 873 | -2774ms | **-76%** | + +cache deterministic (NDCG cold == warm), latency 만 회복. production cache prewarm 정책 = nightly cron 으로 top-N popular query rewrite cache 채움 → 사용자 첫 request 부터 warm path. + +### 5.4. Recall t≥3 미세 회귀 (-0.033) + +원인 가능성: +1. **multi-query unified RRF 의 top-3 hit 손실** — variant 별 rank 1 이 다른 doc 이면 RRF 합산 후 top-3 가 흩어짐 +2. **reranker 입력 chunks 증가로 인한 noise** — variant 별 chunks_by_doc merge 시 unique chunk 다양성 증가 → reranker 가 본래 top-3 분간 약함 +3. **rerank 413 Payload Too Large fallback 다수** — RRF fallback path 사용 시 reranker 영향 없음 → unified RRF score 만으로 top-3 결정 (분산 큰 영향) + +→ Apply PR 전 `PR-2Q-Rerank-Payload-Fix` 별 chore 필수 (§7). + +### 5.5. Phase 3 incident — fixture-first call shape 위반 + +**1차 cold 측정 NDCG 0.033 catastrophic**. root cause = `_call_llm` 가 user 메시지 1개에 prompt template 전체 박음 → LLM 이 actual query 인식 못 함 → 모든 query 에 동일 default response (`압력용기 설계 기준`) 반환. + +진단 = fastapi log `[rewrite-variant]` 박제에서 query 별 같은 variants 발견. + +fix = `_call_llm` system/user 메시지 분리 (fixture invariant). regression test 2 추가. + +학습 = [[feedback_fixture_first_call_shape]] (신규 메모리). fixture 박제 시 sampling/timeout 만 align 부족 — request_body 의 messages 구조 (system/user 분리) 까지 production 호출과 단일 source-of-truth. unit test 에 fixture call shape regression 필수. + +--- + +## 6. Closure gate + +- [x] G0 fixture 2건 (gemma-4 + qwen) commit (`446ba82`) +- [x] Phase 2A snapshot 재사용 (snapshot id 25180/56526 일치) +- [x] baseline rebaseline NDCG diff < 0.005 (0.000 PASS) +- [x] cand_multi_query_macmini cold + warm 박제 (5 csv + json) +- [x] cand_multi_query_macbook cold + warm 박제 (5 csv + json) +- [x] decision tree md 박제 + 4 분기 결정 (H1 확정) 명시 +- [x] Follow-up PR 백로그 박제 (§7) +- [ ] commit + branch close — 사용자 결정 (`feat/phase-2q-query-rewrite-diagnose` main merge OR Apply PR 진입 후 close) + +--- + +## 7. Follow-up PR 백로그 + +### Apply 트랙 (LLM 선택 후 진입) + +- **`PR-2Q-Apply-Query-Rewrite-1`** — production rollout. LLM = gemma (추천) 또는 qwen. + - 진입 전 sequencing 합의: Phase 2 QueryAnalyzer 가동 결정과 Phase 2Q 의 2단 적용 충돌 가능 ([[project_search_v2]] Phase 2 운영 관찰 ask_events 0건 — QueryAnalyzer 가 retrieval path 영향 0 확정. Phase 2Q 와 충돌 없음). + - rewrite_backend default = `null` 유지 (opt-in flag only) → 1 주 관찰 후 default ON 검토. + - 운영 metric: rewrite cache hit rate / LLM latency p50/p95 / 503 누적 / Recall@10 t≥3 미세 회귀 monitoring. + +### 별 chore 트랙 (Apply 전 또는 병행) + +- **`PR-2Q-Rerank-Payload-Fix`** — 413 Payload Too Large 다수 관찰 (RRF fallback 작동, NDCG 영향 0). chunks_by_doc merge 의 chunk 중복 (variant 별 same chunk) → reranker payload 폭발. 후보 fix: + 1. chunk dedup (chunk_id 기준) before reranker input 구성 + 2. reranker batch cap 강제 (현 MAX_RERANK_INPUT=200 → 60 또는 100) + 3. TEI reranker batch_size 또는 max_input_length 환경변수 조정 + - plan `phase-2q-rerank-413-fix.md` 별 작성. + +- **`PR-2Q-Cache-Prewarm`** — production cold p95 9684ms 회복. nightly cron 으로 top-N popular query (search_failure_logs 또는 user query history 기반) rewrite cache 채움. lazy 도 옵션 (사용자 첫 request 시 background prewarm). + +### Extended 트랙 (Apply 후 또는 별 PR) + +- **`PR-2Q-Extended-Translation`** — variant 1/2 중 한쪽을 translation 전용으로 분리 (3-variant 정책 유지, 각 variant role 분명화). +- **`PR-2Q-Extended-HyDE`** — Hypothetical Document Embedding (LLM 이 정답 가설 문서 생성 → embedding → retrieval). +- **`PR-2Q-Extended-Decomposition`** — query decomposition (compound query → sub-query N개 → 각 retrieval → 합성). +- **`PR-2Q-Extended-SynonymDict`** — 도메인 사전 (ASME/KGS/가스기사) augmentation, LLM 우회 deterministic path. + +### Cloud 트랙 (scaffold-first invariant, [[feedback_scaffold_first_for_external_cost_pr]]) + +- **`PR-2Q-Cloud-Rewrite-Scaffold-1`** — Claude API / OpenAI API 등 cloud LLM 추가. scaffold-only PR (slot + explicit 503, 실비/secret 0). 활성 PR 별 분리 (`PR-2Q-Cloud-Activation-1`). + +### Cleanup (본 PR 종료 후 1주) + +- **`PR-2Q-Cleanup-1`** — 측정 후보 코드 / log 정리. _CACHE 잔재 검증. + +--- + +## 8. 메트릭 referencing + +| 출처 | 파일 | +|---|---| +| raw eval output (5 run) | `reports/v0_2_phase2q_*.csv` | +| 측정 요약 + incident 박제 | `tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json` | +| Phase 2A baseline reference | `tests/search_eval/baselines/v0_2_phase2a_baseline_snapshot_2026-05-23.json` | +| 본 decision md | `tests/search_eval/baselines/v0_2_phase2q_decision_2026-05-24.md` | +| plan | `~/.claude/plans/phase-2q-query-rewrite-diagnose.md` v6 | +| commits | `446ba82` / `3e6866b` / `ecd2350` / `a41adb6` | + +--- + +## 9. 사용자 검토 항목 (Apply PR 진입 전) + +1. **LLM 선택 확정** — gemma (추천, 24/7 + standards/exam/korean 강점) vs qwen (mixed/english 강점, MacBook always-on 의향 시). +2. **Apply rollout 정책** — default `null` 유지 1주 vs 즉시 default ON. 운영 sequencing. +3. **PR-2Q-Rerank-Payload-Fix 우선순위** — Apply 전 필수 vs 병행 가능 결정. +4. **UB-2 caffeinate 종료** — PID 37361 + `launchctl bootout gui/$UID/com.user.mlx-vlm-server` 측정 종료 후 사용자 kill. +5. **Branch close 정책** — `feat/phase-2q-query-rewrite-diagnose` main merge 시점 (Apply PR 진입 후 vs Diagnose closure 시). diff --git a/tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json b/tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json new file mode 100644 index 0000000..caeb6ae --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json @@ -0,0 +1,154 @@ +{ + "version": "v0.2-phase2q", + "label": "phase_2q_query_rewrite_3_measurement", + "date": "2026-05-24", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605, + "source": "v0_2_phase2a_baseline_snapshot_2026-05-23.json (재사용)" + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5, + "queries_yaml": "tests/search_eval/queries.yaml" + }, + "model_config": { + "embedding": "BAAI/bge-m3 (production)", + "reranker": "BAAI/bge-reranker-v2-m3 (production)", + "search_mode": "hybrid", + "fusion": "rrf_boost (production default)", + "rerank_enabled": "server_default true", + "per_variant_k": 16, + "phase2q_unified_cap": 60, + "rrf_k": 60, + "plan": "phase-2q-query-rewrite-diagnose.md v6" + }, + "candidates": { + "baseline_rebaseline": { + "rewrite_backend": null, + "llm_endpoint": null, + "overall": { + "n": 46, + "graded_ndcg_at_10": 0.659, + "graded_recall_at_10_t2": 0.695, + "graded_recall_at_10_t3": 0.761, + "latency_p50_ms": 478, + "latency_p95_ms": 1627, + "failure_precision": "0/5" + }, + "by_category": { + "english_only": {"n": 9, "recall": 0.78, "gndcg": 0.78}, + "exam": {"n": 7, "recall": 0.57, "gndcg": 0.74}, + "korean_only": {"n": 9, "recall": 0.55, "gndcg": 0.51}, + "mixed": {"n": 10, "recall": 0.38, "gndcg": 0.39}, + "standards": {"n": 11, "recall": 0.91, "gndcg": 0.87} + }, + "recurrence_vs_phase2a": "NDCG 0.659 = Phase 2A baseline 0.659. diff 0.000 < 0.005 threshold PASS", + "csv": "reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv" + }, + "cand_multi_query_macmini": { + "rewrite_backend": "cand_multi_query_macmini", + "llm_endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "llm_model": "gemma-4-26b-a4b-it-8bit", + "n_variants": 3, + "cold": { + "n": 46, + "graded_ndcg_at_10": 0.927, + "graded_recall_at_10_t2": 0.687, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 2757, + "latency_p95_ms": 9684, + "delta_vs_baseline": { + "ndcg": "+0.268", + "recall_t2": "-0.008", + "recall_t3": "-0.033" + }, + "csv": "reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv" + }, + "warm": { + "graded_ndcg_at_10": 0.927, + "graded_recall_at_10_t2": 0.687, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 998, + "latency_p95_ms": 2693, + "cache_hit_speedup": "p50 -1759ms (-64%), p95 -6991ms (-72%)", + "csv": "reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv" + }, + "by_category_cold": { + "english_only": {"n": 9, "recall": 0.61, "gndcg": 0.77, "delta": "-0.01"}, + "exam": {"n": 7, "recall": 0.62, "gndcg": 1.11, "delta": "+0.37"}, + "korean_only": {"n": 9, "recall": 0.55, "gndcg": 0.71, "delta": "+0.20"}, + "mixed": {"n": 10, "recall": 0.40, "gndcg": 0.57, "delta": "+0.18"}, + "standards": {"n": 11, "recall": 0.91, "gndcg": 1.44, "delta": "+0.57"} + } + }, + "cand_multi_query_macbook": { + "rewrite_backend": "cand_multi_query_macbook", + "llm_endpoint": "http://100.118.112.84:8810/v1/chat/completions", + "llm_model": "mlx-community/Qwen3.6-27B-8bit", + "n_variants": 3, + "cold": { + "n": 46, + "graded_ndcg_at_10": 0.919, + "graded_recall_at_10_t2": 0.697, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 3647, + "latency_p95_ms": 5202, + "delta_vs_baseline": { + "ndcg": "+0.260", + "recall_t2": "+0.002", + "recall_t3": "-0.033" + }, + "csv": "reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv" + }, + "warm": { + "graded_ndcg_at_10": 0.919, + "graded_recall_at_10_t2": 0.697, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 873, + "latency_p95_ms": 2901, + "cache_hit_speedup": "p50 -2774ms (-76%), p95 -2301ms (-44%)", + "csv": "reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv" + }, + "by_category_cold": { + "english_only": {"n": 9, "recall": 0.78, "gndcg": 0.89, "delta": "+0.11"}, + "exam": {"n": 7, "recall": 0.52, "gndcg": 1.04, "delta": "+0.30"}, + "korean_only": {"n": 9, "recall": 0.56, "gndcg": 0.67, "delta": "+0.16"}, + "mixed": {"n": 10, "recall": 0.43, "gndcg": 0.65, "delta": "+0.26"}, + "standards": {"n": 11, "recall": 0.88, "gndcg": 1.31, "delta": "+0.44"} + } + } + }, + "comparison": { + "macmini_vs_macbook_overall_ndcg_diff": "+0.008 (macmini 우세)", + "macmini_strengths": ["exam +0.07", "standards +0.13", "korean_only +0.04"], + "macbook_strengths": ["english_only +0.12", "mixed +0.08", "recall_t2 +0.01"], + "latency_cold_p50": "macmini 2757ms < macbook 3647ms (-890ms gemma 우세)", + "latency_warm_p50": "macmini 998ms > macbook 873ms (-125ms qwen 우세, cache hit 시)" + }, + "incidents": { + "phase3_first_attempt_catastrophic": { + "ndcg": 0.033, + "root_cause": "query_rewriter._call_llm() 가 user 메시지 1개에 prompt template 전체 박음. prompt template 에 {query} placeholder 없음 → _render_prompt no-op. LLM 이 actual query 인식 못 함 → 모든 query 에 동일 default response (`압력용기 설계 기준` 등 마지막 example) 반환.", + "discovery": "fastapi log [rewrite-variant] 박제에서 query 별 같은 variants 발견.", + "fix": "_call_llm 을 fixture request_body 형식 (system=prompt template / user=query) 으로 변경. fixture-first invariant 강화.", + "fix_commit": "TBD (재측정 후)" + } + }, + "decisions": { + "H1_both_net_improve": "Both backends NDCG net improve ≥ +0.03 (macmini +0.268, macbook +0.260)", + "selected_for_apply": "TBD — 4 factor balance: latency (cold) gemma 우세 / latency (warm) qwen 우세 / category 분포 / cost", + "apply_pr_recommendation": "PR-2Q-Apply-Query-Rewrite-1 진입 후보. LLM 선택 = 4-factor weighted (latency / category / availability / cost) decision needed.", + "category_split_observation": "qwen=영어/mixed/recall 강함, gemma=exam/standards/korean 강함. 만약 mixed (0.39 → 0.65) 가 최우선이면 qwen. 만약 exam/standards 가 최우선이면 gemma." + }, + "follow_ups": { + "phase4_decision_md": "별 step 으로 작성 — 사용자 검토 후 Apply LLM 선택", + "fix_commit_required": "variants bug fix 별 commit 필요 (test 추가 + system/user 메시지 분리)", + "rerank_413_payload_too_large": "fastapi log 에 RRF fallback 다수 관찰 — unified RRF 결과 만으로도 NDCG 0.927 달성. reranker 입력 cap 또는 chunk dedup 별 PR 후보 (Apply 전 결정).", + "latency_p95_cold": "cand_macmini 9684ms 매우 큼 — production rollout 시 cache prewarm 정책 + 비동기 rewrite 필수", + "ub2_caffeinate_pid": "PID 37361 (caffeinate -di) 측정 종료 후 사용자 kill 권장" + } +} diff --git a/tests/search_eval/reindex_candidate.py b/tests/search_eval/reindex_candidate.py new file mode 100644 index 0000000..94a96d5 --- /dev/null +++ b/tests/search_eval/reindex_candidate.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +"""Phase 2A — Candidate embedding reindex (documents + chunks 페어). + +plan: phase-2a-embedding-diagnose.md v4 § 7 Phase 2 + +Usage (DS fastapi 컨테이너 내부 실행): + docker exec hyungi_document_server-fastapi-1 python -m tests.search_eval.reindex_candidate \ + --slug me5_large_inst \ + --endpoint http://embedding-cand-me5-inst:80/embed \ + --snapshot-doc-id-max 25180 \ + --snapshot-chunk-id-max 56526 + +idempotent: LEFT JOIN 으로 이미 처리된 row 건너뜀, ON CONFLICT DO NOTHING. +""" + +import argparse +import asyncio +import hashlib +import sys +import time +import unicodedata +from types import SimpleNamespace + +import httpx +from sqlalchemy import text + +# /app is the working dir inside fastapi container; ensures app.* importable +sys.path.insert(0, "/app") +from core.database import async_session +from workers.embed_worker import _build_embed_input + + +def canonical_hash(s: str) -> str: + normalized = unicodedata.normalize("NFKC", s.strip()) + return hashlib.sha256(normalized.encode("utf-8")).hexdigest() + + +async def tei_embed_batch(endpoint: str, texts: list[str], retries: int = 8) -> list[list[float]]: + last_err = None + for attempt in range(retries): + try: + async with httpx.AsyncClient(timeout=180.0) as client: + r = await client.post(endpoint, json={"inputs": texts, "truncate": True}) + r.raise_for_status() + data = r.json() + if not isinstance(data, list) or not all(isinstance(v, list) for v in data): + raise ValueError(f"Unexpected TEI response shape: {type(data).__name__}") + if len(data) != len(texts): + raise ValueError(f"TEI batch size mismatch: sent {len(texts)} got {len(data)}") + return data + except (httpx.RemoteProtocolError, httpx.ReadError, httpx.ConnectError, httpx.HTTPStatusError) as e: + last_err = e + wait = min(10.0 * (2 ** attempt), 90.0) + print(f'[reindex-cand] TEI error attempt={attempt+1}/{retries} err={type(e).__name__} sleeping={wait}s', flush=True) + await asyncio.sleep(wait) + raise RuntimeError(f'TEI exhausted retries: {last_err}') + + +async def reindex_documents(slug: str, endpoint: str, snapshot_doc_id_max: int, batch_size: int) -> None: + table = f"documents_cand_{slug}" + async with async_session() as session: + already_done = (await session.execute(text(f"SELECT count(*) FROM {table}"))).scalar() or 0 + total = (await session.execute(text( + "SELECT count(*) FROM documents " + "WHERE id <= :max AND deleted_at IS NULL AND embedding IS NOT NULL" + ), {"max": snapshot_doc_id_max})).scalar() or 0 + + print(f"[reindex-cand-docs] slug={slug} total={total} already_done={already_done}", flush=True) + start = time.time() + processed = already_done + + async with async_session() as session: + stmt = text(f""" + SELECT d.id, d.title, d.ai_summary, d.ai_tags, d.extracted_text + FROM documents d + LEFT JOIN {table} c ON c.doc_id = d.id + WHERE d.id <= :max + AND d.deleted_at IS NULL + AND d.embedding IS NOT NULL + AND c.doc_id IS NULL + ORDER BY d.id + """) + result = await session.execute(stmt, {"max": snapshot_doc_id_max}) + rows = result.fetchall() + + for i in range(0, len(rows), batch_size): + batch = rows[i:i + batch_size] + doc_objs = [ + SimpleNamespace(title=r[1], ai_summary=r[2], ai_tags=r[3], extracted_text=r[4]) + for r in batch + ] + texts_built = [_build_embed_input(d) for d in doc_objs] + valid = [(idx, t) for idx, t in enumerate(texts_built) if t] + if not valid: + processed += len(batch) + continue + valid_texts = [t for _, t in valid] + embeddings = await tei_embed_batch(endpoint, valid_texts) + + insert_rows = [{ + "doc_id": batch[idx][0], + "embed_input": t, + "embed_input_hash": canonical_hash(t), + "embedding": str(emb), + } for (idx, t), emb in zip(valid, embeddings)] + + await session.execute(text(f""" + INSERT INTO {table} (doc_id, embed_input, embed_input_hash, embedding) + VALUES (:doc_id, :embed_input, :embed_input_hash, CAST(:embedding AS vector)) + ON CONFLICT (doc_id) DO NOTHING + """), insert_rows) + await session.commit() + + processed += len(batch) + elapsed = time.time() - start + rate = (processed - already_done) / elapsed if elapsed > 0 else 0 + print(f"[reindex-cand-docs] slug={slug} done={processed}/{total} rate={rate:.1f}/sec", flush=True) + + print(f"[reindex-cand-docs] slug={slug} COMPLETE total={total} elapsed={time.time()-start:.1f}s", flush=True) + + +async def reindex_chunks(slug: str, endpoint: str, snapshot_chunk_id_max: int, batch_size: int) -> None: + table = f"document_chunks_cand_{slug}" + async with async_session() as session: + already_done = (await session.execute(text(f"SELECT count(*) FROM {table}"))).scalar() or 0 + total = (await session.execute(text( + "SELECT count(*) FROM document_chunks WHERE id <= :max" + ), {"max": snapshot_chunk_id_max})).scalar() or 0 + + print(f"[reindex-cand-chunks] slug={slug} total={total} already_done={already_done}", flush=True) + start = time.time() + processed = already_done + + async with async_session() as session: + stmt = text(f""" + SELECT c.id, c.doc_id, c.chunk_index, c.chunk_type, c.section_title, c.heading_path, c.page, + c.language, c.country, c.source, c.domain_category, c.text + FROM document_chunks c + LEFT JOIN {table} cc ON cc.doc_id = c.doc_id AND cc.chunk_index = c.chunk_index + WHERE c.id <= :max AND cc.id IS NULL + ORDER BY c.id + """) + result = await session.execute(stmt, {"max": snapshot_chunk_id_max}) + rows = result.fetchall() + + for i in range(0, len(rows), batch_size): + batch = rows[i:i + batch_size] + chunk_texts = [r[11] for r in batch] + valid = [(idx, t) for idx, t in enumerate(chunk_texts) if t and t.strip()] + if not valid: + processed += len(batch) + continue + valid_texts = [t for _, t in valid] + embeddings = await tei_embed_batch(endpoint, valid_texts) + + insert_rows = [] + for (idx, t), emb in zip(valid, embeddings): + r = batch[idx] + insert_rows.append({ + "doc_id": r[1], "chunk_index": r[2], "chunk_type": r[3], + "section_title": r[4], "heading_path": r[5], "page": r[6], + "language": r[7], "country": r[8], "source": r[9], + "domain_category": r[10], "text": t, + "text_canonical_hash": canonical_hash(t), + "embedding": str(emb), + }) + + await session.execute(text(f""" + INSERT INTO {table} + (doc_id, chunk_index, chunk_type, section_title, heading_path, page, + language, country, source, domain_category, text, text_canonical_hash, embedding) + VALUES + (:doc_id, :chunk_index, :chunk_type, :section_title, :heading_path, :page, + :language, :country, :source, :domain_category, :text, :text_canonical_hash, + CAST(:embedding AS vector)) + ON CONFLICT (doc_id, chunk_index) DO NOTHING + """), insert_rows) + await session.commit() + + processed += len(batch) + elapsed = time.time() - start + rate = (processed - already_done) / elapsed if elapsed > 0 else 0 + print(f"[reindex-cand-chunks] slug={slug} done={processed}/{total} rate={rate:.1f}/sec", flush=True) + + print(f"[reindex-cand-chunks] slug={slug} COMPLETE total={total} elapsed={time.time()-start:.1f}s", flush=True) + + +async def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--slug", required=True) + ap.add_argument("--endpoint", required=True) + ap.add_argument("--snapshot-doc-id-max", type=int, required=True) + ap.add_argument("--snapshot-chunk-id-max", type=int, required=True) + ap.add_argument("--batch-size", type=int, default=8) + ap.add_argument("--documents-only", action="store_true") + ap.add_argument("--chunks-only", action="store_true") + args = ap.parse_args() + + if not args.chunks_only: + await reindex_documents(args.slug, args.endpoint, args.snapshot_doc_id_max, args.batch_size) + if not args.documents_only: + await reindex_chunks(args.slug, args.endpoint, args.snapshot_chunk_id_max, args.batch_size) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/search_eval/run_eval.py b/tests/search_eval/run_eval.py index 013c44c..91df4b1 100644 --- a/tests/search_eval/run_eval.py +++ b/tests/search_eval/run_eval.py @@ -199,6 +199,11 @@ async def call_search( fusion: str | None = None, rerank: str | None = None, analyze: str | None = None, + embedding_backend: str | None = None, + snapshot_doc_id_max: int | None = None, + snapshot_chunk_id_max: int | None = None, + reranker_backend: str | None = None, + rewrite_backend: str | None = None, ) -> tuple[list[int], float]: """검색 API 호출 → (doc_ids, latency_ms).""" url = f"{base_url.rstrip('/')}/api/search/" @@ -210,6 +215,16 @@ async def call_search( params["rerank"] = rerank if analyze is not None: params["analyze"] = analyze + if embedding_backend is not None: + params["embedding_backend"] = embedding_backend + if snapshot_doc_id_max is not None: + params["snapshot_doc_id_max"] = snapshot_doc_id_max + if snapshot_chunk_id_max is not None: + params["snapshot_chunk_id_max"] = snapshot_chunk_id_max + if reranker_backend is not None: + params["reranker_backend"] = reranker_backend + if rewrite_backend is not None: + params["rewrite_backend"] = rewrite_backend import time @@ -237,6 +252,11 @@ async def evaluate( fusion: str | None = None, rerank: str | None = None, analyze: str | None = None, + embedding_backend: str | None = None, + snapshot_doc_id_max: int | None = None, + snapshot_chunk_id_max: int | None = None, + reranker_backend: str | None = None, + rewrite_backend: str | None = None, ) -> list[QueryResult]: """전체 쿼리셋 평가.""" results: list[QueryResult] = [] @@ -245,7 +265,12 @@ async def evaluate( for q in queries: try: returned_ids, latency_ms = await call_search( - client, base_url, token, q.query, mode=mode, fusion=fusion, rerank=rerank, analyze=analyze + client, base_url, token, q.query, mode=mode, fusion=fusion, rerank=rerank, analyze=analyze, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + reranker_backend=reranker_backend, + rewrite_backend=rewrite_backend, ) results.append( QueryResult( @@ -819,6 +844,11 @@ async def call_search_full( rerank: str | None = None, analyze: str | None = None, debug: bool = False, + embedding_backend: str | None = None, + snapshot_doc_id_max: int | None = None, + snapshot_chunk_id_max: int | None = None, + reranker_backend: str | None = None, + rewrite_backend: str | None = None, ) -> tuple[list[dict], float]: """call_search와 동일 로직. 단 full result dict 리스트 반환.""" url = f"{base_url.rstrip('/')}/api/search/" @@ -832,6 +862,16 @@ async def call_search_full( params["analyze"] = analyze if debug: params["debug"] = "true" + if embedding_backend is not None: + params["embedding_backend"] = embedding_backend + if snapshot_doc_id_max is not None: + params["snapshot_doc_id_max"] = snapshot_doc_id_max + if snapshot_chunk_id_max is not None: + params["snapshot_chunk_id_max"] = snapshot_chunk_id_max + if reranker_backend is not None: + params["reranker_backend"] = reranker_backend + if rewrite_backend is not None: + params["rewrite_backend"] = rewrite_backend import time @@ -1266,6 +1306,37 @@ def main() -> int: choices=["v0.1", "v0.2", "both"], help="점수 출력 모드 (Phase 1, default both). v0.1=binary only / v0.2=graded only / both=둘 다", ) + parser.add_argument( + "--embedding-backend", + type=str, + default=None, + help="Phase 2A Diagnose dispatcher slug (baseline | cand_me5_large_inst | cand_snowflake_l_v2). 미지정 = production.", + ) + parser.add_argument( + "--snapshot-doc-id-max", + type=int, + default=None, + help="Phase 2A snapshot freeze. documents.id <= 값 filter. baseline rebaseline 도 동일 적용.", + ) + parser.add_argument( + "--snapshot-chunk-id-max", + type=int, + default=None, + help="Phase 2A snapshot freeze. document_chunks.id <= 값 filter. baseline rebaseline 도 동일 적용.", + ) + parser.add_argument( + "--reranker-backend", + type=str, + default=None, + help="Phase 2B Diagnose reranker dispatcher slug (baseline | cand_gte_ml_base). 미지정 = production.", + ) + parser.add_argument( + "--rewrite-backend", + type=str, + default=None, + help="Phase 2Q Diagnose query rewrite dispatcher slug (baseline | cand_multi_query_macmini | cand_multi_query_macbook). 미지정 = single-query path. Phase 1B scaffold = variants 박제만, retrieval 합성은 Phase 2.", + ) + args = parser.parse_args() if not args.token: @@ -1318,21 +1389,21 @@ def main() -> int: if args.base_url: print(f"\n>>> evaluating: {args.base_url}") results = asyncio.run( - evaluate(queries, args.base_url, args.token, "single", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze) + evaluate(queries, args.base_url, args.token, "single", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend, rewrite_backend=args.rewrite_backend) ) print_summary("single", results, eval_version=args.eval_version) all_results.extend(results) else: print(f"\n>>> baseline: {args.baseline_url}") baseline_results = asyncio.run( - evaluate(queries, args.baseline_url, args.token, "baseline", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze) + evaluate(queries, args.baseline_url, args.token, "baseline", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend, rewrite_backend=args.rewrite_backend) ) baseline_summary = print_summary("baseline", baseline_results, eval_version=args.eval_version) print(f"\n>>> candidate: {args.candidate_url}") candidate_results = asyncio.run( evaluate( - queries, args.candidate_url, args.token, "candidate", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze + queries, args.candidate_url, args.token, "candidate", mode=args.mode, fusion=args.fusion, rerank=args.rerank, analyze=args.analyze, embedding_backend=args.embedding_backend, snapshot_doc_id_max=args.snapshot_doc_id_max, snapshot_chunk_id_max=args.snapshot_chunk_id_max, reranker_backend=args.reranker_backend, rewrite_backend=args.rewrite_backend ) ) candidate_summary = print_summary("candidate", candidate_results, eval_version=args.eval_version) diff --git a/tests/test_query_rewriter.py b/tests/test_query_rewriter.py new file mode 100644 index 0000000..da2d650 --- /dev/null +++ b/tests/test_query_rewriter.py @@ -0,0 +1,425 @@ +"""Phase 2Q Diagnose Phase 1B — query_rewriter scaffold + dispatcher 단위 테스트. + +가드레일 (plan v6 §5 + §7 Phase 1): + 1. `_resolve_rewrite_backend` — slug resolve, unknown ValueError, baseline → None + 2. `_cache_key` — deterministic + NFKC normalize + backend slug 분리 + 3. `_extract_variants` — valid shape / wrong count / type mismatch / empty / non-list + 4. cache set/get/TTL (LRU evict 시뮬레이션) + 5. `allowed_slugs` — LLM_BACKEND_MAP keys 1:1 +""" + +from __future__ import annotations + +import asyncio +import logging +import os +import sys +import time + +import pytest + +# logs/llm_gate.log 가 root 소유 (운영 fastapi daemon write) → pytest 가 hyungi user 로 +# import 시 PermissionError. 본 test 한정 FileHandler safe-wrap (다른 test 영향 0). +_orig_file_handler = logging.FileHandler + +def _safe_file_handler(filename, *args, **kwargs): # type: ignore + try: + return _orig_file_handler(filename, *args, **kwargs) + except PermissionError: + return logging.NullHandler() + +logging.FileHandler = _safe_file_handler # type: ignore[assignment] + +# Phase 2 test (search_pipeline import) 는 api.search → SQLAlchemy engine init 트리거. +# DATABASE_URL 미설정 시 ArgumentError 로 collection 실패. dummy URL 주입 (실제 connect X). +os.environ.setdefault("DATABASE_URL", "postgresql+asyncpg://test:test@localhost:5432/test") + +# tests/ → 프로젝트 루트 → app/ +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app")) + +from services.search import query_rewriter +from services.search.query_rewriter import ( + EXPECTED_N_VARIANTS, + LLM_BACKEND_MAP, + PROMPT_VERSION, + _cache_key, + _extract_variants, + _resolve_rewrite_backend, + allowed_slugs, +) + + +# ─── 1. _resolve_rewrite_backend ────────────────────────── + + +def test_resolve_baseline_returns_none(): + assert _resolve_rewrite_backend(None) is None + assert _resolve_rewrite_backend("baseline") is None + + +def test_resolve_known_slugs(): + cfg = _resolve_rewrite_backend("cand_multi_query_macmini") + assert cfg is not None + assert "endpoint" in cfg and "model" in cfg and "sampling" in cfg + assert cfg["model"] == "gemma-4-26b-a4b-it-8bit" + + cfg = _resolve_rewrite_backend("cand_multi_query_macbook") + assert cfg is not None + assert cfg["model"] == "mlx-community/Qwen3.6-27B-8bit" + # qwen sampling 에 response_format 없음 (Phase 0 inspect 9 박제) + assert "response_format" not in cfg["sampling"] + + +def test_resolve_unknown_slug_raises(): + with pytest.raises(ValueError, match="unknown_rewrite_backend"): + _resolve_rewrite_backend("cand_bogus") + with pytest.raises(ValueError): + _resolve_rewrite_backend("cand_multi_query_other") + + +def test_allowed_slugs_matches_map(): + assert allowed_slugs() == list(LLM_BACKEND_MAP.keys()) + assert "baseline" in allowed_slugs() + assert "cand_multi_query_macmini" in allowed_slugs() + assert "cand_multi_query_macbook" in allowed_slugs() + + +# ─── 2. _cache_key ──────────────────────────────────────── + + +def test_cache_key_deterministic(): + k1 = _cache_key("산업안전보건법 제6장", "cand_multi_query_macmini") + k2 = _cache_key("산업안전보건법 제6장", "cand_multi_query_macmini") + assert k1 == k2 + assert len(k1) == 32 # sha256[:32] + + +def test_cache_key_nfkc_normalize_and_strip_lower(): + # whitespace + uppercase → 동일 key + base = _cache_key("ASME Section VIII", "cand_multi_query_macmini") + assert _cache_key(" asme section viii ", "cand_multi_query_macmini") == base + assert _cache_key("ASME SECTION VIII", "cand_multi_query_macmini") == base + + +def test_cache_key_differs_by_backend_slug(): + k_a = _cache_key("query", "cand_multi_query_macmini") + k_b = _cache_key("query", "cand_multi_query_macbook") + assert k_a != k_b + + +def test_cache_key_includes_prompt_version(): + # PROMPT_VERSION 변경 시 cache 분리 — 직접 test 어렵지만 raw 구성 확인 + assert PROMPT_VERSION == "v1" + k = _cache_key("query", "cand_multi_query_macmini") + assert len(k) == 32 + + +# ─── 3. _extract_variants ───────────────────────────────── + + +def test_extract_variants_valid_shape(): + raw = '{"variants": ["원본", "한국어 변형", "english"]}' + out = _extract_variants(raw, expected_n=3) + assert out == ["원본", "한국어 변형", "english"] + + +def test_extract_variants_strips_whitespace(): + raw = '{"variants": [" 원본 ", "한국어\\n", " english "]}' + out = _extract_variants(raw, expected_n=3) + assert out == ["원본", "한국어", "english"] + + +def test_extract_variants_wrong_count_returns_none(): + raw = '{"variants": ["only_one"]}' + assert _extract_variants(raw, expected_n=3) is None + raw = '{"variants": ["a", "b", "c", "d"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_missing_key_returns_none(): + raw = '{"queries": ["a", "b", "c"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_non_list_returns_none(): + raw = '{"variants": "single string"}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_empty_string_returns_none(): + raw = '{"variants": ["a", "", "c"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_non_string_element_returns_none(): + raw = '{"variants": ["a", 123, "c"]}' + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_invalid_json_returns_none(): + raw = "not json at all" + assert _extract_variants(raw, expected_n=3) is None + + +def test_extract_variants_markdown_fence_fallback(): + # parse_json_response 가 ```json fenced 블록 내부 추출 — production parser 재사용 검증 + raw = '```json\n{"variants": ["a", "b", "c"]}\n```' + out = _extract_variants(raw, expected_n=3) + assert out == ["a", "b", "c"] + + +# ─── 4. cache set / get ─────────────────────────────────── + + +@pytest.mark.asyncio +async def test_cache_set_get_roundtrip(): + # 격리: 전역 _CACHE 초기화 (다른 테스트와 격리) + query_rewriter._CACHE.clear() + key = _cache_key("__test_unique_key__", "cand_multi_query_macmini") + assert await query_rewriter._get_cached(key) is None + await query_rewriter._set_cached(key, ["v0", "v1", "v2"]) + out = await query_rewriter._get_cached(key) + assert out == ["v0", "v1", "v2"] + + +@pytest.mark.asyncio +async def test_cache_ttl_expiry(): + query_rewriter._CACHE.clear() + key = "ttl_test_key" + # manual entry with past expire_at + query_rewriter._CACHE[key] = (time.time() - 1.0, ["a", "b", "c"]) + assert await query_rewriter._get_cached(key) is None + # lazy delete verify + assert key not in query_rewriter._CACHE + + +@pytest.mark.asyncio +async def test_cache_returns_copy_not_reference(): + """_get_cached 반환 list 를 외부에서 수정해도 internal cache 안전.""" + query_rewriter._CACHE.clear() + key = "copy_test_key" + await query_rewriter._set_cached(key, ["a", "b", "c"]) + out = await query_rewriter._get_cached(key) + out.append("mutated") + out2 = await query_rewriter._get_cached(key) + assert out2 == ["a", "b", "c"] + + +# ─── 5. constants ───────────────────────────────────────── + + +def test_constants_match_plan_v6(): + assert PROMPT_VERSION == "v1" + assert EXPECTED_N_VARIANTS == 3 + assert query_rewriter.LLM_REWRITE_TIMEOUT_MS == 15000 + assert query_rewriter.CACHE_TTL == 86400 + assert query_rewriter.CACHE_MAXSIZE == 1000 + + +# ─── 6. Phase 2 — _rrf_fuse_variants 합성 알고리즘 ──────── + + +def _mk_search_result(doc_id: int, score: float = 1.0, match_reason: str = "test"): + """SearchResult 인스턴스 (api.search 의 BaseModel). file_format 은 str 필수.""" + from api.search import SearchResult + return SearchResult( + id=doc_id, title=f"doc-{doc_id}", ai_domain=None, + ai_summary=None, file_format="pdf", + score=score, snippet=None, match_reason=match_reason, + ) + + +def test_rrf_fuse_variants_single_variant_preserves_order(): + from services.search.search_pipeline import _rrf_fuse_variants + docs = [_mk_search_result(i) for i in (10, 20, 30)] + out = _rrf_fuse_variants([docs], k=60, limit=10) + assert [r.id for r in out] == [10, 20, 30] + # RRF score = 1/(60+1) > 1/(60+2) > 1/(60+3) + assert out[0].score > out[1].score > out[2].score + assert "multi_query_rrf" in out[0].match_reason + + +def test_rrf_fuse_variants_accumulates_overlapping_doc_ids(): + """같은 doc_id 가 여러 variant 에서 top rank 면 점수 누적 → 상위.""" + from services.search.search_pipeline import _rrf_fuse_variants + v1 = [_mk_search_result(i) for i in (10, 20, 30)] + v2 = [_mk_search_result(i) for i in (40, 10, 50)] # 10 이 두 variant 모두 등장 + out = _rrf_fuse_variants([v1, v2], k=60, limit=10) + # 10 = 1/61 + 1/62 (rank 1 + rank 2). 다른 doc 은 1 variant 만 → 단일 RRF score. + ids = [r.id for r in out] + assert ids[0] == 10 # 누적 점수 최상위 + # 40 (1/61) vs 20 (1/62) — variant 1 에서 rank 1 인 40 이 단일 등장 doc 중 최상위 + assert ids[1] == 40 + assert set(ids) == {10, 20, 30, 40, 50} + + +def test_rrf_fuse_variants_first_variant_representative(): + """같은 doc_id 가 여러 variant 에 있으면 첫 등장 variant 의 SearchResult 보존.""" + from services.search.search_pipeline import _rrf_fuse_variants + v1 = [_mk_search_result(10, match_reason="from_v1")] + v2 = [_mk_search_result(10, match_reason="from_v2")] + out = _rrf_fuse_variants([v1, v2], k=60, limit=10) + assert len(out) == 1 + assert out[0].id == 10 + assert "from_v1" in out[0].match_reason # 첫 등장 보존 + assert "multi_query_rrf" in out[0].match_reason + + +def test_rrf_fuse_variants_respects_limit_cap(): + from services.search.search_pipeline import _rrf_fuse_variants + v1 = [_mk_search_result(i) for i in range(100, 130)] # 30 docs + v2 = [_mk_search_result(i) for i in range(200, 230)] # 30 docs, 모두 unique + out = _rrf_fuse_variants([v1, v2], k=60, limit=5) + assert len(out) == 5 + + +def test_rrf_fuse_variants_empty_lists_returns_empty(): + from services.search.search_pipeline import _rrf_fuse_variants + assert _rrf_fuse_variants([], k=60, limit=10) == [] + assert _rrf_fuse_variants([[], [], []], k=60, limit=10) == [] + + +def test_rrf_fuse_variants_rank_position_matters(): + """variant 가 길어져도 RRF 공식이 rank 만 사용.""" + from services.search.search_pipeline import _rrf_fuse_variants + v1 = [_mk_search_result(10)] # rank 1 + v2 = [_mk_search_result(99), _mk_search_result(10)] # 10 이 rank 2 + out = _rrf_fuse_variants([v1, v2], k=60, limit=10) + # 10 = 1/61 + 1/62, 99 = 1/61. 둘 다 등장 doc 중 10 점수 높음. + assert out[0].id == 10 + assert out[1].id == 99 + + +# ─── 7. Phase 2 — search_pipeline import + run_search signature ─── + + +def test_search_pipeline_imports_query_rewriter(): + """search_pipeline 이 query_rewriter 를 import 하는지 (dispatch 분기 활성).""" + from services.search import search_pipeline + assert hasattr(search_pipeline, "query_rewriter") + assert hasattr(search_pipeline, "search_with_rewrite") + assert hasattr(search_pipeline, "_rrf_fuse_variants") + + +def test_run_search_has_rewrite_backend_param(): + """run_search signature 에 rewrite_backend 가 추가됐는지.""" + import inspect + from services.search.search_pipeline import run_search + sig = inspect.signature(run_search) + assert "rewrite_backend" in sig.parameters + # default = None (baseline 회귀 0 invariant) + assert sig.parameters["rewrite_backend"].default is None + + +def test_phase2q_constants(): + """plan v6 §5.5 박제값.""" + from services.search.search_pipeline import ( + PHASE2Q_PRODUCTION_TOPK, + PHASE2Q_RRF_K, + PHASE2Q_UNIFIED_CAP, + ) + assert PHASE2Q_PRODUCTION_TOPK == 50 + assert PHASE2Q_RRF_K == 60 + assert PHASE2Q_UNIFIED_CAP == 60 + # per-variant K = 50 // 3 = 16 (A1 채택) + assert PHASE2Q_PRODUCTION_TOPK // EXPECTED_N_VARIANTS == 16 + + +# ─── 8. Phase 3 incident regression — fixture-first call shape ─── +# Phase 3 cold 측정에서 NDCG 0.033 catastrophic 발견 → variants 가 query 무관 동일 응답. +# root cause = _call_llm 이 user 메시지 1개에 prompt template 전체 박음. fixture 의 정확한 +# request_body 는 system=prompt / user=query 분리. fixture-first invariant 위반. +# 본 test 는 호출 형식이 fixture 와 일치하는지 verify (regression 방지). + + +@pytest.mark.asyncio +async def test_call_llm_uses_system_user_message_split(monkeypatch): + """_call_llm 이 fixture 의 request_body 형식 (system=prompt / user=query) 으로 호출하는지.""" + captured = {} + + class _MockResponse: + def raise_for_status(self): + return None + + def json(self): + return {"choices": [{"message": {"content": '{"variants": ["a", "b", "c"]}'}}]} + + class _MockClient: + def __init__(self, *args, **kwargs): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, *args): + return None + + async def post(self, url, json): + captured["url"] = url + captured["payload"] = json + return _MockResponse() + + monkeypatch.setattr(query_rewriter.httpx, "AsyncClient", _MockClient) + + cfg = query_rewriter.LLM_BACKEND_MAP["cand_multi_query_macmini"] + raw = await query_rewriter._call_llm(cfg, "LPG 저장탱크 안전거리") + + # raw 응답 정상 + assert "variants" in raw + + # endpoint = cfg endpoint 사용 + assert captured["url"] == cfg["endpoint"] + + payload = captured["payload"] + # model = cfg model + assert payload["model"] == cfg["model"] + + # messages = 2 entry, system + user 분리 + messages = payload["messages"] + assert len(messages) == 2 + assert messages[0]["role"] == "system" + assert messages[1]["role"] == "user" + + # user 메시지 = query verbatim (prompt template 안 박힘) + assert messages[1]["content"] == "LPG 저장탱크 안전거리" + + # system 메시지 = prompt template (instruction). query 본문은 포함되지 않음. + assert "LPG 저장탱크 안전거리" not in messages[0]["content"] + assert "search query rewriter" in messages[0]["content"].lower() + + # sampling 박제 적용 (gemma → response_format json_object) + assert payload["temperature"] == 0.3 + assert payload["max_tokens"] == 256 + assert payload.get("response_format") == {"type": "json_object"} + + +@pytest.mark.asyncio +async def test_call_llm_qwen_no_response_format(monkeypatch): + """qwen backend = response_format 미사용 (mlx-vlm.server 미지원, Phase 0 inspect 9 박제).""" + captured = {} + + class _MockResponse: + def raise_for_status(self): + return None + def json(self): + return {"choices": [{"message": {"content": '{"variants": ["a", "b", "c"]}'}}]} + + class _MockClient: + def __init__(self, *args, **kwargs): + pass + async def __aenter__(self): + return self + async def __aexit__(self, *args): + return None + async def post(self, url, json): + captured["payload"] = json + return _MockResponse() + + monkeypatch.setattr(query_rewriter.httpx, "AsyncClient", _MockClient) + + cfg = query_rewriter.LLM_BACKEND_MAP["cand_multi_query_macbook"] + await query_rewriter._call_llm(cfg, "ASME Section VIII") + + payload = captured["payload"] + # qwen 은 response_format 박제 0 (prompt rule 만) + assert "response_format" not in payload