From a41adb63a070d4608da6a0eb00c7ca4233e256c5 Mon Sep 17 00:00:00 2001 From: hyungi Date: Sun, 24 May 2026 00:51:56 +0000 Subject: [PATCH] =?UTF-8?q?fix(search):=20Phase=202Q=20variants=20bug=20fi?= =?UTF-8?q?x=20+=20Phase=203=203=20measurement=20=EB=B0=95=EC=A0=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3 cold 측정 1차에서 NDCG 0.033 catastrophic 발견 — 모든 query 에 동일 variants 반환. root cause = _call_llm 이 user 메시지 1개에 prompt template 전체 박음. LLM 이 actual query 인식 못 함. fixture request_body 형식 (system=prompt / user=query) 과 mismatch. fixture-first invariant 위반. fix: - app/services/search/query_rewriter.py _call_llm — system/user 메시지 분리. fixture request_body 와 단일 source-of-truth. _render_prompt 는 [deprecated] 유지. - tests/test_query_rewriter.py — Phase 3 regression test 2: · _call_llm 가 system + user 분리 호출 verify (httpx.AsyncClient monkeypatch) · qwen backend = response_format 미사용 verify - 32/32 unit test PASS. Phase 3 측정 (fix 후 재측정, 51 case × 3 candidate × cold/warm = 5 run): - baseline_rebaseline (rewrite_backend=null): NDCG 0.659 = Phase 2A 0.659, diff 0.000 PASS - cand_multi_query_macmini cold: NDCG 0.927 (Δ +0.268), p50 2757ms / p95 9684ms - cand_multi_query_macmini warm: NDCG 0.927 동일, p50 998ms (cache hit -64%) - cand_multi_query_macbook cold: NDCG 0.919 (Δ +0.260), p50 3647ms / p95 5202ms - cand_multi_query_macbook warm: NDCG 0.919 동일, p50 873ms (cache hit -76%) 핵심 약점 회복 (gemma / qwen): - mixed 0.39 → 0.57 / 0.65 - korean_only 0.51 → 0.71 / 0.67 - standards 0.87 → 1.44 / 1.31 - exam 0.74 → 1.11 / 1.04 decision = H1 (both backends 유의미 net 개선). LLM 선택 = Phase 4 decision md 별 step. 산출물: - reports/v0_2_phase2q_*.csv (5 raw run_eval output) - tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json (요약 + incident 박제) follow-up: - rerank 413 Payload Too Large 다수 관찰 (RRF fallback 작동, NDCG 영향 없음). Apply PR 전 별 chore — chunk dedup 또는 reranker batch cap 검토. - p95 cold 9684ms 매우 큼. production rollout 시 cache prewarm 정책 필수. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/services/search/query_rewriter.py | 21 ++- ...phase2q_baseline_rebaseline_2026-05-24.csv | 52 ++++++ ...nd_multi_query_macbook_2026-05-24_cold.csv | 52 ++++++ ...nd_multi_query_macbook_2026-05-24_warm.csv | 52 ++++++ ...nd_multi_query_macmini_2026-05-24_cold.csv | 52 ++++++ ...nd_multi_query_macmini_2026-05-24_warm.csv | 52 ++++++ .../v0_2_phase2q_results_2026-05-24.json | 154 ++++++++++++++++++ tests/test_query_rewriter.py | 100 ++++++++++++ 8 files changed, 532 insertions(+), 3 deletions(-) create mode 100644 reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv create mode 100644 reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv create mode 100644 reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv create mode 100644 reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv create mode 100644 reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv create mode 100644 tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json diff --git a/app/services/search/query_rewriter.py b/app/services/search/query_rewriter.py index fcd8261..d6636df 100644 --- a/app/services/search/query_rewriter.py +++ b/app/services/search/query_rewriter.py @@ -135,7 +135,11 @@ def _get_prompt_template() -> str: def _render_prompt(query: str) -> str: - """``{query}`` placeholder 치환.""" + """[deprecated, fixture-first 패턴 후 unused] ``{query}`` placeholder 치환. + + 실제 LLM 호출은 ``_call_llm`` 에서 system/user 메시지 분리 (fixture invariant). + 본 헬퍼는 호환성만 보존 — prompt template 에 ``{query}`` placeholder 없으면 no-op. + """ return _get_prompt_template().replace("{query}", query) @@ -167,13 +171,24 @@ def _extract_variants(raw: str, expected_n: int) -> list[str] | None: async def _call_llm(cfg: dict[str, Any], query: str) -> str: """OpenAI 호환 chat/completions 호출. cfg = LLM_BACKEND_MAP entry. + 호출 형식 = fixture 단일 source-of-truth: + - system 메시지 = prompt template (instruction) + - user 메시지 = query (rewrite 대상) + + 이전 implementation (user 메시지에 prompt 전체 박음) 은 모델이 actual query 인식 못 함 + → 모든 query 에 동일 response 반환하는 NDCG catastrophic 버그 (Phase 3 cold 측정에서 발견). + fixture 의 request_body 와 일치 = production 호출 형식. + Returns: raw response text (first choice message content). Raises: httpx.* / KeyError / ValueError on protocol mismatch. """ - prompt = _render_prompt(query) + system_prompt = _get_prompt_template() payload: dict[str, Any] = { "model": cfg["model"], - "messages": [{"role": "user", "content": prompt}], + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": query}, + ], } sampling = cfg.get("sampling") or {} payload.update(sampling) diff --git a/reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv b/reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv new file mode 100644 index 0000000..a59e592 --- /dev/null +++ b/reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;4041;3890;3917;3863;3908;3855,2322.6,1.000,1.000,1.000,1,0.808,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;3919;10573;10571;3916;3874;3918;3854;3922,500.8,1.000,1.000,1.000,1,0.834,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3984;3993;3857;3978;3983;3957;3980;3903,300.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3877;3905;3878;3858;3903;3781;3881,481.1,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3912;3913;3911;3905;3909;3906;3910;3893,494.2,1.000,0.500,0.631,1,0.631,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;5249;3897;3863;5253;3856;3895;3867;3879;3851,500.4,0.500,0.167,0.257,0,0.314,0.667,1.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3854;5244;3851;3867;3878;3863;3908;10573,459.3,1.000,1.000,0.793,1,0.873,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3904;3903;3909;3905;3981;3760;5253;3985;3896,402.9,0.667,1.000,0.636,1,0.636,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;3917;3916;3918;5227;3854;3877;3922;5240;5226,359.3,0.500,0.500,0.441,1,0.506,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;6675;11677;10573;3757;3811,589.4,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,16081;18077;22048;12213;23984;15793;4321;21273;21276;4307,477.9,0.125,0.100,0.073,1,0.073,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;15922;17123;21890;22049;4346;9022;4767;6067,298.5,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4065;4066;4071;4068;4069;5063;5105;4067,568.1,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4062;4059;4058;4060;4063;4066;4071;4064;5095,533.5,0.667,0.500,0.478,1,0.478,0.667,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;23446;4776;4202;4679;24382;21155;4668;4199;21855,278.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3770;3817;4540;5244;3762;3789;5249;3791;3793,555.4,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5244;5236;5249;5229;3755;3774;3761;5230;10573;3787,469.9,0.250,0.200,0.151,1,0.151,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;5260;3897;5248;3771;3769;11671;13936;3755,749.3,1.000,1.000,1.000,1,1.000,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,22342;19576;17069;15924;16935;23149;16019;16462;16010;4776,322.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16927;20893;16771;17242;4329;20886;4457;4307,517.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5262;23732;24155;4546;20758;5145;4547;3774;5180,415.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,16289;5089;5092;5250;22202;20507;5070;5118;5173;23605,303.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,20022;20470;4634;15361;16059;9102;23336;18286;16218;5738,264.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3902;3887;3895;3898;3885;3905;3908;3911;3915,344.8,1.000,1.000,0.920,1,0.956,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;13930;3895;3911;13929;3866;3903;3890;3910;3909,313.3,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11645;13750;11676;13299;13749;13766,456.2,1.000,0.333,0.571,1,0.539,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13309;13299;13313;13918,423.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13656;13649;13651;13752;13659;13650,342.2,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3896;3887;13935;13938;3877;3900;3899,454.6,0.500,1.000,0.613,1,0.917,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5180;5193;5140;5137;5149;5178;5207;5148,1638.5,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5140,1474.5,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,1615.3,1.000,1.000,0.818,1,0.961,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,1570.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5182;5143;5204;5211;5207;5185;5186,1325.2,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,1424.5,0.500,0.250,0.264,0,0.395,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5192;5180;5187;5186;5212;5188;5182;5137,1650.7,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3759;3774;3755;3818;3812;3778;3756;3761;3771,1083.8,1.000,1.000,0.877,1,0.974,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5225;5206;5208;5210;5137;5182;5145,749.6,0.750,1.000,0.767,1,0.686,0.750,1.000, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5222;5225;5209;5180;5204;5210;5205;5178;5143,706.2,1.000,1.000,0.877,1,0.932,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5186;13913;5143;13760;13749;5145;5180;5240;5137,748.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;13755;13924;13772,384.0,0.250,1.000,0.390,1,0.647,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;11671;11649;11648;13915;5241;11563;5173;5177;11653,638.0,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5081;11509;11476;11486;5064;3788;5134;5075,509.3,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;5139;5090;5178;11515;5210;11493;11719,326.9,0.667,1.000,0.765,1,0.856,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11475;5090;5084;11531;11476;11473;5093;11479;5124,582.3,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11664;13948;13660;5177;13652;11665;13917;11660;13752,375.3,0.333,1.000,0.469,1,0.674,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13918;13751;5177;13653;13753,369.6,0.667,1.000,0.671,1,0.883,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11649;11655;11690;11658;11653;11689,305.2,0.333,0.250,0.202,0,0.321,0.500,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,373.5,1.000,1.000,1.000,1,0.858,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;11692;13665;13661;13664;13666;13670;13773;13934,340.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;5236;3977;3971;3966;4018;3972;3973;3974;3895,438.8,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv new file mode 100644 index 0000000..5f4919c --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3856;3851;3868;3856;3851;3853;3873;10573;3873,3856.5,0.667,1.000,1.149,1,1.099,0.667,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;10573;3922;3917;3918;10573;10571,4620.2,1.000,1.000,1.204,0,0.813,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3978;3880;3857,3554.9,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3858;3896;3782;3755;3851;3775;3755;3851,3618.8,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3910;3888;3910;10570;3888;3905;3774;3895;10570;3905,3791.0,1.000,0.500,1.018,1,1.018,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5253;3895;3902;3879;3855;3897;5244;5249;3897,3313.4,0.250,0.200,0.151,0,0.107,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3855;3878;5227;10571;3867;3851;3878,3689.7,1.000,1.000,1.371,1,1.394,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;5253;3880;3980;3985;3903;3904;3981;5253;3985,3484.9,0.667,1.000,0.819,1,0.819,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3918;3917;3917;3854;3918;10572;10572;3916;3877;3854,3658.7,0.500,0.500,0.565,1,0.758,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,10573;3876;3853;5249;3876;5249;3853;3811;11677;11677,4278.8,0.500,0.333,0.511,1,0.656,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,22048;21276;15793;22048;21273;21276;4307;23571;4452;4339,2737.3,0.125,0.143,0.084,1,0.084,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21890;16532;15922;21706;17123;21890;22049;15922,2858.6,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4071;4064;4065;4066;4064;4068;5105;4065;4071,2910.7,1.000,1.000,1.062,1,1.034,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4070;4062;4060;4060;4059;4070;4061;4068,2674.9,1.000,1.000,1.161,1,1.161,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;4333;4776;4739;4202;4679;4668;4775;4679;4668,2664.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;5239;4540;3791;3770;4540;3770;3758;3791;3774,3375.8,0.500,0.200,0.442,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,3755;5239;5249;5249;5230;10573;5230;3774;5239;10573,3234.6,0.250,1.000,0.390,1,0.390,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;3790;5260;3772;3897;5248;3771;5248;3769,3693.3,1.000,1.000,1.544,1,1.544,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,2768.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16761;16771;16771;17242;21275;17242;4329;4457,2952.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;23732;20036;4547;5161;4547;20758;24155;20032;20036,3442.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5094;5057;5090;5090;5094;5250;5076;5078;5118;5092,3495.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20022;18286;9102;16218;4634;4281;4289,2794.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3912;3895;3886;3902;3902;3886;3887;3895;3898;3756,3994.3,1.000,0.333,0.729,1,0.786,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3911;3866;3903;3866;3898,4288.0,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;4025;4026;13299;13749;13941;11644;11579,4047.0,1.000,0.500,1.195,1,1.135,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13300;13306;13310;13307;13310,3843.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13752;13657;13657;13653;13655;13651;13651,4371.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3907;3899;3901;3851;3877;3898,3788.8,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,4744.1,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5210;5141,5285.2,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5178;5224;5178;5214;5180;5205;5145;5186,5152.3,0.500,0.500,0.580,1,0.868,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,4500.2,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5210;5178;5204;5182;5139;5143,5295.9,1.000,1.000,1.356,1,1.356,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5225;5178;5224;5182;5187;5180;5182;5209;5209,4644.0,0.500,0.111,0.362,0,0.541,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5210;5140;5187;5136;5145;5180;5137;5210;5149,5250.8,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3759;3763;3812;3755;3778;3756;3774;3778;3774;3791,4352.8,1.000,0.500,0.651,1,0.635,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5210;5204;5139;5210;5145;5133;5204;5206;5145,4671.8,0.500,1.000,0.956,0,0.803,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5210;5209;5141;5180;5180;5178,4381.7,1.000,1.000,1.218,1,1.404,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5210;13760;5137;5140;5149;3895;5136;3797;3797,3279.8,0.333,0.125,0.148,1,0.058,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,2598.8,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5178;5178;5177;11690;5177;10575;5173;11638;5173;5241,3454.7,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;11533;5106;11509;11509;3788;11601;5081,3075.4,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11515;11517;11521;11493;5090;5071;3788;11515,3893.1,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11475;11479;11473;11479;11473;11487;11476,3231.0,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;13928;13928;13324;13752;13948;13948;4026,4341.1,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;13753;11625;11600;13653;13653;13918;11627,3228.5,0.667,0.500,0.613,1,0.809,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;11669;11616;11690;11595;13669;11689;11649,3647.4,0.333,0.250,0.202,0,0.138,0.500,0.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11713;11500;11500;11701,3108.0,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;5210;13665;13664;11691;13673;13666;13303;11692,3335.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;3895;13944;5236;5236;13944;3961;3961;3971;3971,3495.7,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv new file mode 100644 index 0000000..a64d6b3 --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3856;3851;3868;3856;3851;3853;3873;10573;3873,1283.8,0.667,1.000,1.149,1,1.099,0.667,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;10573;3922;3917;3918;10573;10571,837.0,1.000,1.000,1.204,0,0.813,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3978;3880;3857,780.4,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3858;3896;3782;3755;3851;3775;3755;3851,1264.0,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3910;3888;3910;10570;3888;3905;3774;3895;10570;3905,1161.2,1.000,0.500,1.018,1,1.018,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5253;3895;3902;3879;3855;3897;5244;5249;3897,864.5,0.250,0.200,0.151,0,0.107,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3855;3878;5227;10571;3867;3851;3878,470.4,1.000,1.000,1.371,1,1.394,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;5253;3880;3980;3985;3903;3904;3981;5253;3985,568.8,0.667,1.000,0.819,1,0.819,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3918;3917;3917;3854;3918;10572;10572;3916;3877;3854,447.4,0.500,0.500,0.565,1,0.758,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,10573;3876;3853;5249;3876;5249;3853;3811;11677;11677,1431.3,0.500,0.333,0.511,1,0.656,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,22048;21276;15793;22048;21273;21276;4307;23571;4452;4339,628.0,0.125,0.143,0.084,1,0.084,0.125,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21890;16532;15922;21706;17123;21890;22049;15922,436.2,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4071;4064;4065;4066;4064;4068;5105;4065;4071,542.2,1.000,1.000,1.062,1,1.034,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4070;4062;4060;4060;4059;4070;4061;4068,552.5,1.000,1.000,1.161,1,1.161,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;4333;4776;4739;4202;4679;4668;4775;4679;4668,610.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;5239;4540;3791;3770;4540;3770;3758;3791;3774,1010.4,0.500,0.200,0.442,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,3755;5239;5249;5249;5230;10573;5230;3774;5239;10573,1359.6,0.250,1.000,0.390,1,0.390,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;3790;5260;3772;3897;5248;3771;5248;3769,1796.1,1.000,1.000,1.544,1,1.544,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,1159.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16761;16771;16771;17242;21275;17242;4329;4457,755.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;23732;20036;4547;5161;4547;20758;24155;20032;20036,325.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5094;5057;5090;5090;5094;5250;5076;5078;5118;5092,295.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20022;18286;9102;16218;4634;4281;4289,251.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3912;3895;3886;3902;3902;3886;3887;3895;3898;3756,1049.9,1.000,0.333,0.729,1,0.786,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3911;3866;3903;3866;3898,1027.7,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;4025;4026;13299;13749;13941;11644;11579,1398.6,1.000,0.500,1.195,1,1.135,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13300;13306;13310;13307;13310,377.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13752;13657;13657;13653;13655;13651;13651,328.3,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3907;3899;3901;3851;3877;3898,1249.8,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,2608.5,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5210;5141,3067.4,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5178;5224;5178;5214;5180;5205;5145;5186,2889.3,0.500,0.500,0.580,1,0.868,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,2379.2,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5210;5178;5204;5182;5139;5143,2950.9,1.000,1.000,1.356,1,1.356,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5225;5178;5224;5182;5187;5180;5182;5209;5209,2267.5,0.500,0.111,0.362,0,0.541,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5210;5140;5187;5136;5145;5180;5137;5210;5149,2912.3,1.000,1.000,0.832,1,0.907,1.000,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3759;3763;3812;3755;3778;3756;3774;3778;3774;3791,1664.2,1.000,0.500,0.651,1,0.635,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5210;5204;5139;5210;5145;5133;5204;5206;5145,2200.3,0.500,1.000,0.956,0,0.803,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5210;5209;5141;5180;5180;5178,1921.9,1.000,1.000,1.218,1,1.404,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5210;13760;5137;5140;5149;3895;5136;3797;3797,931.9,0.333,0.125,0.148,1,0.058,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,449.4,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5178;5178;5177;11690;5177;10575;5173;11638;5173;5241,1028.6,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;11533;5106;11509;11509;3788;11601;5081,872.8,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11515;11517;11521;11493;5090;5071;3788;11515,300.7,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11475;11479;11473;11479;11473;11487;11476,624.8,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;13928;13928;13324;13752;13948;13948;4026,1049.7,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;13753;11625;11600;13653;13653;13918;11627,536.1,0.667,0.500,0.613,1,0.809,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;11669;11616;11690;11595;13669;11689;11649,399.5,0.333,0.250,0.202,0,0.138,0.500,0.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11713;11500;11500;11701,456.6,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;5210;13665;13664;11691;13673;13666;13303;11692,356.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;3895;13944;5236;5236;13944;3961;3961;3971;3971,815.5,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv new file mode 100644 index 0000000..fff3730 --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;3868;3879;4041;3856;4041;3851,2865.3,1.000,1.000,1.497,1,1.235,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;3922;3917;3918;3917;10573;10573,8879.1,1.000,1.000,1.412,0,1.079,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3857;3880;3984,9160.5,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3905;3880;3858;3818;3781;3851;3781;3881,9813.7,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3888;3910;3910;3888;3905;3895;3905;3890;3885;3898,10152.4,1.000,1.000,1.431,1,1.431,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5249;3897;5244;5244;3868;5253;5253;3897;3851,9887.4,0.250,0.167,0.139,0,0.099,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3878;3855;5227;10571;3851;3867;5244,9554.2,1.000,1.000,1.229,1,1.290,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3985;3981;3903;3980;3909;3880;5253;3985;5253,2651.9,0.667,1.000,0.885,1,0.885,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3917;3917;3918;3854;3916;10572;3854;3918;10572;3877,2674.7,0.500,1.000,0.788,1,1.076,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3876;10573;3853;5249;3853;3876;5249;11677;4025;3811,3434.4,0.500,0.333,0.544,1,0.698,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,20893;22048;15793;22048;20893;21273;4452;15793;4331;4339,2365.2,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21706;16532;15922;17123;15922;17123;4346;9022,1800.0,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4071;4064;4065;4071;4065;4066;4066;4063,1811.7,1.000,1.000,1.232,1,1.258,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4059;4070;4060;4062;4070;4060;4061;4058,2161.6,1.000,1.000,1.107,1,1.107,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,23446;4775;4775;4679;23446;4776;4202;4202;4776;4679,2094.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3791;5239;3770;3817;3758;4540;3817;3791;3770,2533.0,0.500,0.250,0.441,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5249;5249;5230;5230;3774;3787;10573;10573;3819;3755,2792.1,0.250,0.100,0.113,1,0.113,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3790;3772;3772;3897;5260;13936;5260;5248;11671,3155.7,1.000,1.000,1.571,1,1.571,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,2548.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;16761;21275;21275;16927;16927;16771;17242;4329;16771,2613.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;4547;5161;20758;23732;4546;3774;3774;4547;5161,2252.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5094;5061;5094;5070;5076;5092;20507;5092;5118,1837.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20470;17133;20022;9102;20022;4634;17133,1532.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3886;3902;3887;3895;3902;3898;3887;3895;3898,2882.5,1.000,1.000,1.457,1,1.536,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3895;3866;3903;3866;3903,3467.6,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;13658;13299;4025;4026;11693;11579;11693,3395.6,1.000,0.500,1.116,1,1.058,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13310;13300;13306;13306;13310,2533.9,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13657;13752;13657;13653;13655;13651;13655,2289.4,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3915;3895;3915;3890;3902;3901,2933.5,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,4062.6,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5137;5210,4675.3,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5224;5178;5178;5180;5214;5145;5205;5214,4548.4,0.500,0.500,0.571,1,0.855,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,3980.0,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5178;5139;5210;5143;5182;5182,4303.1,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5178;5224;5182;5187;5180;5182;5209;5209;5178,4188.4,0.500,0.125,0.378,0,0.566,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5139;5135;5208;5210;5187;5210;5133;5204;4026;4026,3989.3,0.500,0.333,0.307,1,0.394,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3812;5253;5230;5249;5249;3875;3852;3812;10573;3875,2757.4,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5210;5139;5139;5210;5204;5145;5145;5206;5206;5204,3857.5,0.500,1.000,1.000,0,0.944,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5209;5210;5180;5204;5222;5209,3393.5,1.000,1.000,1.414,1,1.512,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;13913;13760;5210;5186;13913;5143;13760;5137;5137,2814.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,2005.0,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5177;5178;13299;13303;5177;11638;11690;11690;13306;5173,2882.5,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;5106;11533;5111;4544;11509;11476;5081,2524.7,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11501;11495;5090;11515;11515;11517;11495;5210,1926.1,0.667,1.000,1.109,1,1.310,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11479;11479;11475;11473;11473;5090;11531,2208.9,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;11664;13928;13917;13752;13752;13928;13917,2690.3,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;11627;13653;13753;13753;13752;11625;13918,1903.5,0.667,0.500,0.639,1,0.887,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;13669;11616;11690;13654;11669;11649;11617,2525.7,0.667,0.125,0.284,0,0.316,1.000,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11500;11713;13930;11701,1883.2,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;13673;11693;13936;5177;13666;5210;13936;5210;13665,1851.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;5210;3961;3895;4026;3971;3966;3972;5210;3961,2479.2,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv new file mode 100644 index 0000000..6221fd3 --- /dev/null +++ b/reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;3868;3879;4041;3856;4041;3851,1353.9,1.000,1.000,1.497,1,1.235,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3921;3874;3874;3922;3917;3918;3917;10573;10573,739.5,1.000,1.000,1.412,0,1.079,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3985;3980;3980;3857;3857;3880;3984,603.6,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;4041;3905;3880;3858;3818;3781;3851;3781;3881,1227.5,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3888;3910;3910;3888;3905;3895;3905;3890;3885;3898,1307.0,1.000,1.000,1.431,1,1.431,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;5249;3897;5244;5244;3868;5253;5253;3897;3851,952.1,0.250,0.167,0.139,0,0.099,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3917;3867;3878;3855;5227;10571;3851;3867;5244,775.5,1.000,1.000,1.229,1,1.290,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3985;3981;3903;3980;3909;3880;5253;3985;5253,831.8,0.667,1.000,0.885,1,0.885,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3917;3917;3918;3854;3916;10572;3854;3918;10572;3877,879.0,0.500,1.000,0.788,1,1.076,0.500,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3876;10573;3853;5249;3853;3876;5249;11677;4025;3811,1527.5,0.500,0.333,0.544,1,0.698,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,20893;22048;15793;22048;20893;21273;4452;15793;4331;4339,629.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;21706;16532;15922;17123;15922;17123;4346;9022,444.8,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4064;4071;4064;4065;4071;4065;4066;4066;4063,477.0,1.000,1.000,1.232,1,1.258,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4059;4070;4060;4062;4070;4060;4061;4058,956.7,1.000,1.000,1.107,1,1.107,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,23446;4775;4775;4679;23446;4776;4202;4202;4776;4679,596.0,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3791;5239;3770;3817;3758;4540;3817;3791;3770,1197.1,0.500,0.250,0.441,0,0.567,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5249;5249;5230;5230;3774;3787;10573;10573;3819;3755,1271.5,0.250,0.100,0.113,1,0.113,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3790;3772;3772;3897;5260;13936;5260;5248;11671,1670.0,1.000,1.000,1.571,1,1.571,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,19576;19576;17069;16935;15924;16935;23149;16010;16010;23149,1160.9,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;16761;21275;21275;16927;16927;16771;17242;4329;16771,943.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;4547;5161;20758;23732;4546;3774;3774;4547;5161,364.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5094;5061;5094;5070;5076;5092;20507;5092;5118,290.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,23336;20470;9102;20470;17133;20022;9102;20022;4634;17133,250.6,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3886;3902;3887;3895;3902;3898;3887;3895;3898,1469.2,1.000,1.000,1.457,1,1.536,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;13930;13930;3895;3895;3866;3903;3866;3903,1511.2,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;4025;4026;13658;13299;4025;4026;11693;11579;11693,2166.1,1.000,0.500,1.116,1,1.058,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13305;13304;13304;13300;13310;13300;13306;13306;13310,785.6,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;13652;13752;13657;13752;13657;13653;13655;13651;13655,998.1,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3901;3915;3895;3915;3890;3902;3901,1614.9,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5140;5136;5210;5149;5206;5210;5178;5136;5204,2592.9,1.000,1.000,1.104,1,1.057,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5204;5180;5182;5208;5206;5210;5137;5210,3083.9,1.000,1.000,1.124,1,1.293,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5204;5205;5224;5178;5178;5180;5214;5145;5205;5214,3019.9,0.500,0.500,0.571,1,0.855,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5139;5140;5136;5149;5137;5141;5140;5186;5145,2373.9,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5180;5178;5139;5210;5143;5182;5182,2729.3,1.000,1.000,1.387,1,1.387,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5178;5224;5182;5187;5180;5182;5209;5209;5178,2655.9,0.500,0.125,0.378,0,0.566,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5139;5135;5208;5210;5187;5210;5133;5204;4026;4026,2393.8,0.500,0.333,0.307,1,0.394,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3812;5253;5230;5249;5249;3875;3852;3812;10573;3875,988.5,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5210;5139;5139;5210;5204;5145;5145;5206;5206;5204,2043.3,0.500,1.000,1.000,0,0.944,0.500,0.500, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5224;5225;5225;5209;5210;5180;5204;5222;5209,1698.3,1.000,1.000,1.414,1,1.512,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;13913;13760;5210;5186;13913;5143;13760;5137;5137,1209.5,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11649;13675;13759;11647;13675;13774;11647;13774;13674;13760,439.3,0.250,0.250,0.298,0,0.494,0.333,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,5177;5178;13299;13303;5177;11638;11690;11690;13306;5173,1644.5,0.500,0.500,0.387,1,0.497,0.500,1.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11504;11533;5106;11533;5111;4544;11509;11476;5081,888.0,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11500;11501;11495;5090;11515;11515;11517;11495;5210,297.8,0.667,1.000,1.109,1,1.310,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11514;11475;11479;11479;11475;11473;11473;5090;11531,823.6,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11591;11664;11664;13928;13917;13752;13752;13928;13917,1047.3,0.333,1.000,0.765,1,1.099,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;11658;11627;13653;13753;13753;13752;11625;13918,473.3,0.667,0.500,0.639,1,0.887,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11690;11649;11595;13669;11616;11690;13654;11669;11649;11617,930.6,0.667,0.125,0.284,0,0.316,1.000,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11712;11711;11711;11503;11503;11500;11713;13930;11701,505.1,1.000,1.000,1.551,1,1.582,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;13673;11693;13936;5177;13666;5210;13936;5210;13665,344.8,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;5210;3961;3895;4026;3971;3966;3972;5210;3961,1051.1,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json b/tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json new file mode 100644 index 0000000..caeb6ae --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2q_results_2026-05-24.json @@ -0,0 +1,154 @@ +{ + "version": "v0.2-phase2q", + "label": "phase_2q_query_rewrite_3_measurement", + "date": "2026-05-24", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605, + "source": "v0_2_phase2a_baseline_snapshot_2026-05-23.json (재사용)" + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5, + "queries_yaml": "tests/search_eval/queries.yaml" + }, + "model_config": { + "embedding": "BAAI/bge-m3 (production)", + "reranker": "BAAI/bge-reranker-v2-m3 (production)", + "search_mode": "hybrid", + "fusion": "rrf_boost (production default)", + "rerank_enabled": "server_default true", + "per_variant_k": 16, + "phase2q_unified_cap": 60, + "rrf_k": 60, + "plan": "phase-2q-query-rewrite-diagnose.md v6" + }, + "candidates": { + "baseline_rebaseline": { + "rewrite_backend": null, + "llm_endpoint": null, + "overall": { + "n": 46, + "graded_ndcg_at_10": 0.659, + "graded_recall_at_10_t2": 0.695, + "graded_recall_at_10_t3": 0.761, + "latency_p50_ms": 478, + "latency_p95_ms": 1627, + "failure_precision": "0/5" + }, + "by_category": { + "english_only": {"n": 9, "recall": 0.78, "gndcg": 0.78}, + "exam": {"n": 7, "recall": 0.57, "gndcg": 0.74}, + "korean_only": {"n": 9, "recall": 0.55, "gndcg": 0.51}, + "mixed": {"n": 10, "recall": 0.38, "gndcg": 0.39}, + "standards": {"n": 11, "recall": 0.91, "gndcg": 0.87} + }, + "recurrence_vs_phase2a": "NDCG 0.659 = Phase 2A baseline 0.659. diff 0.000 < 0.005 threshold PASS", + "csv": "reports/v0_2_phase2q_baseline_rebaseline_2026-05-24.csv" + }, + "cand_multi_query_macmini": { + "rewrite_backend": "cand_multi_query_macmini", + "llm_endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "llm_model": "gemma-4-26b-a4b-it-8bit", + "n_variants": 3, + "cold": { + "n": 46, + "graded_ndcg_at_10": 0.927, + "graded_recall_at_10_t2": 0.687, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 2757, + "latency_p95_ms": 9684, + "delta_vs_baseline": { + "ndcg": "+0.268", + "recall_t2": "-0.008", + "recall_t3": "-0.033" + }, + "csv": "reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_cold.csv" + }, + "warm": { + "graded_ndcg_at_10": 0.927, + "graded_recall_at_10_t2": 0.687, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 998, + "latency_p95_ms": 2693, + "cache_hit_speedup": "p50 -1759ms (-64%), p95 -6991ms (-72%)", + "csv": "reports/v0_2_phase2q_cand_multi_query_macmini_2026-05-24_warm.csv" + }, + "by_category_cold": { + "english_only": {"n": 9, "recall": 0.61, "gndcg": 0.77, "delta": "-0.01"}, + "exam": {"n": 7, "recall": 0.62, "gndcg": 1.11, "delta": "+0.37"}, + "korean_only": {"n": 9, "recall": 0.55, "gndcg": 0.71, "delta": "+0.20"}, + "mixed": {"n": 10, "recall": 0.40, "gndcg": 0.57, "delta": "+0.18"}, + "standards": {"n": 11, "recall": 0.91, "gndcg": 1.44, "delta": "+0.57"} + } + }, + "cand_multi_query_macbook": { + "rewrite_backend": "cand_multi_query_macbook", + "llm_endpoint": "http://100.118.112.84:8810/v1/chat/completions", + "llm_model": "mlx-community/Qwen3.6-27B-8bit", + "n_variants": 3, + "cold": { + "n": 46, + "graded_ndcg_at_10": 0.919, + "graded_recall_at_10_t2": 0.697, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 3647, + "latency_p95_ms": 5202, + "delta_vs_baseline": { + "ndcg": "+0.260", + "recall_t2": "+0.002", + "recall_t3": "-0.033" + }, + "csv": "reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_cold.csv" + }, + "warm": { + "graded_ndcg_at_10": 0.919, + "graded_recall_at_10_t2": 0.697, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 873, + "latency_p95_ms": 2901, + "cache_hit_speedup": "p50 -2774ms (-76%), p95 -2301ms (-44%)", + "csv": "reports/v0_2_phase2q_cand_multi_query_macbook_2026-05-24_warm.csv" + }, + "by_category_cold": { + "english_only": {"n": 9, "recall": 0.78, "gndcg": 0.89, "delta": "+0.11"}, + "exam": {"n": 7, "recall": 0.52, "gndcg": 1.04, "delta": "+0.30"}, + "korean_only": {"n": 9, "recall": 0.56, "gndcg": 0.67, "delta": "+0.16"}, + "mixed": {"n": 10, "recall": 0.43, "gndcg": 0.65, "delta": "+0.26"}, + "standards": {"n": 11, "recall": 0.88, "gndcg": 1.31, "delta": "+0.44"} + } + } + }, + "comparison": { + "macmini_vs_macbook_overall_ndcg_diff": "+0.008 (macmini 우세)", + "macmini_strengths": ["exam +0.07", "standards +0.13", "korean_only +0.04"], + "macbook_strengths": ["english_only +0.12", "mixed +0.08", "recall_t2 +0.01"], + "latency_cold_p50": "macmini 2757ms < macbook 3647ms (-890ms gemma 우세)", + "latency_warm_p50": "macmini 998ms > macbook 873ms (-125ms qwen 우세, cache hit 시)" + }, + "incidents": { + "phase3_first_attempt_catastrophic": { + "ndcg": 0.033, + "root_cause": "query_rewriter._call_llm() 가 user 메시지 1개에 prompt template 전체 박음. prompt template 에 {query} placeholder 없음 → _render_prompt no-op. LLM 이 actual query 인식 못 함 → 모든 query 에 동일 default response (`압력용기 설계 기준` 등 마지막 example) 반환.", + "discovery": "fastapi log [rewrite-variant] 박제에서 query 별 같은 variants 발견.", + "fix": "_call_llm 을 fixture request_body 형식 (system=prompt template / user=query) 으로 변경. fixture-first invariant 강화.", + "fix_commit": "TBD (재측정 후)" + } + }, + "decisions": { + "H1_both_net_improve": "Both backends NDCG net improve ≥ +0.03 (macmini +0.268, macbook +0.260)", + "selected_for_apply": "TBD — 4 factor balance: latency (cold) gemma 우세 / latency (warm) qwen 우세 / category 분포 / cost", + "apply_pr_recommendation": "PR-2Q-Apply-Query-Rewrite-1 진입 후보. LLM 선택 = 4-factor weighted (latency / category / availability / cost) decision needed.", + "category_split_observation": "qwen=영어/mixed/recall 강함, gemma=exam/standards/korean 강함. 만약 mixed (0.39 → 0.65) 가 최우선이면 qwen. 만약 exam/standards 가 최우선이면 gemma." + }, + "follow_ups": { + "phase4_decision_md": "별 step 으로 작성 — 사용자 검토 후 Apply LLM 선택", + "fix_commit_required": "variants bug fix 별 commit 필요 (test 추가 + system/user 메시지 분리)", + "rerank_413_payload_too_large": "fastapi log 에 RRF fallback 다수 관찰 — unified RRF 결과 만으로도 NDCG 0.927 달성. reranker 입력 cap 또는 chunk dedup 별 PR 후보 (Apply 전 결정).", + "latency_p95_cold": "cand_macmini 9684ms 매우 큼 — production rollout 시 cache prewarm 정책 + 비동기 rewrite 필수", + "ub2_caffeinate_pid": "PID 37361 (caffeinate -di) 측정 종료 후 사용자 kill 권장" + } +} diff --git a/tests/test_query_rewriter.py b/tests/test_query_rewriter.py index 03669e8..da2d650 100644 --- a/tests/test_query_rewriter.py +++ b/tests/test_query_rewriter.py @@ -323,3 +323,103 @@ def test_phase2q_constants(): assert PHASE2Q_UNIFIED_CAP == 60 # per-variant K = 50 // 3 = 16 (A1 채택) assert PHASE2Q_PRODUCTION_TOPK // EXPECTED_N_VARIANTS == 16 + + +# ─── 8. Phase 3 incident regression — fixture-first call shape ─── +# Phase 3 cold 측정에서 NDCG 0.033 catastrophic 발견 → variants 가 query 무관 동일 응답. +# root cause = _call_llm 이 user 메시지 1개에 prompt template 전체 박음. fixture 의 정확한 +# request_body 는 system=prompt / user=query 분리. fixture-first invariant 위반. +# 본 test 는 호출 형식이 fixture 와 일치하는지 verify (regression 방지). + + +@pytest.mark.asyncio +async def test_call_llm_uses_system_user_message_split(monkeypatch): + """_call_llm 이 fixture 의 request_body 형식 (system=prompt / user=query) 으로 호출하는지.""" + captured = {} + + class _MockResponse: + def raise_for_status(self): + return None + + def json(self): + return {"choices": [{"message": {"content": '{"variants": ["a", "b", "c"]}'}}]} + + class _MockClient: + def __init__(self, *args, **kwargs): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, *args): + return None + + async def post(self, url, json): + captured["url"] = url + captured["payload"] = json + return _MockResponse() + + monkeypatch.setattr(query_rewriter.httpx, "AsyncClient", _MockClient) + + cfg = query_rewriter.LLM_BACKEND_MAP["cand_multi_query_macmini"] + raw = await query_rewriter._call_llm(cfg, "LPG 저장탱크 안전거리") + + # raw 응답 정상 + assert "variants" in raw + + # endpoint = cfg endpoint 사용 + assert captured["url"] == cfg["endpoint"] + + payload = captured["payload"] + # model = cfg model + assert payload["model"] == cfg["model"] + + # messages = 2 entry, system + user 분리 + messages = payload["messages"] + assert len(messages) == 2 + assert messages[0]["role"] == "system" + assert messages[1]["role"] == "user" + + # user 메시지 = query verbatim (prompt template 안 박힘) + assert messages[1]["content"] == "LPG 저장탱크 안전거리" + + # system 메시지 = prompt template (instruction). query 본문은 포함되지 않음. + assert "LPG 저장탱크 안전거리" not in messages[0]["content"] + assert "search query rewriter" in messages[0]["content"].lower() + + # sampling 박제 적용 (gemma → response_format json_object) + assert payload["temperature"] == 0.3 + assert payload["max_tokens"] == 256 + assert payload.get("response_format") == {"type": "json_object"} + + +@pytest.mark.asyncio +async def test_call_llm_qwen_no_response_format(monkeypatch): + """qwen backend = response_format 미사용 (mlx-vlm.server 미지원, Phase 0 inspect 9 박제).""" + captured = {} + + class _MockResponse: + def raise_for_status(self): + return None + def json(self): + return {"choices": [{"message": {"content": '{"variants": ["a", "b", "c"]}'}}]} + + class _MockClient: + def __init__(self, *args, **kwargs): + pass + async def __aenter__(self): + return self + async def __aexit__(self, *args): + return None + async def post(self, url, json): + captured["payload"] = json + return _MockResponse() + + monkeypatch.setattr(query_rewriter.httpx, "AsyncClient", _MockClient) + + cfg = query_rewriter.LLM_BACKEND_MAP["cand_multi_query_macbook"] + await query_rewriter._call_llm(cfg, "ASME Section VIII") + + payload = captured["payload"] + # qwen 은 response_format 박제 0 (prompt rule 만) + assert "response_format" not in payload