merge(eval): PR-Eval-GradedNDCG-Dedup closed — Phase 2Q 측정 inflation 정정 (NDCG 0.876 → 0.641, multi-query 실제 효과 ≈ 0)

This commit is contained in:
hyungi
2026-05-24 04:35:33 +00:00
5 changed files with 396 additions and 3 deletions
@@ -0,0 +1,52 @@
label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,dedup_count,error
single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3879;3868;3890;3863;3856;3908;3851;4041;10573;3895,474.5,1.000,1.000,0.947,0,0.731,1.000,1.000,0,
single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3874;3922;3917;3918;10573;3854;10571;3877;3920,860.9,1.000,1.000,0.877,0,0.676,1.000,1.000,0,
single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3980;3857;3984;3880;3993;3903;3988;3869,344.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3858;3852;3851;3881;3905;3913;3915;3877;3903,327.5,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3910;3888;3905;3890;3885;3913;3895;3908;3894;3898,351.9,1.000,0.500,0.631,1,0.631,1.000,1.000,0,
single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;3897;5244;3868;5253;3851;3895;3856;3874;3879,431.5,0.750,0.250,0.404,0,0.404,1.000,1.000,0,
single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;3867;3878;5227;10571;5244;3854;10573;3896;5249,351.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;5253;3985;3760;3917;5227;3757;5238;3904;3903,290.6,0.333,1.000,0.469,1,0.469,0.333,0.000,0,
single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,3917;3854;3918;10572;3916;3877;5227;5226;3759;3859,291.8,0.500,1.000,0.541,1,0.698,0.500,1.000,0,
single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;3811;11677;3778;3810;10573;6675;3757,489.8,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,20893;22048;21276;22054;15793;16081;18088;15922;16526;21273,373.7,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,16532;21706;15922;17123;21890;22049;4346;9022;4767;6067,247.4,0.000,0.000,0.000,0,0.000,0.000,0.000,0,
single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4071;4064;4065;4066;4068;5105;4058;11481;4067,321.8,1.000,1.000,0.877,1,0.932,1.000,1.000,0,
single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4062;4059;4070;4060;4061;4066;4068;4058;4063;5113,292.4,1.000,1.000,0.853,1,0.853,1.000,0.000,0,
single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,4775;23446;4776;4202;4679;24382;21155;4668;4199;21855,216.9,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3817;3791;3770;4540;3758;4548;3787;3789;5249,399.0,0.500,0.250,0.264,0,0.339,0.500,1.000,0,
single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5249;5230;3817;10573;3755;3787;3815;3802;5235;3774,324.7,0.250,0.200,0.151,1,0.151,0.250,0.000,0,
single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3790;3772;5260;3897;5248;3771;3769;11671;13936;3755,959.2,1.000,1.000,1.000,1,1.000,1.000,0.000,0,
single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,22342;19576;17069;15924;16935;23149;16019;16462;16010;4776,299.2,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,16761;21275;16927;20893;16771;17242;4329;20886;4457;4307,482.1,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,23732;3774;5262;4547;5161;19373;20758;5174;24155;20032,236.1,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,16289;5089;5092;5250;22202;20507;5070;5118;5173;23605,258.1,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,20022;20470;4634;15361;16059;9102;23336;18286;16218;5738,214.7,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,5236;3886;3900;3895;3887;3881;3894;3908;3889;3912,377.8,1.000,0.500,0.624,1,0.627,1.000,1.000,0,
single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3895;3903;13930;11562;13931;13929;3900;3886;3902,353.0,0.500,1.000,0.613,1,0.917,1.000,1.000,0,
single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11645;13750;11676;13299;13749;13766,419.8,1.000,0.333,0.571,1,0.539,1.000,1.000,0,
single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13309;13299;13313;13918,387.5,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13656;13649;13651;13752;13659;13650,278.8,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3896;3887;13935;13938;3877;3900;3899,411.5,0.500,1.000,0.613,1,0.917,1.000,1.000,0,
single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5180;5193;5140;5137;5149;5178;5207;5148,2266.0,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5182;5208;5206;5210;5141;5137;5145;5183,2217.5,1.000,1.000,0.850,1,0.918,1.000,1.000,0,
single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5178;5214;5205;5186;5145;5204;5148;5180;5192;5190,1960.3,1.000,0.333,0.511,1,0.502,1.000,1.000,0,
single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5140;5136;5137;5141;5186;5145;5182;5190;5185,1399.1,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5210;5180;5182;5143;5204;5211;5207;5185;5186,1482.0,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5180;5178;5186;5187;5182;5209;5224;5188;5204;4835,1336.9,0.500,0.167,0.218,0,0.327,1.000,1.000,0,
single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5192;5180;5187;5186;5212;5188;5182;5137,1902.0,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3759;3774;3755;3818;3812;3778;3756;3761;3771,1402.9,1.000,1.000,0.877,1,0.974,1.000,1.000,0,
single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5225;5206;5208;5210;5137;5182;5145,961.2,0.750,1.000,0.767,1,0.686,0.750,1.000,0,
single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5222;5225;5209;5180;5204;5210;5205;5178;5143,781.8,1.000,1.000,0.877,1,0.932,1.000,1.000,0,
single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,4026;5145;5182;5143;5210;13749;5204;5186;13760;13671,895.6,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;13755;13924;13772,362.2,0.250,1.000,0.390,1,0.647,0.333,1.000,0,
single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;11671;11649;11648;13915;5241;11563;5173;5177;11653,608.8,0.000,0.000,0.000,0,0.000,0.000,0.000,0,
single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5081;11509;11476;11486;5064;3788;5134;5075,496.4,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;5139;5090;5178;11515;5210;11493;11719,293.0,0.667,1.000,0.765,1,0.856,0.667,1.000,0,
single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11475;5090;5084;11531;11476;11473;5093;11479;5124,546.2,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11591;11664;13948;13660;5177;13652;11665;13917;11660;13752,301.3,0.333,1.000,0.469,1,0.674,0.333,1.000,0,
single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13918;13751;5177;13653;13753,308.9,0.667,1.000,0.671,1,0.883,1.000,1.000,0,
single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11649;11655;11690;11658;11653;11689,254.7,0.333,0.250,0.202,0,0.321,0.500,1.000,0,
single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,309.1,1.000,1.000,1.000,1,0.858,1.000,1.000,0,
single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;11692;13665;13661;13664;13666;13670;13773;13934,309.6,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;5236;3977;3971;3966;4018;3972;3973;3974;3895,382.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
1 label id category legacy_category intent domain_hint language ocr_derived failure_expected query relevant_ids graded_relevance returned_ids_top10 latency_ms recall_at_10 mrr_at_10 ndcg_at_10 top3_hit graded_ndcg_at_10 graded_recall_at_10_t2 graded_recall_at_10_t3 dedup_count error
2 single kw_001 standards exact_keyword fact_lookup document ko 0 0 산업안전보건법 제6장 3856;3868;3879 3856:3;3868:2;3879:2 3879;3868;3890;3863;3856;3908;3851;4041;10573;3895 474.5 1.000 1.000 0.947 0 0.731 1.000 1.000 0
3 single kw_002 standards exact_keyword fact_lookup document ko 0 0 중대재해 처벌 등에 관한 법률 제2장 중대산업재해 3917;3921 3917:3;3921:2 3921;3874;3922;3917;3918;10573;3854;10571;3877;3920 860.9 1.000 1.000 0.877 0 0.676 1.000 1.000 0
4 single kw_003 standards exact_keyword fact_lookup document ko 0 0 화학물질관리법 유해화학물질 영업자 3981 3981:3 3981;3985;3980;3857;3984;3880;3993;3903;3988;3869 344.3 1.000 1.000 1.000 1 1.000 1.000 1.000 0
5 single kw_004 standards exact_keyword fact_lookup document ko 0 0 근로기준법 안전과 보건 4041 4041:3 4041;3858;3852;3851;3881;3905;3913;3915;3877;3903 327.5 1.000 1.000 1.000 1 1.000 1.000 1.000 0
6 single kw_005 standards exact_keyword fact_lookup document ko 0 0 산업안전보건기준에 관한 규칙 보호구 3888 3888:3 3910;3888;3905;3890;3885;3913;3895;3908;3894;3898 351.9 1.000 0.500 0.631 1 0.631 1.000 1.000 0
7 single nl_001 korean_only natural_language_ko semantic_search document ko 0 0 기계로 인한 산업재해 관련 법령 3856;3868;3879;3854 3854:1;3856:3;3868:2;3879:2 5249;3897;5244;3868;5253;3851;3895;3856;3874;3879 431.5 0.750 0.250 0.404 0 0.404 1.000 1.000 0
8 single nl_002 korean_only natural_language_ko semantic_search document ko 0 0 사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일 3855;3867;3878 3855:3;3867:2;3878:2 3855;3867;3878;5227;10571;5244;3854;10573;3896;5249 351.3 1.000 1.000 1.000 1 1.000 1.000 1.000 0
9 single nl_003 korean_only natural_language_ko semantic_search document ko 0 0 유해화학물질을 다루는 회사가 지켜야 할 안전 의무 3980;3981;3982 3980:2;3981:2;3982:2 3980;5253;3985;3760;3917;5227;3757;5238;3904;3903 290.6 0.333 1.000 0.469 1 0.469 0.333 0.000 0
10 single nl_004 korean_only natural_language_ko semantic_search document ko 0 0 중대재해가 발생했을 때 경영책임자가 처벌받는 기준 3916;3917;3920;3921 3916:2;3917:3;3920:2;3921:2 3917;3854;3918;10572;3916;3877;5227;5226;3759;3859 291.8 0.500 1.000 0.541 1 0.698 0.500 1.000 0
11 single nl_005 korean_only natural_language_ko semantic_search document ko 0 0 안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가 3853;3865 3853:3;3865:2 3853;3876;5249;3811;11677;3778;3810;10573;6675;3757 489.8 0.500 1.000 0.613 1 0.787 0.500 1.000 0
12 single news_001 korean_only news_ko semantic_search news ko 0 0 이란과 미국의 군사 충돌 4303;4304;4307;4316;4322;4323;4327;4335 4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2 20893;22048;21276;22054;15793;16081;18088;15922;16526;21273 373.7 0.000 0.000 0.000 1 0.000 0.000 0.000 0
13 single news_002 korean_only news_ko semantic_search news ko 0 0 호르무즈 해협 봉쇄 4316;4320;4322;4327 4316:3;4320:2;4322:2;4327:2 16532;21706;15922;17123;21890;22049;4346;9022;4767;6067 247.4 0.000 0.000 0.000 0 0.000 0.000 0.000 0
14 single misc_001 korean_only other_domain fact_lookup document ko 0 0 강체의 평면 운동학 4063;4065 4063:3;4065:2 4063;4071;4064;4065;4066;4068;5105;4058;11481;4067 321.8 1.000 1.000 0.877 1 0.932 1.000 1.000 0
15 single misc_002 korean_only other_domain semantic_search document ko 0 0 질점의 운동역학 4060;4061;4062 4060:2;4061:2;4062:2 4062;4059;4070;4060;4061;4066;4068;4058;4063;5113 292.4 1.000 1.000 0.853 1 0.853 1.000 0.000 0
16 single news_003 english_only news_en semantic_search news en 0 0 Trump Iran ultimatum 4258;4260;4262 4258:2;4260:2;4262:2 4775;23446;4776;4202;4679;24382;21155;4668;4199;21855 216.9 0.000 0.000 0.000 1 0.000 0.000 0.000 0
17 single cl_001 mixed crosslingual_ko_en semantic_search document mixed 0 0 기계 안전 가드 설계 원리 3770;3856 3770:3;3856:2 5239;3817;3791;3770;4540;3758;4548;3787;3789;5249 399.0 0.500 0.250 0.264 0 0.339 0.500 1.000 0
18 single cl_002 mixed crosslingual_ko_en semantic_search document mixed 0 0 산업 안전 입문서 3755;3775;3776;3777 3755:2;3775:2;3776:2;3777:2 5249;5230;3817;10573;3755;3787;3815;3802;5235;3774 324.7 0.250 0.200 0.151 1 0.151 0.250 0.000 0
19 single cl_003 mixed crosslingual_ko_en semantic_search document mixed 0 0 전기 안전 위험 3772;3790 3772:2;3790:2 3790;3772;5260;3897;5248;3771;3769;11671;13936;3755 959.2 1.000 1.000 1.000 1 1.000 1.000 0.000 0
20 single news_004 mixed news_fr semantic_search news mixed 0 0 guerre en Iran 4199;4202;4210;4361;4363;4507;4519;4521 4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2 22342;19576;17069;15924;16935;23149;16019;16462;16010;4776 299.2 0.000 0.000 0.000 1 0.000 0.000 0.000 0
21 single news_005 mixed news_crosslingual semantic_search news mixed 0 0 이란 미국 전쟁 글로벌 반응 4202;4258;4262;4536;4303;4304;4316 4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2 16761;21275;16927;20893;16771;17242;4329;20886;4457;4307 482.1 0.000 0.000 0.000 1 0.000 0.000 0.000 0
22 single fail_001 failure_expected failure_expected semantic_search document mixed 0 1 Rust async runtime tokio scheduler 내부 구조 23732;3774;5262;4547;5161;19373;20758;5174;24155;20032 236.1 0.000 0.000 0.000 1 0.000 0.000 0.000 0
23 single fail_002 failure_expected failure_expected semantic_search document ko 0 1 양자컴퓨터 큐비트 디코히어런스 16289;5089;5092;5250;22202;20507;5070;5118;5173;23605 258.1 0.000 0.000 0.000 1 0.000 0.000 0.000 0
24 single fail_003 failure_expected failure_expected semantic_search news ko 0 1 재즈 보컬리스트 빌리 홀리데이 20022;20470;4634;15361;16059;9102;23336;18286;16218;5738 214.7 0.000 0.000 0.000 1 0.000 0.000 0.000 0
25 single kw_006 standards standards fact_lookup document ko 0 0 산업안전보건기준에 관한 규칙 작업장 통로 3886;3887 3886:3;3887:2 5236;3886;3900;3895;3887;3881;3894;3908;3889;3912 377.8 1.000 0.500 0.624 1 0.627 1.000 1.000 0
26 single kw_007 standards standards fact_lookup document ko 0 0 산업안전보건기준 폭발 화재 위험물 누출 방지 3896;3766 3766:1;3896:3 3896;3895;3903;13930;11562;13931;13929;3900;3886;3902 353.0 0.500 1.000 0.613 1 0.917 1.000 1.000 0
27 single kw_008 standards standards fact_lookup document ko 0 0 고압가스 안전관리법 전문 4025;4026 4025:3;4026:2 11644;11579;4025;4026;11645;13750;11676;13299;13749;13766 419.8 1.000 0.333 0.571 1 0.539 1.000 1.000 0
28 single kw_009 standards standards fact_lookup document ko 0 0 KGS FP111 가스설비 배관설비 기준 13305 13305:3 13305;13311;13306;13312;13302;13304;13309;13299;13313;13918 387.5 1.000 1.000 1.000 1 1.000 1.000 1.000 0
29 single kw_010 standards standards fact_lookup document ko 0 0 KGS FU551 가스설비 압력조정기 가스계량기 13652 13652:3 13652;11689;13657;13655;13656;13649;13651;13752;13659;13650 278.8 1.000 1.000 1.000 1 1.000 1.000 1.000 0
30 single kw_011 standards standards fact_lookup document ko 0 0 산업안전보건기준 전기로 인한 위험 방지 3897;3772 3772:1;3897:3 3897;3895;3902;3896;3887;13935;13938;3877;3900;3899 411.5 0.500 1.000 0.613 1 0.917 1.000 1.000 0
31 single en_001 english_only english_only semantic_search document en 0 0 pressure vessel flange design 5144;5136 5136:2;5144:3 5144;5136;5180;5193;5140;5137;5149;5178;5207;5148 2266.0 1.000 1.000 1.000 1 1.000 1.000 1.000 0
32 single en_002 english_only english_only semantic_search document en 0 0 ASME Section VIII introduction 5204;5206 5204:3;5206:2 5204;5180;5182;5208;5206;5210;5141;5137;5145;5183 2217.5 1.000 1.000 0.850 1 0.918 1.000 1.000 0
33 single en_003 english_only english_only semantic_search document en 0 0 impact test requirements ASME 5205;5148 5148:1;5205:3 5178;5214;5205;5186;5145;5204;5148;5180;5192;5190 1960.3 1.000 0.333 0.511 1 0.502 1.000 1.000 0
34 single en_004 english_only english_only semantic_search document en 0 0 design of vessel supports 5149 5149:3 5149;5140;5136;5137;5141;5186;5145;5182;5190;5185 1399.1 1.000 1.000 1.000 1 1.000 1.000 1.000 0
35 single en_005 english_only english_only semantic_search document en 0 0 hydrogen piping ASME code 5178 5178:3 5178;5210;5180;5182;5143;5204;5211;5207;5185;5186 1482.0 1.000 1.000 1.000 1 1.000 1.000 1.000 0
36 single en_006 english_only english_only semantic_search document en 0 0 ASME welding qualification requirements 5209;3771 3771:1;5209:3 5180;5178;5186;5187;5182;5209;5224;5188;5204;4835 1336.9 0.500 0.167 0.218 0 0.327 1.000 1.000 0
37 single en_007 english_only english_only semantic_search document en 0 0 pressure vessel fabrication and inspection 5208;5145 5145:2;5208:3 5208;5189;5192;5180;5187;5186;5212;5188;5182;5137 1902.0 0.500 1.000 0.613 1 0.787 0.500 1.000 0
38 single en_008 english_only english_only semantic_search document en 0 0 Industrial Safety and Health Management ergonomics 3763;3755 3755:1;3763:3 3763;3759;3774;3755;3818;3812;3778;3756;3761;3771 1402.9 1.000 1.000 0.877 1 0.974 1.000 1.000 0
39 single cl_004 mixed mixed semantic_search document mixed 0 0 ASME 압력용기 설계 실무 5207;5210;5139;5135 5135:2;5139:2;5207:3;5210:3 5139;5207;5204;5225;5206;5208;5210;5137;5182;5145 961.2 0.750 1.000 0.767 1 0.686 0.750 1.000 0
40 single cl_005 mixed mixed semantic_search document mixed 0 0 ASME 용접 코드 해설 5224;5209 5209:2;5224:3 5224;5222;5225;5209;5180;5204;5210;5205;5178;5143 781.8 1.000 1.000 0.877 1 0.932 1.000 1.000 0
41 single cl_006 mixed mixed semantic_search document mixed 0 0 pressure vessel Korean industrial safety regulation 4025;3856;5136 3856:2;4025:2;5136:1 4026;5145;5182;5143;5210;13749;5204;5186;13760;13671 895.6 0.000 0.000 0.000 1 0.000 0.000 0.000 0
42 single cl_007 mixed mixed semantic_search document mixed 0 0 KGS 코드 LPG safety standard 11647;11689;11645;4025 4025:1;11645:2;11647:3;11689:2 11647;13760;13674;13669;13774;13773;13675;13755;13924;13772 362.2 0.250 1.000 0.390 1 0.647 0.333 1.000 0
43 single cl_008 mixed mixed semantic_search document mixed 0 0 수소 가스 안전 기준 hydrogen safety 5178;5169 5169:2;5178:3 10575;11671;11649;11648;13915;5241;11563;5173;5177;11653 608.8 0.000 0.000 0.000 0 0.000 0.000 0.000 0
44 single exam_001 exam exam fact_lookup document ko 0 0 레이놀즈수 정의 공식 11504;11505 11504:3;11505:2 11504;11533;5081;11509;11476;11486;5064;3788;5134;5075 496.4 0.500 1.000 0.613 1 0.787 0.500 1.000 0
45 single exam_002 exam exam fact_lookup document ko 0 0 탱크 바닥 구멍 유체 유속 토리첼리 11500;11495;11496 11495:2;11496:2;11500:3 11500;11495;11501;5139;5090;5178;11515;5210;11493;11719 293.0 0.667 1.000 0.765 1 0.856 0.667 1.000 0
46 single exam_003 exam exam fact_lookup document ko 0 0 이상기체 음속 마하수 11514;11515 11514:3;11515:2 11514;11475;5090;5084;11531;11476;11473;5093;11479;5124 546.2 0.500 1.000 0.613 1 0.787 0.500 1.000 0
47 single exam_004 exam exam fact_lookup document ko 0 0 고압가스 용기 내압시험 영구증가량 11591;11644;11691 11591:3;11644:2;11691:2 11591;11664;13948;13660;5177;13652;11665;13917;11660;13752 301.3 0.333 1.000 0.469 1 0.674 0.333 1.000 0
48 single exam_005 exam exam fact_lookup document ko 0 0 도시가스 배관 매설 이격거리 11627;11625;11646 11625:2;11627:3;11646:1 11627;11658;11600;11625;11692;13918;13751;5177;13653;13753 308.9 0.667 1.000 0.671 1 0.883 1.000 1.000 0
49 single exam_006 exam exam fact_lookup document ko 0 0 LPG 저장탱크 안전거리 분말소화기 11617;11669;11620 11617:3;11620:1;11669:2 11595;11616;13669;11617;11649;11655;11690;11658;11653;11689 254.7 0.333 0.250 0.202 0 0.321 0.500 1.000 0
50 single exam_007 exam exam fact_lookup document ko 0 0 오리피스 차압식 유량계 11712;11711;11503 11503:2;11711:2;11712:3 11711;11712;11503;11500;11713;11714;13930;11717;11701;11502 309.1 1.000 1.000 1.000 1 0.858 1.000 1.000 0
51 single fail_004 failure_expected failure_expected fact_lookup document ko 0 1 KGS AC999 임의 가스 코드 11691;11693;11692;13665;13661;13664;13666;13670;13773;13934 309.6 0.000 0.000 0.000 1 0.000 0.000 0.000 0
52 single fail_005 failure_expected failure_expected fact_lookup document ko 0 1 초전도 안전 관리법 시행규칙 4026;5236;3977;3971;3966;4018;3972;3973;3974;3895 382.5 0.000 0.000 0.000 1 0.000 0.000 0.000 0
@@ -0,0 +1,52 @@
label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,dedup_count,error
single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3879;3868;3890;3863;3856;3908;3851;4041;10573;3895,1690.9,1.000,1.000,0.947,0,0.731,1.000,1.000,0,
single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;3917;10573;3923;3919;3916;3919;3918;10573,1277.5,1.000,1.000,1.000,1,0.834,1.000,1.000,3,
single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3980;3984;3869;3984;3993;3857;3978,1045.5,1.000,1.000,1.000,1,1.000,1.000,1.000,2,
single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3851;3915;3775;3905;3904;3777;3903,2418.1,1.000,1.000,1.000,1,1.000,1.000,1.000,1,
single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3888;3911;3905;3889;3890;3910;3902;3893,1742.0,1.000,0.500,0.631,1,0.631,1.000,1.000,1,
single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;3878;5249;3855;3897;3863;3867;3868;3874;5253,1178.5,0.250,0.125,0.123,0,0.087,0.333,0.000,0,
single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3867;3855;5236;3878;3917;3854;3851;5244,1025.7,1.000,1.000,0.885,1,0.930,1.000,1.000,1,
single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3980;3903;3904;3896;3903;3909;3985;3981;3904,1109.9,0.667,1.000,0.626,1,0.626,0.667,0.000,3,
single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;10573;3917;3916;3917;3923;3921;3918;3923;3919,1134.2,0.750,0.333,0.502,1,0.502,0.750,1.000,2,
single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;11677;6675;10573;4842;11677,1792.1,0.500,1.000,0.613,1,0.787,0.500,1.000,1,
single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,14813;15924;15924;15976;16378;16081;18077;22048;12213;16019,759.8,0.000,0.000,0.000,1,0.000,0.000,0.000,1,
single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,22049;17123;9022;11945;5391;6396;6829;9105;6774;6314,541.3,0.000,0.000,0.000,0,0.000,0.000,0.000,0,
single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4064;4066;4065;4066;4063;4071;4071;4068,898.0,1.000,1.000,1.000,1,1.000,1.000,1.000,4,
single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4060;4062;4059;4059;4061;4064;4062;4058;4065,1382.8,1.000,0.500,0.712,1,0.712,1.000,0.000,2,
single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21186;4775;4202;4776;4679;4199;4519;4668;4515;22069,711.9,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3758;3770;3791;3770;3817;3763;4540;4540;3787,1546.4,0.500,0.333,0.307,1,0.394,0.500,1.000,2,
single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5244;5249;5249;5229;3774;3755;3755;3767;3756;3758,1706.2,0.250,0.167,0.151,1,0.151,0.250,0.000,2,
single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3772;3790;5260;3897;3897;3772;3755;10574;13936;13937,2118.3,1.000,1.000,1.000,1,1.000,1.000,0.000,2,
single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,5840;16010;16457;6945;5398;4199;6996;23149;4776;17069,793.2,0.125,0.167,0.090,1,0.090,0.125,0.000,0,
single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,21848;8381;16823;7473;21275;4262;9545;16927;16378;15924,1547.6,0.143,0.167,0.098,1,0.098,0.143,0.000,0,
single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5070;5262;23732;5262;4546;24155;4546;5092;20758,748.5,0.000,0.000,0.000,1,0.000,0.000,0.000,2,
single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5090;5090;5068;5063;5103;5066;5066;5076;24955,743.1,0.000,0.000,0.000,1,0.000,0.000,0.000,2,
single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,18567;18567;20022;20022;20470;20470;4634;20066;15361;15984,439.8,0.000,0.000,0.000,1,0.000,0.000,0.000,3,
single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3887;3895;3902;3887;3895;3894;3889;3892;3890,1747.6,1.000,1.000,1.000,1,1.000,1.000,1.000,2,
single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;3895;3903;13930;3897;3772;3766;3766;13931,1222.0,1.000,1.000,0.818,1,0.961,1.000,1.000,2,
single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11644;11579;11579;4025;4025;4026;11693;4026;13299,2097.6,1.000,0.200,0.571,0,0.539,1.000,1.000,4,
single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13299;13313;13310;13303,893.7,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13651;13656;13649;13651;13752;13658,560.5,1.000,1.000,1.000,1,1.000,1.000,1.000,1,
single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3902;3758;3886;3755;3896;3887;13935,2015.1,0.500,1.000,0.613,1,0.917,1.000,1.000,1,
single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5178;5180;5136;5207;5140;5137;5140;5149,3164.7,1.000,1.000,1.000,1,1.000,1.000,1.000,2,
single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5204;5180;5208;5210;5143;5206;5137;5207;5182,3527.0,1.000,1.000,0.832,1,0.907,1.000,1.000,1,
single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5214;5224;5210;5148;5145;5186,4178.2,1.000,1.000,0.818,1,0.961,1.000,1.000,1,
single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5149;5141;5136;5137;5186;5139;5136;5140;5186,3640.1,1.000,1.000,1.000,1,1.000,1.000,1.000,3,
single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5178;5139;5180;5210;5179;5180;5210;5143;5182,3293.5,1.000,1.000,1.000,1,1.000,1.000,1.000,3,
single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5178;5225;5208,3715.8,0.500,0.250,0.264,0,0.395,1.000,1.000,2,
single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5139;5135;5208;5210;5187;5210;5133;5204;4026;5204,3376.9,0.500,0.333,0.307,1,0.394,0.500,1.000,2,
single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3812;3755;3760;5253;5230;5249;3812;3858;10573,1391.2,1.000,1.000,0.920,1,0.983,1.000,1.000,1,
single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5210;5145;5137;5180;5204;5140;5206;5145;5206,2204.9,0.500,1.000,0.637,0,0.522,0.500,0.500,2,
single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5225;5225;5224;5210;5180;5204;5209;5209;5178,1763.9,1.000,1.000,0.832,1,0.907,1.000,1.000,3,
single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,5210;5210;5186;13913;5143;13760;13749;5145;5137;5140,1494.0,0.000,0.000,0.000,1,0.000,0.000,0.000,1,
single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;11688;13757;11689,602.1,0.500,1.000,0.503,1,0.727,0.667,1.000,0,
single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;5177;10572;11671;11653;11649;5173;5177;11653;13946,1940.6,0.000,0.000,0.000,0,0.000,0.000,0.000,2,
single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;11504;5090;4544;5081;11509;5140;5089;11476,1233.9,0.500,1.000,0.613,1,0.787,0.500,1.000,1,
single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;3788;5071;5090;5139;11486;5106;5090,701.2,0.667,1.000,0.765,1,0.856,0.667,1.000,1,
single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;11516;11475;5090;5084;11515;11531;11476;11514,1587.7,1.000,1.000,0.818,1,0.900,1.000,1.000,1,
single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11665;11664;11591;11591;13660;11664;13948;13660;11665;13942,1795.3,0.333,0.333,0.235,1,0.337,0.333,1.000,4,
single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11658;11627;13753;11625;13918;13752;13653;13918;13751;13915,1122.2,0.667,0.500,0.498,1,0.608,1.000,1.000,1,
single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,13916;13752;11595;13669;11616;11690;11649;11617;11658;11655,602.1,0.333,0.125,0.148,0,0.235,0.500,1.000,0,
single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11712;11711;11503;11500;11711;11701;11502;11713;13930;11717,517.9,1.000,1.000,1.000,1,1.000,1.000,1.000,1,
single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11693;5210;13936;5210;13665;13664;11691;13673;11693,364.7,0.000,0.000,0.000,1,0.000,0.000,0.000,3,
single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,3895;4026;3875;3966;5210;5210;3961;3971;4025;4026,1106.7,0.000,0.000,0.000,1,0.000,0.000,0.000,2,
1 label id category legacy_category intent domain_hint language ocr_derived failure_expected query relevant_ids graded_relevance returned_ids_top10 latency_ms recall_at_10 mrr_at_10 ndcg_at_10 top3_hit graded_ndcg_at_10 graded_recall_at_10_t2 graded_recall_at_10_t3 dedup_count error
2 single kw_001 standards exact_keyword fact_lookup document ko 0 0 산업안전보건법 제6장 3856;3868;3879 3856:3;3868:2;3879:2 3879;3868;3890;3863;3856;3908;3851;4041;10573;3895 1690.9 1.000 1.000 0.947 0 0.731 1.000 1.000 0
3 single kw_002 standards exact_keyword fact_lookup document ko 0 0 중대재해 처벌 등에 관한 법률 제2장 중대산업재해 3917;3921 3917:3;3921:2 3921;3917;3917;10573;3923;3919;3916;3919;3918;10573 1277.5 1.000 1.000 1.000 1 0.834 1.000 1.000 3
4 single kw_003 standards exact_keyword fact_lookup document ko 0 0 화학물질관리법 유해화학물질 영업자 3981 3981:3 3981;3981;3985;3980;3984;3869;3984;3993;3857;3978 1045.5 1.000 1.000 1.000 1 1.000 1.000 1.000 2
5 single kw_004 standards exact_keyword fact_lookup document ko 0 0 근로기준법 안전과 보건 4041 4041:3 4041;3852;3851;3851;3915;3775;3905;3904;3777;3903 2418.1 1.000 1.000 1.000 1 1.000 1.000 1.000 1
6 single kw_005 standards exact_keyword fact_lookup document ko 0 0 산업안전보건기준에 관한 규칙 보호구 3888 3888:3 10570;3888;3888;3911;3905;3889;3890;3910;3902;3893 1742.0 1.000 0.500 0.631 1 0.631 1.000 1.000 1
7 single nl_001 korean_only natural_language_ko semantic_search document ko 0 0 기계로 인한 산업재해 관련 법령 3856;3868;3879;3854 3854:1;3856:3;3868:2;3879:2 5244;3878;5249;3855;3897;3863;3867;3868;3874;5253 1178.5 0.250 0.125 0.123 0 0.087 0.333 0.000 0
8 single nl_002 korean_only natural_language_ko semantic_search document ko 0 0 사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일 3855;3867;3878 3855:3;3867:2;3878:2 3855;5227;3867;3855;5236;3878;3917;3854;3851;5244 1025.7 1.000 1.000 0.885 1 0.930 1.000 1.000 1
9 single nl_003 korean_only natural_language_ko semantic_search document ko 0 0 유해화학물질을 다루는 회사가 지켜야 할 안전 의무 3980;3981;3982 3980:2;3981:2;3982:2 3980;3980;3903;3904;3896;3903;3909;3985;3981;3904 1109.9 0.667 1.000 0.626 1 0.626 0.667 0.000 3
10 single nl_004 korean_only natural_language_ko semantic_search document ko 0 0 중대재해가 발생했을 때 경영책임자가 처벌받는 기준 3916;3917;3920;3921 3916:2;3917:3;3920:2;3921:2 10572;10573;3917;3916;3917;3923;3921;3918;3923;3919 1134.2 0.750 0.333 0.502 1 0.502 0.750 1.000 2
11 single nl_005 korean_only natural_language_ko semantic_search document ko 0 0 안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가 3853;3865 3853:3;3865:2 3853;3876;5249;5234;4025;11677;6675;10573;4842;11677 1792.1 0.500 1.000 0.613 1 0.787 0.500 1.000 1
12 single news_001 korean_only news_ko semantic_search news ko 0 0 이란과 미국의 군사 충돌 4303;4304;4307;4316;4322;4323;4327;4335 4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2 14813;15924;15924;15976;16378;16081;18077;22048;12213;16019 759.8 0.000 0.000 0.000 1 0.000 0.000 0.000 1
13 single news_002 korean_only news_ko semantic_search news ko 0 0 호르무즈 해협 봉쇄 4316;4320;4322;4327 4316:3;4320:2;4322:2;4327:2 22049;17123;9022;11945;5391;6396;6829;9105;6774;6314 541.3 0.000 0.000 0.000 0 0.000 0.000 0.000 0
14 single misc_001 korean_only other_domain fact_lookup document ko 0 0 강체의 평면 운동학 4063;4065 4063:3;4065:2 4063;4065;4064;4066;4065;4066;4063;4071;4071;4068 898.0 1.000 1.000 1.000 1 1.000 1.000 1.000 4
15 single misc_002 korean_only other_domain semantic_search document ko 0 0 질점의 운동역학 4060;4061;4062 4060:2;4061:2;4062:2 4070;4060;4062;4059;4059;4061;4064;4062;4058;4065 1382.8 1.000 0.500 0.712 1 0.712 1.000 0.000 2
16 single news_003 english_only news_en semantic_search news en 0 0 Trump Iran ultimatum 4258;4260;4262 4258:2;4260:2;4262:2 21186;4775;4202;4776;4679;4199;4519;4668;4515;22069 711.9 0.000 0.000 0.000 1 0.000 0.000 0.000 0
17 single cl_001 mixed crosslingual_ko_en semantic_search document mixed 0 0 기계 안전 가드 설계 원리 3770;3856 3770:3;3856:2 5239;3758;3770;3791;3770;3817;3763;4540;4540;3787 1546.4 0.500 0.333 0.307 1 0.394 0.500 1.000 2
18 single cl_002 mixed crosslingual_ko_en semantic_search document mixed 0 0 산업 안전 입문서 3755;3775;3776;3777 3755:2;3775:2;3776:2;3777:2 5244;5249;5249;5229;3774;3755;3755;3767;3756;3758 1706.2 0.250 0.167 0.151 1 0.151 0.250 0.000 2
19 single cl_003 mixed crosslingual_ko_en semantic_search document mixed 0 0 전기 안전 위험 3772;3790 3772:2;3790:2 3772;3790;5260;3897;3897;3772;3755;10574;13936;13937 2118.3 1.000 1.000 1.000 1 1.000 1.000 0.000 2
20 single news_004 mixed news_fr semantic_search news mixed 0 0 guerre en Iran 4199;4202;4210;4361;4363;4507;4519;4521 4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2 5840;16010;16457;6945;5398;4199;6996;23149;4776;17069 793.2 0.125 0.167 0.090 1 0.090 0.125 0.000 0
21 single news_005 mixed news_crosslingual semantic_search news mixed 0 0 이란 미국 전쟁 글로벌 반응 4202;4258;4262;4536;4303;4304;4316 4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2 21848;8381;16823;7473;21275;4262;9545;16927;16378;15924 1547.6 0.143 0.167 0.098 1 0.098 0.143 0.000 0
22 single fail_001 failure_expected failure_expected semantic_search document mixed 0 1 Rust async runtime tokio scheduler 내부 구조 5161;5070;5262;23732;5262;4546;24155;4546;5092;20758 748.5 0.000 0.000 0.000 1 0.000 0.000 0.000 2
23 single fail_002 failure_expected failure_expected semantic_search document ko 0 1 양자컴퓨터 큐비트 디코히어런스 5057;5090;5090;5068;5063;5103;5066;5066;5076;24955 743.1 0.000 0.000 0.000 1 0.000 0.000 0.000 2
24 single fail_003 failure_expected failure_expected semantic_search news ko 0 1 재즈 보컬리스트 빌리 홀리데이 18567;18567;20022;20022;20470;20470;4634;20066;15361;15984 439.8 0.000 0.000 0.000 1 0.000 0.000 0.000 3
25 single kw_006 standards standards fact_lookup document ko 0 0 산업안전보건기준에 관한 규칙 작업장 통로 3886;3887 3886:3;3887:2 3886;3887;3895;3902;3887;3895;3894;3889;3892;3890 1747.6 1.000 1.000 1.000 1 1.000 1.000 1.000 2
26 single kw_007 standards standards fact_lookup document ko 0 0 산업안전보건기준 폭발 화재 위험물 누출 방지 3896;3766 3766:1;3896:3 3896;3896;3895;3903;13930;3897;3772;3766;3766;13931 1222.0 1.000 1.000 0.818 1 0.961 1.000 1.000 2
27 single kw_008 standards standards fact_lookup document ko 0 0 고압가스 안전관리법 전문 4025;4026 4025:3;4026:2 11644;11644;11579;11579;4025;4025;4026;11693;4026;13299 2097.6 1.000 0.200 0.571 0 0.539 1.000 1.000 4
28 single kw_009 standards standards fact_lookup document ko 0 0 KGS FP111 가스설비 배관설비 기준 13305 13305:3 13305;13311;13306;13312;13302;13304;13299;13313;13310;13303 893.7 1.000 1.000 1.000 1 1.000 1.000 1.000 0
29 single kw_010 standards standards fact_lookup document ko 0 0 KGS FU551 가스설비 압력조정기 가스계량기 13652 13652:3 13652;11689;13657;13655;13651;13656;13649;13651;13752;13658 560.5 1.000 1.000 1.000 1 1.000 1.000 1.000 1
30 single kw_011 standards standards fact_lookup document ko 0 0 산업안전보건기준 전기로 인한 위험 방지 3897;3772 3772:1;3897:3 3897;3897;3895;3902;3758;3886;3755;3896;3887;13935 2015.1 0.500 1.000 0.613 1 0.917 1.000 1.000 1
31 single en_001 english_only english_only semantic_search document en 0 0 pressure vessel flange design 5144;5136 5136:2;5144:3 5144;5136;5178;5180;5136;5207;5140;5137;5140;5149 3164.7 1.000 1.000 1.000 1 1.000 1.000 1.000 2
32 single en_002 english_only english_only semantic_search document en 0 0 ASME Section VIII introduction 5204;5206 5204:3;5206:2 5204;5204;5180;5208;5210;5143;5206;5137;5207;5182 3527.0 1.000 1.000 0.832 1 0.907 1.000 1.000 1
33 single en_003 english_only english_only semantic_search document en 0 0 impact test requirements ASME 5205;5148 5148:1;5205:3 5205;5204;5178;5214;5214;5224;5210;5148;5145;5186 4178.2 1.000 1.000 0.818 1 0.961 1.000 1.000 1
34 single en_004 english_only english_only semantic_search document en 0 0 design of vessel supports 5149 5149:3 5149;5149;5141;5136;5137;5186;5139;5136;5140;5186 3640.1 1.000 1.000 1.000 1 1.000 1.000 1.000 3
35 single en_005 english_only english_only semantic_search document en 0 0 hydrogen piping ASME code 5178 5178:3 5178;5178;5139;5180;5210;5179;5180;5210;5143;5182 3293.5 1.000 1.000 1.000 1 1.000 1.000 1.000 3
36 single en_006 english_only english_only semantic_search document en 0 0 ASME welding qualification requirements 5209;3771 3771:1;5209:3 5204;5224;5208;5209;5205;5178;5180;5178;5225;5208 3715.8 0.500 0.250 0.264 0 0.395 1.000 1.000 2
37 single en_007 english_only english_only semantic_search document en 0 0 pressure vessel fabrication and inspection 5208;5145 5145:2;5208:3 5139;5135;5208;5210;5187;5210;5133;5204;4026;5204 3376.9 0.500 0.333 0.307 1 0.394 0.500 1.000 2
38 single en_008 english_only english_only semantic_search document en 0 0 Industrial Safety and Health Management ergonomics 3763;3755 3755:1;3763:3 3763;3812;3755;3760;5253;5230;5249;3812;3858;10573 1391.2 1.000 1.000 0.920 1 0.983 1.000 1.000 1
39 single cl_004 mixed mixed semantic_search document mixed 0 0 ASME 압력용기 설계 실무 5207;5210;5139;5135 5135:2;5139:2;5207:3;5210:3 5139;5210;5145;5137;5180;5204;5140;5206;5145;5206 2204.9 0.500 1.000 0.637 0 0.522 0.500 0.500 2
40 single cl_005 mixed mixed semantic_search document mixed 0 0 ASME 용접 코드 해설 5224;5209 5209:2;5224:3 5224;5225;5225;5224;5210;5180;5204;5209;5209;5178 1763.9 1.000 1.000 0.832 1 0.907 1.000 1.000 3
41 single cl_006 mixed mixed semantic_search document mixed 0 0 pressure vessel Korean industrial safety regulation 4025;3856;5136 3856:2;4025:2;5136:1 5210;5210;5186;13913;5143;13760;13749;5145;5137;5140 1494.0 0.000 0.000 0.000 1 0.000 0.000 0.000 1
42 single cl_007 mixed mixed semantic_search document mixed 0 0 KGS 코드 LPG safety standard 11647;11689;11645;4025 4025:1;11645:2;11647:3;11689:2 11647;13760;13674;13669;13774;13773;13675;11688;13757;11689 602.1 0.500 1.000 0.503 1 0.727 0.667 1.000 0
43 single cl_008 mixed mixed semantic_search document mixed 0 0 수소 가스 안전 기준 hydrogen safety 5178;5169 5169:2;5178:3 10575;5177;10572;11671;11653;11649;5173;5177;11653;13946 1940.6 0.000 0.000 0.000 0 0.000 0.000 0.000 2
44 single exam_001 exam exam fact_lookup document ko 0 0 레이놀즈수 정의 공식 11504;11505 11504:3;11505:2 11504;11533;11504;5090;4544;5081;11509;5140;5089;11476 1233.9 0.500 1.000 0.613 1 0.787 0.500 1.000 1
45 single exam_002 exam exam fact_lookup document ko 0 0 탱크 바닥 구멍 유체 유속 토리첼리 11500;11495;11496 11495:2;11496:2;11500:3 11500;11495;11501;3788;5071;5090;5139;11486;5106;5090 701.2 0.667 1.000 0.765 1 0.856 0.667 1.000 1
46 single exam_003 exam exam fact_lookup document ko 0 0 이상기체 음속 마하수 11514;11515 11514:3;11515:2 11514;11479;11516;11475;5090;5084;11515;11531;11476;11514 1587.7 1.000 1.000 0.818 1 0.900 1.000 1.000 1
47 single exam_004 exam exam fact_lookup document ko 0 0 고압가스 용기 내압시험 영구증가량 11591;11644;11691 11591:3;11644:2;11691:2 11665;11664;11591;11591;13660;11664;13948;13660;11665;13942 1795.3 0.333 0.333 0.235 1 0.337 0.333 1.000 4
48 single exam_005 exam exam fact_lookup document ko 0 0 도시가스 배관 매설 이격거리 11627;11625;11646 11625:2;11627:3;11646:1 11658;11627;13753;11625;13918;13752;13653;13918;13751;13915 1122.2 0.667 0.500 0.498 1 0.608 1.000 1.000 1
49 single exam_006 exam exam fact_lookup document ko 0 0 LPG 저장탱크 안전거리 분말소화기 11617;11669;11620 11617:3;11620:1;11669:2 13916;13752;11595;13669;11616;11690;11649;11617;11658;11655 602.1 0.333 0.125 0.148 0 0.235 0.500 1.000 0
50 single exam_007 exam exam fact_lookup document ko 0 0 오리피스 차압식 유량계 11712;11711;11503 11503:2;11711:2;11712:3 11712;11711;11503;11500;11711;11701;11502;11713;13930;11717 517.9 1.000 1.000 1.000 1 1.000 1.000 1.000 1
51 single fail_004 failure_expected failure_expected fact_lookup document ko 0 1 KGS AC999 임의 가스 코드 11691;11693;5210;13936;5210;13665;13664;11691;13673;11693 364.7 0.000 0.000 0.000 1 0.000 0.000 0.000 3
52 single fail_005 failure_expected failure_expected fact_lookup document ko 0 1 초전도 안전 관리법 시행규칙 3895;4026;3875;3966;5210;5210;3961;3971;4025;4026 1106.7 0.000 0.000 0.000 1 0.000 0.000 0.000 2
@@ -0,0 +1,91 @@
{
"version": "v0.2-phase2q-eval-dedup",
"label": "phase_2q_eval_graded_ndcg_dedup_invariant_recovery",
"date": "2026-05-24",
"plan": "pr-eval-graded-ndcg-dedup-stormy-tide.md",
"main_head_pre": "b00d9f5",
"critical_finding": {
"summary": "Phase 2Q multi-query 의 실제 net 효과 = 거의 0 (NDCG -0.003 vs baseline). 모든 박제 측정 (0.927/0.876/+0.217) 은 inflation 결과.",
"root_cause": "_rrf_fuse_variants 의 representative 보존 logic 이 같은 doc_id 의 여러 SearchResult 를 unique 가정 — 실제로는 multi-query path 에서 doc_id 중복 박제. chunk_id dedup (Rerank-Fix) 으로 chunk-level inflation 만 해결, doc-level inflation 잔재.",
"evidence": "dedup audit = baseline 0/51 정상 vs gemma 42/51 cases with 81 chunks dedup applied",
"rollout_impact": "Apply opt-in 1주 관찰 결정 재검토 필요. multi-query 의 net 개선 sub-noise level + latency 4x 회귀."
},
"snapshot": {
"doc_id_max": 25180,
"chunk_id_max": 56526
},
"eval_set": {"total_cases": 51, "scored_cases": 46},
"measurements": {
"baseline_rewrite_null": {
"graded_ndcg_at_10": 0.644,
"graded_recall_at_10_t2": 0.699,
"graded_recall_at_10_t3": 0.761,
"latency_p50_ms": 378,
"latency_p95_ms": 1931,
"dedup_audit": {
"cases_with_dedup": "0/51",
"total_dedup_chunks": 0,
"status": "✓ 정상 — single-query path 의 retrieval 가 doc unique 박제"
},
"by_category": {
"english_only": {"recall": 0.78, "gndcg": 0.72},
"exam": {"recall": 0.57, "gndcg": 0.74},
"korean_only": {"recall": 0.56, "gndcg": 0.57},
"mixed": {"recall": 0.38, "gndcg": 0.38},
"standards": {"recall": 0.91, "gndcg": 0.82}
},
"csv": "reports/v0_2_phase2q_eval_dedup_baseline_2026-05-24.csv"
},
"cand_multi_query_macmini": {
"graded_ndcg_at_10": 0.641,
"graded_recall_at_10_t2": 0.716,
"graded_recall_at_10_t3": 0.728,
"latency_p50_ms": 1383,
"latency_p95_ms": 3584,
"dedup_audit": {
"cases_with_dedup": "42/51",
"total_dedup_chunks": 81,
"status": "⚠️ inflation — _rrf_fuse_variants representative 의 doc_id 중복 박제 (chunk_id dedup 후 잔재)"
},
"by_category": {
"english_only": {"recall": 0.78, "gndcg": 0.74},
"exam": {"recall": 0.64, "gndcg": 0.67},
"korean_only": {"recall": 0.57, "gndcg": 0.52},
"mixed": {"recall": 0.40, "gndcg": 0.39},
"standards": {"recall": 0.95, "gndcg": 0.87}
},
"csv": "reports/v0_2_phase2q_eval_dedup_gemma_2026-05-24.csv"
},
"delta_vs_baseline": {
"overall_ndcg": "-0.003 (사실상 동일, noise level)",
"recall_t2": "+0.017",
"recall_t3": "-0.033 (회귀)",
"english_only": "+0.02",
"exam": "-0.07",
"korean_only": "-0.05",
"mixed": "+0.01",
"standards": "+0.05",
"latency_p50": "+1005ms (+266%)",
"latency_p95": "+1653ms (+86%)"
}
},
"previous_inflated_records": [
{"source": "Phase 3 commit a41adb6", "ndcg": 0.927, "inflation_source": "chunk_id 중복 (chunks_per_doc cap 만, dedup 0)"},
{"source": "Rerank-Fix commit b734fc5", "ndcg": 0.876, "inflation_source": "chunk_id dedup 적용 단 doc_id 중복 잔재"},
{"source": "Category-Analysis commit b00d9f5", "ndcg": 0.876, "note": "Rerank-Fix 측정값 재사용 — 본 정정 후 0.641"}
],
"recommendations": {
"immediate_user_decision": [
"Apply rollback 검토 — multi-query 의 실제 net 효과 ≈ 0 + latency 4x 회귀 + LLM endpoint 의존",
"또는 PR-2Q-Search-Result-Dedup 진입 (real fix) 후 재측정 → 실제 multi-query 효과 측정 후 Apply 결정"
],
"next_pr": "PR-2Q-Search-Result-Dedup — _rrf_fuse_variants representative 가 doc_id unique 가정 invariant 강제. dedup audit 가 42/51 → 0/51 회복 + NDCG 실제 효과 측정 가치"
},
"changes_summary": {
"files_changed": [
"tests/search_eval/run_eval.py — _dedup_returned_ids helper + count_dedup wrapper + ndcg_at_k/graded_ndcg_at_k 진입 시 dedup + print_summary dedup audit stats + QueryResult.dedup_count + csv schema column",
"tests/search_eval/test_eval_graded_ndcg_dedup.py — 13 신규 test (dedup helper + invariant + Phase 2Q 실측 case regression)"
],
"test_results": "13 신규 PASS + 38 기존 PASS = 51/51, retrieval path 영향 0"
}
}
+59 -3
View File
@@ -90,6 +90,8 @@ class QueryResult:
graded_ndcg_at_10: float = 0.0
graded_recall_at_10_t2: float = 0.0
graded_recall_at_10_t3: float = 0.0
# PR-Eval-GradedNDCG-Dedup: returned[:k] 의 중복 doc 수 박제. inflation 검출 audit.
dedup_count: int = 0
error: str | None = None
@@ -98,6 +100,34 @@ class QueryResult:
# ─────────────────────────────────────────────────────────
def _dedup_returned_ids(returned: list[int], k: int) -> tuple[list[int], int]:
"""returned[:k] 의 첫 등장 순서 보존 dedup.
PR-Eval-GradedNDCG-Dedup ([[feedback_graded_ndcg_dedup_invariant]]). graded NDCG /
binary NDCG 계산은 top-N 에 unique doc 가정 — retrieval path 가 중복 doc 박제
가능 시 actual DCG > ideal DCG → NDCG > 1.0 invariant 위반. Phase 2Q Phase 3
NDCG 0.927 inflation origin.
Returns: (deduped_top_k, dedup_count) — dedup_count = top-k 영역에서 제거된 중복 entry 수.
"""
seen: set[int] = set()
deduped: list[int] = []
raw_top_k = returned[:k]
for doc_id in raw_top_k:
if doc_id in seen:
continue
seen.add(doc_id)
deduped.append(doc_id)
dedup_count = len(raw_top_k) - len(deduped)
return deduped, dedup_count
def count_dedup(returned: list[int], k: int = 10) -> int:
"""returned[:k] 의 중복 doc 수 (audit 용)."""
_, dedup_count = _dedup_returned_ids(returned, k)
return dedup_count
def recall_at_k(returned: list[int], relevant: list[int], k: int = 10) -> float:
"""top-k 안에 들어간 정답 비율. 정답 0개면 1.0(빈 케이스는 별도 fail metric)."""
if not relevant:
@@ -119,12 +149,16 @@ def mrr_at_k(returned: list[int], relevant: list[int], k: int = 10) -> float:
def ndcg_at_k(returned: list[int], relevant: list[int], k: int = 10) -> float:
"""binary relevance 기반 NDCG@k. top3_ids 같은 가중치는 v0.1에선 무시."""
"""binary relevance 기반 NDCG@k. top3_ids 같은 가중치는 v0.1에선 무시.
PR-Eval-GradedNDCG-Dedup: returned[:k] 진입 직전 dedup (중복 doc inflation 방지).
"""
if not relevant:
return 0.0
deduped, _ = _dedup_returned_ids(returned, k)
relevant_set = set(relevant)
dcg = 0.0
for rank, doc_id in enumerate(returned[:k], start=1):
for rank, doc_id in enumerate(deduped, start=1):
if doc_id in relevant_set:
# binary gain = 1, DCG = 1 / log2(rank+1)
dcg += 1.0 / math.log2(rank + 1)
@@ -140,11 +174,16 @@ def graded_ndcg_at_k(returned: list[int], grades: dict[int, int], k: int = 10) -
gain = 2^grade - 1 (grade=0 → gain=0, grade=3 → gain=7).
ideal DCG = grades 를 grade 내림차순으로 top-k 채운 경우.
grades 비어 있으면 0.0 (failure_expected 케이스는 별도 처리).
PR-Eval-GradedNDCG-Dedup: returned[:k] 진입 직전 dedup. Phase 2Q Phase 3 NDCG 0.927
inflation (top-N doc 중복 박제) 같은 invariant 위반 회피.
[[feedback_graded_ndcg_dedup_invariant]].
"""
if not grades:
return 0.0
deduped, _ = _dedup_returned_ids(returned, k)
dcg = 0.0
for rank, doc_id in enumerate(returned[:k], start=1):
for rank, doc_id in enumerate(deduped, start=1):
grade = grades.get(doc_id, 0)
if grade > 0:
dcg += (2 ** grade - 1) / math.log2(rank + 1)
@@ -272,6 +311,12 @@ async def evaluate(
reranker_backend=reranker_backend,
rewrite_backend=rewrite_backend,
)
dedup_count = count_dedup(returned_ids, 10)
if dedup_count > 0:
print(
f" [dedup] {q.id}: top-10 에 중복 doc {dedup_count}개 (inflation 회피)",
file=sys.stderr,
)
results.append(
QueryResult(
query=q,
@@ -289,6 +334,7 @@ async def evaluate(
graded_recall_at_10_t3=graded_recall_at_k(
returned_ids, q.graded_relevance, threshold=3, k=10
),
dedup_count=dedup_count,
)
)
except Exception as exc:
@@ -403,6 +449,14 @@ def print_summary(
f" Failure-case precision: {failure_correct}/{len(failure_cases)}"
f" ({failure_precision:.2f}) — empty result expected"
)
# PR-Eval-GradedNDCG-Dedup: dedup audit stats (inflation 검출).
dedup_cases = [r for r in results if r.dedup_count > 0]
dedup_total = sum(r.dedup_count for r in dedup_cases)
print(
f" Dedup audit: {len(dedup_cases)}/{len(results)} cases with dedup applied"
f" (totaling {dedup_total} chunks). "
+ ("⚠️ inflation 의심 — retrieval path 검증" if dedup_cases else "✓ 정상 (top-N unique doc invariant)")
)
# 카테고리별
by_cat: dict[str, list[QueryResult]] = {}
@@ -539,6 +593,7 @@ def write_csv(results: list[QueryResult], output_path: Path) -> None:
"graded_ndcg_at_10",
"graded_recall_at_10_t2",
"graded_recall_at_10_t3",
"dedup_count",
"error",
]
)
@@ -569,6 +624,7 @@ def write_csv(results: list[QueryResult], output_path: Path) -> None:
f"{r.graded_ndcg_at_10:.3f}",
f"{r.graded_recall_at_10_t2:.3f}",
f"{r.graded_recall_at_10_t3:.3f}",
str(r.dedup_count),
r.error or "",
]
)
@@ -0,0 +1,142 @@
"""PR-Eval-GradedNDCG-Dedup — run_eval.py 의 graded NDCG dedup invariant 테스트.
[[feedback_graded_ndcg_dedup_invariant]] regression. graded NDCG 는 top-N unique doc 가정
— retrieval path 가 doc 중복 박제 시 inflation (NDCG > 1.0). dedup helper + 함수 진입
시 dedup 으로 invariant 복원.
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
import pytest
# tests/search_eval/ → 프로젝트 루트
THIS_DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(THIS_DIR)) # run_eval 직접 import 위해
from run_eval import (
_dedup_returned_ids,
count_dedup,
graded_ndcg_at_k,
graded_recall_at_k,
ndcg_at_k,
)
# ─── 1. _dedup_returned_ids helper ─────────────────────────
def test_dedup_empty_returns_empty():
out, n = _dedup_returned_ids([], 10)
assert out == []
assert n == 0
def test_dedup_no_duplicates_passthrough():
out, n = _dedup_returned_ids([100, 200, 300], 10)
assert out == [100, 200, 300]
assert n == 0
def test_dedup_with_duplicates_first_only():
"""3868 중복 등장 → 첫 등장만 유지, 순서 보존."""
out, n = _dedup_returned_ids([3868, 3879, 3856, 3851, 3868, 4041, 3890], 10)
assert out == [3868, 3879, 3856, 3851, 4041, 3890]
assert n == 1
def test_dedup_k_limit_applied_before_dedup():
"""returned[:k] 만 dedup. k 외부 등장은 무시."""
# k=3 안에서 dedup
out, n = _dedup_returned_ids([1, 2, 1, 3], 3) # [:3] = [1,2,1] → [1,2], n=1
assert out == [1, 2]
assert n == 1
def test_dedup_count_helper():
"""count_dedup wrapper 도 정확한 카운트 반환."""
assert count_dedup([10, 10, 20, 30], 10) == 1
assert count_dedup([10, 20, 30], 10) == 0
def test_dedup_phase2q_kw_001_case():
"""Phase 2Q Phase 3 의 kw_001 실측 case — 3868 중복 → dedup 1건."""
# Phase 3 returned: [3868, 3879, 3856, 3851, 3868, ...]
returned = [3868, 3879, 3856, 3851, 3868, 3858, 3878, 3859, 3850, 3863]
out, n = _dedup_returned_ids(returned, 10)
assert n == 1
assert out[:5] == [3868, 3879, 3856, 3851, 3858] # 3868 첫 등장 보존, 두 번째 제거
# ─── 2. graded_ndcg_at_k 회귀 0 (dedup 가 정상 case 영향 X) ─────
def test_graded_ndcg_baseline_no_duplicates_unchanged():
"""unique doc 만 있는 case = dedup 영향 0 = 기존 결과 그대로."""
returned = [3868, 3879, 3856, 4041, 3851, 3890, 3917, 3863, 3908, 3855]
grades = {3856: 3, 3868: 2, 3879: 2}
out = graded_ndcg_at_k(returned, grades, 10)
# 0.808 = baseline kw_001 측정값
assert 0.80 <= out <= 0.82
def test_graded_ndcg_with_duplicates_no_longer_inflated():
"""중복 doc 박제 case → NDCG ≤ 1.0 invariant 복원 (이전엔 > 1.0 가능)."""
# 3856 (grade 3) 가 두 번 등장하면 이전 (dedup 미적용) 에서 inflation
returned = [3856, 3856, 3868, 3879, 3851, 3890, 3917, 3863, 3908, 3855]
grades = {3856: 3, 3868: 2, 3879: 2}
out = graded_ndcg_at_k(returned, grades, 10)
assert out <= 1.0, f"NDCG > 1.0 = invariant 위반: {out}"
def test_graded_ndcg_all_duplicates_invariant():
"""top-N 이 모두 같은 doc 의 중복 = 단 1 entry rank 1 으로 처리."""
returned = [11504] * 10
grades = {11504: 3, 11505: 2}
out = graded_ndcg_at_k(returned, grades, 10)
# rank 1 만 grade 3 hit → DCG = (2^3-1)/log2(2) = 7
# iDCG = (2^3-1)/log2(2) + (2^2-1)/log2(3) = 7 + 3/log2(3) ≈ 8.893
# NDCG ≈ 7 / 8.893 ≈ 0.787
assert 0.78 <= out <= 0.80
def test_graded_ndcg_phase2q_exam_001_inflation_fix():
"""Phase 2Q Phase 3 의 exam_001 실측 — 11504 중복 박제. dedup 후 정상화."""
# Phase 3 returned: [11504, 11504, 11533, 5106, 11533, ...] (11504 + 11533 중복)
returned = [11504, 11504, 11533, 5106, 11533, 11504, 11479, 11475, 11533, 5090]
grades = {11504: 3, 11505: 2}
out = graded_ndcg_at_k(returned, grades, 10)
# 11504 rank 1 만 hit (dedup 후), NDCG ≈ 0.787 (test 9 와 동일 결과)
assert out <= 1.0, f"inflation 잔재: {out}"
assert 0.78 <= out <= 0.80, f"예상 0.787 ± noise, 실제 {out}"
def test_graded_ndcg_empty_grades_returns_zero():
assert graded_ndcg_at_k([1, 2, 3], {}, 10) == 0.0
# ─── 3. ndcg_at_k (binary) 도 dedup 적용 ──────────────────
def test_ndcg_binary_with_duplicates_invariant():
"""binary NDCG 도 같은 invariant."""
returned = [100, 100, 200, 300]
relevant = [100, 200]
out = ndcg_at_k(returned, relevant, 10)
assert out <= 1.0, f"binary NDCG > 1.0 = invariant 위반: {out}"
# ─── 4. graded_recall_at_k 영향 0 (set 변환 invariant) ────
def test_graded_recall_unaffected_by_duplicates():
"""recall 은 set 변환이라 dedup invariant 자동 만족."""
grades = {100: 3, 200: 2}
# 중복 있는 returned
r1 = graded_recall_at_k([100, 100, 200, 300], grades, threshold=2, k=10)
# dedup 한 returned
r2 = graded_recall_at_k([100, 200, 300], grades, threshold=2, k=10)
assert r1 == r2 == 1.0 # 100, 200 둘 다 hit