merge(search): PR-2Q-Search-Result-Dedup closed — 진짜 multi-query 효과 측정 (NDCG +0.019 / latency +876% cold)
This commit is contained in:
@@ -452,6 +452,31 @@ def _rrf_fuse_variants(
|
||||
return fused[:limit]
|
||||
|
||||
|
||||
def _dedup_results_by_doc_id(results: "list[SearchResult]") -> "list[SearchResult]":
|
||||
"""API response 의 results 를 doc.id 기준 first-only dedup.
|
||||
|
||||
PR-2Q-Search-Result-Dedup. multi-query path 의 reranker output → apply_diversity 가
|
||||
top_score ≥ 0.90 시 diversity 제약 해제 (unlimited path) → 같은 doc 의 N chunks 가
|
||||
results 에 박제 → returned_ids 에 doc.id 중복 → graded NDCG inflation 직접 원인.
|
||||
|
||||
baseline (single-query) path 의 reranker 는 자연스럽게 doc 분산 score → dedup
|
||||
audit 0/51 정상. multi-query 의 variants 가 같은 doc 의 정답 chunks 집중 retrieval
|
||||
→ unified RRF + reranker score 합산 → 0.90+ 다수 → unlimited path → 중복.
|
||||
|
||||
본 helper = first-only (top score 보존). [[feedback_graded_ndcg_dedup_invariant]] +
|
||||
measurement chain (Phase 3 0.927 → Rerank-Fix 0.876 → eval-dedup 0.641) 의 마지막
|
||||
cleanup.
|
||||
"""
|
||||
seen: set[int] = set()
|
||||
out: list["SearchResult"] = []
|
||||
for r in results:
|
||||
if r.id in seen:
|
||||
continue
|
||||
seen.add(r.id)
|
||||
out.append(r)
|
||||
return out
|
||||
|
||||
|
||||
def _dedup_chunks_by_id(chunks: "list[SearchResult]") -> "list[SearchResult]":
|
||||
"""chunk_id 기준 dedup. chunk_id None 인 doc-level result 는 doc.id 기준 first-only.
|
||||
|
||||
@@ -608,10 +633,18 @@ async def search_with_rewrite(
|
||||
timing["rerank_ms"] = (time.perf_counter() - t_re) * 1000
|
||||
|
||||
t_div = time.perf_counter()
|
||||
results = apply_diversity(reranked, max_per_doc=MAX_CHUNKS_PER_DOC)[:limit]
|
||||
# PR-2Q-Search-Result-Dedup:
|
||||
# (a) top_score_threshold=2.0 강제 — apply_diversity 의 unlimited path 우회
|
||||
# (top_score ≥ 0.90 다수 case 에서 같은 doc chunks 중복 박제 원인).
|
||||
# (b) _dedup_results_by_doc_id — apply_diversity 후에도 max_per_doc 가 2 라서
|
||||
# 같은 doc 2 chunks 가능. doc.id 기준 first-only dedup (top score 보존).
|
||||
# baseline (run_search) path 는 변경 0 — multi-query 전용 invariant.
|
||||
diversified = apply_diversity(reranked, max_per_doc=MAX_CHUNKS_PER_DOC,
|
||||
top_score_threshold=2.0)
|
||||
results = _dedup_results_by_doc_id(diversified)[:limit]
|
||||
timing["diversity_ms"] = (time.perf_counter() - t_div) * 1000
|
||||
else:
|
||||
results = unified_docs[:limit]
|
||||
results = _dedup_results_by_doc_id(unified_docs)[:limit]
|
||||
|
||||
# 6) freshness + display 정규화 (run_search 동일 마무리)
|
||||
t_fr = time.perf_counter()
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,dedup_count,error
|
||||
single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3879;3868;3890;3863;3856;3908;3851;4041;10573;3971,5395.7,1.000,1.000,0.947,0,0.731,1.000,1.000,0,
|
||||
single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;10573;3923;3919;3916;3918;10571;3922;3874,8606.3,1.000,1.000,1.000,1,0.834,1.000,1.000,0,
|
||||
single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3984;3993;3869;3980;3857;3978;3983;3982,8914.5,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3915;3775;3905;3904;3777;3858;3903,9605.0,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3911;3905;3889;3890;3910;3902;3893;3887,12229.0,1.000,0.500,0.631,1,0.631,1.000,1.000,0,
|
||||
single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;3878;5249;3855;3897;3863;3868;3874;5253;3856,8389.9,0.500,0.143,0.243,0,0.279,0.667,1.000,0,
|
||||
single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3867;5236;3917;3854;3878;3851;5244;10573,8367.0,1.000,1.000,0.860,1,0.914,1.000,1.000,0,
|
||||
single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3903;3904;3896;3909;3985;3981;3760;5253;3774,9972.2,0.667,1.000,0.626,1,0.626,0.667,0.000,0,
|
||||
single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;10573;3917;3916;3923;3921;3918;3919;10571;3854,9556.8,0.750,0.333,0.502,1,0.502,0.750,1.000,0,
|
||||
single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;11677;6675;10573;4842;3867,10012.5,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
|
||||
single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,14813;15924;15976;16378;16081;18077;22048;12213;16019;15793,7345.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,22049;17123;9022;11945;5391;6396;6829;9105;6774;6314,8375.8,0.000,0.000,0.000,0,0.000,0.000,0.000,0,
|
||||
single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4064;4066;4071;4068;4058;4067;4069;5063,10420.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4060;4062;4059;4061;4058;4064;4065;4066;4068,9299.9,1.000,0.500,0.712,1,0.712,1.000,0.000,0,
|
||||
single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21186;4775;4202;4776;4679;4199;4519;4668;21276;4658,7345.4,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3758;3770;3791;3817;3763;4540;3787;3762;5244,3342.9,0.500,0.333,0.307,1,0.394,0.500,1.000,0,
|
||||
single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5236;5249;3756;5229;3774;3755;3767;3758;5230;3773,4051.7,0.250,0.167,0.139,1,0.139,0.250,0.000,0,
|
||||
single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3772;3790;5260;3897;3755;10574;13936;13937;10568;11669,3469.6,1.000,1.000,1.000,1,1.000,1.000,0.000,0,
|
||||
single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,5840;4678;16010;16457;6945;5398;4199;6996;23149;4776,3350.5,0.125,0.143,0.084,1,0.084,0.125,0.000,0,
|
||||
single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,21848;8381;16823;7473;21275;4262;9545;16927;16378;15924,2735.1,0.143,0.167,0.098,1,0.098,0.143,0.000,0,
|
||||
single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5070;5262;23732;4546;24155;5092;20758;4547;3774,2386.0,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5090;5068;5063;5103;5066;5076;24955;16289;5094,2204.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,24576;16689;23336;16059;20470;9102;17133;20022;18286;16454,2139.9,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3902;3887;3895;3898;3885;3775;3911;3910;3783,3406.5,1.000,1.000,0.920,1,0.956,1.000,1.000,0,
|
||||
single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;13930;3895;3755;3866;3903;3890;3794;3898;3854,3534.0,0.500,1.000,0.613,1,0.917,1.000,1.000,0,
|
||||
single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11693;13299;11676;13749;13941;13766,4234.8,1.000,0.333,0.571,1,0.539,1.000,1.000,0,
|
||||
single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13299;13313;13918;13310,2638.8,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13651;13656;13649;13752;13658;13648,2581.5,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3758;3886;3755;3896;3887;13935;3772,3207.5,1.000,1.000,0.790,1,0.955,1.000,1.000,0,
|
||||
single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5178;5180;5207;5140;5137;5149;5186;5212,4496.6,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5212,6334.0,1.000,1.000,0.832,1,0.907,1.000,1.000,0,
|
||||
single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,5200.1,1.000,1.000,0.818,1,0.961,1.000,1.000,0,
|
||||
single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,4471.4,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5139;5180;5210;5179;5143;5182;5186;5222;5133,4612.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,5665.3,0.500,0.250,0.264,0,0.395,1.000,1.000,0,
|
||||
single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5180;5187;5186;5188;5182;5137;5185;5136,4672.7,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
|
||||
single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3774;3755;3756;3812;3760;3778;3761;3769;3815,3160.9,1.000,1.000,0.920,1,0.983,1.000,1.000,0,
|
||||
single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5149;5225;5206;5210;5178;5137;5182,4939.9,0.750,1.000,0.767,1,0.686,0.750,1.000,0,
|
||||
single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5209;5222;5225;5208;5180;5204;5210;5211;5205,3692.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,4026;5145;13651;3895;5210;5143;13749;5139;5186;13315,3530.3,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;11688;13757;11689,2403.7,0.500,1.000,0.503,1,0.727,0.667,1.000,0,
|
||||
single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;5177;10572;11671;11653;11649;5173;13946;13669;11655,3204.9,0.000,0.000,0.000,0,0.000,0.000,0.000,0,
|
||||
single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5090;4544;5081;11509;5140;5089;11476;5106,3281.3,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
|
||||
single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;3788;5090;5139;11503;11486;5106;5178,2689.1,0.667,1.000,0.765,1,0.856,0.667,1.000,0,
|
||||
single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;11516;11475;5090;5084;11515;11531;11476;11473,3003.3,1.000,1.000,0.818,1,0.900,1.000,1.000,0,
|
||||
single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11665;11664;11591;13660;13948;5177;13942;13917;11660;13752,3346.0,0.333,0.333,0.235,1,0.337,0.333,1.000,0,
|
||||
single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13653;13918;13305;13751;5177,2660.4,0.667,1.000,0.671,1,0.883,1.000,1.000,0,
|
||||
single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11655;11649;13304;11690;13756;11658,2229.0,0.333,0.250,0.202,0,0.321,0.500,1.000,0,
|
||||
single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,2354.1,1.000,1.000,1.000,1,0.858,1.000,1.000,0,
|
||||
single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11647;5177;11693;11692;13664;13665;13661;13666;13670,2520.7,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;3971;5236;3966;3977;3972;4025;3973;3974;3895,2986.4,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
|
@@ -0,0 +1,52 @@
|
||||
label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,dedup_count,error
|
||||
single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;4041;3781;3775;3756;3783;3782,1826.7,1.000,1.000,1.000,1,0.808,1.000,1.000,0,
|
||||
single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3874;3922;3917;3918;10573;3854;10571;3757;3877,1089.5,1.000,1.000,0.877,0,0.676,1.000,1.000,0,
|
||||
single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3980;3857;3984;3880;3993;3787;3903;3988,633.9,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3858;3852;3781;3851;3881;3905;3818;3912;3880,1289.0,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3910;3888;3905;3890;3885;3895;3894;3898;3911;3902,1368.3,1.000,0.500,0.631,1,0.631,1.000,1.000,0,
|
||||
single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;3897;3785;5244;3868;3784;3782;5253;3758;3851,1973.8,0.250,0.200,0.151,0,0.107,0.333,0.000,0,
|
||||
single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3867;5236;3917;3854;3878;3851;5244;10573,1757.7,1.000,1.000,0.860,1,0.914,1.000,1.000,0,
|
||||
single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3903;3904;3896;3909;3985;3981;3760;5253;3774,1866.7,0.667,1.000,0.626,1,0.626,0.667,0.000,0,
|
||||
single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;10573;3917;3916;3923;3921;3918;3919;10571;3854,1867.7,0.750,0.333,0.502,1,0.502,0.750,1.000,0,
|
||||
single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;11677;6675;10573;4842;3867,2554.0,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
|
||||
single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,14813;15924;15976;16378;16081;18077;22048;12213;16019;15793,1304.6,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,22049;17123;9022;11945;5391;6396;6829;9105;6774;6314,952.6,0.000,0.000,0.000,0,0.000,0.000,0.000,0,
|
||||
single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4064;4066;4071;4068;4058;4067;4069;5063,1424.4,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4060;4062;4059;4061;4058;4064;4065;4066;4068,1730.9,1.000,0.500,0.712,1,0.712,1.000,0.000,0,
|
||||
single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21186;4775;4202;4776;4679;4199;4519;4668;21276;4658,468.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3758;3770;3791;3817;3763;4540;3787;3762;5244,1477.9,0.500,0.333,0.307,1,0.394,0.500,1.000,0,
|
||||
single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5236;5249;3756;5229;3774;3755;3767;3758;5230;3773,2087.3,0.250,0.167,0.139,1,0.139,0.250,0.000,0,
|
||||
single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3772;3790;5260;3897;3755;10574;13936;13937;10568;11669,1929.1,1.000,1.000,1.000,1,1.000,1.000,0.000,0,
|
||||
single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,5840;4678;16010;16457;6945;5398;4199;6996;23149;4776,1740.2,0.125,0.143,0.084,1,0.084,0.125,0.000,0,
|
||||
single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,21848;8381;16823;7473;21275;4262;9545;16927;16378;15924,1561.1,0.143,0.167,0.098,1,0.098,0.143,0.000,0,
|
||||
single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5070;5262;23732;4546;24155;5092;20758;4547;3774,735.7,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5090;5068;5063;5103;5066;5076;24955;16289;5094,758.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,18567;20022;20470;4634;20066;15361;15984;3801;16059;19172,462.8,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3887;3895;3902;3894;3889;3892;3890;3888;3893,1786.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3895;3903;13930;3897;13931;3755;3766;13929;3894,1442.8,1.000,1.000,0.807,1,0.959,1.000,1.000,0,
|
||||
single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11693;13299;11676;13749;13941;13766,1992.2,1.000,0.333,0.571,1,0.539,1.000,1.000,0,
|
||||
single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13299;13313;13918;13310,840.8,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13651;13656;13649;13752;13658;13648,546.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3758;3886;3755;3896;3887;13935;3772,2082.8,1.000,1.000,0.790,1,0.955,1.000,1.000,0,
|
||||
single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5178;5180;5207;5140;5137;5149;5186;5212,3028.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5212,3419.1,1.000,1.000,0.832,1,0.907,1.000,1.000,0,
|
||||
single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,4455.4,1.000,1.000,0.818,1,0.961,1.000,1.000,0,
|
||||
single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,3496.9,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5139;5180;5210;5179;5143;5182;5186;5222;5133,3531.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,3554.1,0.500,0.250,0.264,0,0.395,1.000,1.000,0,
|
||||
single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5180;5187;5186;5188;5182;5137;5185;5136,3005.6,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
|
||||
single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3774;3755;3756;3812;3760;3778;3761;3769;3815,1279.7,1.000,1.000,0.920,1,0.983,1.000,1.000,0,
|
||||
single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5149;5225;5206;5210;5178;5137;5182,2407.0,0.750,1.000,0.767,1,0.686,0.750,1.000,0,
|
||||
single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5209;5222;5225;5208;5180;5204;5210;5211;5205,2768.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0,
|
||||
single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,4026;5145;13651;3895;5210;5143;13749;5139;5186;13315,1955.3,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;11688;13757;11689,775.1,0.500,1.000,0.503,1,0.727,0.667,1.000,0,
|
||||
single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;5177;10572;11671;11653;11649;5173;13946;13669;11655,2120.0,0.000,0.000,0.000,0,0.000,0.000,0.000,0,
|
||||
single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5090;4544;5081;11509;5140;5089;11476;5106,1253.4,0.500,1.000,0.613,1,0.787,0.500,1.000,0,
|
||||
single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;3788;5090;5139;11503;11486;5106;5178,655.5,0.667,1.000,0.765,1,0.856,0.667,1.000,0,
|
||||
single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;11516;11475;5090;5084;11515;11531;11476;11473,1587.9,1.000,1.000,0.818,1,0.900,1.000,1.000,0,
|
||||
single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11665;11664;11591;13660;13948;5177;13942;13917;11660;13752,1728.3,0.333,0.333,0.235,1,0.337,0.333,1.000,0,
|
||||
single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13653;13918;13305;13751;5177,662.3,0.667,1.000,0.671,1,0.883,1.000,1.000,0,
|
||||
single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11655;11649;13304;11690;13756;11658,657.8,0.333,0.250,0.202,0,0.321,0.500,1.000,0,
|
||||
single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,710.2,1.000,1.000,1.000,1,0.858,1.000,1.000,0,
|
||||
single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11647;5177;11693;11692;13664;13665;13661;13666;13670,523.6,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;3971;5236;3966;3977;3972;4025;3973;3974;3895,1376.1,0.000,0.000,0.000,1,0.000,0.000,0.000,0,
|
||||
|
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"version": "v0.2-phase2q-result-dedup",
|
||||
"label": "phase_2q_search_result_dedup_real_effect_measurement",
|
||||
"date": "2026-05-24",
|
||||
"plan": "본 chore inline",
|
||||
"snapshot": {"doc_id_max": 25180, "chunk_id_max": 56526},
|
||||
"fix_summary": {
|
||||
"root_cause": "apply_diversity 의 top_score ≥ 0.90 시 unlimited path → 같은 doc 의 N chunks 가 results 에 박제. multi-query 의 reranker score 가 자주 0.90+ → 다수 case 에서 doc.id 중복.",
|
||||
"code_changes": [
|
||||
"app/services/search/search_pipeline.py — _dedup_results_by_doc_id() helper 신규 (doc.id first-only)",
|
||||
"search_with_rewrite() — apply_diversity(top_score_threshold=2.0) 강제 + 후속 _dedup_results_by_doc_id 적용 + rerank=False path 도 _dedup",
|
||||
"baseline (run_search) path 변경 0 — multi-query 전용 invariant"
|
||||
],
|
||||
"test_changes": [
|
||||
"tests/test_query_rewriter.py — _dedup_results_by_doc_id 4 신규 test (55/55 PASS)"
|
||||
]
|
||||
},
|
||||
"measurements": {
|
||||
"baseline_rewrite_null_eval_dedup_reference": {
|
||||
"label": "baseline (eval dedup 적용, fix 전과 동일 — single-query path 영향 0)",
|
||||
"graded_ndcg_at_10": 0.644,
|
||||
"graded_recall_at_10_t2": 0.699,
|
||||
"graded_recall_at_10_t3": 0.761,
|
||||
"latency_p50_ms": 378,
|
||||
"latency_p95_ms": 1931,
|
||||
"dedup_audit": "0/51 ✓ 정상"
|
||||
},
|
||||
"gemma_cold": {
|
||||
"label": "Result-Dedup fix 후 cold (cache empty)",
|
||||
"graded_ndcg_at_10": 0.663,
|
||||
"graded_recall_at_10_t2": 0.729,
|
||||
"graded_recall_at_10_t3": 0.761,
|
||||
"latency_p50_ms": 3692,
|
||||
"latency_p95_ms": 9992,
|
||||
"dedup_audit": "0/51 ✓ 정상 (fix 작동)",
|
||||
"by_category": {
|
||||
"english_only": {"recall": 0.78, "gndcg": 0.78},
|
||||
"exam": {"recall": 0.64, "gndcg": 0.71},
|
||||
"korean_only": {"recall": 0.60, "gndcg": 0.54},
|
||||
"mixed": {"recall": 0.43, "gndcg": 0.41},
|
||||
"standards": {"recall": 0.95, "gndcg": 0.87}
|
||||
},
|
||||
"csv": "reports/v0_2_phase2q_result_dedup_gemma_2026-05-24.csv"
|
||||
},
|
||||
"gemma_warm": {
|
||||
"label": "Result-Dedup fix 후 warm (cache hit)",
|
||||
"graded_ndcg_at_10": 0.659,
|
||||
"graded_recall_at_10_t2": 0.721,
|
||||
"graded_recall_at_10_t3": 0.739,
|
||||
"latency_p50_ms": 1588,
|
||||
"latency_p95_ms": 3514,
|
||||
"dedup_audit": "0/51 ✓ 정상",
|
||||
"csv": "reports/v0_2_phase2q_result_dedup_gemma_warm_2026-05-24.csv"
|
||||
}
|
||||
},
|
||||
"real_effect_delta_vs_baseline": {
|
||||
"ndcg_overall": "+0.019 (cold) / +0.015 (warm) — sub-noise level 개선",
|
||||
"recall_t2": "+0.030 (cold) / +0.022 (warm) — 소량 개선",
|
||||
"recall_t3": "0.000 (cold) / -0.022 (warm) — 동등~약간 회귀",
|
||||
"category_winners": "english_only +0.06 / standards +0.05 / mixed +0.03",
|
||||
"category_losers": "exam -0.03 / korean_only -0.03",
|
||||
"latency_p50_cold": "+3314ms (+876%) — 사용자 UX 비현실적",
|
||||
"latency_p50_warm": "+1210ms (+320%) — cache prewarm 후에도 무거움"
|
||||
},
|
||||
"measurement_chain_정정_history": [
|
||||
{"commit": "a41adb6", "phase": "Phase 3 (chunk dedup 0)", "ndcg": 0.927, "inflation": "chunk_id 중복"},
|
||||
{"commit": "b734fc5", "phase": "Rerank-Fix (chunk dedup)", "ndcg": 0.876, "inflation": "doc_id 중복 잔재"},
|
||||
{"commit": "3553573", "phase": "Eval-Dedup (eval layer dedup)", "ndcg": 0.641, "inflation": "production path 의 doc 중복 (단 eval 측정만 정정)"},
|
||||
{"commit": "TBD (본 PR)", "phase": "Result-Dedup (production dedup)", "ndcg": 0.663, "inflation": "0 ✓ 정상", "note": "이번이 정확한 production 측정값"}
|
||||
],
|
||||
"user_decision_required": {
|
||||
"context": "multi-query 의 진짜 net 효과 = NDCG +0.019 + Recall +0.030 (sub-noise) / latency cold +876% warm +320%",
|
||||
"options": [
|
||||
{
|
||||
"label": "rollback (opt-in flag 제거 또는 영구 default null)",
|
||||
"rationale": "marginal quality 개선 (NDCG +0.019) 이 latency cost 정당화 X. 시스템 복잡도 비용 큼.",
|
||||
"action": "search.py 의 ?rewrite_backend= 제거 또는 description 갱신 (deprecated 표시). docs/phase_2q_apply_opt_in.md 의 metric 목표 정정 (NDCG ≥ 0.74 → ≥ 0.66 acceptable)."
|
||||
},
|
||||
{
|
||||
"label": "opt-in 유지 + cache prewarm (PR-2Q-Cache-Prewarm) 진입",
|
||||
"rationale": "warm path latency 1.6s = production 가용. nightly cron 으로 top-N query prewarm 시 사용자 cold path 회피 가능.",
|
||||
"action": "PR-2Q-Cache-Prewarm 별 plan + nightly cron + 1주 운영 후 metric 재평가."
|
||||
},
|
||||
{
|
||||
"label": "현 상태 유지 + 1주 운영 관찰 종료 후 (2026-05-31) 재결정",
|
||||
"rationale": "실제 사용자 query 분포에서 어떤 카테고리가 dominant 인지 확인 후 결정. opt-in 이라 production 영향 0.",
|
||||
"action": "현 상태 유지. 2026-05-31 metric 분석 후 별 PR."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -517,3 +517,45 @@ def test_phase2q_rerank_input_cap_constants():
|
||||
)
|
||||
assert PHASE2Q_RERANK_INPUT_CAP == 60
|
||||
assert PHASE2Q_CHUNKS_PER_DOC == 2
|
||||
|
||||
|
||||
# ─── 10. PR-2Q-Search-Result-Dedup — results doc_id dedup ───
|
||||
# multi-query path 의 reranker output → apply_diversity unlimited path 시 같은 doc 의
|
||||
# N chunks 박제 → returned_ids inflation 직접 원인. _dedup_results_by_doc_id helper 로
|
||||
# API response invariant 강제.
|
||||
|
||||
|
||||
def test_dedup_results_empty_returns_empty():
|
||||
from services.search.search_pipeline import _dedup_results_by_doc_id
|
||||
assert _dedup_results_by_doc_id([]) == []
|
||||
|
||||
|
||||
def test_dedup_results_no_duplicates_passthrough():
|
||||
from services.search.search_pipeline import _dedup_results_by_doc_id
|
||||
docs = [_mk_search_result(i) for i in (10, 20, 30)]
|
||||
out = _dedup_results_by_doc_id(docs)
|
||||
assert [r.id for r in out] == [10, 20, 30]
|
||||
|
||||
|
||||
def test_dedup_results_first_only_preserves_top_score():
|
||||
"""같은 doc.id 등장 시 첫 entry 보존 — reranker 의 best chunk (top score) 우선."""
|
||||
from services.search.search_pipeline import _dedup_results_by_doc_id
|
||||
docs = [
|
||||
_mk_search_result(10, score=0.95), # rank 1, keep
|
||||
_mk_search_result(20, score=0.85), # keep
|
||||
_mk_search_result(10, score=0.70), # 중복 drop (lower score)
|
||||
_mk_search_result(30, score=0.60), # keep
|
||||
_mk_search_result(20, score=0.50), # 중복 drop
|
||||
]
|
||||
out = _dedup_results_by_doc_id(docs)
|
||||
assert [r.id for r in out] == [10, 20, 30]
|
||||
assert [r.score for r in out] == [0.95, 0.85, 0.60] # 첫 등장 score 보존
|
||||
|
||||
|
||||
def test_dedup_results_phase2q_kw_001_case():
|
||||
"""Phase 2Q 실측 case — 3868 두 번 등장 시 first-only 보존."""
|
||||
from services.search.search_pipeline import _dedup_results_by_doc_id
|
||||
docs = [_mk_search_result(i) for i in [3868, 3879, 3856, 3851, 3868, 3858]]
|
||||
out = _dedup_results_by_doc_id(docs)
|
||||
assert [r.id for r in out] == [3868, 3879, 3856, 3851, 3858]
|
||||
assert len(out) == 5 # 6 → 5 (1 중복 제거)
|
||||
|
||||
Reference in New Issue
Block a user