From 3b753f18d62b8c4de061ab08679be7937b135d7e Mon Sep 17 00:00:00 2001 From: hyungi Date: Sun, 24 May 2026 04:48:50 +0000 Subject: [PATCH] =?UTF-8?q?fix(search):=20Phase=202Q=20result=20dedup=20?= =?UTF-8?q?=E2=80=94=20apply=5Fdiversity=20unlimited=20path=20doc=5Fid=20i?= =?UTF-8?q?nflation=20=EC=B0=A8=EB=8B=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-2Q-Search-Result-Dedup. measurement chain 의 마지막 cleanup. plan inline. root cause: apply_diversity 의 top_score ≥ 0.90 → unlimited path (diversity 제약 해제) → 같은 doc 의 N chunks 가 results 에 박제 → returned_ids 에 doc.id 중복 → 모든 graded metric inflation. multi-query 의 reranker score 가 자주 0.90+ → 다수 case 영향. 변경 (baseline path 영향 0, multi-query 전용 invariant): - app/services/search/search_pipeline.py: · _dedup_results_by_doc_id() helper 신규 (doc.id first-only, top score 보존) · search_with_rewrite() 의 rerank path 에 apply_diversity(top_score_threshold=2.0) 강제 + 후속 _dedup_results_by_doc_id 적용 · rerank=False path 도 _dedup_results_by_doc_id(unified_docs) 적용 - tests/test_query_rewriter.py — 신규 4 test (55/55 PASS) 🎯 진짜 측정값 (모든 dedup layer 적용, 51 case gemma): cold: NDCG 0.663 / Recall t≥2 0.729 / Recall t≥3 0.761 / p50 3692ms / p95 9992ms warm: NDCG 0.659 / Recall t≥2 0.721 / Recall t≥3 0.739 / p50 1588ms / p95 3514ms baseline (rewrite_backend=null): NDCG 0.644 / Recall t≥2 0.699 / Recall t≥3 0.761 / p50 378ms Dedup audit: gemma 0/51 ✓ 정상 (fix 작동, eval-dedup 42/51 → 0/51 회복) Δ vs baseline (진짜 multi-query 효과): NDCG +0.019 (cold) / +0.015 (warm) — sub-noise level Recall t≥2 +0.030 (cold) / +0.022 (warm) — 소량 개선 Recall t≥3 0.000 / -0.022 — 동등~약간 회귀 latency p50 +876% (cold) / +320% (warm) — major cost category: english/standards/mixed 약간 우세 / exam/korean 약간 회귀 measurement chain 정정 history: Phase 3 (a41adb6) 0.927 — chunk_id 중복 inflation Rerank-Fix (b734fc5) 0.876 — doc_id 중복 잔재 Eval-Dedup (3553573) 0.641 — eval layer 만 dedup Result-Dedup (본 PR) 0.663 — production + eval 둘 다 dedup ← 정확값 사용자 결정 필요 (3 path, json 박제): (a) rollback — marginal 개선이 latency cost 정당화 X (b) opt-in 유지 + PR-2Q-Cache-Prewarm 진입 (warm path 만 노출) (c) 1주 관찰 종료 후 (2026-05-31) 재결정 (현 상태 유지) 산출물: reports/v0_2_phase2q_result_dedup_gemma_{cold,warm}_2026-05-24.csv tests/search_eval/baselines/v0_2_phase2q_result_dedup_2026-05-24.json (요약 + 사용자 결정 옵션) Co-Authored-By: Claude Opus 4.7 (1M context) --- app/services/search/search_pipeline.py | 37 +++++++- ..._phase2q_result_dedup_gemma_2026-05-24.csv | 52 +++++++++++ ...e2q_result_dedup_gemma_warm_2026-05-24.csv | 52 +++++++++++ .../v0_2_phase2q_result_dedup_2026-05-24.json | 91 +++++++++++++++++++ tests/test_query_rewriter.py | 42 +++++++++ 5 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 reports/v0_2_phase2q_result_dedup_gemma_2026-05-24.csv create mode 100644 reports/v0_2_phase2q_result_dedup_gemma_warm_2026-05-24.csv create mode 100644 tests/search_eval/baselines/v0_2_phase2q_result_dedup_2026-05-24.json diff --git a/app/services/search/search_pipeline.py b/app/services/search/search_pipeline.py index 6693ecf..26698b5 100644 --- a/app/services/search/search_pipeline.py +++ b/app/services/search/search_pipeline.py @@ -452,6 +452,31 @@ def _rrf_fuse_variants( return fused[:limit] +def _dedup_results_by_doc_id(results: "list[SearchResult]") -> "list[SearchResult]": + """API response 의 results 를 doc.id 기준 first-only dedup. + + PR-2Q-Search-Result-Dedup. multi-query path 의 reranker output → apply_diversity 가 + top_score ≥ 0.90 시 diversity 제약 해제 (unlimited path) → 같은 doc 의 N chunks 가 + results 에 박제 → returned_ids 에 doc.id 중복 → graded NDCG inflation 직접 원인. + + baseline (single-query) path 의 reranker 는 자연스럽게 doc 분산 score → dedup + audit 0/51 정상. multi-query 의 variants 가 같은 doc 의 정답 chunks 집중 retrieval + → unified RRF + reranker score 합산 → 0.90+ 다수 → unlimited path → 중복. + + 본 helper = first-only (top score 보존). [[feedback_graded_ndcg_dedup_invariant]] + + measurement chain (Phase 3 0.927 → Rerank-Fix 0.876 → eval-dedup 0.641) 의 마지막 + cleanup. + """ + seen: set[int] = set() + out: list["SearchResult"] = [] + for r in results: + if r.id in seen: + continue + seen.add(r.id) + out.append(r) + return out + + def _dedup_chunks_by_id(chunks: "list[SearchResult]") -> "list[SearchResult]": """chunk_id 기준 dedup. chunk_id None 인 doc-level result 는 doc.id 기준 first-only. @@ -608,10 +633,18 @@ async def search_with_rewrite( timing["rerank_ms"] = (time.perf_counter() - t_re) * 1000 t_div = time.perf_counter() - results = apply_diversity(reranked, max_per_doc=MAX_CHUNKS_PER_DOC)[:limit] + # PR-2Q-Search-Result-Dedup: + # (a) top_score_threshold=2.0 강제 — apply_diversity 의 unlimited path 우회 + # (top_score ≥ 0.90 다수 case 에서 같은 doc chunks 중복 박제 원인). + # (b) _dedup_results_by_doc_id — apply_diversity 후에도 max_per_doc 가 2 라서 + # 같은 doc 2 chunks 가능. doc.id 기준 first-only dedup (top score 보존). + # baseline (run_search) path 는 변경 0 — multi-query 전용 invariant. + diversified = apply_diversity(reranked, max_per_doc=MAX_CHUNKS_PER_DOC, + top_score_threshold=2.0) + results = _dedup_results_by_doc_id(diversified)[:limit] timing["diversity_ms"] = (time.perf_counter() - t_div) * 1000 else: - results = unified_docs[:limit] + results = _dedup_results_by_doc_id(unified_docs)[:limit] # 6) freshness + display 정규화 (run_search 동일 마무리) t_fr = time.perf_counter() diff --git a/reports/v0_2_phase2q_result_dedup_gemma_2026-05-24.csv b/reports/v0_2_phase2q_result_dedup_gemma_2026-05-24.csv new file mode 100644 index 0000000..898b86b --- /dev/null +++ b/reports/v0_2_phase2q_result_dedup_gemma_2026-05-24.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,dedup_count,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3879;3868;3890;3863;3856;3908;3851;4041;10573;3971,5395.7,1.000,1.000,0.947,0,0.731,1.000,1.000,0, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;10573;3923;3919;3916;3918;10571;3922;3874,8606.3,1.000,1.000,1.000,1,0.834,1.000,1.000,0, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3984;3993;3869;3980;3857;3978;3983;3982,8914.5,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3915;3775;3905;3904;3777;3858;3903,9605.0,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3911;3905;3889;3890;3910;3902;3893;3887,12229.0,1.000,0.500,0.631,1,0.631,1.000,1.000,0, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;3878;5249;3855;3897;3863;3868;3874;5253;3856,8389.9,0.500,0.143,0.243,0,0.279,0.667,1.000,0, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3867;5236;3917;3854;3878;3851;5244;10573,8367.0,1.000,1.000,0.860,1,0.914,1.000,1.000,0, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3903;3904;3896;3909;3985;3981;3760;5253;3774,9972.2,0.667,1.000,0.626,1,0.626,0.667,0.000,0, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;10573;3917;3916;3923;3921;3918;3919;10571;3854,9556.8,0.750,0.333,0.502,1,0.502,0.750,1.000,0, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;11677;6675;10573;4842;3867,10012.5,0.500,1.000,0.613,1,0.787,0.500,1.000,0, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,14813;15924;15976;16378;16081;18077;22048;12213;16019;15793,7345.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,22049;17123;9022;11945;5391;6396;6829;9105;6774;6314,8375.8,0.000,0.000,0.000,0,0.000,0.000,0.000,0, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4064;4066;4071;4068;4058;4067;4069;5063,10420.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4060;4062;4059;4061;4058;4064;4065;4066;4068,9299.9,1.000,0.500,0.712,1,0.712,1.000,0.000,0, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21186;4775;4202;4776;4679;4199;4519;4668;21276;4658,7345.4,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3758;3770;3791;3817;3763;4540;3787;3762;5244,3342.9,0.500,0.333,0.307,1,0.394,0.500,1.000,0, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5236;5249;3756;5229;3774;3755;3767;3758;5230;3773,4051.7,0.250,0.167,0.139,1,0.139,0.250,0.000,0, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3772;3790;5260;3897;3755;10574;13936;13937;10568;11669,3469.6,1.000,1.000,1.000,1,1.000,1.000,0.000,0, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,5840;4678;16010;16457;6945;5398;4199;6996;23149;4776,3350.5,0.125,0.143,0.084,1,0.084,0.125,0.000,0, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,21848;8381;16823;7473;21275;4262;9545;16927;16378;15924,2735.1,0.143,0.167,0.098,1,0.098,0.143,0.000,0, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5070;5262;23732;4546;24155;5092;20758;4547;3774,2386.0,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5090;5068;5063;5103;5066;5076;24955;16289;5094,2204.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,24576;16689;23336;16059;20470;9102;17133;20022;18286;16454,2139.9,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3902;3887;3895;3898;3885;3775;3911;3910;3783,3406.5,1.000,1.000,0.920,1,0.956,1.000,1.000,0, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;13930;3895;3755;3866;3903;3890;3794;3898;3854,3534.0,0.500,1.000,0.613,1,0.917,1.000,1.000,0, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11693;13299;11676;13749;13941;13766,4234.8,1.000,0.333,0.571,1,0.539,1.000,1.000,0, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13299;13313;13918;13310,2638.8,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13651;13656;13649;13752;13658;13648,2581.5,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3758;3886;3755;3896;3887;13935;3772,3207.5,1.000,1.000,0.790,1,0.955,1.000,1.000,0, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5178;5180;5207;5140;5137;5149;5186;5212,4496.6,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5212,6334.0,1.000,1.000,0.832,1,0.907,1.000,1.000,0, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,5200.1,1.000,1.000,0.818,1,0.961,1.000,1.000,0, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,4471.4,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5139;5180;5210;5179;5143;5182;5186;5222;5133,4612.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,5665.3,0.500,0.250,0.264,0,0.395,1.000,1.000,0, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5180;5187;5186;5188;5182;5137;5185;5136,4672.7,0.500,1.000,0.613,1,0.787,0.500,1.000,0, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3774;3755;3756;3812;3760;3778;3761;3769;3815,3160.9,1.000,1.000,0.920,1,0.983,1.000,1.000,0, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5149;5225;5206;5210;5178;5137;5182,4939.9,0.750,1.000,0.767,1,0.686,0.750,1.000,0, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5209;5222;5225;5208;5180;5204;5210;5211;5205,3692.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,4026;5145;13651;3895;5210;5143;13749;5139;5186;13315,3530.3,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;11688;13757;11689,2403.7,0.500,1.000,0.503,1,0.727,0.667,1.000,0, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;5177;10572;11671;11653;11649;5173;13946;13669;11655,3204.9,0.000,0.000,0.000,0,0.000,0.000,0.000,0, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5090;4544;5081;11509;5140;5089;11476;5106,3281.3,0.500,1.000,0.613,1,0.787,0.500,1.000,0, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;3788;5090;5139;11503;11486;5106;5178,2689.1,0.667,1.000,0.765,1,0.856,0.667,1.000,0, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;11516;11475;5090;5084;11515;11531;11476;11473,3003.3,1.000,1.000,0.818,1,0.900,1.000,1.000,0, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11665;11664;11591;13660;13948;5177;13942;13917;11660;13752,3346.0,0.333,0.333,0.235,1,0.337,0.333,1.000,0, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13653;13918;13305;13751;5177,2660.4,0.667,1.000,0.671,1,0.883,1.000,1.000,0, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11655;11649;13304;11690;13756;11658,2229.0,0.333,0.250,0.202,0,0.321,0.500,1.000,0, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,2354.1,1.000,1.000,1.000,1,0.858,1.000,1.000,0, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11647;5177;11693;11692;13664;13665;13661;13666;13670,2520.7,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;3971;5236;3966;3977;3972;4025;3973;3974;3895,2986.4,0.000,0.000,0.000,1,0.000,0.000,0.000,0, diff --git a/reports/v0_2_phase2q_result_dedup_gemma_warm_2026-05-24.csv b/reports/v0_2_phase2q_result_dedup_gemma_warm_2026-05-24.csv new file mode 100644 index 0000000..b0ec833 --- /dev/null +++ b/reports/v0_2_phase2q_result_dedup_gemma_warm_2026-05-24.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,dedup_count,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3868;3879;3856;3851;4041;3781;3775;3756;3783;3782,1826.7,1.000,1.000,1.000,1,0.808,1.000,1.000,0, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3874;3922;3917;3918;10573;3854;10571;3757;3877,1089.5,1.000,1.000,0.877,0,0.676,1.000,1.000,0, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3985;3980;3857;3984;3880;3993;3787;3903;3988,633.9,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3858;3852;3781;3851;3881;3905;3818;3912;3880,1289.0,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,3910;3888;3905;3890;3885;3895;3894;3898;3911;3902,1368.3,1.000,0.500,0.631,1,0.631,1.000,1.000,0, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5249;3897;3785;5244;3868;3784;3782;5253;3758;3851,1973.8,0.250,0.200,0.151,0,0.107,0.333,0.000,0, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3867;5236;3917;3854;3878;3851;5244;10573,1757.7,1.000,1.000,0.860,1,0.914,1.000,1.000,0, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3903;3904;3896;3909;3985;3981;3760;5253;3774,1866.7,0.667,1.000,0.626,1,0.626,0.667,0.000,0, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;10573;3917;3916;3923;3921;3918;3919;10571;3854,1867.7,0.750,0.333,0.502,1,0.502,0.750,1.000,0, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;11677;6675;10573;4842;3867,2554.0,0.500,1.000,0.613,1,0.787,0.500,1.000,0, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,14813;15924;15976;16378;16081;18077;22048;12213;16019;15793,1304.6,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,22049;17123;9022;11945;5391;6396;6829;9105;6774;6314,952.6,0.000,0.000,0.000,0,0.000,0.000,0.000,0, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4064;4066;4071;4068;4058;4067;4069;5063,1424.4,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4060;4062;4059;4061;4058;4064;4065;4066;4068,1730.9,1.000,0.500,0.712,1,0.712,1.000,0.000,0, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21186;4775;4202;4776;4679;4199;4519;4668;21276;4658,468.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3758;3770;3791;3817;3763;4540;3787;3762;5244,1477.9,0.500,0.333,0.307,1,0.394,0.500,1.000,0, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5236;5249;3756;5229;3774;3755;3767;3758;5230;3773,2087.3,0.250,0.167,0.139,1,0.139,0.250,0.000,0, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3772;3790;5260;3897;3755;10574;13936;13937;10568;11669,1929.1,1.000,1.000,1.000,1,1.000,1.000,0.000,0, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,5840;4678;16010;16457;6945;5398;4199;6996;23149;4776,1740.2,0.125,0.143,0.084,1,0.084,0.125,0.000,0, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,21848;8381;16823;7473;21275;4262;9545;16927;16378;15924,1561.1,0.143,0.167,0.098,1,0.098,0.143,0.000,0, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5070;5262;23732;4546;24155;5092;20758;4547;3774,735.7,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5090;5068;5063;5103;5066;5076;24955;16289;5094,758.5,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,18567;20022;20470;4634;20066;15361;15984;3801;16059;19172,462.8,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3887;3895;3902;3894;3889;3892;3890;3888;3893,1786.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3895;3903;13930;3897;13931;3755;3766;13929;3894,1442.8,1.000,1.000,0.807,1,0.959,1.000,1.000,0, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11579;4025;4026;11693;13299;11676;13749;13941;13766,1992.2,1.000,0.333,0.571,1,0.539,1.000,1.000,0, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13299;13313;13918;13310,840.8,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13651;13656;13649;13752;13658;13648,546.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3895;3902;3758;3886;3755;3896;3887;13935;3772,2082.8,1.000,1.000,0.790,1,0.955,1.000,1.000,0, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5178;5180;5207;5140;5137;5149;5186;5212,3028.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5180;5208;5210;5143;5206;5137;5207;5182;5212,3419.1,1.000,1.000,0.832,1,0.907,1.000,1.000,0, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5224;5210;5148;5145;5186;5190,4455.4,1.000,1.000,0.818,1,0.961,1.000,1.000,0, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5141;5137;5139;5136;5140;5186;5178;5145;5143,3496.9,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5139;5180;5210;5179;5143;5182;5186;5222;5133,3531.3,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5225;5187;5186,3554.1,0.500,0.250,0.264,0,0.395,1.000,1.000,0, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5189;5180;5187;5186;5188;5182;5137;5185;5136,3005.6,0.500,1.000,0.613,1,0.787,0.500,1.000,0, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3774;3755;3756;3812;3760;3778;3761;3769;3815,1279.7,1.000,1.000,0.920,1,0.983,1.000,1.000,0, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5149;5225;5206;5210;5178;5137;5182,2407.0,0.750,1.000,0.767,1,0.686,0.750,1.000,0, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5209;5222;5225;5208;5180;5204;5210;5211;5205,2768.2,1.000,1.000,1.000,1,1.000,1.000,1.000,0, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,4026;5145;13651;3895;5210;5143;13749;5139;5186;13315,1955.3,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;11688;13757;11689,775.1,0.500,1.000,0.503,1,0.727,0.667,1.000,0, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;5177;10572;11671;11653;11649;5173;13946;13669;11655,2120.0,0.000,0.000,0.000,0,0.000,0.000,0.000,0, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;5090;4544;5081;11509;5140;5089;11476;5106,1253.4,0.500,1.000,0.613,1,0.787,0.500,1.000,0, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;3788;5090;5139;11503;11486;5106;5178,655.5,0.667,1.000,0.765,1,0.856,0.667,1.000,0, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;11516;11475;5090;5084;11515;11531;11476;11473,1587.9,1.000,1.000,0.818,1,0.900,1.000,1.000,0, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11665;11664;11591;13660;13948;5177;13942;13917;11660;13752,1728.3,0.333,0.333,0.235,1,0.337,0.333,1.000,0, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11692;13653;13918;13305;13751;5177,662.3,0.667,1.000,0.671,1,0.883,1.000,1.000,0, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11655;11649;13304;11690;13756;11658,657.8,0.333,0.250,0.202,0,0.321,0.500,1.000,0, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11503;11500;11713;11714;13930;11717;11701;11502,710.2,1.000,1.000,1.000,1,0.858,1.000,1.000,0, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11647;5177;11693;11692;13664;13665;13661;13666;13670,523.6,0.000,0.000,0.000,1,0.000,0.000,0.000,0, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;3971;5236;3966;3977;3972;4025;3973;3974;3895,1376.1,0.000,0.000,0.000,1,0.000,0.000,0.000,0, diff --git a/tests/search_eval/baselines/v0_2_phase2q_result_dedup_2026-05-24.json b/tests/search_eval/baselines/v0_2_phase2q_result_dedup_2026-05-24.json new file mode 100644 index 0000000..40754b7 --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2q_result_dedup_2026-05-24.json @@ -0,0 +1,91 @@ +{ + "version": "v0.2-phase2q-result-dedup", + "label": "phase_2q_search_result_dedup_real_effect_measurement", + "date": "2026-05-24", + "plan": "본 chore inline", + "snapshot": {"doc_id_max": 25180, "chunk_id_max": 56526}, + "fix_summary": { + "root_cause": "apply_diversity 의 top_score ≥ 0.90 시 unlimited path → 같은 doc 의 N chunks 가 results 에 박제. multi-query 의 reranker score 가 자주 0.90+ → 다수 case 에서 doc.id 중복.", + "code_changes": [ + "app/services/search/search_pipeline.py — _dedup_results_by_doc_id() helper 신규 (doc.id first-only)", + "search_with_rewrite() — apply_diversity(top_score_threshold=2.0) 강제 + 후속 _dedup_results_by_doc_id 적용 + rerank=False path 도 _dedup", + "baseline (run_search) path 변경 0 — multi-query 전용 invariant" + ], + "test_changes": [ + "tests/test_query_rewriter.py — _dedup_results_by_doc_id 4 신규 test (55/55 PASS)" + ] + }, + "measurements": { + "baseline_rewrite_null_eval_dedup_reference": { + "label": "baseline (eval dedup 적용, fix 전과 동일 — single-query path 영향 0)", + "graded_ndcg_at_10": 0.644, + "graded_recall_at_10_t2": 0.699, + "graded_recall_at_10_t3": 0.761, + "latency_p50_ms": 378, + "latency_p95_ms": 1931, + "dedup_audit": "0/51 ✓ 정상" + }, + "gemma_cold": { + "label": "Result-Dedup fix 후 cold (cache empty)", + "graded_ndcg_at_10": 0.663, + "graded_recall_at_10_t2": 0.729, + "graded_recall_at_10_t3": 0.761, + "latency_p50_ms": 3692, + "latency_p95_ms": 9992, + "dedup_audit": "0/51 ✓ 정상 (fix 작동)", + "by_category": { + "english_only": {"recall": 0.78, "gndcg": 0.78}, + "exam": {"recall": 0.64, "gndcg": 0.71}, + "korean_only": {"recall": 0.60, "gndcg": 0.54}, + "mixed": {"recall": 0.43, "gndcg": 0.41}, + "standards": {"recall": 0.95, "gndcg": 0.87} + }, + "csv": "reports/v0_2_phase2q_result_dedup_gemma_2026-05-24.csv" + }, + "gemma_warm": { + "label": "Result-Dedup fix 후 warm (cache hit)", + "graded_ndcg_at_10": 0.659, + "graded_recall_at_10_t2": 0.721, + "graded_recall_at_10_t3": 0.739, + "latency_p50_ms": 1588, + "latency_p95_ms": 3514, + "dedup_audit": "0/51 ✓ 정상", + "csv": "reports/v0_2_phase2q_result_dedup_gemma_warm_2026-05-24.csv" + } + }, + "real_effect_delta_vs_baseline": { + "ndcg_overall": "+0.019 (cold) / +0.015 (warm) — sub-noise level 개선", + "recall_t2": "+0.030 (cold) / +0.022 (warm) — 소량 개선", + "recall_t3": "0.000 (cold) / -0.022 (warm) — 동등~약간 회귀", + "category_winners": "english_only +0.06 / standards +0.05 / mixed +0.03", + "category_losers": "exam -0.03 / korean_only -0.03", + "latency_p50_cold": "+3314ms (+876%) — 사용자 UX 비현실적", + "latency_p50_warm": "+1210ms (+320%) — cache prewarm 후에도 무거움" + }, + "measurement_chain_정정_history": [ + {"commit": "a41adb6", "phase": "Phase 3 (chunk dedup 0)", "ndcg": 0.927, "inflation": "chunk_id 중복"}, + {"commit": "b734fc5", "phase": "Rerank-Fix (chunk dedup)", "ndcg": 0.876, "inflation": "doc_id 중복 잔재"}, + {"commit": "3553573", "phase": "Eval-Dedup (eval layer dedup)", "ndcg": 0.641, "inflation": "production path 의 doc 중복 (단 eval 측정만 정정)"}, + {"commit": "TBD (본 PR)", "phase": "Result-Dedup (production dedup)", "ndcg": 0.663, "inflation": "0 ✓ 정상", "note": "이번이 정확한 production 측정값"} + ], + "user_decision_required": { + "context": "multi-query 의 진짜 net 효과 = NDCG +0.019 + Recall +0.030 (sub-noise) / latency cold +876% warm +320%", + "options": [ + { + "label": "rollback (opt-in flag 제거 또는 영구 default null)", + "rationale": "marginal quality 개선 (NDCG +0.019) 이 latency cost 정당화 X. 시스템 복잡도 비용 큼.", + "action": "search.py 의 ?rewrite_backend= 제거 또는 description 갱신 (deprecated 표시). docs/phase_2q_apply_opt_in.md 의 metric 목표 정정 (NDCG ≥ 0.74 → ≥ 0.66 acceptable)." + }, + { + "label": "opt-in 유지 + cache prewarm (PR-2Q-Cache-Prewarm) 진입", + "rationale": "warm path latency 1.6s = production 가용. nightly cron 으로 top-N query prewarm 시 사용자 cold path 회피 가능.", + "action": "PR-2Q-Cache-Prewarm 별 plan + nightly cron + 1주 운영 후 metric 재평가." + }, + { + "label": "현 상태 유지 + 1주 운영 관찰 종료 후 (2026-05-31) 재결정", + "rationale": "실제 사용자 query 분포에서 어떤 카테고리가 dominant 인지 확인 후 결정. opt-in 이라 production 영향 0.", + "action": "현 상태 유지. 2026-05-31 metric 분석 후 별 PR." + } + ] + } +} diff --git a/tests/test_query_rewriter.py b/tests/test_query_rewriter.py index 63b4f1a..56283f0 100644 --- a/tests/test_query_rewriter.py +++ b/tests/test_query_rewriter.py @@ -517,3 +517,45 @@ def test_phase2q_rerank_input_cap_constants(): ) assert PHASE2Q_RERANK_INPUT_CAP == 60 assert PHASE2Q_CHUNKS_PER_DOC == 2 + + +# ─── 10. PR-2Q-Search-Result-Dedup — results doc_id dedup ─── +# multi-query path 의 reranker output → apply_diversity unlimited path 시 같은 doc 의 +# N chunks 박제 → returned_ids inflation 직접 원인. _dedup_results_by_doc_id helper 로 +# API response invariant 강제. + + +def test_dedup_results_empty_returns_empty(): + from services.search.search_pipeline import _dedup_results_by_doc_id + assert _dedup_results_by_doc_id([]) == [] + + +def test_dedup_results_no_duplicates_passthrough(): + from services.search.search_pipeline import _dedup_results_by_doc_id + docs = [_mk_search_result(i) for i in (10, 20, 30)] + out = _dedup_results_by_doc_id(docs) + assert [r.id for r in out] == [10, 20, 30] + + +def test_dedup_results_first_only_preserves_top_score(): + """같은 doc.id 등장 시 첫 entry 보존 — reranker 의 best chunk (top score) 우선.""" + from services.search.search_pipeline import _dedup_results_by_doc_id + docs = [ + _mk_search_result(10, score=0.95), # rank 1, keep + _mk_search_result(20, score=0.85), # keep + _mk_search_result(10, score=0.70), # 중복 drop (lower score) + _mk_search_result(30, score=0.60), # keep + _mk_search_result(20, score=0.50), # 중복 drop + ] + out = _dedup_results_by_doc_id(docs) + assert [r.id for r in out] == [10, 20, 30] + assert [r.score for r in out] == [0.95, 0.85, 0.60] # 첫 등장 score 보존 + + +def test_dedup_results_phase2q_kw_001_case(): + """Phase 2Q 실측 case — 3868 두 번 등장 시 first-only 보존.""" + from services.search.search_pipeline import _dedup_results_by_doc_id + docs = [_mk_search_result(i) for i in [3868, 3879, 3856, 3851, 3868, 3858]] + out = _dedup_results_by_doc_id(docs) + assert [r.id for r in out] == [3868, 3879, 3856, 3851, 3858] + assert len(out) == 5 # 6 → 5 (1 중복 제거)