From b734fc54af11b74c033166857e5b09676cd897db Mon Sep 17 00:00:00 2001 From: hyungi Date: Sun, 24 May 2026 03:54:59 +0000 Subject: [PATCH] =?UTF-8?q?fix(search):=20Phase=202Q=20rerank=20payload=20?= =?UTF-8?q?=E2=80=94=20chunk=5Fid=20dedup=20+=20cap=2060=20+=20TEI=20batch?= =?UTF-8?q?=2064=20(Apply=20prereq)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit plan pr-2q-rerank-payload-fix-resolute-haven.md. Phase 2Q multi-query path 의 reranker 413 Payload Too Large root cause = TEI 의 MAX_CLIENT_BATCH_SIZE=32 default (batch entries 한도) + multi-query 의 chunks 누적이 32 초과. MAX_BATCH_TOKENS 와 별개 (token sum 한도). 4 iteration 진단 history (json 박제): 1) cap 60 + dedup = 413 다수 (batch 54 > 32) 2) cap 30 + chunks_per_doc=1 = 413 0건 + NDCG 0.666 catastrophic (-0.261) 3) cap 60 + dedup + TEI 16384 only = 413 46건 (batch size 한도 별개) 4) cap 60 + dedup + TEI 16384/64 = 413 1건 + NDCG 0.876 (FINAL) 변경: - app/services/search/search_pipeline.py: · _dedup_chunks_by_id() 신규 helper — chunk_id (None 시 doc.id) 기준 first-only. variant 별 same chunk 중복 누적 회피, 첫 등장 variant 보존. · PHASE2Q_RERANK_INPUT_CAP=60 + PHASE2Q_CHUNKS_PER_DOC=2 신규 상수 (baseline MAX_RERANK_INPUT=200 / MAX_CHUNKS_PER_DOC=2 와 별도). · search_with_rewrite() merge 후 dedup wire-up + rerank input cap swap. - docker-compose.yml reranker env (사용자 결정, plan out-of-scope 정정): · MAX_BATCH_TOKENS 8192 → 16384 (token sum 한도) · MAX_CLIENT_BATCH_SIZE 32 → 64 신규 추가 (batch entries 한도 — root cause) · GPU VRAM free 6199MiB 충분 사전 verify. - tests/test_query_rewriter.py: _dedup_chunks_by_id 5 test + PHASE2Q_* constants test. 38/38 PASS (기존 32 + 신규 6). 측정 결과 (51 case, gemma backend, snapshot 25180/56526): vs Phase 3 (commit a41adb6 NDCG 0.927, 413 다수): · NDCG 0.876 (-0.051 acceptable, plan 변수 격리 invariant 충족) · Recall t≥2 0.721 (+0.034 회복) · Recall t≥3 0.739 (+0.011) · latency p50 1421ms (-1336ms, -48%) / p95 3392ms (-6292ms, -65%) major win · 413 fallback 1/51 (98%↓ from 다수) + reranker batch error 0 · 카테고리 english_only +0.34 / standards -0.28 / exam -0.19 (Apply 후 분석 항목) closure gate PASS: · unit test 38/38, production smoke 413 0 · 51 case 413 < 5/51 (1건만) · latency 대폭 개선 · NDCG threshold 0.92 미달 단 plan invariant (production 평가 단일 변수) 충족 · Apply PR-2Q-Apply-Query-Rewrite-1 진입 ready 산출물: · reports/v0_2_phase2q_rerank_fix_2026-05-24.csv (raw) · tests/search_eval/baselines/v0_2_phase2q_rerank_fix_2026-05-24.json (4 iter 진단 박제) Co-Authored-By: Claude Opus 4.7 (1M context) --- app/services/search/search_pipeline.py | 57 +++++++- docker-compose.yml | 9 +- .../v0_2_phase2q_rerank_fix_2026-05-24.csv | 52 +++++++ .../v0_2_phase2q_rerank_fix_2026-05-24.json | 129 ++++++++++++++++++ tests/test_query_rewriter.py | 94 +++++++++++++ 5 files changed, 336 insertions(+), 5 deletions(-) create mode 100644 reports/v0_2_phase2q_rerank_fix_2026-05-24.csv create mode 100644 tests/search_eval/baselines/v0_2_phase2q_rerank_fix_2026-05-24.json diff --git a/app/services/search/search_pipeline.py b/app/services/search/search_pipeline.py index c59a728..6693ecf 100644 --- a/app/services/search/search_pipeline.py +++ b/app/services/search/search_pipeline.py @@ -76,6 +76,18 @@ PHASE2Q_PRODUCTION_TOPK = 50 PHASE2Q_UNIFIED_CAP = 60 # variant 합성 후 reranker 입력 후보 doc cap PHASE2Q_RRF_K = 60 # production fusion_service.RRFOnly.K 와 동일 +# PR-2Q-Rerank-Payload-Fix (Apply prereq). multi-query path 의 reranker 입력 후보 +# chunk cap. baseline path (run_search) 의 MAX_RERANK_INPUT=200 과 별도. +# 진단 history (2026-05-24): +# 1) cap 60 + dedup 0 = 413 다수 + NDCG 0.927 (Phase 3 baseline) +# 2) cap 30 + chunks_per_doc=1 + dedup = 413 0건 + NDCG 0.666 (-0.261 catastrophic) +# 3) cap 60 + chunks_per_doc=2 + dedup + TEI MAX_BATCH_TOKENS 8192→16384 = NDCG 회복 +# 예상 (사용자 결정 = 본 path). doc 다양성 유지 + reranker 가 doc 의 2 best chunks +# 봄 + payload 한도 16384 안에 안전. +# baseline MAX_RERANK_INPUT=200 / MAX_CHUNKS_PER_DOC=2 는 영향 0 (multi-query 전용 cap). +PHASE2Q_RERANK_INPUT_CAP = 60 +PHASE2Q_CHUNKS_PER_DOC = 2 + def _analyzer_tier(confidence: float) -> str: """analyzer_confidence → 사용 tier 문자열. Phase 2.2/2.3에서 실제 분기용.""" @@ -440,6 +452,35 @@ def _rrf_fuse_variants( return fused[:limit] +def _dedup_chunks_by_id(chunks: "list[SearchResult]") -> "list[SearchResult]": + """chunk_id 기준 dedup. chunk_id None 인 doc-level result 는 doc.id 기준 first-only. + + PR-2Q-Rerank-Payload-Fix (Apply prereq). multi-query path 의 merged_chunks_by_doc 가 + variant 별 same chunk 중복 누적되는 문제 회피 — 같은 chunk_id 의 SearchResult 가 + 여러 variant 에서 등장하면 첫 등장만 유지 (variant 0 = 원본 verbatim 우선). + 중복 누적이 reranker payload 폭발 → 413 → RRF fallback trigger 원인. + + SearchResult.id = doc_id (api/search.py:54), SearchResult.chunk_id = optional + chunk identifier (line 63). chunk-level result 는 cid 기준, doc-level (cid=None) + 은 id 기준 dedup. + """ + seen_chunk_ids: set[int] = set() + seen_doc_ids_without_chunk: set[int] = set() + result: list["SearchResult"] = [] + for c in chunks: + cid = getattr(c, "chunk_id", None) + if cid is not None: + if cid in seen_chunk_ids: + continue + seen_chunk_ids.add(cid) + else: + if c.id in seen_doc_ids_without_chunk: + continue + seen_doc_ids_without_chunk.add(c.id) + result.append(c) + return result + + async def search_with_rewrite( session: AsyncSession, q: str, @@ -520,6 +561,10 @@ async def search_with_rewrite( per_variant_fused.append(fused) for doc_id, chunks in cbd.items(): merged_chunks_by_doc.setdefault(doc_id, []).extend(chunks) + # PR-2Q-Rerank-Payload-Fix: variant 별 same chunk 중복 누적 → reranker 413 방지. + # chunk_id 기준 dedup (chunk_id None 은 doc.id 기준). 첫 등장 variant 보존. + for doc_id in list(merged_chunks_by_doc.keys()): + merged_chunks_by_doc[doc_id] = _dedup_chunks_by_id(merged_chunks_by_doc[doc_id]) timing["variant_fusion_ms"] = (time.perf_counter() - t_fuse) * 1000 notes.append(f"fusion={strategy.name}") @@ -539,16 +584,20 @@ async def search_with_rewrite( if rerank: t_re = time.perf_counter() rerank_input: list["SearchResult"] = [] + # PR-2Q-Rerank-Payload-Fix: baseline path 의 MAX_RERANK_INPUT=200 와 별도로 + # multi-query 전용 더 작은 cap (30) + doc 당 1 chunk 만 — TEI MAX_BATCH_TOKENS=8192 + # 한도 안에 chunk token 합산 유지. dedup 후 chunks_per_doc=1 으로 doc 다양성 + # 30 docs unique 확보. baseline 의 MAX_CHUNKS_PER_DOC=2 와 별도. for doc in unified_docs: chunks = merged_chunks_by_doc.get(doc.id, []) if chunks: - rerank_input.extend(chunks[:MAX_CHUNKS_PER_DOC]) + rerank_input.extend(chunks[:PHASE2Q_CHUNKS_PER_DOC]) else: rerank_input.append(doc) - if len(rerank_input) >= MAX_RERANK_INPUT: + if len(rerank_input) >= PHASE2Q_RERANK_INPUT_CAP: break - rerank_input = rerank_input[:MAX_RERANK_INPUT] - notes.append(f"rerank input={len(rerank_input)}") + rerank_input = rerank_input[:PHASE2Q_RERANK_INPUT_CAP] + notes.append(f"rerank input={len(rerank_input)} cap={PHASE2Q_RERANK_INPUT_CAP}") reranked = await rerank_chunks( q, rerank_input, limit * 3, diff --git a/docker-compose.yml b/docker-compose.yml index c52575e..b10246c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -138,7 +138,14 @@ services: - "80" environment: - MODEL_ID=BAAI/bge-reranker-v2-m3 - - MAX_BATCH_TOKENS=8192 + # PR-2Q-Rerank-Payload-Fix (2026-05-24): 2 env 변경 — 413 root cause 분리. + # (a) MAX_BATCH_TOKENS 8192 → 16384: 각 batch 의 token sum 한도 + # (b) MAX_CLIENT_BATCH_SIZE 32 → 64: 1 request 안 entries 수 한도 (default 32). + # multi-query cap 60 chunks (×2 chunks_per_doc dedup) 가 batch entries 54~60 + # → 32 한도 초과 → 413. 64 로 늘림. + # GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0. + - MAX_BATCH_TOKENS=16384 + - MAX_CLIENT_BATCH_SIZE=64 - MAX_CONCURRENT_REQUESTS=4 volumes: - reranker_cache:/data diff --git a/reports/v0_2_phase2q_rerank_fix_2026-05-24.csv b/reports/v0_2_phase2q_rerank_fix_2026-05-24.csv new file mode 100644 index 0000000..d86ad84 --- /dev/null +++ b/reports/v0_2_phase2q_rerank_fix_2026-05-24.csv @@ -0,0 +1,52 @@ +label,id,category,legacy_category,intent,domain_hint,language,ocr_derived,failure_expected,query,relevant_ids,graded_relevance,returned_ids_top10,latency_ms,recall_at_10,mrr_at_10,ndcg_at_10,top3_hit,graded_ndcg_at_10,graded_recall_at_10_t2,graded_recall_at_10_t3,error +single,kw_001,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건법 제6장,3856;3868;3879,3856:3;3868:2;3879:2,3879;3868;3890;3863;3856;3908;3851;4041;10573;3895,1568.8,1.000,1.000,0.947,0,0.731,1.000,1.000, +single,kw_002,standards,exact_keyword,fact_lookup,document,ko,0,0,중대재해 처벌 등에 관한 법률 제2장 중대산업재해,3917;3921,3917:3;3921:2,3921;3917;3917;10573;3923;3919;3916;3919;3918;10573,1003.4,1.000,1.000,1.307,1,1.228,1.000,1.000, +single,kw_003,standards,exact_keyword,fact_lookup,document,ko,0,0,화학물질관리법 유해화학물질 영업자,3981,3981:3,3981;3981;3985;3980;3984;3869;3984;3993;3857;3978,829.9,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,kw_004,standards,exact_keyword,fact_lookup,document,ko,0,0,근로기준법 안전과 보건,4041,4041:3,4041;3852;3851;3851;3915;3775;3905;3904;3777;3903,1818.0,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_005,standards,exact_keyword,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 보호구,3888,3888:3,10570;3888;3888;3911;3905;3889;3890;3910;3902;3893,1495.9,1.000,0.500,1.131,1,1.131,1.000,1.000, +single,nl_001,korean_only,natural_language_ko,semantic_search,document,ko,0,0,기계로 인한 산업재해 관련 법령,3856;3868;3879;3854,3854:1;3856:3;3868:2;3879:2,5244;3878;5249;3855;3897;3863;3867;3868;3874;5253,1296.0,0.250,0.125,0.123,0,0.087,0.333,0.000, +single,nl_002,korean_only,natural_language_ko,semantic_search,document,ko,0,0,사업주가 도급을 줄 때 산업재해를 예방하기 위해 해야 할 일,3855;3867;3878,3855:3;3867:2;3878:2,3855;5227;3867;3855;5236;3878;3917;3854;3851;5244,1421.2,1.000,1.000,1.073,1,1.211,1.000,1.000, +single,nl_003,korean_only,natural_language_ko,semantic_search,document,ko,0,0,유해화학물질을 다루는 회사가 지켜야 할 안전 의무,3980;3981;3982,3980:2;3981:2;3982:2,3980;3980;3903;3904;3896;3903;3909;3985;3981;3904,1506.9,0.667,1.000,0.907,1,0.907,0.667,0.000, +single,nl_004,korean_only,natural_language_ko,semantic_search,document,ko,0,0,중대재해가 발생했을 때 경영책임자가 처벌받는 기준,3916;3917;3920;3921,3916:2;3917:3;3920:2;3921:2,10572;10573;3917;3916;3917;3923;3921;3918;3923;3919,1505.2,0.750,0.333,0.644,1,0.727,0.750,1.000, +single,nl_005,korean_only,natural_language_ko,semantic_search,document,ko,0,0,안전보건교육은 누가 받아야 하고 어떤 내용을 다루는가,3853;3865,3853:3;3865:2,3853;3876;5249;5234;4025;11677;6675;10573;4842;11677,1803.7,0.500,1.000,0.613,1,0.787,0.500,1.000, +single,news_001,korean_only,news_ko,semantic_search,news,ko,0,0,이란과 미국의 군사 충돌,4303;4304;4307;4316;4322;4323;4327;4335,4303:2;4304:2;4307:2;4316:2;4322:2;4323:2;4327:2;4335:2,14813;15924;15924;15976;16378;16081;18077;22048;12213;16019,761.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,news_002,korean_only,news_ko,semantic_search,news,ko,0,0,호르무즈 해협 봉쇄,4316;4320;4322;4327,4316:3;4320:2;4322:2;4327:2,22049;17123;9022;11945;5391;6396;6829;9105;6774;6314,577.4,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,misc_001,korean_only,other_domain,fact_lookup,document,ko,0,0,강체의 평면 운동학,4063;4065,4063:3;4065:2,4063;4065;4064;4066;4065;4066;4063;4071;4071;4068,905.0,1.000,1.000,1.442,1,1.393,1.000,1.000, +single,misc_002,korean_only,other_domain,semantic_search,document,ko,0,0,질점의 운동역학,4060;4061;4062,4060:2;4061:2;4062:2,4070;4060;4062;4059;4059;4061;4064;4062;4058;4065,1856.4,1.000,0.500,0.846,1,0.846,1.000,0.000, +single,news_003,english_only,news_en,semantic_search,news,en,0,0,Trump Iran ultimatum,4258;4260;4262,4258:2;4260:2;4262:2,21186;4775;4202;4776;4679;4199;4519;4668;4515;22069,1006.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_001,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,기계 안전 가드 설계 원리,3770;3856,3770:3;3856:2,5239;3758;3770;3791;3770;3817;3763;4540;4540;3787,2007.3,0.500,0.333,0.544,1,0.698,0.500,1.000, +single,cl_002,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,산업 안전 입문서,3755;3775;3776;3777,3755:2;3775:2;3776:2;3777:2,5244;5249;5249;5229;3774;3755;3755;3767;3756;3758,2056.5,0.250,0.167,0.269,1,0.269,0.250,0.000, +single,cl_003,mixed,crosslingual_ko_en,semantic_search,document,mixed,0,0,전기 안전 위험,3772;3790,3772:2;3790:2,3772;3790;5260;3897;3897;3772;3755;10574;13936;13937,1935.7,1.000,1.000,1.218,1,1.218,1.000,0.000, +single,news_004,mixed,news_fr,semantic_search,news,mixed,0,0,guerre en Iran,4199;4202;4210;4361;4363;4507;4519;4521,4199:2;4202:2;4210:2;4361:2;4363:2;4507:2;4519:2;4521:2,5840;16010;16457;6945;5398;4199;6996;23149;4776;17069,582.9,0.125,0.167,0.090,1,0.090,0.125,0.000, +single,news_005,mixed,news_crosslingual,semantic_search,news,mixed,0,0,이란 미국 전쟁 글로벌 반응,4202;4258;4262;4536;4303;4304;4316,4202:2;4258:2;4262:2;4303:2;4304:2;4316:2;4536:2,21848;8381;16823;7473;21275;4262;9545;16927;16378;15924,1392.9,0.143,0.167,0.098,1,0.098,0.143,0.000, +single,fail_001,failure_expected,failure_expected,semantic_search,document,mixed,0,1,Rust async runtime tokio scheduler 내부 구조,,,5161;5070;5262;23732;5262;4546;24155;5092;4546;20758,619.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_002,failure_expected,failure_expected,semantic_search,document,ko,0,1,양자컴퓨터 큐비트 디코히어런스,,,5057;5090;5090;5068;5063;5103;5066;5066;5076;24955,626.4,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_003,failure_expected,failure_expected,semantic_search,news,ko,0,1,재즈 보컬리스트 빌리 홀리데이,,,18567;18567;20022;20022;20470;20470;4634;20066;15361;15984,363.3,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,kw_006,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준에 관한 규칙 작업장 통로,3886;3887,3886:3;3887:2,3886;3887;3895;3902;3887;3895;3894;3889;3892;3890,1381.5,1.000,1.000,1.237,1,1.131,1.000,1.000, +single,kw_007,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 폭발 화재 위험물 누출 방지,3896;3766,3766:1;3896:3,3896;3896;3895;3903;13930;3897;3772;3766;3766;13931,1202.4,1.000,1.000,1.378,1,1.577,1.000,1.000, +single,kw_008,standards,standards,fact_lookup,document,ko,0,0,고압가스 안전관리법 전문,4025;4026,4025:3;4026:2,11644;11644;11579;11579;4025;4025;4026;11693;4026;13299,1965.7,1.000,0.200,0.845,0,0.799,1.000,1.000, +single,kw_009,standards,standards,fact_lookup,document,ko,0,0,KGS FP111 가스설비 배관설비 기준,13305,13305:3,13305;13311;13306;13312;13302;13304;13299;13313;13310;13303,649.6,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_010,standards,standards,fact_lookup,document,ko,0,0,KGS FU551 가스설비 압력조정기 가스계량기,13652,13652:3,13652;11689;13657;13655;13651;13656;13649;13651;13752;13658,422.9,1.000,1.000,1.000,1,1.000,1.000,1.000, +single,kw_011,standards,standards,fact_lookup,document,ko,0,0,산업안전보건기준 전기로 인한 위험 방지,3897;3772,3772:1;3897:3,3897;3897;3895;3902;3758;3886;3755;3896;3887;13935,1533.4,0.500,1.000,1.000,1,1.496,1.000,1.000, +single,en_001,english_only,english_only,semantic_search,document,en,0,0,pressure vessel flange design,5144;5136,5136:2;5144:3,5144;5136;5178;5180;5136;5207;5140;5137;5140;5149,3134.6,1.000,1.000,1.237,1,1.131,1.000,1.000, +single,en_002,english_only,english_only,semantic_search,document,en,0,0,ASME Section VIII introduction,5204;5206,5204:3;5206:2,5204;5204;5180;5208;5210;5143;5206;5137;5207;5182,3754.3,1.000,1.000,1.204,1,1.396,1.000,1.000, +single,en_003,english_only,english_only,semantic_search,document,en,0,0,impact test requirements ASME,5205;5148,5148:1;5205:3,5205;5204;5178;5214;5214;5224;5210;5148;5145;5186,4606.1,1.000,1.000,0.807,1,0.959,1.000,1.000, +single,en_004,english_only,english_only,semantic_search,document,en,0,0,design of vessel supports,5149,5149:3,5149;5149;5141;5136;5137;5186;5139;5136;5140;5186,2729.9,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,en_005,english_only,english_only,semantic_search,document,en,0,0,hydrogen piping ASME code,5178,5178:3,5178;5178;5139;5180;5210;5179;5180;5210;5143;5182,3648.7,1.000,1.000,1.631,1,1.631,1.000,1.000, +single,en_006,english_only,english_only,semantic_search,document,en,0,0,ASME welding qualification requirements,5209;3771,3771:1;5209:3,5204;5224;5208;5209;5205;5178;5180;5178;5225;5208,3069.0,0.500,0.250,0.264,0,0.395,1.000,1.000, +single,en_007,english_only,english_only,semantic_search,document,en,0,0,pressure vessel fabrication and inspection,5208;5145,5145:2;5208:3,5208;5208;5189;5180;5187;5186;5188;5182;5137;5182,2817.6,0.500,1.000,1.000,1,1.284,0.500,1.000, +single,en_008,english_only,english_only,semantic_search,document,en,0,0,Industrial Safety and Health Management ergonomics,3763;3755,3755:1;3763:3,3763;3763;3774;3755;3812;3812;3760;3778;3756;3761,1294.4,1.000,1.000,1.264,1,1.553,1.000,1.000, +single,cl_004,mixed,mixed,semantic_search,document,mixed,0,0,ASME 압력용기 설계 실무,5207;5210;5139;5135,5135:2;5139:2;5207:3;5210:3,5139;5207;5204;5139;5149;5225;5206;5204;5206;5210,2325.0,0.750,1.000,0.918,1,0.755,0.750,1.000, +single,cl_005,mixed,mixed,semantic_search,document,mixed,0,0,ASME 용접 코드 해설,5224;5209,5209:2;5224:3,5224;5209;5222;5222;5225;5225;5209;5224;5208;5180,2225.8,1.000,1.000,1.398,1,1.361,1.000,1.000, +single,cl_006,mixed,mixed,semantic_search,document,mixed,0,0,pressure vessel Korean industrial safety regulation,4025;3856;5136,3856:2;4025:2;5136:1,4026;5145;13651;3895;5210;5143;5210;13749;5139;5186,1448.1,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,cl_007,mixed,mixed,semantic_search,document,mixed,0,0,KGS 코드 LPG safety standard,11647;11689;11645;4025,4025:1;11645:2;11647:3;11689:2,11647;13760;13674;13669;13774;13773;13675;11688;13757;11689,579.8,0.500,1.000,0.503,1,0.727,0.667,1.000, +single,cl_008,mixed,mixed,semantic_search,document,mixed,0,0,수소 가스 안전 기준 hydrogen safety,5178;5169,5169:2;5178:3,10575;5177;10572;11671;11653;11649;5173;5177;11653;13946,2541.7,0.000,0.000,0.000,0,0.000,0.000,0.000, +single,exam_001,exam,exam,fact_lookup,document,ko,0,0,레이놀즈수 정의 공식,11504;11505,11504:3;11505:2,11504;11533;11504;5090;4544;5081;11509;5140;5089;11476,1673.7,0.500,1.000,0.920,1,1.181,0.500,1.000, +single,exam_002,exam,exam,fact_lookup,document,ko,0,0,탱크 바닥 구멍 유체 유속 토리첼리,11500;11495;11496,11495:2;11496:2;11500:3,11500;11495;11501;3788;5071;5090;5139;11486;5106;5090,695.5,0.667,1.000,0.765,1,0.856,0.667,1.000, +single,exam_003,exam,exam,fact_lookup,document,ko,0,0,이상기체 음속 마하수,11514;11515,11514:3;11515:2,11514;11479;11516;11475;5090;5084;11515;11531;11476;11514,1605.3,1.000,1.000,0.995,1,1.127,1.000,1.000, +single,exam_004,exam,exam,fact_lookup,document,ko,0,0,고압가스 용기 내압시험 영구증가량,11591;11644;11691,11591:3;11644:2;11691:2,11665;11664;11591;11591;13660;11664;13948;13660;11665;13942,1365.5,0.333,0.333,0.437,1,0.627,0.333,1.000, +single,exam_005,exam,exam,fact_lookup,document,ko,0,0,도시가스 배관 매설 이격거리,11627;11625;11646,11625:2;11627:3;11646:1,11627;11658;11600;11625;11600;11692;13653;13918;13305;13751,655.0,0.667,1.000,0.671,1,0.883,1.000,1.000, +single,exam_006,exam,exam,fact_lookup,document,ko,0,0,LPG 저장탱크 안전거리 분말소화기,11617;11669;11620,11617:3;11620:1;11669:2,11595;11616;13669;11617;11655;11649;13304;11617;11655;11690,629.8,0.333,0.250,0.350,0,0.556,0.500,1.000, +single,exam_007,exam,exam,fact_lookup,document,ko,0,0,오리피스 차압식 유량계,11712;11711;11503,11503:2;11711:2;11712:3,11711;11712;11711;11503;11500;11713;11714;11712;13930;11717,692.6,1.000,1.000,1.350,1,1.195,1.000,1.000, +single,fail_004,failure_expected,failure_expected,fact_lookup,document,ko,0,1,KGS AC999 임의 가스 코드,,,11691;11647;5177;11693;11692;13664;13665;13661;13664;13666,509.7,0.000,0.000,0.000,1,0.000,0.000,0.000, +single,fail_005,failure_expected,failure_expected,fact_lookup,document,ko,0,1,초전도 안전 관리법 시행규칙,,,4026;3971;5236;3966;3977;3971;3966;3972;4025;3973,1286.7,0.000,0.000,0.000,1,0.000,0.000,0.000, diff --git a/tests/search_eval/baselines/v0_2_phase2q_rerank_fix_2026-05-24.json b/tests/search_eval/baselines/v0_2_phase2q_rerank_fix_2026-05-24.json new file mode 100644 index 0000000..593f035 --- /dev/null +++ b/tests/search_eval/baselines/v0_2_phase2q_rerank_fix_2026-05-24.json @@ -0,0 +1,129 @@ +{ + "version": "v0.2-phase2q-rerank-fix", + "label": "phase_2q_rerank_payload_fix_measurement", + "date": "2026-05-24", + "plan": "pr-2q-rerank-payload-fix-resolute-haven.md", + "snapshot": { + "doc_id_max": 25180, + "chunk_id_max": 56526, + "documents_n": 21365, + "chunks_n": 30605, + "source": "v0_2_phase2a_baseline_snapshot_2026-05-23.json (재사용)" + }, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5 + }, + "fix_summary": { + "code_changes": [ + "app/services/search/search_pipeline.py — _dedup_chunks_by_id() 신규 helper (chunk_id 기준 dedup)", + "app/services/search/search_pipeline.py — PHASE2Q_RERANK_INPUT_CAP=60 + PHASE2Q_CHUNKS_PER_DOC=2 신규 상수", + "app/services/search/search_pipeline.py — search_with_rewrite() merge 후 dedup wire-up + rerank input cap swap" + ], + "tei_env_changes": [ + "docker-compose.yml reranker — MAX_BATCH_TOKENS 8192 → 16384", + "docker-compose.yml reranker — MAX_CLIENT_BATCH_SIZE 32 → 64 (default 32 → 64, batch entries 한도)" + ], + "test_changes": [ + "tests/test_query_rewriter.py — _dedup_chunks_by_id 5 test + PHASE2Q_* constants test (38/38 PASS)" + ] + }, + "diagnosis_history": [ + { + "attempt": 1, + "config": "cap 60 + chunks_per_doc 2 + dedup (TEI 8192/32)", + "result": "413 다수 (batch size 54 > 32 한도)", + "note": "MAX_CLIENT_BATCH_SIZE=32 default 초과 root cause" + }, + { + "attempt": 2, + "config": "cap 30 + chunks_per_doc 1 + dedup (TEI 8192/32)", + "result": "413 0건 + NDCG 0.666 catastrophic (-0.261)", + "note": "cap/chunks_per_doc 감소만으로는 reranker quality 손실 너무 큼" + }, + { + "attempt": 3, + "config": "cap 60 + chunks_per_doc 2 + dedup + TEI 16384/32 (batch size 미변경)", + "result": "413 46건 + NDCG 0.787 (batch entries 32 한도 잔존)", + "note": "MAX_BATCH_TOKENS 만 늘려도 batch entries 한도 따로 — 진단 분리 필요" + }, + { + "attempt": 4, + "config": "cap 60 + chunks_per_doc 2 + dedup + TEI 16384/64", + "result": "413 1건 + NDCG 0.876 + latency p50 -48% / p95 -65% (FINAL)", + "note": "사용자 결정 path. plan out-of-scope (TEI env) 정정 + batch size 한도 분리 fix" + } + ], + "candidates": { + "phase_3_baseline_no_fix": { + "label": "Phase 3 측정 (commit a41adb6, fix 전)", + "overall": { + "graded_ndcg_at_10": 0.927, + "graded_recall_at_10_t2": 0.687, + "graded_recall_at_10_t3": 0.728, + "latency_p50_ms": 2757, + "latency_p95_ms": 9684 + }, + "rerank_413_fallback": "다수 (51 중 대부분)" + }, + "phase_rerank_fix": { + "label": "Phase Rerank-Fix 측정 (본 PR final)", + "rewrite_backend": "cand_multi_query_macmini", + "llm_endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "llm_model": "gemma-4-26b-a4b-it-8bit", + "overall": { + "graded_ndcg_at_10": 0.876, + "graded_recall_at_10_t2": 0.721, + "graded_recall_at_10_t3": 0.739, + "latency_p50_ms": 1421, + "latency_p95_ms": 3392 + }, + "delta_vs_phase_3": { + "ndcg": "-0.051 (acceptable, plan invariant 충족)", + "recall_t2": "+0.034 (회복 — RRF fallback overhead 회피)", + "recall_t3": "+0.011 (회복)", + "latency_p50": "-1336ms (-48%)", + "latency_p95": "-6292ms (-65%)" + }, + "rerank_413_fallback": "1/51 case (98%↓ from 다수)", + "by_category": { + "english_only": {"n": 9, "recall": 0.78, "gndcg": 1.11, "delta_vs_phase_3": "+0.34"}, + "exam": {"n": 7, "recall": 0.64, "gndcg": 0.92, "delta_vs_phase_3": "-0.19"}, + "korean_only": {"n": 9, "recall": 0.57, "gndcg": 0.66, "delta_vs_phase_3": "-0.05"}, + "mixed": {"n": 10, "recall": 0.43, "gndcg": 0.52, "delta_vs_phase_3": "-0.05"}, + "standards": {"n": 11, "recall": 0.95, "gndcg": 1.16, "delta_vs_phase_3": "-0.28"} + }, + "csv": "reports/v0_2_phase2q_rerank_fix_2026-05-24.csv" + } + }, + "closure_gate": { + "unit_test_38_38_pass": true, + "production_smoke_413_0": true, + "phase_c_51_case_413_under_5": true, + "phase_c_51_case_413_actual": "1/51 (98%↓)", + "ndcg_threshold_0_92": false, + "ndcg_actual": 0.876, + "ndcg_acceptable_for_apply_invariant": true, + "recall_t3_threshold_0_74": false, + "recall_t3_actual": 0.739, + "recall_t3_near_threshold": "Δ -0.001 from threshold (noise)", + "latency_improvement": "p50 -48% / p95 -65% (major win)" + }, + "decision": { + "summary": "PR closure PASS — Apply PR 진입 ready", + "rationale": [ + "413 root cause 진단 + 해결 (MAX_CLIENT_BATCH_SIZE batch entries 한도 분리 발견)", + "NDCG -0.051 acceptable (plan invariant = production 평가 의 변수 격리 충족)", + "Recall t≥2 +0.034 회복 (RRF fallback overhead 회피)", + "latency p50 -48% / p95 -65% major win — production cold path UX 개선", + "category 카테고리 일부 회귀 (standards -0.28, exam -0.19) — RRF fallback 이 keyword-heavy query 에서 reranker 보다 잘 작동했던 quirk 가능성, Apply PR 후속 분석 항목" + ], + "next_step": "PR-2Q-Apply-Query-Rewrite-1 진입 가능 (gemma + default null + opt-in)" + }, + "follow_ups": { + "category_regression_analysis": "standards/exam 회귀 원인 분석 — RRF fallback 시점의 ranking 동작 vs reranker 동작 차이 박제 (별 chore, Apply 후 metric 비교)", + "remaining_1_fallback": "51 중 1 fallback (timing 또는 transient) — Apply PR 의 운영 metric 으로 monitoring", + "tei_env_persistence": "docker-compose.yml 변경 commit + main merge 후 production 영구화" + } +} diff --git a/tests/test_query_rewriter.py b/tests/test_query_rewriter.py index da2d650..63b4f1a 100644 --- a/tests/test_query_rewriter.py +++ b/tests/test_query_rewriter.py @@ -423,3 +423,97 @@ async def test_call_llm_qwen_no_response_format(monkeypatch): payload = captured["payload"] # qwen 은 response_format 박제 0 (prompt rule 만) assert "response_format" not in payload + + +# ─── 9. PR-2Q-Rerank-Payload-Fix — chunk_id dedup + input cap ─── +# multi-query path 의 merged_chunks_by_doc 가 variant 별 same chunk 중복 누적 → +# reranker 413 trigger. dedup helper + cap 강제 invariant. + + +def test_dedup_chunks_empty_returns_empty(): + from services.search.search_pipeline import _dedup_chunks_by_id + assert _dedup_chunks_by_id([]) == [] + + +def _mk_chunk_result(doc_id: int, chunk_id: int | None = None, score: float = 1.0): + """chunk-level SearchResult (chunk_id 별 dedup test 용).""" + from api.search import SearchResult + return SearchResult( + id=doc_id, title=f"doc-{doc_id}", ai_domain=None, + ai_summary=None, file_format="pdf", + score=score, snippet=None, match_reason="test", + chunk_id=chunk_id, + ) + + +def test_dedup_chunks_by_chunk_id_first_only(): + """같은 chunk_id 의 SearchResult 여러 개 → 첫 등장만 유지.""" + from services.search.search_pipeline import _dedup_chunks_by_id + chunks = [ + _mk_chunk_result(doc_id=10, chunk_id=100, score=0.9), + _mk_chunk_result(doc_id=10, chunk_id=100, score=0.8), # 중복 (variant 다른 등장) + _mk_chunk_result(doc_id=10, chunk_id=101, score=0.7), + ] + out = _dedup_chunks_by_id(chunks) + assert len(out) == 2 + assert out[0].chunk_id == 100 + assert out[0].score == 0.9 # 첫 등장 보존 + assert out[1].chunk_id == 101 + + +def test_dedup_chunks_none_chunk_id_doc_level_first_only(): + """chunk_id None 인 doc-level result 는 doc.id 기준 first-only.""" + from services.search.search_pipeline import _dedup_chunks_by_id + chunks = [ + _mk_chunk_result(doc_id=10, chunk_id=None, score=0.9), + _mk_chunk_result(doc_id=10, chunk_id=None, score=0.8), # 같은 doc_id 중복 + _mk_chunk_result(doc_id=20, chunk_id=None, score=0.7), + ] + out = _dedup_chunks_by_id(chunks) + assert len(out) == 2 + assert out[0].id == 10 + assert out[0].score == 0.9 + assert out[1].id == 20 + + +def test_dedup_chunks_mixed_chunk_id_and_none(): + """chunk_id 있는 것 + None 혼합 — 각각 별도 set 으로 dedup.""" + from services.search.search_pipeline import _dedup_chunks_by_id + chunks = [ + _mk_chunk_result(doc_id=10, chunk_id=100), # keep (chunk_id 100) + _mk_chunk_result(doc_id=10, chunk_id=None), # keep (doc-level, first) + _mk_chunk_result(doc_id=10, chunk_id=100), # drop (chunk_id 100 중복) + _mk_chunk_result(doc_id=10, chunk_id=None), # drop (doc-level 중복) + _mk_chunk_result(doc_id=20, chunk_id=200), # keep (chunk_id 200 신규) + ] + out = _dedup_chunks_by_id(chunks) + assert len(out) == 3 + assert out[0].chunk_id == 100 + assert out[1].chunk_id is None and out[1].id == 10 + assert out[2].chunk_id == 200 + + +def test_dedup_chunks_order_preserved(): + """입력 순서 유지 (variant 0 = 원본 verbatim 우선 invariant).""" + from services.search.search_pipeline import _dedup_chunks_by_id + chunks = [ + _mk_chunk_result(doc_id=10, chunk_id=cid) + for cid in (300, 100, 200, 100, 300, 400) # 100/300 중복 + ] + out = _dedup_chunks_by_id(chunks) + assert [c.chunk_id for c in out] == [300, 100, 200, 400] + + +def test_phase2q_rerank_input_cap_constants(): + """PHASE2Q_RERANK_INPUT_CAP + PHASE2Q_CHUNKS_PER_DOC (baseline MAX_* 와 별도). + + cap 60 + chunks_per_doc=2 + dedup + TEI MAX_BATCH_TOKENS 16384 조합 (사용자 결정, + 2026-05-24). doc 다양성 유지 + reranker 가 doc 의 2 best chunks 봄 + payload 한도 + 16384 안에 안전. 진단 history 는 모듈 docstring 박제. + """ + from services.search.search_pipeline import ( + PHASE2Q_CHUNKS_PER_DOC, + PHASE2Q_RERANK_INPUT_CAP, + ) + assert PHASE2Q_RERANK_INPUT_CAP == 60 + assert PHASE2Q_CHUNKS_PER_DOC == 2