diff --git a/app/api/search.py b/app/api/search.py index d9a3235..a0e8f54 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -185,19 +185,33 @@ async def search( + (" (bg triggered)" if triggered else " (bg inflight)") ) - # Phase 2.2: multilingual vector search 활성 조건 + # Phase 2.2: multilingual vector search 활성 조건 (보수적) # - cache hit + analyzer_tier == "analyzed" (≥0.85 고신뢰) # - normalized_queries 2개 이상 (lang 다양성 있음) - # 그 외 케이스는 기존 single-query search_vector 그대로 사용 (회귀 0). + # - domain_hint == "news" 또는 language_scope == "global" + # ↑ 1차 측정 결과: document 도메인에서 multilingual이 natural_language_ko + # -0.10 악화시킴. 영어 번역이 한국어 법령 검색에서 noise로 작용. + # news / global 영역에서만 multilingual 활성 (news_crosslingual +0.10 개선 확인). use_multilingual: bool = False normalized_queries: list[dict] = [] if analyzer_cache_hit and analyzer_tier == "analyzed" and query_analysis: - raw_nq = query_analysis.get("normalized_queries") or [] - if isinstance(raw_nq, list) and len(raw_nq) >= 2: - normalized_queries = [nq for nq in raw_nq if isinstance(nq, dict) and nq.get("text")] - if len(normalized_queries) >= 2: - use_multilingual = True - notes.append(f"multilingual langs={[nq.get('lang') for nq in normalized_queries]}") + domain_hint = query_analysis.get("domain_hint", "mixed") + language_scope = query_analysis.get("language_scope", "limited") + is_multilingual_candidate = ( + domain_hint == "news" or language_scope == "global" + ) + if is_multilingual_candidate: + raw_nq = query_analysis.get("normalized_queries") or [] + if isinstance(raw_nq, list) and len(raw_nq) >= 2: + normalized_queries = [ + nq for nq in raw_nq if isinstance(nq, dict) and nq.get("text") + ] + if len(normalized_queries) >= 2: + use_multilingual = True + notes.append( + f"multilingual langs={[nq.get('lang') for nq in normalized_queries]}" + f" hint={domain_hint}/{language_scope}" + ) if mode == "vector": t0 = time.perf_counter()