diff --git a/app/services/search/retrieval_service.py b/app/services/search/retrieval_service.py index ac8ae9d..fd9d6ca 100644 --- a/app/services/search/retrieval_service.py +++ b/app/services/search/retrieval_service.py @@ -40,9 +40,19 @@ async def search_text( JOIN documents d ON d.id = c.id ORDER BY 5컬럼 similarity 가중 합산 + ts_rank * 2.0 가중치: title 3.0 / ai_tags 2.5 / user_note 2.0 / ai_summary 1.5 / extracted_text 1.0 + + threshold: + pg_trgm.similarity_threshold default = 0.3 + → multi-token 한국어 뉴스 쿼리(예: "이란 미국 전쟁 글로벌 반응")에서 + candidates를 못 모음 → recall 감소 (0.788 → 0.750) + → set_limit(0.15)으로 낮춰 recall 회복. precision은 ORDER BY similarity 합산이 보정. """ from api.search import SearchResult # 순환 import 회피 + # trigram threshold를 0.15로 낮춰 multi-token query recall 회복 + # SQLAlchemy async session 내 두 execute는 같은 connection 사용 + await session.execute(text("SELECT set_limit(0.15)")) + result = await session.execute( text(""" WITH candidates AS (