From 151c1ee518e9206c3c79517f19d957c8d36c9238 Mon Sep 17 00:00:00 2001 From: hyungi Date: Sun, 14 Jun 2026 04:34:24 +0000 Subject: [PATCH] =?UTF-8?q?fix(search):=20text-leg=20=EB=B3=B8=EB=AC=B8=20?= =?UTF-8?q?=EC=8A=A4=EC=BD=94=EC=96=B4=EB=A7=81=202000=EC=9E=90=20?= =?UTF-8?q?=EC=A0=88=EB=8B=A8=20+=20bge-m3=20keep=5Falive=20=EB=A1=9C=20?= =?UTF-8?q?=EA=B2=80=EC=83=89=20latency=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 코퍼스 ~52배 성장(코드 가정 765 → 실제 40k docs) 후 search_text ORDER BY 가 후보 행마다 extracted_text(평균 3.7KB·최대 1.6MB) 전체에 similarity() + to_tsvector() 재토큰화를 재연산 → broad/영어 쿼리 text_ms 최대 4960ms. scoring/match_reason 의 extracted_text 를 left(...,2000) 으로 절단(후보 CTE 의 FTS 매칭은 전체 본문 유지 → recall 불변). embed() 요청에 keep_alive:-1 추가로 ollama bge-m3 GPU 상주 → sparse 검색의 cold reload(~6s) 제거. 검증(snapshot freeze docs 43958/chunks 195671, 51 case, eval-version both): - graded NDCG 0.575 → 0.575 (±0.000, 전 카테고리 byte-identical) - Recall g>=2 0.691 / g>=3 0.739 불변, v0.1 NDCG/Recall/Top-3 불변 - latency p50 760→586ms (-23%) / p95 5230→832ms (-84%) - EXPLAIN 단일쿼리: V0 4917ms → left(2000) 285ms (17x) Co-Authored-By: Claude Fable 5 --- app/ai/client.py | 2 +- app/services/search/retrieval_service.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/ai/client.py b/app/ai/client.py index 3684b76..6ff06b5 100644 --- a/app/ai/client.py +++ b/app/ai/client.py @@ -264,7 +264,7 @@ class AIClient: """벡터 임베딩 — GPU 서버 전용""" response = await self._http.post( self.ai.embedding.endpoint, - json={"model": self.ai.embedding.model, "prompt": text}, + json={"model": self.ai.embedding.model, "prompt": text, "keep_alive": -1}, # bge-m3 GPU 상주(홈랩 sparse 검색 cold reload ~6s 방지) ) response.raise_for_status() return response.json()["embedding"] diff --git a/app/services/search/retrieval_service.py b/app/services/search/retrieval_service.py index c14a2eb..f1539ef 100644 --- a/app/services/search/retrieval_service.py +++ b/app/services/search/retrieval_service.py @@ -361,7 +361,7 @@ async def search_text( + similarity(coalesce(d.ai_tags::text, ''), :q) * 2.5 + similarity(coalesce(d.user_note, ''), :q) * 2.0 + similarity(coalesce(d.ai_summary, ''), :q) * 1.5 - + similarity(coalesce(d.extracted_text, ''), :q) * 1.0 + + similarity(left(coalesce(d.extracted_text, ''), 2000), :q) * 1.0 -- FTS 보너스 (idx_documents_fts_full 활용) + coalesce(ts_rank( to_tsvector('simple', @@ -369,7 +369,7 @@ async def search_text( coalesce(d.ai_tags::text, '') || ' ' || coalesce(d.ai_summary, '') || ' ' || coalesce(d.user_note, '') || ' ' || - coalesce(d.extracted_text, '') + left(coalesce(d.extracted_text, ''), 2000) ), plainto_tsquery('simple', :q) ), 0) * 2.0 @@ -380,7 +380,7 @@ async def search_text( WHEN similarity(coalesce(d.ai_tags::text, ''), :q) >= 0.3 THEN 'tags' WHEN similarity(coalesce(d.user_note, ''), :q) >= 0.3 THEN 'note' WHEN similarity(coalesce(d.ai_summary, ''), :q) >= 0.3 THEN 'summary' - WHEN similarity(coalesce(d.extracted_text, ''), :q) >= 0.3 THEN 'content' + WHEN similarity(left(coalesce(d.extracted_text, ''), 2000), :q) >= 0.3 THEN 'content' ELSE 'fts' END AS match_reason, d.material_type, d.jurisdiction, d.published_date