From e7cd710e69494de319c1cd7441700afafb605149 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Fri, 3 Apr 2026 14:16:36 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20hybrid=20=EA=B2=80=EC=83=89=20=EB=8B=A8?= =?UTF-8?q?=EC=88=9C=ED=99=94=20=E2=80=94=20FTS=20+=20ILIKE=20(vector/trgm?= =?UTF-8?q?=20=EB=B3=B5=EC=9E=A1=20=EC=BF=BC=EB=A6=AC=20=EC=A0=9C=EA=B1=B0?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asyncpg 파라미터 바인딩 충돌 문제 근본 해결. 한국어 검색: ILIKE fallback으로 안정 동작. Co-Authored-By: Claude Opus 4.6 (1M context) --- app/api/search.py | 66 ++++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 47 deletions(-) diff --git a/app/api/search.py b/app/api/search.py index 4a67ac7..2a09fb2 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -143,55 +143,27 @@ async def _search_vector(session: AsyncSession, query: str, limit: int) -> list[ async def _search_hybrid(session: AsyncSession, query: str, limit: int) -> list[SearchResult]: - """하이브리드 검색 — FTS + 트리그램 + 벡터 가중 합산""" - # 벡터 임베딩 생성 (실패 시 FTS+트리그램만) - query_embedding = None - try: - client = AIClient() - query_embedding = await client.embed(query) - await client.close() - except Exception: - pass - - vector_clause = "" - vector_score = "0" - params = {"query": query, "limit": limit, "w_fts": W_FTS, "w_trgm": W_TRGM, "w_vector": W_VECTOR} - - if query_embedding: - vector_clause = "LEFT JOIN LATERAL (SELECT 1 - (d.embedding <=> :embedding::vector) AS vscore) v ON true" - vector_score = "coalesce(v.vscore, 0)" - params["embedding"] = str(query_embedding) - else: - # 벡터 없으면 FTS+트리그램만 사용 - params["w_fts"] = 0.6 - params["w_trgm"] = 0.4 - params["w_vector"] = 0.0 - + """하이브리드 검색 — FTS + ILIKE (안정적 한국어 지원)""" result = await session.execute( - text(f""" - SELECT * FROM ( - SELECT d.id, d.title, d.ai_domain, d.ai_summary, d.file_format, - ( - :w_fts * coalesce(ts_rank( - to_tsvector('simple', coalesce(d.title, '') || ' ' || coalesce(d.extracted_text, '')), - plainto_tsquery('simple', :query) - ), 0) - + :w_trgm * coalesce(similarity( - coalesce(d.title, '') || ' ' || coalesce(d.extracted_text, ''), - :query - ), 0) - + :w_vector * {vector_score} - ) AS score, - left(d.extracted_text, 200) AS snippet - FROM documents d - {vector_clause} - WHERE coalesce(d.extracted_text, '') != '' - OR (coalesce(d.title, '') || ' ' || coalesce(d.extracted_text, '')) ILIKE '%%' || :query || '%%' - ) sub - WHERE sub.score > 0.001 - ORDER BY sub.score DESC + text(""" + SELECT id, title, ai_domain, ai_summary, file_format, + GREATEST( + coalesce(ts_rank( + to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, '')), + plainto_tsquery('simple', :query) + ), 0), + CASE WHEN (coalesce(title, '') || ' ' || coalesce(extracted_text, '')) + ILIKE '%%' || :query || '%%' THEN 0.5 ELSE 0 END + ) AS score, + left(extracted_text, 200) AS snippet + FROM documents + WHERE to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, '')) + @@ plainto_tsquery('simple', :query) + OR (coalesce(title, '') || ' ' || coalesce(extracted_text, '')) + ILIKE '%%' || :query || '%%' + ORDER BY score DESC LIMIT :limit """), - params, + {"query": query, "limit": limit}, ) return [SearchResult(**row._mapping) for row in result]