From 50e6b5ad90977b098aa9acec67929de788c443f3 Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Tue, 7 Apr 2026 08:33:25 +0900
Subject: [PATCH] =?UTF-8?q?fix(search):=20confidence=20=ED=9C=B4=EB=A6=AC?=
 =?UTF-8?q?=EC=8A=A4=ED=8B=B1=20vector-only=20amplify=20=EB=B2=84=EA=B7=B8?=
 =?UTF-8?q?=20=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vector-only 매치(match_reason == 'vector')에서 raw 코사인 0.43이
0.6으로 잘못 amplify되어 low_confidence threshold(0.5)를 못 넘기던 문제.

- vector-only 분기: amplify 제거, _cosine_to_confidence로 일관 환산
- _cosine_to_confidence: bge-m3 코사인 분포 (무관 텍스트 ~0.4) 반영
- 코사인 0.55 = threshold 경계(0.50), 0.45 미만은 명확히 low

smoke test 결과 zzzqxywvkpqxnj1234 같은 무의미 쿼리(top cosine 0.43)가
low_confidence로 잡히지 않던 문제 해결.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app/services/search_telemetry.py | 52 ++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/app/services/search_telemetry.py b/app/services/search_telemetry.py
index 91ebd14..3260420 100644
--- a/app/services/search_telemetry.py
+++ b/app/services/search_telemetry.py
@@ -81,8 +81,16 @@ def compute_confidence(results: list[Any], mode: str) -> float:
 
     Phase 0.3 임시 구현. Phase 2에서 QueryAnalyzer 결과 + reranker score로 교체.
 
-    하이브리드/텍스트 모드는 score가 가중치 합산이라 unbounded → match_reason과 결합.
-    벡터 모드는 score가 코사인 유사도(0..1)라 그대로 사용.
+    score 의미 정리 (search.py 기준):
+      - mode=vector       → score = 코사인 유사도 [0..1]
+      - mode=fts/trgm/hybrid에서 텍스트 매치 → score = 가중치 합산 (unbounded)
+        가중치: title=3.0 / tags=2.5 / note=2.0 / summary=1.5 / content=1.0 / fts bonus≈2.0
+      - mode=hybrid에서 텍스트 0건 → 벡터 결과만, score는 코사인 그대로
+      - mode=hybrid 텍스트+벡터 동시 매치 → score = 텍스트가중치 + 0.5*코사인,
+        match_reason = "<텍스트reason>+vector"
+
+    핵심: match_reason이 정확히 'vector'(=문자열 "vector")면 텍스트 매치 0건인 vector-only.
+          이 경우 score는 raw 코사인이므로 amplify 금지.
     """
     if not results:
         return 0.0
@@ -93,12 +101,9 @@ def compute_confidence(results: list[Any], mode: str) -> float:
 
     if mode == "vector":
         # 코사인 유사도 그대로
-        return max(0.0, min(1.0, top_score))
-
-    # text / hybrid: match_reason 강도 + score를 함께 본다
-    # search.py의 가중치: title=3.0, tags=2.5, note=2.0, summary=1.5, content=1.0, fts bonus=2.0
-    # vector boost(hybrid 합산)는 +0.5*cosine
+        return _cosine_to_confidence(top_score)
 
+    # text / hybrid: 강한 텍스트 매치 우선 판정
     if "title" in reason and top_score >= 4.0:
         return 0.95
     if any(k in reason for k in ("tags", "note")) and top_score >= 3.0:
@@ -109,15 +114,36 @@ def compute_confidence(results: list[Any], mode: str) -> float:
         return 0.65
     if "fts" in reason and top_score >= 1.0:
         return 0.55
-    if "vector" in reason:
-        # vector-only hit (텍스트 매칭 실패) → 코사인 유사도 환산
-        # hybrid 합산 시 vector 단독 점수는 score * 0.5로 들어옴
-        cosine = top_score / 0.5 if top_score < 1.0 else top_score
-        return max(0.2, min(0.6, cosine * 0.7))
-    # 약한 매치
+
+    # vector-only hit (텍스트 0건 → 코사인 raw, amplify 금지)
+    if reason == "vector":
+        return _cosine_to_confidence(top_score)
+
+    # 그 외(약한 매치 또는 알 수 없는 reason)
     return 0.3
 
 
+def _cosine_to_confidence(cosine: float) -> float:
+    """bge-m3 임베딩 코사인 유사도 → confidence 환산.
+
+    bge-m3는 무관한 텍스트도 보통 0.3~0.5 정도 코사인을 만든다.
+    따라서 0.5는 "약하게 닮음", 0.7+는 "꽤 관련", 0.85+는 "매우 관련"으로 본다.
+    """
+    if cosine >= 0.85:
+        return 0.95
+    if cosine >= 0.75:
+        return 0.80
+    if cosine >= 0.65:
+        return 0.65
+    if cosine >= 0.55:
+        return 0.50  # threshold 경계
+    if cosine >= 0.45:
+        return 0.35
+    if cosine >= 0.35:
+        return 0.20
+    return 0.10
+
+
 # ─── 로깅 진입점 ─────────────────────────────────────────