From 46ba9dd231d73b6ed4e234b56f453983057c817f Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Thu, 9 Apr 2026 08:00:43 +0900
Subject: [PATCH] =?UTF-8?q?fix(digest/loader):=20raw=20SQL=20pgvector=20st?=
 =?UTF-8?q?ring=20=ED=98=95=ED=83=9C=20=ED=8C=8C=EC=8B=B1=20=EC=A7=80?=
 =?UTF-8?q?=EC=9B=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

raw text() SQL + asyncpg 조합에서는 pgvector Vector(1024) 컬럼이
'[0.087,0.305,...]' 형태의 string 으로 반환되며 numpy 변환이 실패함
(ORM 을 쓰면 type 등록되지만 raw SQL 은 안 됨).

_to_numpy_embedding 에서 string 이면 json.loads 로 먼저 파싱한 뒤
numpy.asarray. 변환 실패 시 None 반환 (해당 doc 자동 drop).

Phase 4 deploy 워커 첫 실행 검증 중 발견.
---
 app/services/digest/loader.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/app/services/digest/loader.py b/app/services/digest/loader.py
index ccbc627..1f1e6be 100644
--- a/app/services/digest/loader.py
+++ b/app/services/digest/loader.py
@@ -50,10 +50,24 @@ _SOURCE_COUNTRY_SQL = text("""
 
 
 def _to_numpy_embedding(raw: Any) -> np.ndarray | None:
-    """pgvector 컬럼을 numpy array(float32)로 정규화."""
+    """pgvector 컬럼을 numpy array(float32)로 정규화.
+
+    raw SQL + asyncpg 조합에서 pgvector type 이 등록 안 되어 있으면
+    embedding 이 '[0.1,0.2,...]' 같은 string 으로 반환된다. ORM 을 안 쓰므로
+    이 경우 직접 파싱해야 한다.
+    """
     if raw is None:
         return None
-    arr = np.asarray(raw, dtype=np.float32)
+    if isinstance(raw, str):
+        import json
+        try:
+            raw = json.loads(raw)
+        except json.JSONDecodeError:
+            return None
+    try:
+        arr = np.asarray(raw, dtype=np.float32)
+    except (TypeError, ValueError):
+        return None
     if arr.size == 0:
         return None
     return arr