feat(search): Phase 2A E-1 — Qwen 후보 3종 백필 CLI + eval 디스패처 확장 (마이그 328~333)
- 후보 섀도 테이블 6종(전부 vector 타입 — eval=exact scan 이라 인덱스 불요, halfvec 은 C-1 소관) - workers/phase2a_cand_backfill: resumable(NOT EXISTS)·배치 커밋·동결셋 한정(--doc/chunk-id-max), 문서/청크 입력 = production 경로 동일 구성 + plain - CANDIDATE_BACKEND_MAP += cand_qwen06/qwen4/qwen4m (embed_kind=ollama, 쿼리측 instruct prefix G-1 핀 문자열, qwen4m = dimensions 1024 MRL) - qwen4m 적재는 qwen4 에서 SQL 파생(subvector+l2_normalize) — 본 CLI 비대상 Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
"""Phase 2A (embedding-phase2a-1) — Qwen 후보 디스패처/쿼리 임베딩 단위 테스트."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from services.search import retrieval_service as rs
|
||||
|
||||
|
||||
def test_resolve_qwen_backends():
|
||||
for slug in ("cand_qwen06", "cand_qwen4", "cand_qwen4m"):
|
||||
cfg = rs._resolve_backend(slug)
|
||||
assert cfg["docs_table"].startswith("documents_cand_qwen")
|
||||
assert cfg["chunks_table"].startswith("document_chunks_cand_qwen")
|
||||
assert cfg["embed_kind"] == "ollama"
|
||||
# 테이블명이 2단계 SQL allowlist 도 통과해야 함 (R2-B1)
|
||||
assert rs._VALID_DOCS_TABLE.match(cfg["docs_table"])
|
||||
assert rs._VALID_CHUNKS_TABLE.match(cfg["chunks_table"])
|
||||
assert rs._resolve_backend("baseline") is None
|
||||
with pytest.raises(ValueError):
|
||||
rs._resolve_backend("cand_unknown")
|
||||
|
||||
|
||||
def test_qwen4m_has_mrl_dimensions():
|
||||
assert rs._resolve_backend("cand_qwen4m")["embed_dimensions"] == 1024
|
||||
assert "embed_dimensions" not in rs._resolve_backend("cand_qwen4")
|
||||
|
||||
|
||||
class _FakeResp:
|
||||
def __init__(self, embs):
|
||||
self._embs = embs
|
||||
|
||||
def raise_for_status(self):
|
||||
return None
|
||||
|
||||
def json(self):
|
||||
return {"embeddings": self._embs}
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
"""httpx.AsyncClient 대역 — post body 캡처."""
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
def __init__(self, *a, **k):
|
||||
pass
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *a):
|
||||
return False
|
||||
|
||||
async def post(self, url, json=None):
|
||||
_FakeClient.captured = {"url": url, "json": json}
|
||||
dim = (json or {}).get("dimensions") or 1024
|
||||
return _FakeResp([[0.1] * dim])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ollama_query_embed_applies_instruct_prefix(monkeypatch):
|
||||
import httpx
|
||||
|
||||
monkeypatch.setattr(httpx, "AsyncClient", _FakeClient)
|
||||
cfg = rs._resolve_backend("cand_qwen06")
|
||||
out = await rs._embed_query_via_ollama(cfg, "압력용기 수압시험")
|
||||
assert out is not None and len(out) == 1024
|
||||
body = _FakeClient.captured["json"]
|
||||
assert body["model"] == "qwen3-embedding:0.6b"
|
||||
assert body["input"][0].startswith(rs.QWEN3_QUERY_INSTRUCT)
|
||||
assert body["input"][0].endswith("압력용기 수압시험")
|
||||
assert "dimensions" not in body
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ollama_query_embed_mrl_dimensions(monkeypatch):
|
||||
import httpx
|
||||
|
||||
monkeypatch.setattr(httpx, "AsyncClient", _FakeClient)
|
||||
cfg = rs._resolve_backend("cand_qwen4m")
|
||||
out = await rs._embed_query_via_ollama(cfg, "q")
|
||||
assert _FakeClient.captured["json"]["dimensions"] == 1024
|
||||
assert len(out) == 1024
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ollama_query_embed_failure_returns_none(monkeypatch):
|
||||
import httpx
|
||||
|
||||
class _Boom(_FakeClient):
|
||||
async def post(self, url, json=None):
|
||||
raise httpx.ConnectError("down")
|
||||
|
||||
monkeypatch.setattr(httpx, "AsyncClient", _Boom)
|
||||
cfg = rs._resolve_backend("cand_qwen06")
|
||||
assert await rs._embed_query_via_ollama(cfg, "q") is None
|
||||
Reference in New Issue
Block a user