feat(search): /ask backend dispatcher (qwen-macbook opt-in, no silent fallback)

PR-MacBook-RAG-Backend-1 — /api/search/ask 의 명시 backend 선택 진입점.

핵심 invariant (정정 4):
- backend 미지정 = Gemma Mac mini default, 응답 contract 변동 0
- backend="qwen-macbook" 명시 opt-in 만 MacBook M5 Max mlx-vlm.server 호출
- MacBook unavailable 시 HTTP 503 + error_reason=macbook_unavailable
- 자동 fallback 절대 금지 — 실패 path 에서 Gemma backend.generate() 호출 0

backend dispatcher (services/llm/):
- BackendBase / GemmaMacMiniBackend / QwenMacBookBackend / BackendUnavailable
- Qwen backend 는 Mac mini llm_gate 점유 X, 별 Semaphore(1) — llm_gate
  docstring 의 single-inference 영구 룰은 같은 endpoint 한정으로 scope 명시
- httpx Connect/Read/Pool/Timeout/5xx → BackendUnavailable, 4xx 전파

synthesis_service.py:
- backend 인자 추가, status="backend_unavailable" 신규
- cache key 에 backend_name 포함 (qwen ↔ gemma 캐시 충돌 차단)

config:
- search.ask.backend.{macmini_url, macbook_url, macbook_model,
  timeout_connect_s=1, timeout_read_s=30}
- MacBook endpoint = http://100.118.112.84:8810 (M5 Max Tailscale bind)

tests (14 신규):
- tests/services/test_backend_dispatcher.py (9): dispatcher 정합성 + Qwen
  generate path (mock 200 / dead port / 5xx / 4xx) + cache identity
- tests/api/test_search_ask_macbook_503.py (5): 정정 4 핵심 invariant.
  backend=qwen-macbook 비가용 시 gemma.generate.assert_not_called()

기존 ask 회귀 0 (test_ask_eval_auth 9건 등 85건 모두 PASS).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
hyungi
2026-05-22 12:38:48 +00:00
parent 224843ba25
commit a7b8f15870
9 changed files with 910 additions and 42 deletions
+197
View File
@@ -0,0 +1,197 @@
"""PR-MacBook-RAG-Backend-1: backend dispatcher 단위 테스트.
- get_backend(None) / get_backend("gemma-macmini") → GemmaMacMiniBackend
- get_backend("qwen-macbook") → QwenMacBookBackend (config 값 반영)
- get_backend("unknown") → ValueError
- QwenMacBookBackend.generate() — mock httpx 200 OK → content 반환
- QwenMacBookBackend.generate() — dead port → BackendUnavailable("ConnectError")
목적: 정정 4 (자동 fallback 부재) 의 핵심 빌딩블럭 검증. dispatcher 자체 무결성.
"""
from __future__ import annotations
import asyncio
import os
import sys
from unittest.mock import AsyncMock, patch
import httpx
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app"))
@pytest.fixture(autouse=True)
def _reset_dispatcher():
"""각 테스트 격리 — backend 인스턴스 캐시 초기화."""
from services.llm import reset_backends_for_test
reset_backends_for_test()
yield
reset_backends_for_test()
def test_get_backend_default_is_gemma():
"""backend 미지정 (None) = Gemma Mac mini default."""
from services.llm import get_backend
b = get_backend(None)
assert b.name == "gemma-macmini"
def test_get_backend_explicit_gemma():
"""gemma-macmini 명시도 동일."""
from services.llm import get_backend
b = get_backend("gemma-macmini")
assert b.name == "gemma-macmini"
def test_get_backend_qwen_macbook_uses_config():
"""qwen-macbook 은 settings.search.ask.backend 값 그대로 반영."""
from core.config import settings
from services.llm import QwenMacBookBackend, get_backend
b = get_backend("qwen-macbook")
assert isinstance(b, QwenMacBookBackend)
assert b.name == "qwen-macbook"
# config.yaml 의 search.ask.backend.macbook_url 그대로
assert b.base_url == settings.search.ask.backend.macbook_url.rstrip("/")
assert b.model == settings.search.ask.backend.macbook_model
assert b.timeout_connect_s == settings.search.ask.backend.timeout_connect_s
def test_get_backend_unknown_raises_value_error():
"""미지원 backend 이름 → ValueError (호출자가 400 으로 매핑)."""
from services.llm import get_backend
with pytest.raises(ValueError, match="unknown backend"):
get_backend("claude-opus")
def test_get_backend_cached_returns_same_instance():
"""동일 backend 재호출 시 인스턴스 캐시."""
from services.llm import get_backend
b1 = get_backend("qwen-macbook")
b2 = get_backend("qwen-macbook")
assert b1 is b2
def test_qwen_generate_success_mocked():
"""mock 200 OK → choices[0].message.content 반환."""
from services.llm import QwenMacBookBackend
fake_payload = {
"choices": [{"message": {"content": "hello from qwen"}}],
}
class _Resp:
status_code = 200
def raise_for_status(self):
return None
def json(self):
return fake_payload
async def _fake_post(self, url, json=None):
return _Resp()
backend = QwenMacBookBackend(
base_url="http://test:8810",
model="test-model",
timeout_connect_s=1,
)
with patch.object(httpx.AsyncClient, "post", new=_fake_post):
result = asyncio.run(backend.generate("hi", timeout_read_s=2))
assert result == "hello from qwen"
def test_qwen_generate_dead_port_raises_backend_unavailable():
"""실제 dead port (127.0.0.1:1) → BackendUnavailable.
정정 4 의 핵심: 명시 Qwen 호출이 실패하면 예외가 통과돼야 한다.
synthesis_service 가 이 예외를 잡아 status="backend_unavailable" 로 매핑.
"""
from services.llm import BackendUnavailable, QwenMacBookBackend
backend = QwenMacBookBackend(
base_url="http://127.0.0.1:1",
model="test-model",
timeout_connect_s=1,
)
with pytest.raises(BackendUnavailable) as exc_info:
asyncio.run(backend.generate("hi", timeout_read_s=2))
assert exc_info.value.backend_name == "qwen-macbook"
assert "ConnectError" in exc_info.value.reason or "Timeout" in exc_info.value.reason
def test_qwen_generate_http_5xx_raises_backend_unavailable():
"""5xx 응답도 BackendUnavailable 로 매핑."""
from services.llm import BackendUnavailable, QwenMacBookBackend
class _Resp:
status_code = 503
def raise_for_status(self):
raise httpx.HTTPStatusError(
"service unavailable",
request=httpx.Request("POST", "http://test:8810/v1/chat/completions"),
response=httpx.Response(503),
)
def json(self):
return {}
async def _fake_post(self, url, json=None):
return _Resp()
backend = QwenMacBookBackend(
base_url="http://test:8810",
model="test-model",
timeout_connect_s=1,
)
with patch.object(httpx.AsyncClient, "post", new=_fake_post):
with pytest.raises(BackendUnavailable) as exc_info:
asyncio.run(backend.generate("hi", timeout_read_s=2))
assert exc_info.value.backend_name == "qwen-macbook"
assert "503" in exc_info.value.reason
def test_qwen_generate_http_4xx_not_backend_unavailable():
"""4xx (호출자 잘못) 은 BackendUnavailable 아님 — 일반 예외 전파."""
from services.llm import BackendUnavailable, QwenMacBookBackend
class _Resp:
status_code = 400
def raise_for_status(self):
raise httpx.HTTPStatusError(
"bad request",
request=httpx.Request("POST", "http://test:8810/v1/chat/completions"),
response=httpx.Response(400),
)
def json(self):
return {}
async def _fake_post(self, url, json=None):
return _Resp()
backend = QwenMacBookBackend(
base_url="http://test:8810",
model="test-model",
timeout_connect_s=1,
)
with patch.object(httpx.AsyncClient, "post", new=_fake_post):
with pytest.raises(httpx.HTTPStatusError):
asyncio.run(backend.generate("hi", timeout_read_s=2))