a7b8f15870
PR-MacBook-RAG-Backend-1 — /api/search/ask 의 명시 backend 선택 진입점.
핵심 invariant (정정 4):
- backend 미지정 = Gemma Mac mini default, 응답 contract 변동 0
- backend="qwen-macbook" 명시 opt-in 만 MacBook M5 Max mlx-vlm.server 호출
- MacBook unavailable 시 HTTP 503 + error_reason=macbook_unavailable
- 자동 fallback 절대 금지 — 실패 path 에서 Gemma backend.generate() 호출 0
backend dispatcher (services/llm/):
- BackendBase / GemmaMacMiniBackend / QwenMacBookBackend / BackendUnavailable
- Qwen backend 는 Mac mini llm_gate 점유 X, 별 Semaphore(1) — llm_gate
docstring 의 single-inference 영구 룰은 같은 endpoint 한정으로 scope 명시
- httpx Connect/Read/Pool/Timeout/5xx → BackendUnavailable, 4xx 전파
synthesis_service.py:
- backend 인자 추가, status="backend_unavailable" 신규
- cache key 에 backend_name 포함 (qwen ↔ gemma 캐시 충돌 차단)
config:
- search.ask.backend.{macmini_url, macbook_url, macbook_model,
timeout_connect_s=1, timeout_read_s=30}
- MacBook endpoint = http://100.118.112.84:8810 (M5 Max Tailscale bind)
tests (14 신규):
- tests/services/test_backend_dispatcher.py (9): dispatcher 정합성 + Qwen
generate path (mock 200 / dead port / 5xx / 4xx) + cache identity
- tests/api/test_search_ask_macbook_503.py (5): 정정 4 핵심 invariant.
backend=qwen-macbook 비가용 시 gemma.generate.assert_not_called()
기존 ask 회귀 0 (test_ask_eval_auth 9건 등 85건 모두 PASS).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
198 lines
6.0 KiB
Python
198 lines
6.0 KiB
Python
"""PR-MacBook-RAG-Backend-1: backend dispatcher 단위 테스트.
|
|
|
|
- get_backend(None) / get_backend("gemma-macmini") → GemmaMacMiniBackend
|
|
- get_backend("qwen-macbook") → QwenMacBookBackend (config 값 반영)
|
|
- get_backend("unknown") → ValueError
|
|
- QwenMacBookBackend.generate() — mock httpx 200 OK → content 반환
|
|
- QwenMacBookBackend.generate() — dead port → BackendUnavailable("ConnectError")
|
|
|
|
목적: 정정 4 (자동 fallback 부재) 의 핵심 빌딩블럭 검증. dispatcher 자체 무결성.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
from unittest.mock import AsyncMock, patch
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app"))
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_dispatcher():
|
|
"""각 테스트 격리 — backend 인스턴스 캐시 초기화."""
|
|
from services.llm import reset_backends_for_test
|
|
|
|
reset_backends_for_test()
|
|
yield
|
|
reset_backends_for_test()
|
|
|
|
|
|
def test_get_backend_default_is_gemma():
|
|
"""backend 미지정 (None) = Gemma Mac mini default."""
|
|
from services.llm import get_backend
|
|
|
|
b = get_backend(None)
|
|
assert b.name == "gemma-macmini"
|
|
|
|
|
|
def test_get_backend_explicit_gemma():
|
|
"""gemma-macmini 명시도 동일."""
|
|
from services.llm import get_backend
|
|
|
|
b = get_backend("gemma-macmini")
|
|
assert b.name == "gemma-macmini"
|
|
|
|
|
|
def test_get_backend_qwen_macbook_uses_config():
|
|
"""qwen-macbook 은 settings.search.ask.backend 값 그대로 반영."""
|
|
from core.config import settings
|
|
from services.llm import QwenMacBookBackend, get_backend
|
|
|
|
b = get_backend("qwen-macbook")
|
|
assert isinstance(b, QwenMacBookBackend)
|
|
assert b.name == "qwen-macbook"
|
|
# config.yaml 의 search.ask.backend.macbook_url 그대로
|
|
assert b.base_url == settings.search.ask.backend.macbook_url.rstrip("/")
|
|
assert b.model == settings.search.ask.backend.macbook_model
|
|
assert b.timeout_connect_s == settings.search.ask.backend.timeout_connect_s
|
|
|
|
|
|
def test_get_backend_unknown_raises_value_error():
|
|
"""미지원 backend 이름 → ValueError (호출자가 400 으로 매핑)."""
|
|
from services.llm import get_backend
|
|
|
|
with pytest.raises(ValueError, match="unknown backend"):
|
|
get_backend("claude-opus")
|
|
|
|
|
|
def test_get_backend_cached_returns_same_instance():
|
|
"""동일 backend 재호출 시 인스턴스 캐시."""
|
|
from services.llm import get_backend
|
|
|
|
b1 = get_backend("qwen-macbook")
|
|
b2 = get_backend("qwen-macbook")
|
|
assert b1 is b2
|
|
|
|
|
|
def test_qwen_generate_success_mocked():
|
|
"""mock 200 OK → choices[0].message.content 반환."""
|
|
from services.llm import QwenMacBookBackend
|
|
|
|
fake_payload = {
|
|
"choices": [{"message": {"content": "hello from qwen"}}],
|
|
}
|
|
|
|
class _Resp:
|
|
status_code = 200
|
|
|
|
def raise_for_status(self):
|
|
return None
|
|
|
|
def json(self):
|
|
return fake_payload
|
|
|
|
async def _fake_post(self, url, json=None):
|
|
return _Resp()
|
|
|
|
backend = QwenMacBookBackend(
|
|
base_url="http://test:8810",
|
|
model="test-model",
|
|
timeout_connect_s=1,
|
|
)
|
|
|
|
with patch.object(httpx.AsyncClient, "post", new=_fake_post):
|
|
result = asyncio.run(backend.generate("hi", timeout_read_s=2))
|
|
|
|
assert result == "hello from qwen"
|
|
|
|
|
|
def test_qwen_generate_dead_port_raises_backend_unavailable():
|
|
"""실제 dead port (127.0.0.1:1) → BackendUnavailable.
|
|
|
|
정정 4 의 핵심: 명시 Qwen 호출이 실패하면 예외가 통과돼야 한다.
|
|
synthesis_service 가 이 예외를 잡아 status="backend_unavailable" 로 매핑.
|
|
"""
|
|
from services.llm import BackendUnavailable, QwenMacBookBackend
|
|
|
|
backend = QwenMacBookBackend(
|
|
base_url="http://127.0.0.1:1",
|
|
model="test-model",
|
|
timeout_connect_s=1,
|
|
)
|
|
|
|
with pytest.raises(BackendUnavailable) as exc_info:
|
|
asyncio.run(backend.generate("hi", timeout_read_s=2))
|
|
|
|
assert exc_info.value.backend_name == "qwen-macbook"
|
|
assert "ConnectError" in exc_info.value.reason or "Timeout" in exc_info.value.reason
|
|
|
|
|
|
def test_qwen_generate_http_5xx_raises_backend_unavailable():
|
|
"""5xx 응답도 BackendUnavailable 로 매핑."""
|
|
from services.llm import BackendUnavailable, QwenMacBookBackend
|
|
|
|
class _Resp:
|
|
status_code = 503
|
|
|
|
def raise_for_status(self):
|
|
raise httpx.HTTPStatusError(
|
|
"service unavailable",
|
|
request=httpx.Request("POST", "http://test:8810/v1/chat/completions"),
|
|
response=httpx.Response(503),
|
|
)
|
|
|
|
def json(self):
|
|
return {}
|
|
|
|
async def _fake_post(self, url, json=None):
|
|
return _Resp()
|
|
|
|
backend = QwenMacBookBackend(
|
|
base_url="http://test:8810",
|
|
model="test-model",
|
|
timeout_connect_s=1,
|
|
)
|
|
|
|
with patch.object(httpx.AsyncClient, "post", new=_fake_post):
|
|
with pytest.raises(BackendUnavailable) as exc_info:
|
|
asyncio.run(backend.generate("hi", timeout_read_s=2))
|
|
|
|
assert exc_info.value.backend_name == "qwen-macbook"
|
|
assert "503" in exc_info.value.reason
|
|
|
|
|
|
def test_qwen_generate_http_4xx_not_backend_unavailable():
|
|
"""4xx (호출자 잘못) 은 BackendUnavailable 아님 — 일반 예외 전파."""
|
|
from services.llm import BackendUnavailable, QwenMacBookBackend
|
|
|
|
class _Resp:
|
|
status_code = 400
|
|
|
|
def raise_for_status(self):
|
|
raise httpx.HTTPStatusError(
|
|
"bad request",
|
|
request=httpx.Request("POST", "http://test:8810/v1/chat/completions"),
|
|
response=httpx.Response(400),
|
|
)
|
|
|
|
def json(self):
|
|
return {}
|
|
|
|
async def _fake_post(self, url, json=None):
|
|
return _Resp()
|
|
|
|
backend = QwenMacBookBackend(
|
|
base_url="http://test:8810",
|
|
model="test-model",
|
|
timeout_connect_s=1,
|
|
)
|
|
|
|
with patch.object(httpx.AsyncClient, "post", new=_fake_post):
|
|
with pytest.raises(httpx.HTTPStatusError):
|
|
asyncio.run(backend.generate("hi", timeout_read_s=2))
|