"""PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react endpoint integration. 검증 항목 (G0-3 trace exposure + 정정 4 invariant): - backend unavailable → HTTP 503 + error_reason=macbook_unavailable + ★ `run_search` mock 호출 횟수 == 0 (search 단계 진입 자체 차단) - 정상 응답 → 200 + final_answer + sources + debug_trace=null (default) - debug=true → debug_trace 채워짐 - max rounds 도달 → iterations=2 + partial=false (final content 정상) endpoint 함수 (`api.search.ask_react`) 를 직접 호출하는 lightweight 패턴. TestClient 없이 FastAPI deps 를 MagicMock 으로 우회. (priority_gate / backend_dispatcher test 와 동일 service-layer 패턴.) """ from __future__ import annotations import asyncio import json import os import sys from unittest.mock import AsyncMock, MagicMock import pytest sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app")) # ── helpers ──────────────────────────────────────────────────────────────── def _msg_with_tool_call(q: str, tc_id: str = "tc-1") -> dict: return { "role": "assistant", "content": None, "tool_calls": [ { "id": tc_id, "type": "function", "function": { "name": "search", "arguments": json.dumps({"q": q}, ensure_ascii=False), }, } ], } def _msg_with_content(text: str) -> dict: return {"role": "assistant", "content": text, "tool_calls": None} def _fake_chunk(chunk_id: int, doc_id: int = 100): m = MagicMock() m.id = chunk_id m.chunk_id = chunk_id m.doc_id = doc_id m.title = f"doc {doc_id}" m.score = 0.9 m.snippet = f"snippet {chunk_id}" m.text = None return m def _fake_pr(chunks: list): pr = MagicMock() pr.results = chunks return pr @pytest.fixture def patched_backend_and_search(monkeypatch): """get_backend + run_search 둘 다 mock. backend 의 generate_with_tools 는 각 테스트가 side_effect 설정. Returns: (backend_mock, run_search_mock, set_backend_unavailable_fn). """ from services.llm.backends import BackendUnavailable, QwenMacBookBackend from services.llm import backends as backends_mod from services.search import react_loop backend = MagicMock(spec=QwenMacBookBackend) backend.name = "qwen-macbook" backend.generate_with_tools = AsyncMock() def _fake_get_backend(name): # endpoint 가 qwen-macbook 만 호출하므로 단일 backend 반환 return backend monkeypatch.setattr(backends_mod, "get_backend", _fake_get_backend) # search.py 의 ask_react 안에서 `from services.llm.backends import ... get_backend` # 로 import 하므로 module-level patch 만으로 충분 (지연 import 라 매번 fresh). run_search_mock = AsyncMock(return_value=_fake_pr([_fake_chunk(1)])) monkeypatch.setattr(react_loop, "run_search", run_search_mock) def _make_unavailable(): backend.generate_with_tools.side_effect = BackendUnavailable( "qwen-macbook", "ConnectError" ) return backend, run_search_mock, _make_unavailable def _call_endpoint(payload): """ask_react 를 직접 호출. user/session 은 MagicMock 으로 우회.""" from api.search import ask_react user = MagicMock() session = MagicMock() return asyncio.run(ask_react(payload, user=user, session=session)) # ── ★ 정정 4 invariant: backend unavailable → 503 + run_search 호출 0 ────── def test_qwen_unavailable_returns_503(patched_backend_and_search): """backend BackendUnavailable → HTTP 503 + error_reason=macbook_unavailable.""" from api.search import AskReactRequest backend, run_search_mock, make_unavailable = patched_backend_and_search make_unavailable() response = _call_endpoint(AskReactRequest(query="Q")) # JSONResponse instance assert response.status_code == 503 body = json.loads(response.body) assert body["error_reason"] == "macbook_unavailable" assert body["backend_used"] is None assert body["backend_requested"] == "qwen-macbook" # ★ run_search 호출 0 (search 진입 자체 차단) assert run_search_mock.call_count == 0 # ── 정상 200 + G0-3 default debug_trace=null ────────────────────────────── def test_successful_response_default_no_debug_trace(patched_backend_and_search): """debug 미지정 (default false) → 200 + debug_trace == null.""" from api.search import AskReactRequest, AskReactResponse backend, run_search_mock, _ = patched_backend_and_search backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_content("최종 답입니다"), ] response = _call_endpoint(AskReactRequest(query="Q")) # Pydantic instance (FastAPI response_model 적용 전 raw return) assert isinstance(response, AskReactResponse) assert response.final_answer == "최종 답입니다" assert response.iterations == 2 assert response.partial is False assert response.debug_trace is None # ★ G0-3 assert len(response.sources) == 1 # ── G0-3: debug=true → debug_trace 채워짐 ────────────────────────────────── def test_debug_true_populates_trace(patched_backend_and_search): from api.search import AskReactRequest backend, run_search_mock, _ = patched_backend_and_search backend.generate_with_tools.side_effect = [ _msg_with_content("바로 답"), ] response = _call_endpoint(AskReactRequest(query="Q", debug=True)) assert response.debug_trace is not None assert isinstance(response.debug_trace, list) assert len(response.debug_trace) >= 1 # ── max rounds → final content 정상 → partial=false ────────────────────── def test_max_rounds_with_final_content(patched_backend_and_search): from api.search import AskReactRequest backend, run_search_mock, _ = patched_backend_and_search backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content("정리된 최종 답"), ] response = _call_endpoint(AskReactRequest(query="Q")) assert response.iterations == 2 assert response.partial is False assert response.final_answer == "정리된 최종 답" # LLM 호출 3회, search 2회 (G0-2 cap) assert backend.generate_with_tools.call_count == 3 assert run_search_mock.call_count == 2 # ── max rounds + final content 빈 string → partial=true ────────────────── def test_max_rounds_with_empty_final_partial(patched_backend_and_search): from api.search import AskReactRequest backend, run_search_mock, _ = patched_backend_and_search backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content(""), ] response = _call_endpoint(AskReactRequest(query="Q")) assert response.iterations == 2 assert response.partial is True assert response.final_answer == ""