"""PR-DocSrv-Ask-ToolCalling-ReAct-1: agentic_ask_loop unit tests. 검증 invariant: - G0-1: tests/fixtures/qwen_tool_call_response.json 의 shape 이 parsing 가정과 일치. - G0-2 counter semantics: * LLM 호출 횟수 ≤ max_llm_calls (= max_tool_rounds + 1) * search 실행 횟수 ≤ search_exec_max (= max_tool_rounds) * 마지막 LLM 호출의 tool_choice == "none" * partial=true 조건: max rounds 후 final content 비어 있을 때 - G0-3 trace exposure: debug=False → debug_trace=None, debug=True → list[dict]. - BackendUnavailable 은 호출자에게 그대로 전파 (정정 4 의 연장). """ from __future__ import annotations import asyncio import json import os import sys from pathlib import Path from unittest.mock import AsyncMock, MagicMock import pytest sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app")) FIXTURE_PATH = ( Path(__file__).resolve().parents[1] / "fixtures" / "qwen_tool_call_response.json" ) # ── helpers ──────────────────────────────────────────────────────────────── def _msg_with_tool_call(q: str, tc_id: str = "tc-1") -> dict: """G0-1 fixture shape 그대로 — assistant message with one tool_call.""" return { "role": "assistant", "content": None, "tool_calls": [ { "id": tc_id, "type": "function", "function": { "name": "search", "arguments": json.dumps({"q": q}, ensure_ascii=False), }, } ], } def _msg_with_content(text: str) -> dict: return {"role": "assistant", "content": text, "tool_calls": None} def _fake_chunk(chunk_id: int, doc_id: int = 100, score: float = 0.9): m = MagicMock() m.id = chunk_id m.chunk_id = chunk_id m.doc_id = doc_id m.title = f"doc {doc_id}" m.score = score m.snippet = f"snippet for chunk {chunk_id}" m.text = None return m def _fake_pr(chunks: list): pr = MagicMock() pr.results = chunks return pr @pytest.fixture def mock_backend(): """services.llm.backends.QwenMacBookBackend instance mock (generate_with_tools).""" from services.llm.backends import QwenMacBookBackend b = MagicMock(spec=QwenMacBookBackend) b.name = "qwen-macbook" b.generate_with_tools = AsyncMock() return b @pytest.fixture def mock_run_search(monkeypatch): """services.search.react_loop.run_search 를 monkeypatch — chunk 1건 반환 default.""" from services.search import react_loop mock = AsyncMock(return_value=_fake_pr([_fake_chunk(1)])) monkeypatch.setattr(react_loop, "run_search", mock) return mock # ── G0-1: fixture shape 검증 ─────────────────────────────────────────────── def test_fixture_shape_matches_parser_assumptions(): """G0-1: fixture 의 shape 이 react_loop 의 parsing 가정과 일치.""" assert FIXTURE_PATH.exists(), f"fixture missing at {FIXTURE_PATH}" fixture = json.loads(FIXTURE_PATH.read_text(encoding="utf-8")) assert "choices" in fixture and len(fixture["choices"]) >= 1 msg = fixture["choices"][0]["message"] assert msg["role"] == "assistant" assert "tool_calls" in msg tcs = msg["tool_calls"] assert isinstance(tcs, list) and len(tcs) >= 1 tc = tcs[0] assert tc["type"] == "function" assert tc["function"]["name"] # non-empty str # arguments 가 JSON string 임 (G0-1 핵심 발견) args_str = tc["function"]["arguments"] assert isinstance(args_str, str) args = json.loads(args_str) assert isinstance(args, dict) # ── early exit (LLM call #1 에 tool_calls 없음) ───────────────────────────── def test_no_tool_calls_early_exit(mock_backend, mock_run_search): """첫 LLM 호출이 tool_calls 없이 content 반환 → iterations=1, partial=false.""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_content("바로 답입니다"), ] session = MagicMock() result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) assert result.iterations == 1 assert result.partial is False assert result.final_answer == "바로 답입니다" assert result.sources == [] assert mock_backend.generate_with_tools.call_count == 1 assert mock_run_search.call_count == 0 # ── 1 round + early exit ─────────────────────────────────────────────────── def test_one_round_then_final_content(mock_backend, mock_run_search): """round 1 tool_call → search → round 2 content (early exit).""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("query A"), _msg_with_content("두 번째 호출 종합문"), ] session = MagicMock() result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) assert result.iterations == 2 assert result.partial is False assert result.final_answer == "두 번째 호출 종합문" assert len(result.sources) == 1 assert mock_backend.generate_with_tools.call_count == 2 assert mock_run_search.call_count == 1 # ── max rounds 도달 + final call ──────────────────────────────────────────── def test_max_rounds_reached_final_with_content(mock_backend, mock_run_search): """round 1, 2 둘 다 tool_call → final call → content 정상 → partial=false.""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content("최종 답입니다"), ] session = MagicMock() result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) assert result.iterations == 2 # = max_tool_rounds assert result.partial is False assert result.final_answer == "최종 답입니다" assert mock_backend.generate_with_tools.call_count == 3 assert mock_run_search.call_count == 2 # ── G0-2: 마지막 호출 tool_choice="none" ───────────────────────────────────── def test_final_call_uses_tool_choice_none(mock_backend, mock_run_search): """G0-2 invariant: max_tool_rounds 도달 시 final call 의 tool_choice == 'none'.""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content("종합"), ] session = MagicMock() asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) last_call = mock_backend.generate_with_tools.call_args_list[-1] assert last_call.kwargs.get("tool_choice") == "none" # final call 은 tools=[] 를 keyword 로 넘김 (positional 아님) assert last_call.kwargs.get("tools") == [] # ── G0-2: max LLM calls + search exec cap ────────────────────────────────── def test_max_llm_calls_capped_at_three(mock_backend, mock_run_search): """LLM 호출 횟수 ≤ 3 (= max_tool_rounds + 1).""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content("종합"), ] asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) assert mock_backend.generate_with_tools.call_count <= 3 def test_search_exec_capped_at_two(mock_backend, mock_run_search): """search 실제 실행 횟수 ≤ max_tool_rounds (=2).""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content("종합"), ] asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) assert mock_run_search.call_count <= 2 # ── G0-2: partial=true (final content 비어 있음) ─────────────────────────── def test_partial_when_final_content_empty(mock_backend, mock_run_search): """max rounds 도달 + final call content 비어 있음 → partial=true.""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content(""), # 빈 content ] result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) assert result.iterations == 2 assert result.partial is True assert result.final_answer == "" # ── sources dedup ────────────────────────────────────────────────────────── def test_sources_dedup_by_id(mock_backend, monkeypatch): """같은 chunk id 가 두 round 에 나오면 sources 에서 dedup.""" from services.search import react_loop from services.search.react_loop import agentic_ask_loop # round 1 → chunk id=1, round 2 → chunk id=1 + id=2 run_search_mock = AsyncMock(side_effect=[ _fake_pr([_fake_chunk(1)]), _fake_pr([_fake_chunk(1), _fake_chunk(2)]), ]) monkeypatch.setattr(react_loop, "run_search", run_search_mock) mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), _msg_with_content("종합"), ] result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) src_ids = [s["id"] for s in result.sources] assert src_ids == [1, 2] # id=1 중복 없음 assert len(result.sources) == 2 # ── G0-3: trace exposure ─────────────────────────────────────────────────── def test_debug_trace_none_when_debug_false(mock_backend, mock_run_search): """G0-3: debug=False (default) → debug_trace=None.""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_content("바로 답"), ] result = asyncio.run( agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=False) ) assert result.debug_trace is None def test_debug_trace_populated_when_debug_true(mock_backend, mock_run_search): """G0-3: debug=True → debug_trace 가 list[dict].""" from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_content("종합"), ] result = asyncio.run( agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=True) ) assert isinstance(result.debug_trace, list) assert len(result.debug_trace) >= 1 # 첫 entry 는 tool_round assert result.debug_trace[0].get("phase") == "tool_round" # ── BackendUnavailable propagation ───────────────────────────────────────── def test_backend_unavailable_propagates(mock_backend, mock_run_search): """BackendUnavailable 은 그대로 raise — 호출자 (search.py) 가 503 매핑.""" from services.llm.backends import BackendUnavailable from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = BackendUnavailable( "qwen-macbook", "ConnectError" ) with pytest.raises(BackendUnavailable): asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) # ★ run_search 가 한 번도 호출되지 않음 (search 시도 0) assert mock_run_search.call_count == 0 def test_backend_unavailable_in_final_call_propagates(mock_backend, mock_run_search): """final call 에서 unavailable 발생도 그대로 raise.""" from services.llm.backends import BackendUnavailable from services.search.react_loop import agentic_ask_loop mock_backend.generate_with_tools.side_effect = [ _msg_with_tool_call("q1"), _msg_with_tool_call("q2", tc_id="tc-2"), BackendUnavailable("qwen-macbook", "ReadTimeout"), ] with pytest.raises(BackendUnavailable): asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))