51c3f6df10
PR-DocSrv-Ask-ToolCalling-ReAct-1 — Qwen3.6-27B-8bit 의 native tool calling
으로 ReAct loop 도입. 기존 /api/search/ask 무수정. 트랙 B (frontend /ask SSE)
와 파일 단위 충돌 0 (search.py 의 ask() 함수 line diff = 0, 순수 추가).
핵심 invariant:
- 별 endpoint /api/search/ask/react (qwen-macbook only, implicit opt-in)
- MacBook unavailable 시 HTTP 503 + error_reason=macbook_unavailable.
Gemma 자동 fallback X (정정 4 의 연장)
G0 (구현 전 hard gate, plan b-velvety-hare.md):
- G0-1 fixture (tests/fixtures/qwen_tool_call_response.json): 실제 mlx-vlm
응답 박제. shape = OpenAI 표준 호환 (choices[0].message.tool_calls +
function.arguments JSON string). generate_with_tools() 가 본 shape 기준 구현.
- G0-2 counter semantics: max_tool_rounds=2 + max_llm_calls=3 + search_exec_max=2.
마지막 LLM 호출은 tool_choice="none" + system instruction 으로 final 강제.
- G0-3 trace exposure: default response 의 debug_trace=null. debug=true 시만
채움. server log 에는 항상 round 기록.
backends.py (193 → 261줄):
- QwenMacBookBackend.generate_with_tools(messages, tools, tool_choice)
신규 method. 기존 generate() 무수정. BackendUnavailable 처리 동일.
react_loop.py 신규 (275줄):
- agentic_ask_loop(session, query, *, backend, max_tool_rounds, debug)
- tool round 안에서 run_search 호출, results dedup by id, final round 강제,
partial=True 조건 (final content 빈 경우)
search.py (+82줄):
- POST /api/search/ask/react + AskReactRequest/Response schema
- BackendUnavailable → JSONResponse(503, error_reason=macbook_unavailable)
config.yaml + config.py:
- search.ask.react: { enabled, max_tool_rounds=2, search_tool_limit=5,
search_tool_mode=hybrid }
tests (566줄, 18 신규 + 23 회귀 모두 PASS):
- test_react_loop.py 13건: G0-1 fixture shape / G0-2 counter cap / G0-3 trace
exposure / BackendUnavailable propagation / sources dedup
- test_search_ask_react_endpoint.py 5건: 503 + run_search 호출 0 / 정상 200 /
debug=true trace 노출 / max rounds partial
- 회귀 (test_ask_eval_auth 9 + test_search_ask_macbook_503 5 +
test_backend_dispatcher 9) 모두 PASS
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
349 lines
13 KiB
Python
349 lines
13 KiB
Python
"""PR-DocSrv-Ask-ToolCalling-ReAct-1: agentic_ask_loop unit tests.
|
|
|
|
검증 invariant:
|
|
- G0-1: tests/fixtures/qwen_tool_call_response.json 의 shape 이 parsing 가정과 일치.
|
|
- G0-2 counter semantics:
|
|
* LLM 호출 횟수 ≤ max_llm_calls (= max_tool_rounds + 1)
|
|
* search 실행 횟수 ≤ search_exec_max (= max_tool_rounds)
|
|
* 마지막 LLM 호출의 tool_choice == "none"
|
|
* partial=true 조건: max rounds 후 final content 비어 있을 때
|
|
- G0-3 trace exposure: debug=False → debug_trace=None, debug=True → list[dict].
|
|
- BackendUnavailable 은 호출자에게 그대로 전파 (정정 4 의 연장).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app"))
|
|
|
|
|
|
FIXTURE_PATH = (
|
|
Path(__file__).resolve().parents[1] / "fixtures" / "qwen_tool_call_response.json"
|
|
)
|
|
|
|
|
|
# ── helpers ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _msg_with_tool_call(q: str, tc_id: str = "tc-1") -> dict:
|
|
"""G0-1 fixture shape 그대로 — assistant message with one tool_call."""
|
|
return {
|
|
"role": "assistant",
|
|
"content": None,
|
|
"tool_calls": [
|
|
{
|
|
"id": tc_id,
|
|
"type": "function",
|
|
"function": {
|
|
"name": "search",
|
|
"arguments": json.dumps({"q": q}, ensure_ascii=False),
|
|
},
|
|
}
|
|
],
|
|
}
|
|
|
|
|
|
def _msg_with_content(text: str) -> dict:
|
|
return {"role": "assistant", "content": text, "tool_calls": None}
|
|
|
|
|
|
def _fake_chunk(chunk_id: int, doc_id: int = 100, score: float = 0.9):
|
|
m = MagicMock()
|
|
m.id = chunk_id
|
|
m.chunk_id = chunk_id
|
|
m.doc_id = doc_id
|
|
m.title = f"doc {doc_id}"
|
|
m.score = score
|
|
m.snippet = f"snippet for chunk {chunk_id}"
|
|
m.text = None
|
|
return m
|
|
|
|
|
|
def _fake_pr(chunks: list):
|
|
pr = MagicMock()
|
|
pr.results = chunks
|
|
return pr
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_backend():
|
|
"""services.llm.backends.QwenMacBookBackend instance mock (generate_with_tools)."""
|
|
from services.llm.backends import QwenMacBookBackend
|
|
|
|
b = MagicMock(spec=QwenMacBookBackend)
|
|
b.name = "qwen-macbook"
|
|
b.generate_with_tools = AsyncMock()
|
|
return b
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_run_search(monkeypatch):
|
|
"""services.search.react_loop.run_search 를 monkeypatch — chunk 1건 반환 default."""
|
|
from services.search import react_loop
|
|
|
|
mock = AsyncMock(return_value=_fake_pr([_fake_chunk(1)]))
|
|
monkeypatch.setattr(react_loop, "run_search", mock)
|
|
return mock
|
|
|
|
|
|
# ── G0-1: fixture shape 검증 ───────────────────────────────────────────────
|
|
|
|
|
|
def test_fixture_shape_matches_parser_assumptions():
|
|
"""G0-1: fixture 의 shape 이 react_loop 의 parsing 가정과 일치."""
|
|
assert FIXTURE_PATH.exists(), f"fixture missing at {FIXTURE_PATH}"
|
|
fixture = json.loads(FIXTURE_PATH.read_text(encoding="utf-8"))
|
|
assert "choices" in fixture and len(fixture["choices"]) >= 1
|
|
msg = fixture["choices"][0]["message"]
|
|
assert msg["role"] == "assistant"
|
|
assert "tool_calls" in msg
|
|
tcs = msg["tool_calls"]
|
|
assert isinstance(tcs, list) and len(tcs) >= 1
|
|
tc = tcs[0]
|
|
assert tc["type"] == "function"
|
|
assert tc["function"]["name"] # non-empty str
|
|
# arguments 가 JSON string 임 (G0-1 핵심 발견)
|
|
args_str = tc["function"]["arguments"]
|
|
assert isinstance(args_str, str)
|
|
args = json.loads(args_str)
|
|
assert isinstance(args, dict)
|
|
|
|
|
|
# ── early exit (LLM call #1 에 tool_calls 없음) ─────────────────────────────
|
|
|
|
|
|
def test_no_tool_calls_early_exit(mock_backend, mock_run_search):
|
|
"""첫 LLM 호출이 tool_calls 없이 content 반환 → iterations=1, partial=false."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_content("바로 답입니다"),
|
|
]
|
|
session = MagicMock()
|
|
|
|
result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))
|
|
|
|
assert result.iterations == 1
|
|
assert result.partial is False
|
|
assert result.final_answer == "바로 답입니다"
|
|
assert result.sources == []
|
|
assert mock_backend.generate_with_tools.call_count == 1
|
|
assert mock_run_search.call_count == 0
|
|
|
|
|
|
# ── 1 round + early exit ───────────────────────────────────────────────────
|
|
|
|
|
|
def test_one_round_then_final_content(mock_backend, mock_run_search):
|
|
"""round 1 tool_call → search → round 2 content (early exit)."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("query A"),
|
|
_msg_with_content("두 번째 호출 종합문"),
|
|
]
|
|
session = MagicMock()
|
|
|
|
result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))
|
|
|
|
assert result.iterations == 2
|
|
assert result.partial is False
|
|
assert result.final_answer == "두 번째 호출 종합문"
|
|
assert len(result.sources) == 1
|
|
assert mock_backend.generate_with_tools.call_count == 2
|
|
assert mock_run_search.call_count == 1
|
|
|
|
|
|
# ── max rounds 도달 + final call ────────────────────────────────────────────
|
|
|
|
|
|
def test_max_rounds_reached_final_with_content(mock_backend, mock_run_search):
|
|
"""round 1, 2 둘 다 tool_call → final call → content 정상 → partial=false."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_tool_call("q2", tc_id="tc-2"),
|
|
_msg_with_content("최종 답입니다"),
|
|
]
|
|
session = MagicMock()
|
|
|
|
result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))
|
|
|
|
assert result.iterations == 2 # = max_tool_rounds
|
|
assert result.partial is False
|
|
assert result.final_answer == "최종 답입니다"
|
|
assert mock_backend.generate_with_tools.call_count == 3
|
|
assert mock_run_search.call_count == 2
|
|
|
|
|
|
# ── G0-2: 마지막 호출 tool_choice="none" ─────────────────────────────────────
|
|
|
|
|
|
def test_final_call_uses_tool_choice_none(mock_backend, mock_run_search):
|
|
"""G0-2 invariant: max_tool_rounds 도달 시 final call 의 tool_choice == 'none'."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_tool_call("q2", tc_id="tc-2"),
|
|
_msg_with_content("종합"),
|
|
]
|
|
session = MagicMock()
|
|
|
|
asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))
|
|
|
|
last_call = mock_backend.generate_with_tools.call_args_list[-1]
|
|
assert last_call.kwargs.get("tool_choice") == "none"
|
|
# final call 은 tools=[] 를 keyword 로 넘김 (positional 아님)
|
|
assert last_call.kwargs.get("tools") == []
|
|
|
|
|
|
# ── G0-2: max LLM calls + search exec cap ──────────────────────────────────
|
|
|
|
|
|
def test_max_llm_calls_capped_at_three(mock_backend, mock_run_search):
|
|
"""LLM 호출 횟수 ≤ 3 (= max_tool_rounds + 1)."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_tool_call("q2", tc_id="tc-2"),
|
|
_msg_with_content("종합"),
|
|
]
|
|
asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
|
|
assert mock_backend.generate_with_tools.call_count <= 3
|
|
|
|
|
|
def test_search_exec_capped_at_two(mock_backend, mock_run_search):
|
|
"""search 실제 실행 횟수 ≤ max_tool_rounds (=2)."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_tool_call("q2", tc_id="tc-2"),
|
|
_msg_with_content("종합"),
|
|
]
|
|
asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
|
|
assert mock_run_search.call_count <= 2
|
|
|
|
|
|
# ── G0-2: partial=true (final content 비어 있음) ───────────────────────────
|
|
|
|
|
|
def test_partial_when_final_content_empty(mock_backend, mock_run_search):
|
|
"""max rounds 도달 + final call content 비어 있음 → partial=true."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_tool_call("q2", tc_id="tc-2"),
|
|
_msg_with_content(""), # 빈 content
|
|
]
|
|
result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
|
|
|
|
assert result.iterations == 2
|
|
assert result.partial is True
|
|
assert result.final_answer == ""
|
|
|
|
|
|
# ── sources dedup ──────────────────────────────────────────────────────────
|
|
|
|
|
|
def test_sources_dedup_by_id(mock_backend, monkeypatch):
|
|
"""같은 chunk id 가 두 round 에 나오면 sources 에서 dedup."""
|
|
from services.search import react_loop
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
# round 1 → chunk id=1, round 2 → chunk id=1 + id=2
|
|
run_search_mock = AsyncMock(side_effect=[
|
|
_fake_pr([_fake_chunk(1)]),
|
|
_fake_pr([_fake_chunk(1), _fake_chunk(2)]),
|
|
])
|
|
monkeypatch.setattr(react_loop, "run_search", run_search_mock)
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_tool_call("q2", tc_id="tc-2"),
|
|
_msg_with_content("종합"),
|
|
]
|
|
result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
|
|
|
|
src_ids = [s["id"] for s in result.sources]
|
|
assert src_ids == [1, 2] # id=1 중복 없음
|
|
assert len(result.sources) == 2
|
|
|
|
|
|
# ── G0-3: trace exposure ───────────────────────────────────────────────────
|
|
|
|
|
|
def test_debug_trace_none_when_debug_false(mock_backend, mock_run_search):
|
|
"""G0-3: debug=False (default) → debug_trace=None."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_content("바로 답"),
|
|
]
|
|
result = asyncio.run(
|
|
agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=False)
|
|
)
|
|
assert result.debug_trace is None
|
|
|
|
|
|
def test_debug_trace_populated_when_debug_true(mock_backend, mock_run_search):
|
|
"""G0-3: debug=True → debug_trace 가 list[dict]."""
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_content("종합"),
|
|
]
|
|
result = asyncio.run(
|
|
agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=True)
|
|
)
|
|
assert isinstance(result.debug_trace, list)
|
|
assert len(result.debug_trace) >= 1
|
|
# 첫 entry 는 tool_round
|
|
assert result.debug_trace[0].get("phase") == "tool_round"
|
|
|
|
|
|
# ── BackendUnavailable propagation ─────────────────────────────────────────
|
|
|
|
|
|
def test_backend_unavailable_propagates(mock_backend, mock_run_search):
|
|
"""BackendUnavailable 은 그대로 raise — 호출자 (search.py) 가 503 매핑."""
|
|
from services.llm.backends import BackendUnavailable
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = BackendUnavailable(
|
|
"qwen-macbook", "ConnectError"
|
|
)
|
|
|
|
with pytest.raises(BackendUnavailable):
|
|
asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
|
|
|
|
# ★ run_search 가 한 번도 호출되지 않음 (search 시도 0)
|
|
assert mock_run_search.call_count == 0
|
|
|
|
|
|
def test_backend_unavailable_in_final_call_propagates(mock_backend, mock_run_search):
|
|
"""final call 에서 unavailable 발생도 그대로 raise."""
|
|
from services.llm.backends import BackendUnavailable
|
|
from services.search.react_loop import agentic_ask_loop
|
|
|
|
mock_backend.generate_with_tools.side_effect = [
|
|
_msg_with_tool_call("q1"),
|
|
_msg_with_tool_call("q2", tc_id="tc-2"),
|
|
BackendUnavailable("qwen-macbook", "ReadTimeout"),
|
|
]
|
|
with pytest.raises(BackendUnavailable):
|
|
asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
|