hyungi_document_server/tests/services/test_react_loop.py

"""PR-DocSrv-Ask-ToolCalling-ReAct-1: agentic_ask_loop unit tests.

검증 invariant:
- G0-1: tests/fixtures/qwen_tool_call_response.json 의 shape 이 parsing 가정과 일치.
- G0-2 counter semantics:
    * LLM 호출 횟수 ≤ max_llm_calls (= max_tool_rounds + 1)
    * search 실행 횟수 ≤ search_exec_max (= max_tool_rounds)
    * 마지막 LLM 호출의 tool_choice == "none"
    * partial=true 조건: max rounds 후 final content 비어 있을 때
- G0-3 trace exposure: debug=False → debug_trace=None, debug=True → list[dict].
- BackendUnavailable 은 호출자에게 그대로 전파 (정정 4 의 연장).
"""

from __future__ import annotations

import asyncio
import json
import os
import sys
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock

import pytest

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app"))


FIXTURE_PATH = (
    Path(__file__).resolve().parents[1] / "fixtures" / "qwen_tool_call_response.json"
)


# ── helpers ────────────────────────────────────────────────────────────────


def _msg_with_tool_call(q: str, tc_id: str = "tc-1") -> dict:
    """G0-1 fixture shape 그대로 — assistant message with one tool_call."""
    return {
        "role": "assistant",
        "content": None,
        "tool_calls": [
            {
                "id": tc_id,
                "type": "function",
                "function": {
                    "name": "search",
                    "arguments": json.dumps({"q": q}, ensure_ascii=False),
                },
            }
        ],
    }


def _msg_with_content(text: str) -> dict:
    return {"role": "assistant", "content": text, "tool_calls": None}


def _fake_chunk(chunk_id: int, doc_id: int = 100, score: float = 0.9):
    m = MagicMock()
    m.id = chunk_id
    m.chunk_id = chunk_id
    m.doc_id = doc_id
    m.title = f"doc {doc_id}"
    m.score = score
    m.snippet = f"snippet for chunk {chunk_id}"
    m.text = None
    return m


def _fake_pr(chunks: list):
    pr = MagicMock()
    pr.results = chunks
    return pr


@pytest.fixture
def mock_backend():
    """services.llm.backends.QwenMacBookBackend instance mock (generate_with_tools)."""
    from services.llm.backends import QwenMacBookBackend

    b = MagicMock(spec=QwenMacBookBackend)
    b.name = "qwen-macbook"
    b.generate_with_tools = AsyncMock()
    return b


@pytest.fixture
def mock_run_search(monkeypatch):
    """services.search.react_loop.run_search 를 monkeypatch — chunk 1건 반환 default."""
    from services.search import react_loop

    mock = AsyncMock(return_value=_fake_pr([_fake_chunk(1)]))
    monkeypatch.setattr(react_loop, "run_search", mock)
    return mock


# ── G0-1: fixture shape 검증 ───────────────────────────────────────────────


def test_fixture_shape_matches_parser_assumptions():
    """G0-1: fixture 의 shape 이 react_loop 의 parsing 가정과 일치."""
    assert FIXTURE_PATH.exists(), f"fixture missing at {FIXTURE_PATH}"
    fixture = json.loads(FIXTURE_PATH.read_text(encoding="utf-8"))
    assert "choices" in fixture and len(fixture["choices"]) >= 1
    msg = fixture["choices"][0]["message"]
    assert msg["role"] == "assistant"
    assert "tool_calls" in msg
    tcs = msg["tool_calls"]
    assert isinstance(tcs, list) and len(tcs) >= 1
    tc = tcs[0]
    assert tc["type"] == "function"
    assert tc["function"]["name"]  # non-empty str
    # arguments 가 JSON string 임 (G0-1 핵심 발견)
    args_str = tc["function"]["arguments"]
    assert isinstance(args_str, str)
    args = json.loads(args_str)
    assert isinstance(args, dict)


# ── early exit (LLM call #1 에 tool_calls 없음) ─────────────────────────────


def test_no_tool_calls_early_exit(mock_backend, mock_run_search):
    """첫 LLM 호출이 tool_calls 없이 content 반환 → iterations=1, partial=false."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_content("바로 답입니다"),
    ]
    session = MagicMock()

    result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))

    assert result.iterations == 1
    assert result.partial is False
    assert result.final_answer == "바로 답입니다"
    assert result.sources == []
    assert mock_backend.generate_with_tools.call_count == 1
    assert mock_run_search.call_count == 0


# ── 1 round + early exit ───────────────────────────────────────────────────


def test_one_round_then_final_content(mock_backend, mock_run_search):
    """round 1 tool_call → search → round 2 content (early exit)."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("query A"),
        _msg_with_content("두 번째 호출 종합문"),
    ]
    session = MagicMock()

    result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))

    assert result.iterations == 2
    assert result.partial is False
    assert result.final_answer == "두 번째 호출 종합문"
    assert len(result.sources) == 1
    assert mock_backend.generate_with_tools.call_count == 2
    assert mock_run_search.call_count == 1


# ── max rounds 도달 + final call ────────────────────────────────────────────


def test_max_rounds_reached_final_with_content(mock_backend, mock_run_search):
    """round 1, 2 둘 다 tool_call → final call → content 정상 → partial=false."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_tool_call("q2", tc_id="tc-2"),
        _msg_with_content("최종 답입니다"),
    ]
    session = MagicMock()

    result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))

    assert result.iterations == 2  # = max_tool_rounds
    assert result.partial is False
    assert result.final_answer == "최종 답입니다"
    assert mock_backend.generate_with_tools.call_count == 3
    assert mock_run_search.call_count == 2


# ── G0-2: 마지막 호출 tool_choice="none" ─────────────────────────────────────


def test_final_call_uses_tool_choice_none(mock_backend, mock_run_search):
    """G0-2 invariant: max_tool_rounds 도달 시 final call 의 tool_choice == 'none'."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_tool_call("q2", tc_id="tc-2"),
        _msg_with_content("종합"),
    ]
    session = MagicMock()

    asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend))

    last_call = mock_backend.generate_with_tools.call_args_list[-1]
    assert last_call.kwargs.get("tool_choice") == "none"
    # final call 은 tools=[] 를 keyword 로 넘김 (positional 아님)
    assert last_call.kwargs.get("tools") == []


# ── G0-2: max LLM calls + search exec cap ──────────────────────────────────


def test_max_llm_calls_capped_at_three(mock_backend, mock_run_search):
    """LLM 호출 횟수 ≤ 3 (= max_tool_rounds + 1)."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_tool_call("q2", tc_id="tc-2"),
        _msg_with_content("종합"),
    ]
    asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
    assert mock_backend.generate_with_tools.call_count <= 3


def test_search_exec_capped_at_two(mock_backend, mock_run_search):
    """search 실제 실행 횟수 ≤ max_tool_rounds (=2)."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_tool_call("q2", tc_id="tc-2"),
        _msg_with_content("종합"),
    ]
    asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))
    assert mock_run_search.call_count <= 2


# ── G0-2: partial=true (final content 비어 있음) ───────────────────────────


def test_partial_when_final_content_empty(mock_backend, mock_run_search):
    """max rounds 도달 + final call content 비어 있음 → partial=true."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_tool_call("q2", tc_id="tc-2"),
        _msg_with_content(""),  # 빈 content
    ]
    result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))

    assert result.iterations == 2
    assert result.partial is True
    assert result.final_answer == ""


# ── sources dedup ──────────────────────────────────────────────────────────


def test_sources_dedup_by_id(mock_backend, monkeypatch):
    """같은 chunk id 가 두 round 에 나오면 sources 에서 dedup."""
    from services.search import react_loop
    from services.search.react_loop import agentic_ask_loop

    # round 1 → chunk id=1, round 2 → chunk id=1 + id=2
    run_search_mock = AsyncMock(side_effect=[
        _fake_pr([_fake_chunk(1)]),
        _fake_pr([_fake_chunk(1), _fake_chunk(2)]),
    ])
    monkeypatch.setattr(react_loop, "run_search", run_search_mock)

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_tool_call("q2", tc_id="tc-2"),
        _msg_with_content("종합"),
    ]
    result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))

    src_ids = [s["id"] for s in result.sources]
    assert src_ids == [1, 2]  # id=1 중복 없음
    assert len(result.sources) == 2


# ── G0-3: trace exposure ───────────────────────────────────────────────────


def test_debug_trace_none_when_debug_false(mock_backend, mock_run_search):
    """G0-3: debug=False (default) → debug_trace=None."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_content("바로 답"),
    ]
    result = asyncio.run(
        agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=False)
    )
    assert result.debug_trace is None


def test_debug_trace_populated_when_debug_true(mock_backend, mock_run_search):
    """G0-3: debug=True → debug_trace 가 list[dict]."""
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_content("종합"),
    ]
    result = asyncio.run(
        agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=True)
    )
    assert isinstance(result.debug_trace, list)
    assert len(result.debug_trace) >= 1
    # 첫 entry 는 tool_round
    assert result.debug_trace[0].get("phase") == "tool_round"


# ── BackendUnavailable propagation ─────────────────────────────────────────


def test_backend_unavailable_propagates(mock_backend, mock_run_search):
    """BackendUnavailable 은 그대로 raise — 호출자 (search.py) 가 503 매핑."""
    from services.llm.backends import BackendUnavailable
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = BackendUnavailable(
        "qwen-macbook", "ConnectError"
    )

    with pytest.raises(BackendUnavailable):
        asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))

    # ★ run_search 가 한 번도 호출되지 않음 (search 시도 0)
    assert mock_run_search.call_count == 0


def test_backend_unavailable_in_final_call_propagates(mock_backend, mock_run_search):
    """final call 에서 unavailable 발생도 그대로 raise."""
    from services.llm.backends import BackendUnavailable
    from services.search.react_loop import agentic_ask_loop

    mock_backend.generate_with_tools.side_effect = [
        _msg_with_tool_call("q1"),
        _msg_with_tool_call("q2", tc_id="tc-2"),
        BackendUnavailable("qwen-macbook", "ReadTimeout"),
    ]
    with pytest.raises(BackendUnavailable):
        asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))