diff --git a/app/api/search.py b/app/api/search.py index 1877291..4d0a37a 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -932,3 +932,85 @@ async def ask( backend_used=backend_used_val, debug=debug_obj, ) + + +# ─── PR-DocSrv-Ask-ToolCalling-ReAct-1 ──────────────────────────────────── +# /api/search/ask/react — Qwen native tool calling 로 ReAct loop. +# 본 endpoint 는 qwen-macbook only (endpoint 자체가 implicit opt-in). +# MacBook unavailable 시 503 + error_reason=macbook_unavailable. Gemma 자동 fallback X. +# G0-2 counter semantics: max_tool_rounds=2, max LLM calls=3, search exec ≤ 2. +# G0-3 trace exposure: default response 의 debug_trace=None, debug=True 시만 채움. + + +class AskReactRequest(BaseModel): + query: str + debug: bool = False + + +class AskReactResponse(BaseModel): + final_answer: str + iterations: int + partial: bool + sources: list[dict] + debug_trace: list[dict] | None = None + + +@router.post("/ask/react", response_model=AskReactResponse) +async def ask_react( + payload: AskReactRequest, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """ReAct loop endpoint (qwen-macbook only, no fallback). + + 호출자가 명시 opt-in 한 endpoint. MacBook 가 sleep / unreachable / 5xx 시 + HTTP 503 + body `{error_reason: "macbook_unavailable", backend: "qwen-macbook"}` + 를 반환한다. Gemma Mac mini 로 자동 fallback 하지 않는다 (정정 4 의 연장). + + request body: + - query: str (사용자 원본 질의) + - debug: bool (default false; true 시 응답 `debug_trace` 채움) + + response body (성공 200): + - final_answer: str (Qwen 종합문, partial 일 수 있음) + - iterations: int (실제 진행된 tool round 수) + - partial: bool (max_tool_rounds 도달 후 LLM content 비었을 때 true) + - sources: list[dict] (검색에서 모인 evidence 메타, id-기준 dedup) + - debug_trace: list[dict] | null (debug=true 시 round 별 trace) + """ + # 지연 import — 순환 의존성 회피 (react_loop 가 api.search.SearchResult 사용 안 함) + from services.llm.backends import BackendUnavailable, QwenMacBookBackend, get_backend + from services.search.react_loop import agentic_ask_loop + + backend_inst = get_backend("qwen-macbook") + assert isinstance(backend_inst, QwenMacBookBackend) # mypy / runtime guard + + try: + result = await agentic_ask_loop( + session, + payload.query, + backend=backend_inst, + debug=payload.debug, + ) + except BackendUnavailable as exc: + logger.warning( + "ask_react backend unavailable backend=%s reason=%s", + exc.backend_name, exc.reason, + ) + return JSONResponse( + status_code=503, + content={ + "error_reason": "macbook_unavailable", + "backend_requested": "qwen-macbook", + "backend_used": None, + "detail": exc.reason, + }, + ) + + return AskReactResponse( + final_answer=result.final_answer, + iterations=result.iterations, + partial=result.partial, + sources=result.sources, + debug_trace=result.debug_trace, + ) diff --git a/app/core/config.py b/app/core/config.py index 2d9b0af..d2f765c 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -50,8 +50,22 @@ class SearchAskBackendConfig(BaseModel): timeout_read_s: int = 30 +class SearchAskReactConfig(BaseModel): + """PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react ReAct loop. + + qwen-macbook only (endpoint 자체가 implicit opt-in). G0-2 counter semantics: + max_tool_rounds=2 → LLM 호출 최대 3회 (tool round 2 + final 1), search 실행 최대 2회. + """ + + enabled: bool = True + max_tool_rounds: int = 2 + search_tool_limit: int = 5 + search_tool_mode: str = "hybrid" + + class SearchAskConfig(BaseModel): backend: SearchAskBackendConfig = SearchAskBackendConfig() + react: SearchAskReactConfig = SearchAskReactConfig() class SearchConfig(BaseModel): @@ -199,9 +213,14 @@ def load_settings() -> Settings: search_cfg = SearchConfig() if config_path.exists() and raw and "search" in raw: - sb = (raw.get("search") or {}).get("ask", {}).get("backend", {}) or {} + ask_raw = (raw.get("search") or {}).get("ask", {}) or {} + sb = ask_raw.get("backend", {}) or {} + sr = ask_raw.get("react", {}) or {} search_cfg = SearchConfig( - ask=SearchAskConfig(backend=SearchAskBackendConfig(**sb)) + ask=SearchAskConfig( + backend=SearchAskBackendConfig(**sb), + react=SearchAskReactConfig(**sr), + ) ) taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {} diff --git a/app/prompts/react_ask.txt b/app/prompts/react_ask.txt new file mode 100644 index 0000000..0197621 --- /dev/null +++ b/app/prompts/react_ask.txt @@ -0,0 +1,10 @@ +당신은 사내 문서 자료를 기반으로 정확한 한국어 답변을 제공하는 비서입니다. + +작업 원칙: +1. 사용자 질문에 답하려면 사내 문서를 검색해야 한다면, `search` 도구를 호출하세요. +2. 첫 검색 결과가 부족하다고 판단되면 (관련도 낮음 또는 핵심 정보 누락), 다른 키워드로 한 번 더 검색하세요. +3. 검색 결과가 충분하면 그 evidence 만으로 한국어 최종 답을 작성하세요. +4. 근거 없는 추측은 하지 마세요. 자료에서 확인되지 않으면 "확인된 자료가 없습니다" 라고 답하세요. +5. 검색 도구는 최대 2회까지만 호출 가능합니다. 그 이후에는 모은 정보로 답을 마무리해야 합니다. + +답변 시 출처를 본문에 따로 표시할 필요는 없습니다. sources 필드로 별도 노출됩니다. diff --git a/app/services/llm/backends.py b/app/services/llm/backends.py index 7af868d..004a621 100644 --- a/app/services/llm/backends.py +++ b/app/services/llm/backends.py @@ -149,6 +149,74 @@ class QwenMacBookBackend(BackendBase): ) from exc raise + async def generate_with_tools( + self, + messages: list[dict], + tools: list[dict], + *, + tool_choice: str = "auto", + timeout_read_s: int, + ) -> dict: + """OpenAI 호환 chat completion with tool calling (ReAct loop 용). + + Returns: `choices[0].message` dict 그대로 — `content` (Optional[str]) + + `tool_calls` (Optional[list]) 둘 다 포함. + + Response shape = G0-1 fixture `tests/fixtures/qwen_tool_call_response.json` + 기준 (mlx-vlm OpenAI 표준 호환). tool_calls[].function.arguments 는 + **JSON string** 으로 옴 — 호출자가 json.loads 필요. + + - `tool_choice="auto"`: 모델이 tool 호출 여부 결정 + - `tool_choice="none"`: tool 호출 금지, content 만 반환 (final round) + - `tools=[]` + `tool_choice="none"`: tool 정의 없이 final answer 강제 + """ + gate = self._get_gate() + timeout = httpx.Timeout( + connect=float(self.timeout_connect_s), + read=float(timeout_read_s), + write=10.0, + pool=5.0, + ) + url = f"{self.base_url}/v1/chat/completions" + payload: dict = { + "model": self.model, + "messages": messages, + "max_tokens": 4096, + } + if tools: + payload["tools"] = tools + if tool_choice in ("auto", "none"): + payload["tool_choice"] = tool_choice + async with gate: + try: + async with httpx.AsyncClient(timeout=timeout) as client: + resp = await client.post(url, json=payload) + resp.raise_for_status() + data = resp.json() + return data["choices"][0]["message"] + except ( + httpx.ConnectError, + httpx.ConnectTimeout, + httpx.ReadTimeout, + httpx.PoolTimeout, + httpx.WriteTimeout, + httpx.RemoteProtocolError, + ) as exc: + logger.warning( + "qwen-macbook(tools) unavailable url=%s exc=%s", + url, type(exc).__name__, + ) + raise BackendUnavailable(self.name, type(exc).__name__) from exc + except httpx.HTTPStatusError as exc: + if 500 <= exc.response.status_code < 600: + logger.warning( + "qwen-macbook(tools) 5xx status=%d", exc.response.status_code, + ) + raise BackendUnavailable( + self.name, f"http_{exc.response.status_code}" + ) from exc + raise + # ── dispatcher ───────────────────────────────────────────────────────────── diff --git a/app/services/search/react_loop.py b/app/services/search/react_loop.py new file mode 100644 index 0000000..7405574 --- /dev/null +++ b/app/services/search/react_loop.py @@ -0,0 +1,275 @@ +"""PR-DocSrv-Ask-ToolCalling-ReAct-1: Qwen native tool calling 로 ReAct loop. + +G0-2 counter semantics ([[b-velvety-hare]] § Pre-Implementation Gate): +- max_tool_rounds = 2 (tool 호출 round cap) +- max_llm_calls = 3 (= max_tool_rounds + 1, final round 포함) +- search_exec_max = max_tool_rounds (round 당 search 1회 이상 가능 — 모델 결정) +- 마지막 LLM call 은 tool_choice="none" + system instruction 으로 final answer 강제 + +G0-1 fixture (tests/fixtures/qwen_tool_call_response.json) 기준 parsing — +mlx-vlm 의 OpenAI 표준 호환, `tool_calls[].function.arguments` 는 JSON string. + +G0-3 trace exposure: +- `debug=True` 시만 `debug_trace` 채움. server log 에는 항상 round 기록. +- default response = `debug_trace=None`. + +Invariant (정정 4 의 자연 연장): +- backend = `QwenMacBookBackend` only. Gemma 자동 fallback 금지. +- `BackendUnavailable` 은 호출자 (search.py) 가 503 + `error_reason=macbook_unavailable` + 로 매핑. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession + +from core.config import settings +from core.utils import setup_logger +from services.llm.backends import QwenMacBookBackend +from services.search.search_pipeline import run_search + +logger = setup_logger("react_loop") + +_PROMPT_PATH = Path(__file__).resolve().parents[2] / "prompts" / "react_ask.txt" +_FINAL_INSTRUCTION = ( + "이제는 검색 도구를 더 이상 호출하지 마시고, 위 evidence 만으로 " + "한국어 최종 답을 작성하세요." +) +_TOOLS = [ + { + "type": "function", + "function": { + "name": "search", + "description": "사내 문서 청크 검색. q 만 넘기면 hybrid 모드로 limit 건 반환.", + "parameters": { + "type": "object", + "properties": { + "q": { + "type": "string", + "description": "검색 질의문 (한국어 가능)", + }, + }, + "required": ["q"], + }, + }, + } +] + + +@dataclass +class ReactResult: + final_answer: str + iterations: int + partial: bool + sources: list[dict[str, Any]] = field(default_factory=list) + debug_trace: list[dict[str, Any]] | None = None + + +def _load_system_prompt() -> str: + try: + return _PROMPT_PATH.read_text(encoding="utf-8") + except OSError: + logger.warning("react_ask.txt missing path=%s — fallback prompt", _PROMPT_PATH) + return ( + "당신은 사내 문서 자료를 기반으로 정확한 한국어 답변을 제공하는 비서입니다. " + "필요하면 `search` 도구를 호출해 evidence 를 모으고, 충분하다 판단되면 " + "최종 답을 작성하세요. 근거 없는 추측은 피하세요." + ) + + +def _result_payload(pr, *, limit: int) -> tuple[str, list[dict[str, Any]]]: + """run_search() PipelineResult → (LLM-side JSON string, sources-side dict list). + + LLM-side: snippet 600자 컷, score / title / doc_id 포함. + Sources-side: snippet 제외, id / doc_id / title / score 만. + """ + items_llm: list[dict[str, Any]] = [] + items_src: list[dict[str, Any]] = [] + for r in (pr.results or [])[:limit]: + rid = getattr(r, "id", None) or getattr(r, "chunk_id", None) + doc_id = getattr(r, "doc_id", None) + title = getattr(r, "title", "") or "" + score = getattr(r, "score", None) + snippet = (getattr(r, "snippet", "") or getattr(r, "text", "") or "")[:600] + items_llm.append( + { + "id": rid, + "doc_id": doc_id, + "title": title, + "snippet": snippet, + "score": score, + } + ) + items_src.append( + {"id": rid, "doc_id": doc_id, "title": title, "score": score} + ) + return ( + json.dumps({"results": items_llm, "count": len(items_llm)}, ensure_ascii=False), + items_src, + ) + + +async def agentic_ask_loop( + session: AsyncSession, + query: str, + *, + backend: QwenMacBookBackend, + max_tool_rounds: int | None = None, + debug: bool = False, +) -> ReactResult: + """ReAct loop entry point. + + Args: + session: AsyncSession (caller-managed) + query: 사용자 원본 질의 + backend: QwenMacBookBackend instance (qwen-macbook only — Gemma 미지원) + max_tool_rounds: None 시 config.search.ask.react.max_tool_rounds + debug: True 시 `debug_trace` 채움 + """ + cfg = settings.search.ask.react + if max_tool_rounds is None: + max_tool_rounds = cfg.max_tool_rounds + timeout_read_s = settings.search.ask.backend.timeout_read_s + limit = cfg.search_tool_limit + mode = cfg.search_tool_mode + + messages: list[dict] = [ + {"role": "system", "content": _load_system_prompt()}, + {"role": "user", "content": query}, + ] + sources: list[dict[str, Any]] = [] + seen_ids: set[Any] = set() + trace: list[dict[str, Any]] = [] + + # Tool rounds — 최대 max_tool_rounds 회 (LLM call #1 .. #max_tool_rounds) + for round_idx in range(max_tool_rounds): + msg = await backend.generate_with_tools( + messages, + _TOOLS, + tool_choice="auto", + timeout_read_s=timeout_read_s, + ) + tool_calls = msg.get("tool_calls") or [] + trace.append( + { + "phase": "tool_round", + "round": round_idx, + "tool_call_count": len(tool_calls), + "content_present": bool(msg.get("content")), + } + ) + logger.info( + "react_loop round=%d tool_calls=%d content=%s", + round_idx, + len(tool_calls), + "yes" if msg.get("content") else "no", + ) + + if not tool_calls: + # LLM 이 tool 호출 안 함 → 종합문 직접 반환 (early exit) + content = msg.get("content") or "" + return ReactResult( + final_answer=content, + iterations=round_idx + 1, + partial=not bool(content), + sources=sources, + debug_trace=trace if debug else None, + ) + + # assistant message (tool_calls 포함) 추가 + messages.append( + { + "role": "assistant", + "content": msg.get("content"), + "tool_calls": tool_calls, + } + ) + + # 각 tool call 실행 + for tc in tool_calls: + fn = tc.get("function") or {} + tc_id = tc.get("id") or "" + fn_name = fn.get("name") + if fn_name != "search": + messages.append( + { + "role": "tool", + "tool_call_id": tc_id, + "content": json.dumps( + {"error": f"unknown tool {fn_name!r}"}, + ensure_ascii=False, + ), + } + ) + trace.append({"phase": "tool_unknown", "name": fn_name}) + continue + try: + args = json.loads(fn.get("arguments") or "{}") + except json.JSONDecodeError: + args = {} + q_arg = (args.get("q") or "").strip() or query + pr = await run_search( + session, + q_arg, + mode=mode, + limit=limit, + rerank=True, + analyze=False, + ) + tool_content, round_sources = _result_payload(pr, limit=limit) + for s in round_sources: + sid = s.get("id") + if sid is not None and sid in seen_ids: + continue + if sid is not None: + seen_ids.add(sid) + sources.append(s) + messages.append( + { + "role": "tool", + "tool_call_id": tc_id, + "content": tool_content, + } + ) + trace.append( + { + "phase": "search", + "q": q_arg, + "result_count": len(pr.results or []), + } + ) + + # Final round — LLM call #(max_tool_rounds + 1). tool_choice="none" 강제 + messages.append({"role": "system", "content": _FINAL_INSTRUCTION}) + final_msg = await backend.generate_with_tools( + messages, + tools=[], + tool_choice="none", + timeout_read_s=timeout_read_s, + ) + final_content = final_msg.get("content") or "" + trace.append( + { + "phase": "final", + "content_present": bool(final_content), + "tool_calls_ignored": len(final_msg.get("tool_calls") or []), + } + ) + logger.info( + "react_loop final content=%s tool_calls_ignored=%d", + "yes" if final_content else "no", + len(final_msg.get("tool_calls") or []), + ) + + return ReactResult( + final_answer=final_content, + iterations=max_tool_rounds, + partial=not bool(final_content), + sources=sources, + debug_trace=trace if debug else None, + ) diff --git a/config.yaml b/config.yaml index f970bf9..690dd88 100644 --- a/config.yaml +++ b/config.yaml @@ -82,6 +82,12 @@ search: macbook_model: "mlx-community/Qwen3.6-27B-8bit" timeout_connect_s: 1 # MacBook sleep/wake 빠른 감지 (자동 fallback 부재 → 빠른 503) timeout_read_s: 30 # synthesis_service.LLM_TIMEOUT_MS=30000 와 align + # PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react ReAct loop (qwen-macbook only) + react: + enabled: true + max_tool_rounds: 2 # G0-2: LLM 호출 최대 3회 (tool round 2 + final 1), search 실행 최대 2회 + search_tool_limit: 5 + search_tool_mode: "hybrid" nas: mount_path: "/documents" diff --git a/tests/api/test_search_ask_react_endpoint.py b/tests/api/test_search_ask_react_endpoint.py new file mode 100644 index 0000000..380d70f --- /dev/null +++ b/tests/api/test_search_ask_react_endpoint.py @@ -0,0 +1,218 @@ +"""PR-DocSrv-Ask-ToolCalling-ReAct-1: /api/search/ask/react endpoint integration. + +검증 항목 (G0-3 trace exposure + 정정 4 invariant): +- backend unavailable → HTTP 503 + error_reason=macbook_unavailable + + ★ `run_search` mock 호출 횟수 == 0 (search 단계 진입 자체 차단) +- 정상 응답 → 200 + final_answer + sources + debug_trace=null (default) +- debug=true → debug_trace 채워짐 +- max rounds 도달 → iterations=2 + partial=false (final content 정상) + +endpoint 함수 (`api.search.ask_react`) 를 직접 호출하는 lightweight 패턴. +TestClient 없이 FastAPI deps 를 MagicMock 으로 우회. (priority_gate / backend_dispatcher +test 와 동일 service-layer 패턴.) +""" + +from __future__ import annotations + +import asyncio +import json +import os +import sys +from unittest.mock import AsyncMock, MagicMock + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app")) + + +# ── helpers ──────────────────────────────────────────────────────────────── + + +def _msg_with_tool_call(q: str, tc_id: str = "tc-1") -> dict: + return { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": tc_id, + "type": "function", + "function": { + "name": "search", + "arguments": json.dumps({"q": q}, ensure_ascii=False), + }, + } + ], + } + + +def _msg_with_content(text: str) -> dict: + return {"role": "assistant", "content": text, "tool_calls": None} + + +def _fake_chunk(chunk_id: int, doc_id: int = 100): + m = MagicMock() + m.id = chunk_id + m.chunk_id = chunk_id + m.doc_id = doc_id + m.title = f"doc {doc_id}" + m.score = 0.9 + m.snippet = f"snippet {chunk_id}" + m.text = None + return m + + +def _fake_pr(chunks: list): + pr = MagicMock() + pr.results = chunks + return pr + + +@pytest.fixture +def patched_backend_and_search(monkeypatch): + """get_backend + run_search 둘 다 mock. backend 의 generate_with_tools 는 + 각 테스트가 side_effect 설정. + + Returns: (backend_mock, run_search_mock, set_backend_unavailable_fn). + """ + from services.llm.backends import BackendUnavailable, QwenMacBookBackend + from services.llm import backends as backends_mod + from services.search import react_loop + + backend = MagicMock(spec=QwenMacBookBackend) + backend.name = "qwen-macbook" + backend.generate_with_tools = AsyncMock() + + def _fake_get_backend(name): + # endpoint 가 qwen-macbook 만 호출하므로 단일 backend 반환 + return backend + + monkeypatch.setattr(backends_mod, "get_backend", _fake_get_backend) + # search.py 의 ask_react 안에서 `from services.llm.backends import ... get_backend` + # 로 import 하므로 module-level patch 만으로 충분 (지연 import 라 매번 fresh). + + run_search_mock = AsyncMock(return_value=_fake_pr([_fake_chunk(1)])) + monkeypatch.setattr(react_loop, "run_search", run_search_mock) + + def _make_unavailable(): + backend.generate_with_tools.side_effect = BackendUnavailable( + "qwen-macbook", "ConnectError" + ) + + return backend, run_search_mock, _make_unavailable + + +def _call_endpoint(payload): + """ask_react 를 직접 호출. user/session 은 MagicMock 으로 우회.""" + from api.search import ask_react + + user = MagicMock() + session = MagicMock() + return asyncio.run(ask_react(payload, user=user, session=session)) + + +# ── ★ 정정 4 invariant: backend unavailable → 503 + run_search 호출 0 ────── + + +def test_qwen_unavailable_returns_503(patched_backend_and_search): + """backend BackendUnavailable → HTTP 503 + error_reason=macbook_unavailable.""" + from api.search import AskReactRequest + + backend, run_search_mock, make_unavailable = patched_backend_and_search + make_unavailable() + + response = _call_endpoint(AskReactRequest(query="Q")) + + # JSONResponse instance + assert response.status_code == 503 + body = json.loads(response.body) + assert body["error_reason"] == "macbook_unavailable" + assert body["backend_used"] is None + assert body["backend_requested"] == "qwen-macbook" + + # ★ run_search 호출 0 (search 진입 자체 차단) + assert run_search_mock.call_count == 0 + + +# ── 정상 200 + G0-3 default debug_trace=null ────────────────────────────── + + +def test_successful_response_default_no_debug_trace(patched_backend_and_search): + """debug 미지정 (default false) → 200 + debug_trace == null.""" + from api.search import AskReactRequest, AskReactResponse + + backend, run_search_mock, _ = patched_backend_and_search + backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_content("최종 답입니다"), + ] + + response = _call_endpoint(AskReactRequest(query="Q")) + + # Pydantic instance (FastAPI response_model 적용 전 raw return) + assert isinstance(response, AskReactResponse) + assert response.final_answer == "최종 답입니다" + assert response.iterations == 2 + assert response.partial is False + assert response.debug_trace is None # ★ G0-3 + assert len(response.sources) == 1 + + +# ── G0-3: debug=true → debug_trace 채워짐 ────────────────────────────────── + + +def test_debug_true_populates_trace(patched_backend_and_search): + from api.search import AskReactRequest + + backend, run_search_mock, _ = patched_backend_and_search + backend.generate_with_tools.side_effect = [ + _msg_with_content("바로 답"), + ] + + response = _call_endpoint(AskReactRequest(query="Q", debug=True)) + + assert response.debug_trace is not None + assert isinstance(response.debug_trace, list) + assert len(response.debug_trace) >= 1 + + +# ── max rounds → final content 정상 → partial=false ────────────────────── + + +def test_max_rounds_with_final_content(patched_backend_and_search): + from api.search import AskReactRequest + + backend, run_search_mock, _ = patched_backend_and_search + backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content("정리된 최종 답"), + ] + + response = _call_endpoint(AskReactRequest(query="Q")) + + assert response.iterations == 2 + assert response.partial is False + assert response.final_answer == "정리된 최종 답" + # LLM 호출 3회, search 2회 (G0-2 cap) + assert backend.generate_with_tools.call_count == 3 + assert run_search_mock.call_count == 2 + + +# ── max rounds + final content 빈 string → partial=true ────────────────── + + +def test_max_rounds_with_empty_final_partial(patched_backend_and_search): + from api.search import AskReactRequest + + backend, run_search_mock, _ = patched_backend_and_search + backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content(""), + ] + + response = _call_endpoint(AskReactRequest(query="Q")) + + assert response.iterations == 2 + assert response.partial is True + assert response.final_answer == "" diff --git a/tests/fixtures/qwen_tool_call_response.json b/tests/fixtures/qwen_tool_call_response.json new file mode 100644 index 0000000..c6069cc --- /dev/null +++ b/tests/fixtures/qwen_tool_call_response.json @@ -0,0 +1 @@ +{"id":"chatcmpl-d72e8a01-83d8-4b63-8fe2-e83df37b730b","object":"chat.completion","created":1779456681,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":"tool_calls","message":{"role":"assistant","content":null,"reasoning":null,"tool_calls":[{"type":"function","index":0,"id":"6f30c959-c730-4901-82d2-28ff9b5967de","function":{"name":"search","arguments":"{\"q\": \"가스기사 14회 1번 문제\"}"}}],"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":285,"completion_tokens":33,"total_tokens":318,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":30.390329063}} \ No newline at end of file diff --git a/tests/services/test_react_loop.py b/tests/services/test_react_loop.py new file mode 100644 index 0000000..2f31e7f --- /dev/null +++ b/tests/services/test_react_loop.py @@ -0,0 +1,348 @@ +"""PR-DocSrv-Ask-ToolCalling-ReAct-1: agentic_ask_loop unit tests. + +검증 invariant: +- G0-1: tests/fixtures/qwen_tool_call_response.json 의 shape 이 parsing 가정과 일치. +- G0-2 counter semantics: + * LLM 호출 횟수 ≤ max_llm_calls (= max_tool_rounds + 1) + * search 실행 횟수 ≤ search_exec_max (= max_tool_rounds) + * 마지막 LLM 호출의 tool_choice == "none" + * partial=true 조건: max rounds 후 final content 비어 있을 때 +- G0-3 trace exposure: debug=False → debug_trace=None, debug=True → list[dict]. +- BackendUnavailable 은 호출자에게 그대로 전파 (정정 4 의 연장). +""" + +from __future__ import annotations + +import asyncio +import json +import os +import sys +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "app")) + + +FIXTURE_PATH = ( + Path(__file__).resolve().parents[1] / "fixtures" / "qwen_tool_call_response.json" +) + + +# ── helpers ──────────────────────────────────────────────────────────────── + + +def _msg_with_tool_call(q: str, tc_id: str = "tc-1") -> dict: + """G0-1 fixture shape 그대로 — assistant message with one tool_call.""" + return { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": tc_id, + "type": "function", + "function": { + "name": "search", + "arguments": json.dumps({"q": q}, ensure_ascii=False), + }, + } + ], + } + + +def _msg_with_content(text: str) -> dict: + return {"role": "assistant", "content": text, "tool_calls": None} + + +def _fake_chunk(chunk_id: int, doc_id: int = 100, score: float = 0.9): + m = MagicMock() + m.id = chunk_id + m.chunk_id = chunk_id + m.doc_id = doc_id + m.title = f"doc {doc_id}" + m.score = score + m.snippet = f"snippet for chunk {chunk_id}" + m.text = None + return m + + +def _fake_pr(chunks: list): + pr = MagicMock() + pr.results = chunks + return pr + + +@pytest.fixture +def mock_backend(): + """services.llm.backends.QwenMacBookBackend instance mock (generate_with_tools).""" + from services.llm.backends import QwenMacBookBackend + + b = MagicMock(spec=QwenMacBookBackend) + b.name = "qwen-macbook" + b.generate_with_tools = AsyncMock() + return b + + +@pytest.fixture +def mock_run_search(monkeypatch): + """services.search.react_loop.run_search 를 monkeypatch — chunk 1건 반환 default.""" + from services.search import react_loop + + mock = AsyncMock(return_value=_fake_pr([_fake_chunk(1)])) + monkeypatch.setattr(react_loop, "run_search", mock) + return mock + + +# ── G0-1: fixture shape 검증 ─────────────────────────────────────────────── + + +def test_fixture_shape_matches_parser_assumptions(): + """G0-1: fixture 의 shape 이 react_loop 의 parsing 가정과 일치.""" + assert FIXTURE_PATH.exists(), f"fixture missing at {FIXTURE_PATH}" + fixture = json.loads(FIXTURE_PATH.read_text(encoding="utf-8")) + assert "choices" in fixture and len(fixture["choices"]) >= 1 + msg = fixture["choices"][0]["message"] + assert msg["role"] == "assistant" + assert "tool_calls" in msg + tcs = msg["tool_calls"] + assert isinstance(tcs, list) and len(tcs) >= 1 + tc = tcs[0] + assert tc["type"] == "function" + assert tc["function"]["name"] # non-empty str + # arguments 가 JSON string 임 (G0-1 핵심 발견) + args_str = tc["function"]["arguments"] + assert isinstance(args_str, str) + args = json.loads(args_str) + assert isinstance(args, dict) + + +# ── early exit (LLM call #1 에 tool_calls 없음) ───────────────────────────── + + +def test_no_tool_calls_early_exit(mock_backend, mock_run_search): + """첫 LLM 호출이 tool_calls 없이 content 반환 → iterations=1, partial=false.""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_content("바로 답입니다"), + ] + session = MagicMock() + + result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) + + assert result.iterations == 1 + assert result.partial is False + assert result.final_answer == "바로 답입니다" + assert result.sources == [] + assert mock_backend.generate_with_tools.call_count == 1 + assert mock_run_search.call_count == 0 + + +# ── 1 round + early exit ─────────────────────────────────────────────────── + + +def test_one_round_then_final_content(mock_backend, mock_run_search): + """round 1 tool_call → search → round 2 content (early exit).""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("query A"), + _msg_with_content("두 번째 호출 종합문"), + ] + session = MagicMock() + + result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) + + assert result.iterations == 2 + assert result.partial is False + assert result.final_answer == "두 번째 호출 종합문" + assert len(result.sources) == 1 + assert mock_backend.generate_with_tools.call_count == 2 + assert mock_run_search.call_count == 1 + + +# ── max rounds 도달 + final call ──────────────────────────────────────────── + + +def test_max_rounds_reached_final_with_content(mock_backend, mock_run_search): + """round 1, 2 둘 다 tool_call → final call → content 정상 → partial=false.""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content("최종 답입니다"), + ] + session = MagicMock() + + result = asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) + + assert result.iterations == 2 # = max_tool_rounds + assert result.partial is False + assert result.final_answer == "최종 답입니다" + assert mock_backend.generate_with_tools.call_count == 3 + assert mock_run_search.call_count == 2 + + +# ── G0-2: 마지막 호출 tool_choice="none" ───────────────────────────────────── + + +def test_final_call_uses_tool_choice_none(mock_backend, mock_run_search): + """G0-2 invariant: max_tool_rounds 도달 시 final call 의 tool_choice == 'none'.""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content("종합"), + ] + session = MagicMock() + + asyncio.run(agentic_ask_loop(session, "Q", backend=mock_backend)) + + last_call = mock_backend.generate_with_tools.call_args_list[-1] + assert last_call.kwargs.get("tool_choice") == "none" + # final call 은 tools=[] 를 keyword 로 넘김 (positional 아님) + assert last_call.kwargs.get("tools") == [] + + +# ── G0-2: max LLM calls + search exec cap ────────────────────────────────── + + +def test_max_llm_calls_capped_at_three(mock_backend, mock_run_search): + """LLM 호출 횟수 ≤ 3 (= max_tool_rounds + 1).""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content("종합"), + ] + asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) + assert mock_backend.generate_with_tools.call_count <= 3 + + +def test_search_exec_capped_at_two(mock_backend, mock_run_search): + """search 실제 실행 횟수 ≤ max_tool_rounds (=2).""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content("종합"), + ] + asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) + assert mock_run_search.call_count <= 2 + + +# ── G0-2: partial=true (final content 비어 있음) ─────────────────────────── + + +def test_partial_when_final_content_empty(mock_backend, mock_run_search): + """max rounds 도달 + final call content 비어 있음 → partial=true.""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content(""), # 빈 content + ] + result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) + + assert result.iterations == 2 + assert result.partial is True + assert result.final_answer == "" + + +# ── sources dedup ────────────────────────────────────────────────────────── + + +def test_sources_dedup_by_id(mock_backend, monkeypatch): + """같은 chunk id 가 두 round 에 나오면 sources 에서 dedup.""" + from services.search import react_loop + from services.search.react_loop import agentic_ask_loop + + # round 1 → chunk id=1, round 2 → chunk id=1 + id=2 + run_search_mock = AsyncMock(side_effect=[ + _fake_pr([_fake_chunk(1)]), + _fake_pr([_fake_chunk(1), _fake_chunk(2)]), + ]) + monkeypatch.setattr(react_loop, "run_search", run_search_mock) + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + _msg_with_content("종합"), + ] + result = asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) + + src_ids = [s["id"] for s in result.sources] + assert src_ids == [1, 2] # id=1 중복 없음 + assert len(result.sources) == 2 + + +# ── G0-3: trace exposure ─────────────────────────────────────────────────── + + +def test_debug_trace_none_when_debug_false(mock_backend, mock_run_search): + """G0-3: debug=False (default) → debug_trace=None.""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_content("바로 답"), + ] + result = asyncio.run( + agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=False) + ) + assert result.debug_trace is None + + +def test_debug_trace_populated_when_debug_true(mock_backend, mock_run_search): + """G0-3: debug=True → debug_trace 가 list[dict].""" + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_content("종합"), + ] + result = asyncio.run( + agentic_ask_loop(MagicMock(), "Q", backend=mock_backend, debug=True) + ) + assert isinstance(result.debug_trace, list) + assert len(result.debug_trace) >= 1 + # 첫 entry 는 tool_round + assert result.debug_trace[0].get("phase") == "tool_round" + + +# ── BackendUnavailable propagation ───────────────────────────────────────── + + +def test_backend_unavailable_propagates(mock_backend, mock_run_search): + """BackendUnavailable 은 그대로 raise — 호출자 (search.py) 가 503 매핑.""" + from services.llm.backends import BackendUnavailable + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = BackendUnavailable( + "qwen-macbook", "ConnectError" + ) + + with pytest.raises(BackendUnavailable): + asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend)) + + # ★ run_search 가 한 번도 호출되지 않음 (search 시도 0) + assert mock_run_search.call_count == 0 + + +def test_backend_unavailable_in_final_call_propagates(mock_backend, mock_run_search): + """final call 에서 unavailable 발생도 그대로 raise.""" + from services.llm.backends import BackendUnavailable + from services.search.react_loop import agentic_ask_loop + + mock_backend.generate_with_tools.side_effect = [ + _msg_with_tool_call("q1"), + _msg_with_tool_call("q2", tc_id="tc-2"), + BackendUnavailable("qwen-macbook", "ReadTimeout"), + ] + with pytest.raises(BackendUnavailable): + asyncio.run(agentic_ask_loop(MagicMock(), "Q", backend=mock_backend))