Files
gpu-services/nanoclaude/tests/test_worker_fallback.py
T
Hyungi Ahn 86c076fcf9 fix(nanoclaude): prevent classifier router JSON leak in fallback path
When the classifier (gemma4:e4b) timed out or returned unparseable
output, the worker's "direct" branch re-called backend_registry.classifier
with the original user message. The classifier still had CLASSIFIER_PROMPT
attached, so it dutifully emitted router JSON like
{"action": "route", "response": "추론 모델에게 전달할게요!", ...}
which was streamed verbatim to Synology Chat as the bot's answer.
The reasoning model (Gemma 26B on Mac mini) was never actually invoked.

Changes:
- New services/classifier_io.py with parse_classification (returns explicit
  classification_failed instead of silently morphing to direct) and
  looks_like_router_json (defense-in-depth guard on any user-facing output).
- New BackendRegistry.chat_fallback adapter — same physical model as the
  classifier but with CHAT_FALLBACK_PROMPT (no JSON, no routing meta).
  This is what the worker now uses for failed-classification recovery.
- worker.py direct branch split into two:
    * elif action=="direct" and response_text and not router_json → push as-is
    * else → _fetch_fallback_text via chat_fallback (never the classifier),
      with leak guard suppressing router-shaped output.
- Belt-and-suspenders leak check on the final concatenated answer before
  _send_callback fires.
- Static safe message ("분류기가 응답을 제대로 만들지 못했어요...") when the
  fallback path produces nothing usable.

Tests:
- 28 unit tests in tests/test_classifier_io.py covering parser failure
  modes and the leak guard (incl. verbatim production payload).
- Integration tests in tests/test_worker_fallback.py asserting
  backend_registry.classifier is NOT called by the fallback path,
  chat_fallback IS called, router JSON output is suppressed, and the
  chat_fallback adapter system_prompt != CLASSIFIER_PROMPT.

Out of scope: long-input pre-routing optimization, EXAONE_* env rename,
full model routing redesign.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 08:33:54 +09:00

153 lines
5.3 KiB
Python

"""Integration-style tests for the worker's fallback path.
Pinned behaviors:
- When classification fails, the direct/fallback path must NOT call
``backend_registry.classifier`` (its CLASSIFIER_PROMPT would leak
router JSON to the user).
- The fallback path must call ``backend_registry.chat_fallback``.
- Router-shaped JSON returned from chat_fallback is suppressed.
- chat_fallback adapter being None or raising returns "".
These tests touch ``services.worker``, which imports the rest of the app
(config, db, tools). Run them in the nanoclaude venv where all deps live;
locally without those deps, pytest will skip the module.
"""
from __future__ import annotations
import asyncio
from unittest.mock import AsyncMock, MagicMock
import pytest
# Skip the whole module if heavy deps (aiosqlite, caldav, ...) aren't
# available. Prevents local devs from seeing a wall of ImportError.
pytest.importorskip("aiosqlite")
pytest.importorskip("caldav")
from services import worker # noqa: E402
from services.backend_registry import backend_registry # noqa: E402
def _make_job(message: str = "안녕"):
job = MagicMock()
job.id = "test-job"
job.message = message
job.callback = "synology"
job.status = "processing"
job.response_sent = False
return job
@pytest.fixture(autouse=True)
def _patch_state_stream(monkeypatch):
"""state_stream.push is awaited from inside _fetch_fallback_text.
Replace with an AsyncMock so the test doesn't try to send real events."""
fake_stream = MagicMock()
fake_stream.push = AsyncMock()
fake_stream.push_done = AsyncMock()
monkeypatch.setattr(worker, "state_stream", fake_stream)
return fake_stream
@pytest.fixture(autouse=True)
def _bypass_heartbeat(monkeypatch):
"""_complete_with_heartbeat normally sleeps 2s between heartbeats. Bypass
by replacing it with a thin shim that just awaits adapter.complete_chat."""
async def _direct_call(adapter, message, job_id, *, messages=None, beat_msg=""):
return await adapter.complete_chat(message, messages=messages)
monkeypatch.setattr(worker, "_complete_with_heartbeat", _direct_call)
@pytest.mark.asyncio
async def test_fallback_uses_chat_fallback_not_classifier(monkeypatch):
classifier_mock = MagicMock()
classifier_mock.complete_chat = AsyncMock(
return_value="this should never reach the user"
)
chat_fallback_mock = MagicMock()
chat_fallback_mock.complete_chat = AsyncMock(
return_value="자연스러운 답변입니다."
)
monkeypatch.setattr(backend_registry, "classifier", classifier_mock)
monkeypatch.setattr(backend_registry, "chat_fallback", chat_fallback_mock)
result = await worker._fetch_fallback_text(_make_job())
assert result == "자연스러운 답변입니다."
chat_fallback_mock.complete_chat.assert_awaited_once()
classifier_mock.complete_chat.assert_not_called()
@pytest.mark.asyncio
async def test_fallback_suppresses_router_json_output(monkeypatch):
chat_fallback_mock = MagicMock()
# The exact bug shape from production: chat-side model emits router JSON.
chat_fallback_mock.complete_chat = AsyncMock(
return_value=(
'{"action": "route", "response": "사용자님의 깊은 감정이 담긴 글이네요.", '
'"prompt": "노래 가사를 분석해주세요."}'
)
)
monkeypatch.setattr(backend_registry, "chat_fallback", chat_fallback_mock)
result = await worker._fetch_fallback_text(_make_job())
assert result == "" # Suppressed by leak guard
@pytest.mark.asyncio
async def test_fallback_returns_empty_when_adapter_missing(monkeypatch):
monkeypatch.setattr(backend_registry, "chat_fallback", None)
result = await worker._fetch_fallback_text(_make_job())
assert result == ""
@pytest.mark.asyncio
async def test_fallback_returns_empty_on_adapter_exception(monkeypatch):
chat_fallback_mock = MagicMock()
chat_fallback_mock.complete_chat = AsyncMock(
side_effect=RuntimeError("connection refused")
)
monkeypatch.setattr(backend_registry, "chat_fallback", chat_fallback_mock)
result = await worker._fetch_fallback_text(_make_job())
assert result == ""
@pytest.mark.asyncio
async def test_fallback_strips_whitespace(monkeypatch):
chat_fallback_mock = MagicMock()
chat_fallback_mock.complete_chat = AsyncMock(
return_value=" 안녕하세요!\n "
)
monkeypatch.setattr(backend_registry, "chat_fallback", chat_fallback_mock)
result = await worker._fetch_fallback_text(_make_job())
assert result == "안녕하세요!"
@pytest.mark.asyncio
async def test_chat_fallback_adapter_uses_chat_prompt_not_classifier_prompt():
"""Spec contract: the chat_fallback adapter must NOT carry CLASSIFIER_PROMPT.
If init_from_settings ever wires the wrong prompt onto chat_fallback, this
test catches it before the leak hits production."""
from config import settings
from services.backend_registry import (
BackendRegistry,
CHAT_FALLBACK_PROMPT,
CLASSIFIER_PROMPT,
)
registry = BackendRegistry()
registry.init_from_settings(settings)
assert registry.chat_fallback is not None
assert registry.chat_fallback.system_prompt == CHAT_FALLBACK_PROMPT
assert registry.chat_fallback.system_prompt != CLASSIFIER_PROMPT