"""Unit tests for services.classifier_io — parser and router-JSON guard. These are pure-function tests. They run without bootstrapping the rest of the worker's imports. """ from __future__ import annotations from services.classifier_io import looks_like_router_json, parse_classification # ---------- parse_classification ---------- def test_empty_raw_returns_classification_failed(): result = parse_classification("") assert result["action"] == "classification_failed" assert result["response"] == "" assert result["prompt"] == "" def test_whitespace_only_returns_classification_failed(): assert parse_classification(" \n\t ")["action"] == "classification_failed" def test_none_returns_classification_failed(): assert parse_classification(None)["action"] == "classification_failed" # type: ignore[arg-type] def test_non_json_text_returns_classification_failed(): assert parse_classification("그냥 평범한 답변입니다.")["action"] == "classification_failed" def test_json_without_action_key_returns_classification_failed(): assert parse_classification('{"foo": "bar", "baz": 1}')["action"] == "classification_failed" def test_invalid_json_returns_classification_failed(): assert parse_classification('{"action": "direct", broken')["action"] == "classification_failed" def test_non_dict_json_returns_classification_failed(): assert parse_classification("[1, 2, 3]")["action"] == "classification_failed" def test_valid_direct_returns_unchanged(): raw = '{"action": "direct", "response": "안녕!", "prompt": ""}' result = parse_classification(raw) assert result["action"] == "direct" assert result["response"] == "안녕!" def test_valid_route_returns_unchanged(): raw = '{"action": "route", "response": "분석 중", "prompt": "양자역학 설명"}' result = parse_classification(raw) assert result["action"] == "route" assert result["prompt"] == "양자역학 설명" def test_valid_tools_returns_unchanged(): raw = '{"action": "tools", "tool": "calendar", "operation": "today", "params": {}}' result = parse_classification(raw) assert result["action"] == "tools" assert result["tool"] == "calendar" def test_json_embedded_in_surrounding_text_extracts(): raw = 'Sure! {"action": "direct", "response": "hi", "prompt": ""} Done.' assert parse_classification(raw)["action"] == "direct" def test_empty_does_not_become_direct(): """Regression: empty classifier output must NOT silently become a direct action. The old behavior caused the worker's direct branch to re-call the classifier with the user's message, leaking router JSON to chat.""" assert parse_classification("")["action"] != "direct" def test_non_json_text_does_not_become_direct(): """Regression: raw natural-language text must NOT become a direct action that streams the raw text to the user. The classifier prompt biases the model toward JSON output, so non-JSON output is a malfunction signal.""" assert parse_classification("이건 평문 답변이야.")["action"] != "direct" # ---------- looks_like_router_json ---------- def test_router_json_with_action_route_detected(): assert looks_like_router_json('{"action": "route", "response": "...", "prompt": "..."}') is True def test_router_json_with_action_direct_detected(): assert looks_like_router_json('{"action": "direct", "response": "hi", "prompt": ""}') is True def test_router_json_with_action_tools_detected(): text = '{"action": "tools", "tool": "calendar", "operation": "today", "params": {}}' assert looks_like_router_json(text) is True def test_router_json_with_action_clarify_detected(): assert looks_like_router_json('{"action": "clarify", "response": "어떤 의미?", "prompt": ""}') is True def test_router_json_with_action_classification_failed_detected(): assert looks_like_router_json('{"action": "classification_failed"}') is True def test_natural_text_not_detected(): assert looks_like_router_json("안녕하세요! 무엇을 도와드릴까요?") is False def test_empty_string_not_detected(): assert looks_like_router_json("") is False def test_whitespace_only_not_detected(): assert looks_like_router_json(" \n ") is False def test_text_with_braces_but_no_json_not_detected(): assert looks_like_router_json("이건 {중괄호 가} 있는 자연어 답변이에요.") is False def test_unrelated_json_not_detected(): assert looks_like_router_json('{"name": "이드", "version": "1.0"}') is False def test_json_with_two_router_keys_detected(): """Even without an `action` field, two router-shaped keys signal leakage.""" assert looks_like_router_json('{"prompt": "do x", "tool": "calendar"}') is True def test_json_with_one_router_key_not_detected(): """A single router-like key in otherwise-normal JSON should not trip the guard.""" assert looks_like_router_json('{"prompt": "사용자가 입력한 prompt"}') is False def test_code_fenced_router_json_detected(): text = '```json\n{"action": "route", "response": "...", "prompt": "..."}\n```' assert looks_like_router_json(text) is True def test_unknown_action_value_with_other_router_keys_detected(): """Unknown action value but other router keys present → still leaks shape.""" text = '{"action": "weird", "tool": "calendar", "operation": "today"}' assert looks_like_router_json(text) is True def test_actual_bug_payload_detected(): """Verbatim production leak (Synology Chat 2026-05-02 08:16:25).""" text = ( '{"action": "route", "response": "사용자님의 깊은 감정이 담긴 글이네요. ' '이 글을 바탕으로 질문에 답하기 위해서는 자세한 분석이 필요해요. ' '추론 모델에게 전달할게요!", "prompt": "노래 가사를 분석해주세요."}' ) assert looks_like_router_json(text) is True # ---------- combined: normal direct path stays out of fallback ---------- def test_normal_direct_response_takes_direct_path(): """Spec contract: a normal direct response with non-empty natural text must be classified `direct` AND survive the leak guard, so the worker's direct-path elif fires (not the chat_fallback branch).""" raw = '{"action": "direct", "response": "안녕하세요! 무엇을 도와드릴까요?", "prompt": ""}' parsed = parse_classification(raw) assert parsed["action"] == "direct" assert parsed["response"] assert looks_like_router_json(parsed["response"]) is False # The two assertions above together replicate the elif condition in # worker.py: action=="direct" and response_text and not router-shaped.