"""Pure helpers for classifier I/O — parsing classifier output and detecting router-shaped JSON that must never reach the user. Kept dependency-free so unit tests can import without bootstrapping settings, DB, or HTTP clients. """ from __future__ import annotations import json ROUTER_ACTIONS = frozenset( {"route", "direct", "clarify", "tools", "system_status", "classification_failed"} ) ROUTER_KEYS = frozenset({"action", "tool", "operation", "params", "prompt"}) _CLASSIFICATION_FAILED: dict = { "action": "classification_failed", "response": "", "prompt": "", } def parse_classification(raw: str) -> dict: """Parse classifier output into a dict that always carries an `action`. Returns the parsed JSON when: - the payload contains a JSON object with an `action` key. Returns ``{"action": "classification_failed", ...}`` when: - input is empty or whitespace only, - no JSON object is present, - the JSON cannot be decoded, - the decoded value is not a dict, - the decoded dict has no `action` key. The failure path used to be `{"action": "direct", "response": }`, which let a downstream branch re-call the classifier and stream classifier-prompt JSON straight to the user. Returning an explicit failure sentinel forces callers to take a non-classifier code path. """ if raw is None: return dict(_CLASSIFICATION_FAILED) stripped = raw.strip() if not stripped: return dict(_CLASSIFICATION_FAILED) start = stripped.find("{") end = stripped.rfind("}") if start < 0 or end <= start: return dict(_CLASSIFICATION_FAILED) candidate = stripped[start : end + 1] try: result = json.loads(candidate) except json.JSONDecodeError: return dict(_CLASSIFICATION_FAILED) if not isinstance(result, dict) or "action" not in result: return dict(_CLASSIFICATION_FAILED) return result def looks_like_router_json(text: str) -> bool: """Return True if ``text`` carries router/classifier JSON. The classifier system prompt forces the model to emit JSON of shape ``{"action": "...", "response": "...", "prompt": "..."}`` (and several tool variants). When the fallback path accidentally streams that JSON to the user, the chat surface displays the raw routing decision instead of an answer. This guard lets us drop such output before send. Detection rules (any one is enough): - contains a JSON object whose ``action`` value is a known router action, - contains a JSON object with two or more known router-shaped keys. Returns False for empty input, plain text, or unrelated JSON. """ if not text: return False stripped = text.strip() if not stripped: return False # Strip leading/trailing markdown fences, if any. for fence in ("```json", "```"): if stripped.startswith(fence): stripped = stripped[len(fence) :].lstrip() break if stripped.endswith("```"): stripped = stripped[: -len("```")].rstrip() start = stripped.find("{") end = stripped.rfind("}") if start < 0 or end <= start: return False candidate = stripped[start : end + 1] try: obj = json.loads(candidate) except json.JSONDecodeError: return False if not isinstance(obj, dict): return False action = obj.get("action") if isinstance(action, str) and action in ROUTER_ACTIONS: return True matching = ROUTER_KEYS & set(obj.keys()) if len(matching) >= 2: return True return False