feat(eid): 이드 채팅 표면 — /api/eid/chat SSE 스트리밍 + /chat 페이지 (P1)
- compose: eid_chat surface 등록(persona+rules, 자유-prose) + rules_present() 라이브 판정(D-6 fail-closed) - EidAIClient.call_stream: 닫힌 mode 매핑(daily→mac-mini-default/deep→qwen-macbook), router 경유, MLX gate(FOREGROUND)+wall-clock 300s deadline, SSE 라인 relay(model→mode 치환·usage 제거), router 400 fail-loud, error_reason allowlist sanitize - POST /api/eid/chat: JWT, role=system 422 거부, 8000자/40턴/총량 32000 cap, 503 error_reason(ask 컨벤션), 본문 무로깅 - frontend /chat: 이드 표면 문법(일상/심층, 모델·머신명 비노출), SSE 파서(경계 buf·flush·[DONE]), error_reason UX, 8000자 선차단+422 오염 차단, localStorage 이력(logout 시 제거), nav 등록 - Caddyfile: encode 명시 match로 text/event-stream gzip 버퍼링 제외 - tests: 신규 32+ (fixture: router 경유 26B/27B SSE 박제), tests/eid 61 + ask 회귀 9 = 70 passed - 적대 리뷰 3렌즈 18 finding 반영 13/13. 배포는 D26 게이트(fix/hwp 머지+Soft Lock) 대기 Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ from eid.compose import ( # noqa: E402
|
||||
_persona,
|
||||
compose,
|
||||
is_composed_surface,
|
||||
rules_present,
|
||||
)
|
||||
|
||||
_TASK = "<<<TASK_SENTINEL>>>"
|
||||
@@ -92,6 +93,51 @@ def test_study_diagnosis_overlay_placeholders_survive_compose():
|
||||
assert "{weakness_snapshot_block}" not in filled and "WB" in filled and "HB" in filled
|
||||
|
||||
|
||||
def test_eid_chat_surface_registered():
|
||||
# eid-chat D-1: 채팅 표면 = 자유-prose(base), persona ON, 기능 overlay 없음 (불변식 #3)
|
||||
assert is_composed_surface("eid_chat"), "eid_chat ROUTE_MAP 미등록"
|
||||
out = compose("eid_chat", "")
|
||||
assert "이드" in out, "persona 미주입"
|
||||
assert "보수적" in out, "rules 미주입"
|
||||
assert out.index("이드") < out.index("보수적"), "persona→rules 순서 위반"
|
||||
assert "학습 진단 코치" not in out, "채팅 base 표면에 기능 overlay 누출"
|
||||
|
||||
|
||||
def test_rules_present_true_then_false():
|
||||
# D-6 fail-closed 판정 재료 — vendored rules.md 존재 시 True, 부재 시 False.
|
||||
# _rules() 의 degraded 배너 동작(다른 표면)은 본 헬퍼와 무관하게 유지된다.
|
||||
import eid.compose as c
|
||||
|
||||
assert rules_present() is True, "vendored rules.md 가 있는데 False"
|
||||
orig = c._SUBSTRATE_DIR
|
||||
try:
|
||||
c._SUBSTRATE_DIR = Path("/nonexistent-substrate-dir-for-test")
|
||||
assert c.rules_present() is False, "rules.md 부재인데 True — fail-closed 판정 불가"
|
||||
finally:
|
||||
c._SUBSTRATE_DIR = orig
|
||||
|
||||
|
||||
def test_rules_present_live_judgment():
|
||||
# D-6 게이트 = 살아있는 판정 — lru_cache(_read) 동결 회귀 방지.
|
||||
# 같은 경로에서 생성→True, 삭제→False 가 즉시 반영돼야 한다.
|
||||
import tempfile
|
||||
|
||||
import eid.compose as c
|
||||
|
||||
orig = c._SUBSTRATE_DIR
|
||||
try:
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
c._SUBSTRATE_DIR = Path(td)
|
||||
rules = Path(td) / "rules.md"
|
||||
assert c.rules_present() is False
|
||||
rules.write_text("rule", encoding="utf-8")
|
||||
assert c.rules_present() is True, "생성이 반영 안 됨 — 캐시 동결"
|
||||
rules.unlink()
|
||||
assert c.rules_present() is False, "삭제가 반영 안 됨 — 캐시 동결"
|
||||
finally:
|
||||
c._SUBSTRATE_DIR = orig
|
||||
|
||||
|
||||
def _run():
|
||||
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
|
||||
fails = 0
|
||||
|
||||
@@ -0,0 +1,201 @@
|
||||
"""POST /api/eid/chat endpoint 테스트 — inline ASGI app (DB 의존 0).
|
||||
|
||||
★ 실행 환경: fastapi + httpx 필요 → Docker/staging pytest (test_eid_ai_client.py 동일 idiom).
|
||||
★ DB 0: get_current_user 는 dependency_overrides 로 대체. 무인증/위조토큰 케이스는 실제
|
||||
auth 경로지만 decode 단계에서 거부돼 DB 접근 전 반환.
|
||||
★ LLM 0: 정상 경로는 EidAIClient.call_stream 을 fixture bytes yield 로 monkeypatch.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from fastapi import FastAPI
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "app"))
|
||||
|
||||
import eid.compose as eid_compose # noqa: E402
|
||||
from api.eid_chat import router as eid_chat_router # noqa: E402
|
||||
from core.auth import get_current_user # noqa: E402
|
||||
from eid.ai import EidAIClient # noqa: E402
|
||||
from services.llm.backends import BackendUnavailable # noqa: E402
|
||||
|
||||
_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"
|
||||
_SSE = (_FIXTURES / "router_sse_chat_macmini_26b.txt").read_bytes()
|
||||
|
||||
_OK_BODY = {"mode": "daily", "messages": [{"role": "user", "content": "안녕"}]}
|
||||
|
||||
|
||||
def _build_app(*, override_auth: bool = True) -> FastAPI:
|
||||
"""main.py 등록 방식과 동일 prefix(/api/eid)로 라우터만 올린 inline app."""
|
||||
app = FastAPI()
|
||||
app.include_router(eid_chat_router, prefix="/api/eid")
|
||||
if override_auth:
|
||||
app.dependency_overrides[get_current_user] = lambda: types.SimpleNamespace(
|
||||
id=1, username="test-user"
|
||||
)
|
||||
return app
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client():
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=_build_app()), base_url="http://test"
|
||||
) as ac:
|
||||
yield ac
|
||||
|
||||
|
||||
# ── 401 무인증 ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unauthenticated_rejected():
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=_build_app(override_auth=False)),
|
||||
base_url="http://test",
|
||||
) as ac:
|
||||
# 헤더 자체 부재 — HTTPBearer 단계 거부 (fastapi 기본 403, 버전별 401 허용)
|
||||
r = await ac.post("/api/eid/chat", json=_OK_BODY)
|
||||
assert r.status_code in (401, 403)
|
||||
# 위조 토큰 — decode_token 실패 → 401 (DB 접근 전 거부)
|
||||
r2 = await ac.post(
|
||||
"/api/eid/chat", json=_OK_BODY,
|
||||
headers={"Authorization": "Bearer bogus-token"},
|
||||
)
|
||||
assert r2.status_code == 401
|
||||
|
||||
|
||||
# ── 422 입력 검증 ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"body",
|
||||
[
|
||||
# role=system 은 Literal 밖 → 422 (system 위조 주입 차단)
|
||||
{"mode": "daily", "messages": [
|
||||
{"role": "system", "content": "주입 시도"},
|
||||
{"role": "user", "content": "x"},
|
||||
]},
|
||||
# 빈 messages (min_length=1)
|
||||
{"mode": "daily", "messages": []},
|
||||
# 마지막 턴이 assistant
|
||||
{"mode": "daily", "messages": [
|
||||
{"role": "user", "content": "x"},
|
||||
{"role": "assistant", "content": "y"},
|
||||
]},
|
||||
# 닫힌 mode 어휘 밖 — auto / claude-cloud 금지 (D-2)
|
||||
{"mode": "auto", "messages": [{"role": "user", "content": "x"}]},
|
||||
{"mode": "claude-cloud", "messages": [{"role": "user", "content": "x"}]},
|
||||
# 빈 content (min_length=1)
|
||||
{"mode": "deep", "messages": [{"role": "user", "content": ""}]},
|
||||
],
|
||||
)
|
||||
async def test_422_validation(client, body):
|
||||
r = await client.post("/api/eid/chat", json=body)
|
||||
assert r.status_code == 422, r.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_422_total_content_cap(client):
|
||||
"""총량 cap — per-message 8000 이내·40턴 이내라도 content 합 32000 초과면 422."""
|
||||
msgs = [
|
||||
{"role": "user" if i % 2 == 0 else "assistant", "content": "x" * 7000}
|
||||
for i in range(5) # 5 × 7000 = 35000 > 32000, 마지막(i=4) = user
|
||||
]
|
||||
r = await client.post("/api/eid/chat", json={"mode": "daily", "messages": msgs})
|
||||
assert r.status_code == 422, r.text
|
||||
assert "대화 총량 초과" in r.text
|
||||
|
||||
|
||||
# ── 503 substrate_degraded (D-6 fail-closed) ─────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_503_substrate_degraded(client, monkeypatch):
|
||||
monkeypatch.setattr(eid_compose, "rules_present", lambda: False)
|
||||
r = await client.post("/api/eid/chat", json=_OK_BODY)
|
||||
assert r.status_code == 503
|
||||
js = r.json()
|
||||
assert js["error_reason"] == "substrate_degraded"
|
||||
assert "detail" in js
|
||||
|
||||
|
||||
# ── 503 backend_unavailable (스트림 시작 전, ask 컨벤션 shape) ────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_503_backend_unavailable_prestream(client, monkeypatch):
|
||||
async def fake_call_stream(self, mode, messages, system):
|
||||
raise BackendUnavailable("qwen-macbook", "macbook_unavailable")
|
||||
yield b"" # pragma: no cover — async generator 형태 유지용
|
||||
|
||||
monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream)
|
||||
r = await client.post(
|
||||
"/api/eid/chat",
|
||||
json={"mode": "deep", "messages": [{"role": "user", "content": "x"}]},
|
||||
)
|
||||
assert r.status_code == 503
|
||||
js = r.json()
|
||||
assert js["error"] == "backend_unavailable"
|
||||
assert js["error_reason"] == "macbook_unavailable"
|
||||
assert js["backend_requested"] == "qwen-macbook"
|
||||
|
||||
|
||||
# ── 정상 경로 — SSE raw pass-through ──────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_200_stream_passthrough(client, monkeypatch):
|
||||
captured: dict = {}
|
||||
|
||||
async def fake_call_stream(self, mode, messages, system):
|
||||
captured["mode"] = mode
|
||||
captured["messages"] = messages
|
||||
captured["system"] = system
|
||||
# chunk 단위로 쪼개 yield — endpoint 가 무변형으로 그대로 흘리는지 확인
|
||||
for i in range(0, len(_SSE), 256):
|
||||
yield _SSE[i : i + 256]
|
||||
|
||||
monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream)
|
||||
r = await client.post("/api/eid/chat", json=_OK_BODY)
|
||||
assert r.status_code == 200, r.text
|
||||
assert r.headers["content-type"].startswith("text/event-stream")
|
||||
assert r.headers["cache-control"] == "no-store"
|
||||
assert r.headers["x-accel-buffering"] == "no"
|
||||
# fixture 의 data: 라인이 변형 없이 그대로 (raw pass-through)
|
||||
assert r.content == _SSE
|
||||
assert b'data: {"id"' in r.content
|
||||
assert b"data: [DONE]" in r.content
|
||||
# call_stream 입력: mode 그대로 + 사용자 턴 + compose 합본(persona 포함) system
|
||||
assert captured["mode"] == "daily"
|
||||
assert captured["messages"] == [{"role": "user", "content": "안녕"}]
|
||||
assert "이드" in captured["system"], "system 에 compose 합본(persona) 미주입"
|
||||
assert "보수적" in captured["system"], "system 에 rules 미주입"
|
||||
|
||||
|
||||
# ── 스트림 시작 후 절단 — traceback 전파 0, 조용히 종료 ──────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_200_midstream_abort_quiet(client, monkeypatch):
|
||||
"""스트림 도중 BackendUnavailable — 부분 본문까지만 전송, 예외 전파 0
|
||||
(프론트는 data: [DONE] 부재 절단으로 처리)."""
|
||||
|
||||
async def fake_call_stream(self, mode, messages, system):
|
||||
yield b'data: {"x": 1}\n\n'
|
||||
raise BackendUnavailable("qwen-macbook", "stream_deadline_exceeded")
|
||||
|
||||
monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream)
|
||||
r = await client.post(
|
||||
"/api/eid/chat",
|
||||
json={"mode": "deep", "messages": [{"role": "user", "content": "x"}]},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.content == b'data: {"x": 1}\n\n'
|
||||
assert b"data: [DONE]" not in r.content
|
||||
@@ -0,0 +1,318 @@
|
||||
"""EidAIClient.call_stream 단위 테스트 — mode 닫힌 매핑·egress 차단·SSE 라인 단위 중계.
|
||||
|
||||
★ 실행 환경: httpx + config(settings) 필요 → Docker/staging pytest
|
||||
(tests/eid/test_eid_ai_client.py 와 동일 idiom, MacBook 로컬 deps 없으면 hard-fail).
|
||||
★ httpx 호출은 MockTransport 로 대체 — 실제 네트워크 0 (DB 의존 0).
|
||||
★ 차단 대상 host 문자열은 런타임 분할 조립 — 차단을 *테스트*하는 코드지 호출 아님
|
||||
(meter-guard 오탐 회피, test_eid_ai_client.py 동일).
|
||||
★ 스트림 검증 = byte-equal 아님: call_stream 이 data: JSON 의 model 을 mode 어휘로
|
||||
치환 + usage 제거(머신 경로/텔레메트리 비노출) — content 누적·프레이밍 보존을 본다.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "app"))
|
||||
|
||||
import eid.ai as eid_ai # noqa: E402
|
||||
from eid.ai import EidAIClient, EidEgressBlocked # noqa: E402
|
||||
from services.llm.backends import BackendUnavailable # noqa: E402
|
||||
from services.search.llm_gate import _reset_for_test # noqa: E402
|
||||
|
||||
_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"
|
||||
_SSE_MACMINI = (_FIXTURES / "router_sse_chat_macmini_26b.txt").read_bytes()
|
||||
_SSE_QWEN = (_FIXTURES / "router_sse_chat_qwen_27b.txt").read_bytes()
|
||||
|
||||
_BLOCKED_HOST = "anthropic" + ".com"
|
||||
|
||||
_MSG = [{"role": "user", "content": "안녕"}]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_gate():
|
||||
"""daily(mac-mini-default) 경로가 mlx gate 를 잡으므로 fresh event loop 마다 reset."""
|
||||
_reset_for_test()
|
||||
yield
|
||||
_reset_for_test()
|
||||
|
||||
|
||||
def _patch_transport(monkeypatch, handler):
|
||||
"""eid.ai 내부 httpx.AsyncClient 생성에 MockTransport 주입 (생성 인자는 보존)."""
|
||||
real = httpx.AsyncClient
|
||||
|
||||
def _factory(*args, **kwargs):
|
||||
kwargs["transport"] = httpx.MockTransport(handler)
|
||||
return real(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(eid_ai.httpx, "AsyncClient", _factory)
|
||||
|
||||
|
||||
def _data_objs(raw: bytes) -> list[dict]:
|
||||
"""SSE bytes → data: JSON 객체 목록 ([DONE] 제외)."""
|
||||
objs = []
|
||||
for line in raw.split(b"\n"):
|
||||
if line.startswith(b"data: ") and line[len(b"data: "):].strip() != b"[DONE]":
|
||||
objs.append(json.loads(line[len(b"data: "):]))
|
||||
return objs
|
||||
|
||||
|
||||
def _content_concat(raw: bytes) -> str:
|
||||
"""delta.content 누적 — 본문 무손실 검증용."""
|
||||
return "".join(
|
||||
(o["choices"][0]["delta"].get("content") or "") for o in _data_objs(raw)
|
||||
)
|
||||
|
||||
|
||||
# ── mode 닫힌 매핑 / egress 차단 ──────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("bad_mode", ["auto", "claude-cloud", "mac-mini-default", "bogus"])
|
||||
async def test_unknown_mode_blocked(bad_mode):
|
||||
"""미지 mode = EidEgressBlocked — alias 직접 지정 포함 닫힌 매핑(daily/deep) 밖 전부 차단."""
|
||||
c = EidAIClient()
|
||||
try:
|
||||
stream = c.call_stream(bad_mode, _MSG, "sys")
|
||||
with pytest.raises(EidEgressBlocked):
|
||||
await anext(stream)
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_router_url_blocked(monkeypatch):
|
||||
"""router URL 이 외부로 오결선돼도 call_stream 이 차단 (기존 _request 패턴 미러)."""
|
||||
monkeypatch.setattr(eid_ai, "_router_url", lambda: "https://api." + _BLOCKED_HOST)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
stream = c.call_stream("deep", _MSG, "sys")
|
||||
with pytest.raises(EidEgressBlocked):
|
||||
await anext(stream)
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
|
||||
# ── alias 매핑 + payload shape + 라인 단위 중계(model 치환·usage 제거) ────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deep_mode_alias_and_sse_line_rewrite(monkeypatch):
|
||||
"""deep → qwen-macbook alias, system 은 messages[0] 단일 주입, 라인 단위 정화 중계."""
|
||||
seen: dict = {}
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
seen["url"] = str(request.url)
|
||||
seen["json"] = json.loads(request.content)
|
||||
return httpx.Response(
|
||||
200, content=_SSE_QWEN, headers={"content-type": "text/event-stream"}
|
||||
)
|
||||
|
||||
_patch_transport(monkeypatch, handler)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
chunks = [b async for b in c.call_stream("deep", _MSG, "SYS_SENTINEL")]
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
joined = b"".join(chunks)
|
||||
# (a) content 누적 = fixture 와 동일 (델타 본문 무손실)
|
||||
assert _content_concat(joined) == _content_concat(_SSE_QWEN) != ""
|
||||
assert len(_data_objs(joined)) == len(_data_objs(_SSE_QWEN))
|
||||
# (b) model 필드 = mode 어휘 치환 — 맥북 파일시스템 절대경로/실모델명 비노출
|
||||
assert all(o["model"] == "deep" for o in _data_objs(joined))
|
||||
assert b"mlx-models" not in joined and b"Qwen" not in joined
|
||||
# (c) usage(머신 텔레메트리) 부재
|
||||
assert all("usage" not in o for o in _data_objs(joined))
|
||||
assert b"peak_memory" not in joined
|
||||
# (d) data: [DONE] 보존
|
||||
assert b"data: [DONE]" in joined
|
||||
# (e) 빈 줄 프레이밍 보존 — 라인 수·빈 줄 위치가 fixture 와 동일
|
||||
assert [bool(l) for l in joined.split(b"\n")] == [
|
||||
bool(l) for l in _SSE_QWEN.split(b"\n")
|
||||
]
|
||||
assert seen["url"].endswith("/v1/chat/completions")
|
||||
body = seen["json"]
|
||||
assert body["model"] == "qwen-macbook"
|
||||
assert body["stream"] is True
|
||||
assert body["max_tokens"] == 2048
|
||||
assert body["temperature"] == 0.4
|
||||
assert body["messages"][0] == {"role": "system", "content": "SYS_SENTINEL"}
|
||||
assert body["messages"][1:] == _MSG
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_daily_mode_alias_macmini(monkeypatch):
|
||||
"""daily → mac-mini-default alias (mlx gate 경유) + 라인 단위 정화 중계."""
|
||||
|
||||
class _TinyChunks(httpx.AsyncByteStream):
|
||||
"""청크 경계가 라인/JSON 중간에 오도록 7B 씩 방출 — 라인 버퍼링 검증."""
|
||||
|
||||
async def __aiter__(self):
|
||||
for i in range(0, len(_SSE_MACMINI), 7):
|
||||
yield _SSE_MACMINI[i : i + 7]
|
||||
|
||||
async def aclose(self):
|
||||
return None
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
assert json.loads(request.content)["model"] == "mac-mini-default"
|
||||
return httpx.Response(
|
||||
200, stream=_TinyChunks(), headers={"content-type": "text/event-stream"}
|
||||
)
|
||||
|
||||
_patch_transport(monkeypatch, handler)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
chunks = [b async for b in c.call_stream("daily", _MSG, "sys")]
|
||||
finally:
|
||||
await c.close()
|
||||
joined = b"".join(chunks)
|
||||
# (a) content 누적 동일 / (b) model 치환 / (c) usage 부재 / (d) [DONE] / (e) 프레이밍
|
||||
assert _content_concat(joined) == _content_concat(_SSE_MACMINI) != ""
|
||||
assert all(o["model"] == "daily" for o in _data_objs(joined))
|
||||
assert b"gemma" not in joined
|
||||
assert all("usage" not in o for o in _data_objs(joined))
|
||||
assert b"data: [DONE]" in joined
|
||||
assert [bool(l) for l in joined.split(b"\n")] == [
|
||||
bool(l) for l in _SSE_MACMINI.split(b"\n")
|
||||
]
|
||||
|
||||
|
||||
# ── 스트림 시작 전 에러 → BackendUnavailable (ask 어휘 일치) ──────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prestream_503_maps_reason(monkeypatch):
|
||||
"""router 503 body 의 error.type 을 error_reason 으로 추출 (ask 와 동일 어휘)."""
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(503, json={"error": {"type": "macbook_unavailable"}})
|
||||
|
||||
_patch_transport(monkeypatch, handler)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
stream = c.call_stream("deep", _MSG, "sys")
|
||||
with pytest.raises(BackendUnavailable) as ei:
|
||||
await anext(stream)
|
||||
assert ei.value.reason == "macbook_unavailable"
|
||||
assert ei.value.backend_name == "qwen-macbook"
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prestream_503_no_body_falls_back_router_503(monkeypatch):
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(503, content=b"oops not json")
|
||||
|
||||
_patch_transport(monkeypatch, handler)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
stream = c.call_stream("deep", _MSG, "sys")
|
||||
with pytest.raises(BackendUnavailable) as ei:
|
||||
await anext(stream)
|
||||
assert ei.value.reason == "router_503"
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prestream_connect_error_maps_router_prefix(monkeypatch):
|
||||
"""연결 실패 → router_<예외명> (RouterBackend._post 어휘 일치)."""
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
raise httpx.ConnectError("connection refused")
|
||||
|
||||
_patch_transport(monkeypatch, handler)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
stream = c.call_stream("deep", _MSG, "sys")
|
||||
with pytest.raises(BackendUnavailable) as ei:
|
||||
await anext(stream)
|
||||
assert ei.value.reason == "router_ConnectError"
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prestream_400_raises_valueerror_failloud(monkeypatch):
|
||||
"""router 400 = 닫힌 매핑에서 alias drift 코드 버그 — BackendUnavailable 아닌
|
||||
ValueError fail-loud (RouterBackend._post 컨벤션 미러)."""
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(400, json={"error": "unknown_alias"})
|
||||
|
||||
_patch_transport(monkeypatch, handler)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
stream = c.call_stream("deep", _MSG, "sys")
|
||||
with pytest.raises(ValueError, match="router rejected alias='qwen-macbook'"):
|
||||
await anext(stream)
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
|
||||
# ── wall-clock deadline (게이트 점유 무한화 차단) ─────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_deadline_exceeded(monkeypatch):
|
||||
"""업스트림 진입~종료 deadline 초과 → BackendUnavailable(stream_deadline_exceeded)."""
|
||||
|
||||
class _StallStream(httpx.AsyncByteStream):
|
||||
"""첫 chunk 후 정체 — per-chunk read timeout 으론 안 잡히는 패턴 모사."""
|
||||
|
||||
async def __aiter__(self):
|
||||
yield b'data: {"choices": []}\n\n'
|
||||
await asyncio.sleep(30)
|
||||
|
||||
async def aclose(self):
|
||||
return None
|
||||
|
||||
def handler(request: httpx.Request) -> httpx.Response:
|
||||
return httpx.Response(
|
||||
200, stream=_StallStream(), headers={"content-type": "text/event-stream"}
|
||||
)
|
||||
|
||||
_patch_transport(monkeypatch, handler)
|
||||
monkeypatch.setattr(eid_ai, "_STREAM_DEADLINE_S", 0.05)
|
||||
c = EidAIClient()
|
||||
try:
|
||||
stream = c.call_stream("deep", _MSG, "sys")
|
||||
with pytest.raises(BackendUnavailable) as ei:
|
||||
async for _ in stream:
|
||||
pass
|
||||
assert ei.value.reason == "stream_deadline_exceeded"
|
||||
assert ei.value.backend_name == "qwen-macbook"
|
||||
finally:
|
||||
await c.close()
|
||||
|
||||
|
||||
# ── error_reason allowlist sanitize ──────────────────────────────────────────
|
||||
|
||||
|
||||
def test_stream_error_reason_sanitized():
|
||||
"""최종 reason 은 [a-z0-9_]{1,64} allowlist — 불일치(대문자/공백/dict 파편)는
|
||||
upstream_502(502)/router_error(그 외) 로 일반화, dict 직렬화 파편 비노출."""
|
||||
from eid.ai import _stream_error_reason
|
||||
|
||||
# 정상 어휘는 그대로 (ask 와 동일)
|
||||
assert (
|
||||
_stream_error_reason(503, b'{"error": {"type": "macbook_unavailable"}}')
|
||||
== "macbook_unavailable"
|
||||
)
|
||||
assert _stream_error_reason(503, b"oops not json") == "router_503"
|
||||
assert _stream_error_reason(418, b"{}") == "router_http_418"
|
||||
# 502 + 추출 실패 → upstream_502 (기존 upstream_502_{dict...} 파편 제거)
|
||||
assert _stream_error_reason(502, b'{"error": {"detail": "x"}}') == "upstream_502"
|
||||
# allowlist 밖(대문자/공백/특수문자) → 일반화
|
||||
assert _stream_error_reason(502, b'{"error": {"type": "Bad Gateway!"}}') == "upstream_502"
|
||||
assert _stream_error_reason(503, b'{"error": {"type": "Weird Reason"}}') == "router_error"
|
||||
assert _stream_error_reason(503, b'{"error": {"type": "' + b"a" * 80 + b'"}}') == "router_error"
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139860, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 1, "total_tokens": 29, "prompt_tps": 183.51595345126498, "generation_tps": 140349.23521338476, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 2, "total_tokens": 30, "prompt_tps": 183.51595345126498, "generation_tps": 93.60885515563795, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "안녕하세요,", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 3, "total_tokens": 31, "prompt_tps": 183.51595345126498, "generation_tps": 70.37263329290622, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 4, "total_tokens": 32, "prompt_tps": 183.51595345126498, "generation_tps": 62.61454940315543, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": " 만나서", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 5, "total_tokens": 33, "prompt_tps": 183.51595345126498, "generation_tps": 58.7098801868211, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 6, "total_tokens": 34, "prompt_tps": 183.51595345126498, "generation_tps": 56.35974757228211, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": " 반갑습니다!", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 7, "total_tokens": 35, "prompt_tps": 183.51595345126498, "generation_tps": 54.81880127112613, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": "stop", "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 7, "total_tokens": 35, "prompt_tps": 183.51595345126498, "generation_tps": 54.81880127112613, "peak_memory": 34.66827434}}
|
||||
|
||||
|
||||
data: [DONE]
|
||||
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":1,"total_tokens":26,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
|
||||
|
||||
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":2,"total_tokens":27,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
|
||||
|
||||
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":3,"total_tokens":28,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
|
||||
|
||||
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139881,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":4,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
|
||||
|
||||
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139881,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":"stop","delta":{"role":"assistant","content":"안녕하세요!","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":5,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
|
||||
|
||||
data: [DONE]
|
||||
|
||||
Reference in New Issue
Block a user