feat(eid): 이드 채팅 표면 — /api/eid/chat SSE 스트리밍 + /chat 페이지 (P1)

- compose: eid_chat surface 등록(persona+rules, 자유-prose) + rules_present() 라이브 판정(D-6 fail-closed)
- EidAIClient.call_stream: 닫힌 mode 매핑(daily→mac-mini-default/deep→qwen-macbook), router 경유,
  MLX gate(FOREGROUND)+wall-clock 300s deadline, SSE 라인 relay(model→mode 치환·usage 제거),
  router 400 fail-loud, error_reason allowlist sanitize
- POST /api/eid/chat: JWT, role=system 422 거부, 8000자/40턴/총량 32000 cap,
  503 error_reason(ask 컨벤션), 본문 무로깅
- frontend /chat: 이드 표면 문법(일상/심층, 모델·머신명 비노출), SSE 파서(경계 buf·flush·[DONE]),
  error_reason UX, 8000자 선차단+422 오염 차단, localStorage 이력(logout 시 제거), nav 등록
- Caddyfile: encode 명시 match로 text/event-stream gzip 버퍼링 제외
- tests: 신규 32+ (fixture: router 경유 26B/27B SSE 박제), tests/eid 61 + ask 회귀 9 = 70 passed
- 적대 리뷰 3렌즈 18 finding 반영 13/13. 배포는 D26 게이트(fix/hwp 머지+Soft Lock) 대기

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
hyungi
2026-06-11 10:51:39 +09:00
parent d3aa640f65
commit cd06ef0403
16 changed files with 1641 additions and 3 deletions
+46
View File
@@ -17,6 +17,7 @@ from eid.compose import ( # noqa: E402
_persona,
compose,
is_composed_surface,
rules_present,
)
_TASK = "<<<TASK_SENTINEL>>>"
@@ -92,6 +93,51 @@ def test_study_diagnosis_overlay_placeholders_survive_compose():
assert "{weakness_snapshot_block}" not in filled and "WB" in filled and "HB" in filled
def test_eid_chat_surface_registered():
# eid-chat D-1: 채팅 표면 = 자유-prose(base), persona ON, 기능 overlay 없음 (불변식 #3)
assert is_composed_surface("eid_chat"), "eid_chat ROUTE_MAP 미등록"
out = compose("eid_chat", "")
assert "이드" in out, "persona 미주입"
assert "보수적" in out, "rules 미주입"
assert out.index("이드") < out.index("보수적"), "persona→rules 순서 위반"
assert "학습 진단 코치" not in out, "채팅 base 표면에 기능 overlay 누출"
def test_rules_present_true_then_false():
# D-6 fail-closed 판정 재료 — vendored rules.md 존재 시 True, 부재 시 False.
# _rules() 의 degraded 배너 동작(다른 표면)은 본 헬퍼와 무관하게 유지된다.
import eid.compose as c
assert rules_present() is True, "vendored rules.md 가 있는데 False"
orig = c._SUBSTRATE_DIR
try:
c._SUBSTRATE_DIR = Path("/nonexistent-substrate-dir-for-test")
assert c.rules_present() is False, "rules.md 부재인데 True — fail-closed 판정 불가"
finally:
c._SUBSTRATE_DIR = orig
def test_rules_present_live_judgment():
# D-6 게이트 = 살아있는 판정 — lru_cache(_read) 동결 회귀 방지.
# 같은 경로에서 생성→True, 삭제→False 가 즉시 반영돼야 한다.
import tempfile
import eid.compose as c
orig = c._SUBSTRATE_DIR
try:
with tempfile.TemporaryDirectory() as td:
c._SUBSTRATE_DIR = Path(td)
rules = Path(td) / "rules.md"
assert c.rules_present() is False
rules.write_text("rule", encoding="utf-8")
assert c.rules_present() is True, "생성이 반영 안 됨 — 캐시 동결"
rules.unlink()
assert c.rules_present() is False, "삭제가 반영 안 됨 — 캐시 동결"
finally:
c._SUBSTRATE_DIR = orig
def _run():
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
fails = 0
+201
View File
@@ -0,0 +1,201 @@
"""POST /api/eid/chat endpoint 테스트 — inline ASGI app (DB 의존 0).
★ 실행 환경: fastapi + httpx 필요 → Docker/staging pytest (test_eid_ai_client.py 동일 idiom).
★ DB 0: get_current_user 는 dependency_overrides 로 대체. 무인증/위조토큰 케이스는 실제
auth 경로지만 decode 단계에서 거부돼 DB 접근 전 반환.
★ LLM 0: 정상 경로는 EidAIClient.call_stream 을 fixture bytes yield 로 monkeypatch.
"""
from __future__ import annotations
import sys
import types
from pathlib import Path
import pytest
import pytest_asyncio
from fastapi import FastAPI
from httpx import ASGITransport, AsyncClient
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "app"))
import eid.compose as eid_compose # noqa: E402
from api.eid_chat import router as eid_chat_router # noqa: E402
from core.auth import get_current_user # noqa: E402
from eid.ai import EidAIClient # noqa: E402
from services.llm.backends import BackendUnavailable # noqa: E402
_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"
_SSE = (_FIXTURES / "router_sse_chat_macmini_26b.txt").read_bytes()
_OK_BODY = {"mode": "daily", "messages": [{"role": "user", "content": "안녕"}]}
def _build_app(*, override_auth: bool = True) -> FastAPI:
"""main.py 등록 방식과 동일 prefix(/api/eid)로 라우터만 올린 inline app."""
app = FastAPI()
app.include_router(eid_chat_router, prefix="/api/eid")
if override_auth:
app.dependency_overrides[get_current_user] = lambda: types.SimpleNamespace(
id=1, username="test-user"
)
return app
@pytest_asyncio.fixture
async def client():
async with AsyncClient(
transport=ASGITransport(app=_build_app()), base_url="http://test"
) as ac:
yield ac
# ── 401 무인증 ────────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_unauthenticated_rejected():
async with AsyncClient(
transport=ASGITransport(app=_build_app(override_auth=False)),
base_url="http://test",
) as ac:
# 헤더 자체 부재 — HTTPBearer 단계 거부 (fastapi 기본 403, 버전별 401 허용)
r = await ac.post("/api/eid/chat", json=_OK_BODY)
assert r.status_code in (401, 403)
# 위조 토큰 — decode_token 실패 → 401 (DB 접근 전 거부)
r2 = await ac.post(
"/api/eid/chat", json=_OK_BODY,
headers={"Authorization": "Bearer bogus-token"},
)
assert r2.status_code == 401
# ── 422 입력 검증 ─────────────────────────────────────────────────────────────
@pytest.mark.asyncio
@pytest.mark.parametrize(
"body",
[
# role=system 은 Literal 밖 → 422 (system 위조 주입 차단)
{"mode": "daily", "messages": [
{"role": "system", "content": "주입 시도"},
{"role": "user", "content": "x"},
]},
# 빈 messages (min_length=1)
{"mode": "daily", "messages": []},
# 마지막 턴이 assistant
{"mode": "daily", "messages": [
{"role": "user", "content": "x"},
{"role": "assistant", "content": "y"},
]},
# 닫힌 mode 어휘 밖 — auto / claude-cloud 금지 (D-2)
{"mode": "auto", "messages": [{"role": "user", "content": "x"}]},
{"mode": "claude-cloud", "messages": [{"role": "user", "content": "x"}]},
# 빈 content (min_length=1)
{"mode": "deep", "messages": [{"role": "user", "content": ""}]},
],
)
async def test_422_validation(client, body):
r = await client.post("/api/eid/chat", json=body)
assert r.status_code == 422, r.text
@pytest.mark.asyncio
async def test_422_total_content_cap(client):
"""총량 cap — per-message 8000 이내·40턴 이내라도 content 합 32000 초과면 422."""
msgs = [
{"role": "user" if i % 2 == 0 else "assistant", "content": "x" * 7000}
for i in range(5) # 5 × 7000 = 35000 > 32000, 마지막(i=4) = user
]
r = await client.post("/api/eid/chat", json={"mode": "daily", "messages": msgs})
assert r.status_code == 422, r.text
assert "대화 총량 초과" in r.text
# ── 503 substrate_degraded (D-6 fail-closed) ─────────────────────────────────
@pytest.mark.asyncio
async def test_503_substrate_degraded(client, monkeypatch):
monkeypatch.setattr(eid_compose, "rules_present", lambda: False)
r = await client.post("/api/eid/chat", json=_OK_BODY)
assert r.status_code == 503
js = r.json()
assert js["error_reason"] == "substrate_degraded"
assert "detail" in js
# ── 503 backend_unavailable (스트림 시작 전, ask 컨벤션 shape) ────────────────
@pytest.mark.asyncio
async def test_503_backend_unavailable_prestream(client, monkeypatch):
async def fake_call_stream(self, mode, messages, system):
raise BackendUnavailable("qwen-macbook", "macbook_unavailable")
yield b"" # pragma: no cover — async generator 형태 유지용
monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream)
r = await client.post(
"/api/eid/chat",
json={"mode": "deep", "messages": [{"role": "user", "content": "x"}]},
)
assert r.status_code == 503
js = r.json()
assert js["error"] == "backend_unavailable"
assert js["error_reason"] == "macbook_unavailable"
assert js["backend_requested"] == "qwen-macbook"
# ── 정상 경로 — SSE raw pass-through ──────────────────────────────────────────
@pytest.mark.asyncio
async def test_200_stream_passthrough(client, monkeypatch):
captured: dict = {}
async def fake_call_stream(self, mode, messages, system):
captured["mode"] = mode
captured["messages"] = messages
captured["system"] = system
# chunk 단위로 쪼개 yield — endpoint 가 무변형으로 그대로 흘리는지 확인
for i in range(0, len(_SSE), 256):
yield _SSE[i : i + 256]
monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream)
r = await client.post("/api/eid/chat", json=_OK_BODY)
assert r.status_code == 200, r.text
assert r.headers["content-type"].startswith("text/event-stream")
assert r.headers["cache-control"] == "no-store"
assert r.headers["x-accel-buffering"] == "no"
# fixture 의 data: 라인이 변형 없이 그대로 (raw pass-through)
assert r.content == _SSE
assert b'data: {"id"' in r.content
assert b"data: [DONE]" in r.content
# call_stream 입력: mode 그대로 + 사용자 턴 + compose 합본(persona 포함) system
assert captured["mode"] == "daily"
assert captured["messages"] == [{"role": "user", "content": "안녕"}]
assert "이드" in captured["system"], "system 에 compose 합본(persona) 미주입"
assert "보수적" in captured["system"], "system 에 rules 미주입"
# ── 스트림 시작 후 절단 — traceback 전파 0, 조용히 종료 ──────────────────────
@pytest.mark.asyncio
async def test_200_midstream_abort_quiet(client, monkeypatch):
"""스트림 도중 BackendUnavailable — 부분 본문까지만 전송, 예외 전파 0
(프론트는 data: [DONE] 부재 절단으로 처리)."""
async def fake_call_stream(self, mode, messages, system):
yield b'data: {"x": 1}\n\n'
raise BackendUnavailable("qwen-macbook", "stream_deadline_exceeded")
monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream)
r = await client.post(
"/api/eid/chat",
json={"mode": "deep", "messages": [{"role": "user", "content": "x"}]},
)
assert r.status_code == 200
assert r.content == b'data: {"x": 1}\n\n'
assert b"data: [DONE]" not in r.content
+318
View File
@@ -0,0 +1,318 @@
"""EidAIClient.call_stream 단위 테스트 — mode 닫힌 매핑·egress 차단·SSE 라인 단위 중계.
★ 실행 환경: httpx + config(settings) 필요 → Docker/staging pytest
(tests/eid/test_eid_ai_client.py 와 동일 idiom, MacBook 로컬 deps 없으면 hard-fail).
★ httpx 호출은 MockTransport 로 대체 — 실제 네트워크 0 (DB 의존 0).
★ 차단 대상 host 문자열은 런타임 분할 조립 — 차단을 *테스트*하는 코드지 호출 아님
(meter-guard 오탐 회피, test_eid_ai_client.py 동일).
★ 스트림 검증 = byte-equal 아님: call_stream 이 data: JSON 의 model 을 mode 어휘로
치환 + usage 제거(머신 경로/텔레메트리 비노출) — content 누적·프레이밍 보존을 본다.
"""
from __future__ import annotations
import asyncio
import json
import sys
from pathlib import Path
import httpx
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "app"))
import eid.ai as eid_ai # noqa: E402
from eid.ai import EidAIClient, EidEgressBlocked # noqa: E402
from services.llm.backends import BackendUnavailable # noqa: E402
from services.search.llm_gate import _reset_for_test # noqa: E402
_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures"
_SSE_MACMINI = (_FIXTURES / "router_sse_chat_macmini_26b.txt").read_bytes()
_SSE_QWEN = (_FIXTURES / "router_sse_chat_qwen_27b.txt").read_bytes()
_BLOCKED_HOST = "anthropic" + ".com"
_MSG = [{"role": "user", "content": "안녕"}]
@pytest.fixture(autouse=True)
def _reset_gate():
"""daily(mac-mini-default) 경로가 mlx gate 를 잡으므로 fresh event loop 마다 reset."""
_reset_for_test()
yield
_reset_for_test()
def _patch_transport(monkeypatch, handler):
"""eid.ai 내부 httpx.AsyncClient 생성에 MockTransport 주입 (생성 인자는 보존)."""
real = httpx.AsyncClient
def _factory(*args, **kwargs):
kwargs["transport"] = httpx.MockTransport(handler)
return real(*args, **kwargs)
monkeypatch.setattr(eid_ai.httpx, "AsyncClient", _factory)
def _data_objs(raw: bytes) -> list[dict]:
"""SSE bytes → data: JSON 객체 목록 ([DONE] 제외)."""
objs = []
for line in raw.split(b"\n"):
if line.startswith(b"data: ") and line[len(b"data: "):].strip() != b"[DONE]":
objs.append(json.loads(line[len(b"data: "):]))
return objs
def _content_concat(raw: bytes) -> str:
"""delta.content 누적 — 본문 무손실 검증용."""
return "".join(
(o["choices"][0]["delta"].get("content") or "") for o in _data_objs(raw)
)
# ── mode 닫힌 매핑 / egress 차단 ──────────────────────────────────────────────
@pytest.mark.asyncio
@pytest.mark.parametrize("bad_mode", ["auto", "claude-cloud", "mac-mini-default", "bogus"])
async def test_unknown_mode_blocked(bad_mode):
"""미지 mode = EidEgressBlocked — alias 직접 지정 포함 닫힌 매핑(daily/deep) 밖 전부 차단."""
c = EidAIClient()
try:
stream = c.call_stream(bad_mode, _MSG, "sys")
with pytest.raises(EidEgressBlocked):
await anext(stream)
finally:
await c.close()
@pytest.mark.asyncio
async def test_anthropic_router_url_blocked(monkeypatch):
"""router URL 이 외부로 오결선돼도 call_stream 이 차단 (기존 _request 패턴 미러)."""
monkeypatch.setattr(eid_ai, "_router_url", lambda: "https://api." + _BLOCKED_HOST)
c = EidAIClient()
try:
stream = c.call_stream("deep", _MSG, "sys")
with pytest.raises(EidEgressBlocked):
await anext(stream)
finally:
await c.close()
# ── alias 매핑 + payload shape + 라인 단위 중계(model 치환·usage 제거) ────────
@pytest.mark.asyncio
async def test_deep_mode_alias_and_sse_line_rewrite(monkeypatch):
"""deep → qwen-macbook alias, system 은 messages[0] 단일 주입, 라인 단위 정화 중계."""
seen: dict = {}
def handler(request: httpx.Request) -> httpx.Response:
seen["url"] = str(request.url)
seen["json"] = json.loads(request.content)
return httpx.Response(
200, content=_SSE_QWEN, headers={"content-type": "text/event-stream"}
)
_patch_transport(monkeypatch, handler)
c = EidAIClient()
try:
chunks = [b async for b in c.call_stream("deep", _MSG, "SYS_SENTINEL")]
finally:
await c.close()
joined = b"".join(chunks)
# (a) content 누적 = fixture 와 동일 (델타 본문 무손실)
assert _content_concat(joined) == _content_concat(_SSE_QWEN) != ""
assert len(_data_objs(joined)) == len(_data_objs(_SSE_QWEN))
# (b) model 필드 = mode 어휘 치환 — 맥북 파일시스템 절대경로/실모델명 비노출
assert all(o["model"] == "deep" for o in _data_objs(joined))
assert b"mlx-models" not in joined and b"Qwen" not in joined
# (c) usage(머신 텔레메트리) 부재
assert all("usage" not in o for o in _data_objs(joined))
assert b"peak_memory" not in joined
# (d) data: [DONE] 보존
assert b"data: [DONE]" in joined
# (e) 빈 줄 프레이밍 보존 — 라인 수·빈 줄 위치가 fixture 와 동일
assert [bool(l) for l in joined.split(b"\n")] == [
bool(l) for l in _SSE_QWEN.split(b"\n")
]
assert seen["url"].endswith("/v1/chat/completions")
body = seen["json"]
assert body["model"] == "qwen-macbook"
assert body["stream"] is True
assert body["max_tokens"] == 2048
assert body["temperature"] == 0.4
assert body["messages"][0] == {"role": "system", "content": "SYS_SENTINEL"}
assert body["messages"][1:] == _MSG
@pytest.mark.asyncio
async def test_daily_mode_alias_macmini(monkeypatch):
"""daily → mac-mini-default alias (mlx gate 경유) + 라인 단위 정화 중계."""
class _TinyChunks(httpx.AsyncByteStream):
"""청크 경계가 라인/JSON 중간에 오도록 7B 씩 방출 — 라인 버퍼링 검증."""
async def __aiter__(self):
for i in range(0, len(_SSE_MACMINI), 7):
yield _SSE_MACMINI[i : i + 7]
async def aclose(self):
return None
def handler(request: httpx.Request) -> httpx.Response:
assert json.loads(request.content)["model"] == "mac-mini-default"
return httpx.Response(
200, stream=_TinyChunks(), headers={"content-type": "text/event-stream"}
)
_patch_transport(monkeypatch, handler)
c = EidAIClient()
try:
chunks = [b async for b in c.call_stream("daily", _MSG, "sys")]
finally:
await c.close()
joined = b"".join(chunks)
# (a) content 누적 동일 / (b) model 치환 / (c) usage 부재 / (d) [DONE] / (e) 프레이밍
assert _content_concat(joined) == _content_concat(_SSE_MACMINI) != ""
assert all(o["model"] == "daily" for o in _data_objs(joined))
assert b"gemma" not in joined
assert all("usage" not in o for o in _data_objs(joined))
assert b"data: [DONE]" in joined
assert [bool(l) for l in joined.split(b"\n")] == [
bool(l) for l in _SSE_MACMINI.split(b"\n")
]
# ── 스트림 시작 전 에러 → BackendUnavailable (ask 어휘 일치) ──────────────────
@pytest.mark.asyncio
async def test_prestream_503_maps_reason(monkeypatch):
"""router 503 body 의 error.type 을 error_reason 으로 추출 (ask 와 동일 어휘)."""
def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(503, json={"error": {"type": "macbook_unavailable"}})
_patch_transport(monkeypatch, handler)
c = EidAIClient()
try:
stream = c.call_stream("deep", _MSG, "sys")
with pytest.raises(BackendUnavailable) as ei:
await anext(stream)
assert ei.value.reason == "macbook_unavailable"
assert ei.value.backend_name == "qwen-macbook"
finally:
await c.close()
@pytest.mark.asyncio
async def test_prestream_503_no_body_falls_back_router_503(monkeypatch):
def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(503, content=b"oops not json")
_patch_transport(monkeypatch, handler)
c = EidAIClient()
try:
stream = c.call_stream("deep", _MSG, "sys")
with pytest.raises(BackendUnavailable) as ei:
await anext(stream)
assert ei.value.reason == "router_503"
finally:
await c.close()
@pytest.mark.asyncio
async def test_prestream_connect_error_maps_router_prefix(monkeypatch):
"""연결 실패 → router_<예외명> (RouterBackend._post 어휘 일치)."""
def handler(request: httpx.Request) -> httpx.Response:
raise httpx.ConnectError("connection refused")
_patch_transport(monkeypatch, handler)
c = EidAIClient()
try:
stream = c.call_stream("deep", _MSG, "sys")
with pytest.raises(BackendUnavailable) as ei:
await anext(stream)
assert ei.value.reason == "router_ConnectError"
finally:
await c.close()
@pytest.mark.asyncio
async def test_prestream_400_raises_valueerror_failloud(monkeypatch):
"""router 400 = 닫힌 매핑에서 alias drift 코드 버그 — BackendUnavailable 아닌
ValueError fail-loud (RouterBackend._post 컨벤션 미러)."""
def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(400, json={"error": "unknown_alias"})
_patch_transport(monkeypatch, handler)
c = EidAIClient()
try:
stream = c.call_stream("deep", _MSG, "sys")
with pytest.raises(ValueError, match="router rejected alias='qwen-macbook'"):
await anext(stream)
finally:
await c.close()
# ── wall-clock deadline (게이트 점유 무한화 차단) ─────────────────────────────
@pytest.mark.asyncio
async def test_stream_deadline_exceeded(monkeypatch):
"""업스트림 진입~종료 deadline 초과 → BackendUnavailable(stream_deadline_exceeded)."""
class _StallStream(httpx.AsyncByteStream):
"""첫 chunk 후 정체 — per-chunk read timeout 으론 안 잡히는 패턴 모사."""
async def __aiter__(self):
yield b'data: {"choices": []}\n\n'
await asyncio.sleep(30)
async def aclose(self):
return None
def handler(request: httpx.Request) -> httpx.Response:
return httpx.Response(
200, stream=_StallStream(), headers={"content-type": "text/event-stream"}
)
_patch_transport(monkeypatch, handler)
monkeypatch.setattr(eid_ai, "_STREAM_DEADLINE_S", 0.05)
c = EidAIClient()
try:
stream = c.call_stream("deep", _MSG, "sys")
with pytest.raises(BackendUnavailable) as ei:
async for _ in stream:
pass
assert ei.value.reason == "stream_deadline_exceeded"
assert ei.value.backend_name == "qwen-macbook"
finally:
await c.close()
# ── error_reason allowlist sanitize ──────────────────────────────────────────
def test_stream_error_reason_sanitized():
"""최종 reason 은 [a-z0-9_]{1,64} allowlist — 불일치(대문자/공백/dict 파편)는
upstream_502(502)/router_error(그 외) 로 일반화, dict 직렬화 파편 비노출."""
from eid.ai import _stream_error_reason
# 정상 어휘는 그대로 (ask 와 동일)
assert (
_stream_error_reason(503, b'{"error": {"type": "macbook_unavailable"}}')
== "macbook_unavailable"
)
assert _stream_error_reason(503, b"oops not json") == "router_503"
assert _stream_error_reason(418, b"{}") == "router_http_418"
# 502 + 추출 실패 → upstream_502 (기존 upstream_502_{dict...} 파편 제거)
assert _stream_error_reason(502, b'{"error": {"detail": "x"}}') == "upstream_502"
# allowlist 밖(대문자/공백/특수문자) → 일반화
assert _stream_error_reason(502, b'{"error": {"type": "Bad Gateway!"}}') == "upstream_502"
assert _stream_error_reason(503, b'{"error": {"type": "Weird Reason"}}') == "router_error"
assert _stream_error_reason(503, b'{"error": {"type": "' + b"a" * 80 + b'"}}') == "router_error"
+26
View File
@@ -0,0 +1,26 @@
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139860, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 1, "total_tokens": 29, "prompt_tps": 183.51595345126498, "generation_tps": 140349.23521338476, "peak_memory": 34.66827434}}
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 2, "total_tokens": 30, "prompt_tps": 183.51595345126498, "generation_tps": 93.60885515563795, "peak_memory": 34.66827434}}
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "안녕하세요,", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 3, "total_tokens": 31, "prompt_tps": 183.51595345126498, "generation_tps": 70.37263329290622, "peak_memory": 34.66827434}}
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 4, "total_tokens": 32, "prompt_tps": 183.51595345126498, "generation_tps": 62.61454940315543, "peak_memory": 34.66827434}}
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": " 만나서", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 5, "total_tokens": 33, "prompt_tps": 183.51595345126498, "generation_tps": 58.7098801868211, "peak_memory": 34.66827434}}
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 6, "total_tokens": 34, "prompt_tps": 183.51595345126498, "generation_tps": 56.35974757228211, "peak_memory": 34.66827434}}
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": " 반갑습니다!", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 7, "total_tokens": 35, "prompt_tps": 183.51595345126498, "generation_tps": 54.81880127112613, "peak_memory": 34.66827434}}
data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": "stop", "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 7, "total_tokens": 35, "prompt_tps": 183.51595345126498, "generation_tps": 54.81880127112613, "peak_memory": 34.66827434}}
data: [DONE]
+12
View File
@@ -0,0 +1,12 @@
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":1,"total_tokens":26,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":2,"total_tokens":27,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":3,"total_tokens":28,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139881,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":4,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139881,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":"stop","delta":{"role":"assistant","content":"안녕하세요!","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":5,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}}
data: [DONE]