From cd06ef0403cdb5d0b94f4ef1f674d3d90794851b Mon Sep 17 00:00:00 2001 From: hyungi Date: Thu, 11 Jun 2026 10:51:39 +0900 Subject: [PATCH] =?UTF-8?q?feat(eid):=20=EC=9D=B4=EB=93=9C=20=EC=B1=84?= =?UTF-8?q?=ED=8C=85=20=ED=91=9C=EB=A9=B4=20=E2=80=94=20/api/eid/chat=20SS?= =?UTF-8?q?E=20=EC=8A=A4=ED=8A=B8=EB=A6=AC=EB=B0=8D=20+=20/chat=20?= =?UTF-8?q?=ED=8E=98=EC=9D=B4=EC=A7=80=20(P1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - compose: eid_chat surface 등록(persona+rules, 자유-prose) + rules_present() 라이브 판정(D-6 fail-closed) - EidAIClient.call_stream: 닫힌 mode 매핑(daily→mac-mini-default/deep→qwen-macbook), router 경유, MLX gate(FOREGROUND)+wall-clock 300s deadline, SSE 라인 relay(model→mode 치환·usage 제거), router 400 fail-loud, error_reason allowlist sanitize - POST /api/eid/chat: JWT, role=system 422 거부, 8000자/40턴/총량 32000 cap, 503 error_reason(ask 컨벤션), 본문 무로깅 - frontend /chat: 이드 표면 문법(일상/심층, 모델·머신명 비노출), SSE 파서(경계 buf·flush·[DONE]), error_reason UX, 8000자 선차단+422 오염 차단, localStorage 이력(logout 시 제거), nav 등록 - Caddyfile: encode 명시 match로 text/event-stream gzip 버퍼링 제외 - tests: 신규 32+ (fixture: router 경유 26B/27B SSE 박제), tests/eid 61 + ask 회귀 9 = 70 passed - 적대 리뷰 3렌즈 18 finding 반영 13/13. 배포는 D26 게이트(fix/hwp 머지+Soft Lock) 대기 Co-Authored-By: Claude Fable 5 --- Caddyfile | 9 +- app/api/eid_chat.py | 168 ++++++ app/eid/ai.py | 193 ++++++ app/eid/compose.py | 13 + app/main.py | 3 + frontend/src/lib/api.ts | 55 ++ frontend/src/lib/components/Sidebar.svelte | 12 +- frontend/src/lib/eidChat.ts | 8 + frontend/src/lib/stores/auth.ts | 9 + frontend/src/routes/+layout.svelte | 4 +- frontend/src/routes/chat/+page.svelte | 567 ++++++++++++++++++ tests/eid/test_compose.py | 46 ++ tests/eid/test_eid_chat_endpoint.py | 201 +++++++ tests/eid/test_eid_chat_stream.py | 318 ++++++++++ .../fixtures/router_sse_chat_macmini_26b.txt | 26 + tests/fixtures/router_sse_chat_qwen_27b.txt | 12 + 16 files changed, 1641 insertions(+), 3 deletions(-) create mode 100644 app/api/eid_chat.py create mode 100644 frontend/src/lib/eidChat.ts create mode 100644 frontend/src/routes/chat/+page.svelte create mode 100644 tests/eid/test_eid_chat_endpoint.py create mode 100644 tests/eid/test_eid_chat_stream.py create mode 100644 tests/fixtures/router_sse_chat_macmini_26b.txt create mode 100644 tests/fixtures/router_sse_chat_qwen_27b.txt diff --git a/Caddyfile b/Caddyfile index 89b32cf..a78647f 100644 --- a/Caddyfile +++ b/Caddyfile @@ -9,7 +9,14 @@ } http://document.hyungi.net { - encode gzip + # 명시 Content-Type match — 기본 match 의 text/* 는 text/event-stream 까지 포함해 + # SSE(/api/eid/chat)의 첫 ~512B 를 gzip 버퍼링함. SSE 제외, 기존 압축 대상은 보존. + encode { + gzip + match { + header Content-Type text/html* text/css* text/plain* text/xml* text/javascript* application/json* application/javascript* application/xml* image/svg+xml* + } + } # API + 문서 → FastAPI handle /api/* { diff --git a/app/api/eid_chat.py b/app/api/eid_chat.py new file mode 100644 index 0000000..1f0543f --- /dev/null +++ b/app/api/eid_chat.py @@ -0,0 +1,168 @@ +"""이드 채팅 표면 — POST /api/eid/chat (eid-chat 트랙). + +확정 결정: + - D-1 경로 = /api/eid/chat (main.py prefix=/api/eid + 본 라우터 POST /chat) + - D-2 mode 닫힌 어휘: daily(mac-mini-default) / deep(qwen-macbook). 클라는 mode 만 보냄 — + claude-cloud / auto 금지 (Literal 로 422 차단). 심층(deep) 모드 무게이트. + - D-3 독립 /chat 라우트 (frontend) — 본 모듈은 백엔드 API 만. + - D-5 LLM 호출 = EidAIClient.call_stream 한 곳 (이드 egress 봉쇄 불변식 #5, + RouterBackend 직접 호출 금지). + - D-6 rules.md 부재 = 503 substrate_degraded fail-closed — 다른 표면의 degraded 배너 + 컨벤션(compose._rules)과 달리 채팅은 진행 자체를 거부. + +응답 = router SSE 라인 단위 중계 (text/event-stream — call_stream 이 model 필드를 mode +어휘로 치환·usage 제거, 프레이밍 보존. 본 모듈은 무변형 relay). 스트림 시작 전 +backend 실패는 /api/search/ask 와 동일 shape 의 503 + error_reason 매핑(자동 fallback 0). +로그는 메타 1줄(mode·턴수·status)만 — 대화 본문 로깅 0. +""" + +from __future__ import annotations + +from typing import Annotated, Literal + +import httpx +from fastapi import APIRouter, Depends +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel, Field, field_validator, model_validator + +from core.auth import get_current_user +from core.utils import setup_logger +from eid import compose as eid_compose +from eid.ai import EidAIClient +from models.user import User +from services.llm.backends import BackendUnavailable + +logger = setup_logger("eid_chat") + +router = APIRouter() + + +class ChatMessage(BaseModel): + """채팅 턴 1건. role=system 은 Literal 밖 → 422 (system 합본은 서버 compose 만 주입).""" + + role: Literal["user", "assistant"] + content: str = Field(min_length=1, max_length=8000) + + +# 대화 총량 cap (전 메시지 content 합) — per-message 8000·40턴 제한과 별도의 총량 상한 +_TOTAL_CONTENT_CAP = 32000 + + +class ChatRequest(BaseModel): + """POST /api/eid/chat body. mode 는 닫힌 어휘(D-2), messages 는 1~40턴 + 총량 32000자.""" + + mode: Literal["daily", "deep"] + messages: list[ChatMessage] = Field(min_length=1, max_length=40) + + @field_validator("messages") + @classmethod + def _last_turn_is_user(cls, v: list[ChatMessage]) -> list[ChatMessage]: + if v and v[-1].role != "user": + raise ValueError("마지막 메시지는 role=user 여야 합니다") + return v + + @model_validator(mode="after") + def _total_content_cap(self) -> "ChatRequest": + if sum(len(m.content) for m in self.messages) > _TOTAL_CONTENT_CAP: + raise ValueError( + "대화 총량 초과 — 새 대화로 시작하거나 입력을 줄여주세요 " + f"(전체 메시지 합 {_TOTAL_CONTENT_CAP}자 제한)" + ) + return self + + +@router.post("/chat") +async def eid_chat( + body: ChatRequest, + user: Annotated[User, Depends(get_current_user)], +): + """이드 채팅 — router SSE 스트리밍 pass-through. + + 503 두 경로 (둘 다 자동 fallback 없음): + - substrate_degraded: rules.md 부재 (D-6 fail-closed, 채팅 진행 거부) + - backend_unavailable: 스트림 시작 전 backend 실패 (ask 컨벤션과 동일 shape) + """ + # D-6: rules 부재 = fail-closed. 채팅은 안전·정책 가드 없이 진행하지 않는다(배너 X). + if not eid_compose.rules_present(): + logger.error( + "eid_chat substrate_degraded mode=%s turns=%d status=503 — rules.md 부재, 채팅 거부", + body.mode, len(body.messages), + ) + return JSONResponse( + status_code=503, + content={ + "detail": ( + "이드 substrate 가 degraded 상태입니다 (운영 규칙 rules.md 부재). " + "복구 전까지 채팅을 진행하지 않습니다." + ), + "error_reason": "substrate_degraded", + }, + ) + + system = eid_compose.compose("eid_chat", task="") + client = EidAIClient() + stream = client.call_stream( + body.mode, [m.model_dump() for m in body.messages], system, + ) + + # async generator 는 첫 __anext__ 에서야 실제 요청 전송 — 스트림 시작 전 실패(연결/4xx/5xx) + # 를 503 으로 매핑하기 위해 첫 chunk 를 여기서 먼저 당긴다. + try: + first = await anext(stream, None) + except BackendUnavailable as exc: + logger.warning( + "eid_chat backend_unavailable mode=%s turns=%d status=503 reason=%s", + body.mode, len(body.messages), exc.reason, + ) + await client.close() + return JSONResponse( + status_code=503, + content={ + "error": "backend_unavailable", + "error_reason": exc.reason, + "backend_requested": exc.backend_name, + "detail": ( + "선택한 모드의 backend 가 일시적으로 응답할 수 없습니다. " + "잠시 후 다시 시도하거나 mode 를 바꿔 호출하세요." + ), + }, + ) + except BaseException: + await client.close() + raise + + # 메타 로그 1줄 — 본문 로깅 0 (대화 내용은 어디에도 남기지 않는다) + logger.info( + "eid_chat stream mode=%s turns=%d status=200", body.mode, len(body.messages) + ) + + async def _passthrough(): + # call_stream 방출분 무변형 relay (정화는 call_stream 라인 단위 한 곳). 취소· + # disconnect 포함 finally 에서 generator aclose → AsyncExitStack 이 upstream 정리. + try: + try: + if first is not None: + yield first + async for chunk in stream: + yield chunk + except (BackendUnavailable, httpx.HTTPError) as exc: + # 스트림 시작 후 절단 — status 200 은 이미 송신돼 재매핑 불가. 메타 로그 + # 1줄만 남기고 조용히 종료(traceback 전파 0) — 프론트는 [DONE] 부재로 처리. + logger.warning( + "eid_chat stream aborted mode=%s turns=%d reason=%s", + body.mode, len(body.messages), + getattr(exc, "reason", type(exc).__name__), + ) + return + finally: + # stream.aclose() 가 예외여도 client.close() 는 보장 (중첩 finally) + try: + await stream.aclose() + finally: + await client.close() + + return StreamingResponse( + _passthrough(), + media_type="text/event-stream", + headers={"Cache-Control": "no-store", "X-Accel-Buffering": "no"}, + ) diff --git a/app/eid/ai.py b/app/eid/ai.py index fd8e4c6..aeb3603 100644 --- a/app/eid/ai.py +++ b/app/eid/ai.py @@ -11,11 +11,116 @@ endpoint 를 못 부른다(silent fallback 0, rules no-silent-fallback). - _request() → endpoint 에 anthropic.com 있으면 raise(primary 오결선 방어, 이중보증) call_primary / call_triage / embed / rerank 는 그대로(내부 inference·임베딩 허용). egress 워커·시스템 경로는 기존 AIClient 유지 — fallback 은 시스템만, 이드만 박탈(분리). + +eid-chat (D-5): 이드 채팅 SSE 스트리밍도 이 클래스의 call_stream() 한 곳 — RouterBackend +직접 호출 금지, mode 어휘는 _CHAT_ALIAS 닫힌 매핑(daily/deep)만, 미지 mode = EidEgressBlocked. """ from __future__ import annotations +import asyncio +import json +import re +from collections.abc import AsyncIterator +from contextlib import AsyncExitStack + +import httpx + from ai.client import AIClient +from services.llm.backends import ( + MAC_MINI_DEFAULT, + QWEN_MACBOOK, + BackendUnavailable, + _router_url, # router URL 단일 출처 재사용 (settings → env LLM_ROUTER_URL → MVP default) +) +from services.search.llm_gate import Priority, acquire_mlx_gate + +# 이드 채팅 mode → router alias 닫힌 매핑 (D-2). 클라는 mode 만 보냄 — claude-cloud/auto 금지. +_CHAT_ALIAS: dict[str, str] = { + "daily": MAC_MINI_DEFAULT, # router tier_b → Mac mini :8801 gemma-4-26b + "deep": QWEN_MACBOOK, # router named upstream → M5 Max Qwen3.6-27B (무게이트, D-2) +} + +# read 는 per-chunk 적용이라 MacBook wake(24s)+토큰 생성 간격 커버. connect 는 내부 router 라 짧게. +_STREAM_TIMEOUT = httpx.Timeout(connect=5.0, read=120.0, write=30.0, pool=5.0) + +# 스트림 중계 전체(업스트림 진입~종료) wall-clock 상한. per-chunk read timeout 만으로는 +# 토큰이 계속 흐르는 한 무한 점유 가능 → daily 는 mlx gate 를 물고 있어 deadline 필수. +# deep 도 동일 적용(단순·일관). 정상 스트림(max_tokens 2048, ~90tps ≈ 23s)은 여유 통과. +_STREAM_DEADLINE_S = 300.0 + +# error_reason allowlist — 이 밖(대문자/공백/JSON 직렬화 파편)은 일반화해 비노출 +_REASON_ALLOWED = re.compile(r"[a-z0-9_]{1,64}") + +# 스트림 시작 전 transport 계열 실패 → BackendUnavailable 매핑 대상 (RouterBackend._post 와 동일 목록) +_TRANSPORT_ERRORS = ( + httpx.ConnectError, + httpx.ConnectTimeout, + httpx.ReadTimeout, + httpx.PoolTimeout, + httpx.WriteTimeout, + httpx.RemoteProtocolError, +) + + +def _stream_error_reason(status_code: int, body: bytes) -> str: + """스트림 시작 전 4xx/5xx 응답 본문 → error_reason 추출. + + 어휘는 /api/search/ask(RouterBackend._post)와 일치 — router 가 주는 error.type / + error.error_reason (macbook_unavailable / warming / editor_busy / upstream_cold / + provider_not_configured 등) 우선, 없으면 status 기반 router_503 / upstream_502 / + router_http_. + + 최종 reason 은 [a-z0-9_]{1,64} allowlist 검사 — 불일치(대문자/공백/dict 직렬화 + 파편)는 upstream_502(502 계열) / router_error(그 외) 로 일반화해 외부 비노출. + """ + try: + data = json.loads(body.decode("utf-8", errors="replace")) + except Exception: + data = {} + err = data.get("error", {}) if isinstance(data, dict) else {} + reason: str | None = None + if isinstance(err, dict): + raw = err.get("type") or err.get("error_reason") + if raw: + reason = str(raw) + if reason is None and isinstance(data, dict) and data.get("error_reason"): + reason = str(data["error_reason"]) + if reason is None: + if status_code == 502: + reason = "upstream_502" + elif status_code == 503: + reason = "router_503" + else: + reason = f"router_http_{status_code}" + if _REASON_ALLOWED.fullmatch(reason): + return reason + return "upstream_502" if status_code == 502 else "router_error" + + +def _rewrite_sse_line(line: bytes, mode: str) -> bytes: + """SSE 라인 1건 정화 — data: JSON 의 model 을 mode 어휘로 치환 + usage 제거. + + fixture 실측: 27B chunk 의 model 필드가 맥북 파일시스템 절대경로 + ("/Users/.../mlx-models/Qwen3.6-27B-8bit")를 노출 — 표면 문법 '모델·머신명 + 비노출'과 충돌해 라인 단위로 재작성한다. usage(tps/peak_memory 등 머신 + 텔레메트리)도 함께 제거. [DONE]·비-data 라인(빈 줄 포함)·파싱 실패 라인은 + 원문 그대로(방어적) — SSE 프레이밍(data: 라인 + 빈 줄) 보존. + """ + if not line.startswith(b"data: "): + return line + payload = line[len(b"data: "):] + if payload.strip() == b"[DONE]": + return line + try: + obj = json.loads(payload) + except Exception: + return line + if not isinstance(obj, dict): + return line + obj["model"] = mode + obj.pop("usage", None) + return b"data: " + json.dumps(obj, ensure_ascii=False).encode("utf-8") class EidEgressBlocked(RuntimeError): @@ -39,3 +144,91 @@ class EidAIClient(AIClient): if "anthropic.com" in endpoint: raise EidEgressBlocked(f"이드: 외부 endpoint 차단 ({endpoint}). 내부 inference 만.") return await super()._request(model_config, prompt, system=system) + + async def call_stream( + self, mode: str, messages: list[dict], system: str + ) -> AsyncIterator[bytes]: + """이드 채팅 SSE 스트림 — router /v1/chat/completions stream=true 라인 단위 중계 (D-5). + + mode : "daily" | "deep" — _CHAT_ALIAS 닫힌 매핑. 미지 mode = EidEgressBlocked + (이드 LLM 호출 봉쇄는 이 클래스 한 곳, 불변식 #5). + messages : user/assistant 턴 목록 (system role 금지 — system 인자로만 주입). + system : compose("eid_chat", ...) 합본. messages 맨 앞에 system role 로 끼움. + + 스트림 시작 전 실패(연결 실패·5xx 응답) = BackendUnavailable(reason 어휘는 ask + 와 동일). router 400 = 닫힌 매핑에서 alias drift 코드 버그 → ValueError fail-loud + (RouterBackend._post 컨벤션 미러). 스트림 시작 후엔 bytes 를 라인 버퍼링해 + _rewrite_sse_line 으로 model 치환(mode 어휘)·usage 제거만 하고 프레이밍은 보존. + 취소/disconnect 시 AsyncExitStack 이 response·client 정리(upstream 닫힘 보장). + + daily(mac-mini-default)는 Mac mini MLX 단일 inference 영구 룰(llm_gate docstring + "예외 없이 gate 획득 필수")에 따라 acquire_mlx_gate(FOREGROUND) 안에서 스트리밍 — + RouterBackend 의 requires_gate=True 와 동일한 client-side mutex 효과. + deep(qwen-macbook)은 별 endpoint 라 무게이트 (D-2, RouterBackend 동형). + + 중계 전체(업스트림 진입~종료)는 asyncio.timeout(_STREAM_DEADLINE_S) wall-clock + deadline 안 — llm_gate 계약 "timeout 은 gate 안쪽" 준수(gate 대기엔 미적용). + 초과 시 BackendUnavailable(alias, "stream_deadline_exceeded") 로 수렴. + """ + alias = _CHAT_ALIAS.get(mode) + if alias is None: + raise EidEgressBlocked( + f"이드: 미지 chat mode {mode!r} — 닫힌 매핑(daily/deep) 외 호출 차단." + ) + router_url = _router_url() + if "anthropic.com" in router_url: + # 기존 _request 패턴 미러 — router URL 오결선 시 외부 egress 방어 (이중보증) + raise EidEgressBlocked(f"이드: 외부 endpoint 차단 ({router_url}). 내부 router 만.") + url = f"{router_url.rstrip('/')}/v1/chat/completions" + payload = { + "model": alias, + "messages": [{"role": "system", "content": system}] + messages, + "stream": True, + "max_tokens": 2048, + "temperature": 0.4, + } + async with AsyncExitStack() as stack: + if alias == MAC_MINI_DEFAULT: + await stack.enter_async_context(acquire_mlx_gate(Priority.FOREGROUND)) + client = await stack.enter_async_context(httpx.AsyncClient(timeout=_STREAM_TIMEOUT)) + try: + # wall-clock deadline — gate 획득 *후* 진입 (llm_gate "timeout 은 gate 안쪽") + async with asyncio.timeout(_STREAM_DEADLINE_S): + try: + resp = await stack.enter_async_context( + client.stream("POST", url, json=payload) + ) + except _TRANSPORT_ERRORS as exc: + # 스트림 시작 전 연결 계열 실패 — reason 어휘 = RouterBackend(router_*) 와 일치 + raise BackendUnavailable(alias, f"router_{type(exc).__name__}") from exc + if resp.status_code == 400: + # 닫힌 매핑에서 400 = alias drift 코드 버그 — RouterBackend._post 미러, + # BackendUnavailable(일시 비가용) 아님 → fail-loud + body = await resp.aread() + try: + data = json.loads(body.decode("utf-8", errors="replace")) + except Exception: + data = {} + raise ValueError(f"router rejected alias={alias!r} body={data!r}") + if resp.status_code >= 400: + body = await resp.aread() + raise BackendUnavailable( + alias, _stream_error_reason(resp.status_code, body) + ) + buf = b"" + try: + async for chunk in resp.aiter_bytes(): + buf += chunk + # 라인 버퍼링 — 청크 경계에서 b"\n" 분리, 잔여 버퍼 유지 + while (nl := buf.find(b"\n")) != -1: + line, buf = buf[:nl], buf[nl + 1:] + yield _rewrite_sse_line(line, mode) + b"\n" + except _TRANSPORT_ERRORS as exc: + # 시작 후 중단 — 이미 보낸 chunk 는 전송됨. typed 예외로 수렴(caller 가 끊고 정리). + raise BackendUnavailable(alias, f"router_{type(exc).__name__}") from exc + if buf: + # 스트림 끝 잔여분 flush (개행 없는 마지막 라인 — 원문에 없던 \n 추가 안 함) + yield _rewrite_sse_line(buf, mode) + except TimeoutError as exc: + # asyncio.timeout 초과 — 게이트 점유 무한화 차단, typed 예외로 수렴 + raise BackendUnavailable(alias, "stream_deadline_exceeded") from exc diff --git a/app/eid/compose.py b/app/eid/compose.py index 82a04de..90e786b 100644 --- a/app/eid/compose.py +++ b/app/eid/compose.py @@ -50,6 +50,8 @@ _ROUTE: dict[str, dict] = { "react_ask": {"overlay": None, "variant": "full"}, "study_subject_note": {"overlay": None, "variant": "full"}, "study_question_explanation": {"overlay": None, "variant": "full"}, + # 이드 채팅 표면 (D-1 /api/eid/chat) — 자유-prose(base), persona ON (불변식 #3) + "eid_chat": {"overlay": None, "variant": "full"}, # 미래 active eid 표면 — 기능 overlay (W3+ 에서 호출 배선) "study_diagnosis": {"overlay": "study", "variant": "full"}, "document_brief": {"overlay": "document", "variant": "full"}, @@ -113,6 +115,17 @@ def is_composed_surface(surface: str) -> bool: return surface in _ROUTE +def rules_present() -> bool: + """rules.md 존재 여부 — 채팅 표면(D-6)의 fail-closed 판정 재료. + + 기존 _rules() 의 degraded 배너 컨벤션(다른 표면, fail-loud 진행)은 그대로 둔다 — + 여긴 '진행 거부' 판정만 제공하고 강제는 호출부(/api/eid/chat) 책임. + lru_cache 된 _read 를 쓰지 않고 매 호출 직접 stat — D-6 게이트는 살아있는 판정 + 이어야 한다(캐시 동결 시 rules.md 부재/복구가 영원히 반영 안 됨). + """ + return (_SUBSTRATE_DIR / "rules.md").is_file() + + def compose(surface: str, task: str, *, variant: str | None = None, budget_chars: int | None = None) -> str: """persona → rules → overlay → task 단일 system 문자열 합성. diff --git a/app/main.py b/app/main.py index f14dbdd..da3516e 100644 --- a/app/main.py +++ b/app/main.py @@ -17,6 +17,7 @@ from api.digest import router as digest_router from api.document_notes import router as document_notes_router from api.document_reads import router as document_reads_router from api.documents import router as documents_router +from api.eid_chat import router as eid_chat_router from api.events import router as events_router from api.library import router as library_router from api.memos import router as memos_router @@ -174,6 +175,8 @@ app.include_router(documents_router, prefix="/api/documents", tags=["documents"] app.include_router(document_reads_router, prefix="/api/documents", tags=["document-reads"]) app.include_router(document_notes_router, prefix="/api/documents", tags=["document-notes"]) app.include_router(search_router, prefix="/api/search", tags=["search"]) +# 이드 채팅 표면 (D-1) — POST /api/eid/chat. SSE 스트리밍, EidAIClient.call_stream 봉쇄 경유. +app.include_router(eid_chat_router, prefix="/api/eid", tags=["eid-chat"]) app.include_router(memos_router, prefix="/api/memos", tags=["memos"]) app.include_router(events_router, prefix="/api/events", tags=["events"]) diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 707698c..e9865b5 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -172,6 +172,61 @@ export async function api( return res.json(); } +/** + * Raw fetch 헬퍼 — SSE/스트리밍 등 JSON 일괄 파싱이 부적합한 endpoint 전용. + * + * api() 와 동일한 정책을 공유한다: + * - access token 자동 첨부 + * - 401 → refresh 1회 재시도 (실패 시 handleTokenRefresh 가 강제 logout) + * - JSON body 면 Content-Type 자동 설정 + * + * 차이: Response 를 그대로 반환한다 (status 판단 / body 소비는 호출자 책임). + * PR-Eid-Chat: `/api/eid/chat` SSE 스트림이 첫 소비자. additive export only — + * 기존 api()/uploadFile() 동작은 변경하지 않는다. + */ +export async function apiFetchRaw( + path: string, + options: RequestInit = {}, +): Promise { + const headers: Record = { + ...(options.headers as Record || {}), + }; + + if (accessToken) { + headers['Authorization'] = `Bearer ${accessToken}`; + } + if (options.body && !(options.body instanceof FormData)) { + headers['Content-Type'] = 'application/json'; + } + + const res = await fetch(`${API_BASE}${path}`, { + ...options, + headers, + credentials: 'include', + }); + + // 401 → refresh 1회 시도 (api() 와 같은 정책, auth endpoint 제외) + const isAuthEndpoint = path.startsWith('/auth/login') || path.startsWith('/auth/refresh'); + if (res.status === 401 && accessToken && !isAuthEndpoint) { + try { + await handleTokenRefresh(); + } catch { + // refresh 실패 — handleTokenRefresh 가 강제 logout(리다이렉트) 처리. + // api() 와 일관되게 원본 401 Response 를 그대로 반환해 호출자가 + // 네트워크 에러로 오인하지 않게 한다 (body 미소비 상태라 재사용 가능). + return res; + } + headers['Authorization'] = `Bearer ${accessToken}`; + return fetch(`${API_BASE}${path}`, { + ...options, + headers, + credentials: 'include', + }); + } + + return res; +} + /** * 업로드 전용 헬퍼 — XMLHttpRequest 기반. * diff --git a/frontend/src/lib/components/Sidebar.svelte b/frontend/src/lib/components/Sidebar.svelte index 38b48b8..4e4dd03 100644 --- a/frontend/src/lib/components/Sidebar.svelte +++ b/frontend/src/lib/components/Sidebar.svelte @@ -2,7 +2,7 @@ import { page } from '$app/stores'; import { goto } from '$app/navigation'; import { api } from '$lib/api'; - import { ChevronRight, ChevronDown, FolderOpen, FolderTree, Inbox, Clock, Mail, Scale, StickyNote, GraduationCap, CalendarCheck } from 'lucide-svelte'; + import { ChevronRight, ChevronDown, FolderOpen, FolderTree, Inbox, Clock, Mail, Scale, StickyNote, GraduationCap, CalendarCheck, MessageCircle } from 'lucide-svelte'; let tree = $state([]); let loading = $state(true); @@ -229,6 +229,16 @@ 공부 + + + + 이드 + + 질문 + 이드 @@ -178,6 +179,7 @@ 문서 뉴스 질문 + 이드 메모 diff --git a/frontend/src/routes/chat/+page.svelte b/frontend/src/routes/chat/+page.svelte new file mode 100644 index 0000000..4b5a8b4 --- /dev/null +++ b/frontend/src/routes/chat/+page.svelte @@ -0,0 +1,567 @@ + + + + + 이드 - PKM + + +
+ +
+
+

+ + 이드 +

+ + +
+ + +
+ +
+ + +
+ {#if mode === 'deep'} +
+

{DEEP_CAPTION}

+
+ {/if} +
+ + +
+
+ {#if messages.length === 0 && !streaming} +
+ +
+ {/if} + + {#each messages as msg, i (i)} + {#if msg.role === 'user'} +
+
+ {msg.content} +
+
+ {:else} +
+
+ {msg.content} +
+
+ {/if} + {/each} + + + {#if streaming} +
+
+ {#if streamingText} + {streamingText} + {:else} + 응답 준비 중... + {/if} +
+
+ {/if} + + + {#if notice} +
+ +
+

{notice.message}

+ {#if notice.retryable && canRetry} + + {/if} +
+
+ {/if} +
+
+ + +
+
+ +
+ {#each PRESETS as preset (preset.label)} + + {/each} +
+ +
+ + +
+ + + {#if inputLength >= COUNTER_THRESHOLD} +

+ {inputLength.toLocaleString()} / {MAX_MESSAGE_CHARS.toLocaleString()}자{overLimit + ? ' — 입력이 너무 깁니다 (8,000자 이내)' + : ''} +

+ {/if} +
+
+
diff --git a/tests/eid/test_compose.py b/tests/eid/test_compose.py index 3042b06..ba8370b 100644 --- a/tests/eid/test_compose.py +++ b/tests/eid/test_compose.py @@ -17,6 +17,7 @@ from eid.compose import ( # noqa: E402 _persona, compose, is_composed_surface, + rules_present, ) _TASK = "<<>>" @@ -92,6 +93,51 @@ def test_study_diagnosis_overlay_placeholders_survive_compose(): assert "{weakness_snapshot_block}" not in filled and "WB" in filled and "HB" in filled +def test_eid_chat_surface_registered(): + # eid-chat D-1: 채팅 표면 = 자유-prose(base), persona ON, 기능 overlay 없음 (불변식 #3) + assert is_composed_surface("eid_chat"), "eid_chat ROUTE_MAP 미등록" + out = compose("eid_chat", "") + assert "이드" in out, "persona 미주입" + assert "보수적" in out, "rules 미주입" + assert out.index("이드") < out.index("보수적"), "persona→rules 순서 위반" + assert "학습 진단 코치" not in out, "채팅 base 표면에 기능 overlay 누출" + + +def test_rules_present_true_then_false(): + # D-6 fail-closed 판정 재료 — vendored rules.md 존재 시 True, 부재 시 False. + # _rules() 의 degraded 배너 동작(다른 표면)은 본 헬퍼와 무관하게 유지된다. + import eid.compose as c + + assert rules_present() is True, "vendored rules.md 가 있는데 False" + orig = c._SUBSTRATE_DIR + try: + c._SUBSTRATE_DIR = Path("/nonexistent-substrate-dir-for-test") + assert c.rules_present() is False, "rules.md 부재인데 True — fail-closed 판정 불가" + finally: + c._SUBSTRATE_DIR = orig + + +def test_rules_present_live_judgment(): + # D-6 게이트 = 살아있는 판정 — lru_cache(_read) 동결 회귀 방지. + # 같은 경로에서 생성→True, 삭제→False 가 즉시 반영돼야 한다. + import tempfile + + import eid.compose as c + + orig = c._SUBSTRATE_DIR + try: + with tempfile.TemporaryDirectory() as td: + c._SUBSTRATE_DIR = Path(td) + rules = Path(td) / "rules.md" + assert c.rules_present() is False + rules.write_text("rule", encoding="utf-8") + assert c.rules_present() is True, "생성이 반영 안 됨 — 캐시 동결" + rules.unlink() + assert c.rules_present() is False, "삭제가 반영 안 됨 — 캐시 동결" + finally: + c._SUBSTRATE_DIR = orig + + def _run(): fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")] fails = 0 diff --git a/tests/eid/test_eid_chat_endpoint.py b/tests/eid/test_eid_chat_endpoint.py new file mode 100644 index 0000000..0b27c98 --- /dev/null +++ b/tests/eid/test_eid_chat_endpoint.py @@ -0,0 +1,201 @@ +"""POST /api/eid/chat endpoint 테스트 — inline ASGI app (DB 의존 0). + +★ 실행 환경: fastapi + httpx 필요 → Docker/staging pytest (test_eid_ai_client.py 동일 idiom). +★ DB 0: get_current_user 는 dependency_overrides 로 대체. 무인증/위조토큰 케이스는 실제 + auth 경로지만 decode 단계에서 거부돼 DB 접근 전 반환. +★ LLM 0: 정상 경로는 EidAIClient.call_stream 을 fixture bytes yield 로 monkeypatch. +""" + +from __future__ import annotations + +import sys +import types +from pathlib import Path + +import pytest +import pytest_asyncio +from fastapi import FastAPI +from httpx import ASGITransport, AsyncClient + +sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "app")) + +import eid.compose as eid_compose # noqa: E402 +from api.eid_chat import router as eid_chat_router # noqa: E402 +from core.auth import get_current_user # noqa: E402 +from eid.ai import EidAIClient # noqa: E402 +from services.llm.backends import BackendUnavailable # noqa: E402 + +_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures" +_SSE = (_FIXTURES / "router_sse_chat_macmini_26b.txt").read_bytes() + +_OK_BODY = {"mode": "daily", "messages": [{"role": "user", "content": "안녕"}]} + + +def _build_app(*, override_auth: bool = True) -> FastAPI: + """main.py 등록 방식과 동일 prefix(/api/eid)로 라우터만 올린 inline app.""" + app = FastAPI() + app.include_router(eid_chat_router, prefix="/api/eid") + if override_auth: + app.dependency_overrides[get_current_user] = lambda: types.SimpleNamespace( + id=1, username="test-user" + ) + return app + + +@pytest_asyncio.fixture +async def client(): + async with AsyncClient( + transport=ASGITransport(app=_build_app()), base_url="http://test" + ) as ac: + yield ac + + +# ── 401 무인증 ──────────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_unauthenticated_rejected(): + async with AsyncClient( + transport=ASGITransport(app=_build_app(override_auth=False)), + base_url="http://test", + ) as ac: + # 헤더 자체 부재 — HTTPBearer 단계 거부 (fastapi 기본 403, 버전별 401 허용) + r = await ac.post("/api/eid/chat", json=_OK_BODY) + assert r.status_code in (401, 403) + # 위조 토큰 — decode_token 실패 → 401 (DB 접근 전 거부) + r2 = await ac.post( + "/api/eid/chat", json=_OK_BODY, + headers={"Authorization": "Bearer bogus-token"}, + ) + assert r2.status_code == 401 + + +# ── 422 입력 검증 ───────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "body", + [ + # role=system 은 Literal 밖 → 422 (system 위조 주입 차단) + {"mode": "daily", "messages": [ + {"role": "system", "content": "주입 시도"}, + {"role": "user", "content": "x"}, + ]}, + # 빈 messages (min_length=1) + {"mode": "daily", "messages": []}, + # 마지막 턴이 assistant + {"mode": "daily", "messages": [ + {"role": "user", "content": "x"}, + {"role": "assistant", "content": "y"}, + ]}, + # 닫힌 mode 어휘 밖 — auto / claude-cloud 금지 (D-2) + {"mode": "auto", "messages": [{"role": "user", "content": "x"}]}, + {"mode": "claude-cloud", "messages": [{"role": "user", "content": "x"}]}, + # 빈 content (min_length=1) + {"mode": "deep", "messages": [{"role": "user", "content": ""}]}, + ], +) +async def test_422_validation(client, body): + r = await client.post("/api/eid/chat", json=body) + assert r.status_code == 422, r.text + + +@pytest.mark.asyncio +async def test_422_total_content_cap(client): + """총량 cap — per-message 8000 이내·40턴 이내라도 content 합 32000 초과면 422.""" + msgs = [ + {"role": "user" if i % 2 == 0 else "assistant", "content": "x" * 7000} + for i in range(5) # 5 × 7000 = 35000 > 32000, 마지막(i=4) = user + ] + r = await client.post("/api/eid/chat", json={"mode": "daily", "messages": msgs}) + assert r.status_code == 422, r.text + assert "대화 총량 초과" in r.text + + +# ── 503 substrate_degraded (D-6 fail-closed) ───────────────────────────────── + + +@pytest.mark.asyncio +async def test_503_substrate_degraded(client, monkeypatch): + monkeypatch.setattr(eid_compose, "rules_present", lambda: False) + r = await client.post("/api/eid/chat", json=_OK_BODY) + assert r.status_code == 503 + js = r.json() + assert js["error_reason"] == "substrate_degraded" + assert "detail" in js + + +# ── 503 backend_unavailable (스트림 시작 전, ask 컨벤션 shape) ──────────────── + + +@pytest.mark.asyncio +async def test_503_backend_unavailable_prestream(client, monkeypatch): + async def fake_call_stream(self, mode, messages, system): + raise BackendUnavailable("qwen-macbook", "macbook_unavailable") + yield b"" # pragma: no cover — async generator 형태 유지용 + + monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream) + r = await client.post( + "/api/eid/chat", + json={"mode": "deep", "messages": [{"role": "user", "content": "x"}]}, + ) + assert r.status_code == 503 + js = r.json() + assert js["error"] == "backend_unavailable" + assert js["error_reason"] == "macbook_unavailable" + assert js["backend_requested"] == "qwen-macbook" + + +# ── 정상 경로 — SSE raw pass-through ────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_200_stream_passthrough(client, monkeypatch): + captured: dict = {} + + async def fake_call_stream(self, mode, messages, system): + captured["mode"] = mode + captured["messages"] = messages + captured["system"] = system + # chunk 단위로 쪼개 yield — endpoint 가 무변형으로 그대로 흘리는지 확인 + for i in range(0, len(_SSE), 256): + yield _SSE[i : i + 256] + + monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream) + r = await client.post("/api/eid/chat", json=_OK_BODY) + assert r.status_code == 200, r.text + assert r.headers["content-type"].startswith("text/event-stream") + assert r.headers["cache-control"] == "no-store" + assert r.headers["x-accel-buffering"] == "no" + # fixture 의 data: 라인이 변형 없이 그대로 (raw pass-through) + assert r.content == _SSE + assert b'data: {"id"' in r.content + assert b"data: [DONE]" in r.content + # call_stream 입력: mode 그대로 + 사용자 턴 + compose 합본(persona 포함) system + assert captured["mode"] == "daily" + assert captured["messages"] == [{"role": "user", "content": "안녕"}] + assert "이드" in captured["system"], "system 에 compose 합본(persona) 미주입" + assert "보수적" in captured["system"], "system 에 rules 미주입" + + +# ── 스트림 시작 후 절단 — traceback 전파 0, 조용히 종료 ────────────────────── + + +@pytest.mark.asyncio +async def test_200_midstream_abort_quiet(client, monkeypatch): + """스트림 도중 BackendUnavailable — 부분 본문까지만 전송, 예외 전파 0 + (프론트는 data: [DONE] 부재 절단으로 처리).""" + + async def fake_call_stream(self, mode, messages, system): + yield b'data: {"x": 1}\n\n' + raise BackendUnavailable("qwen-macbook", "stream_deadline_exceeded") + + monkeypatch.setattr(EidAIClient, "call_stream", fake_call_stream) + r = await client.post( + "/api/eid/chat", + json={"mode": "deep", "messages": [{"role": "user", "content": "x"}]}, + ) + assert r.status_code == 200 + assert r.content == b'data: {"x": 1}\n\n' + assert b"data: [DONE]" not in r.content diff --git a/tests/eid/test_eid_chat_stream.py b/tests/eid/test_eid_chat_stream.py new file mode 100644 index 0000000..f0a1635 --- /dev/null +++ b/tests/eid/test_eid_chat_stream.py @@ -0,0 +1,318 @@ +"""EidAIClient.call_stream 단위 테스트 — mode 닫힌 매핑·egress 차단·SSE 라인 단위 중계. + +★ 실행 환경: httpx + config(settings) 필요 → Docker/staging pytest + (tests/eid/test_eid_ai_client.py 와 동일 idiom, MacBook 로컬 deps 없으면 hard-fail). +★ httpx 호출은 MockTransport 로 대체 — 실제 네트워크 0 (DB 의존 0). +★ 차단 대상 host 문자열은 런타임 분할 조립 — 차단을 *테스트*하는 코드지 호출 아님 + (meter-guard 오탐 회피, test_eid_ai_client.py 동일). +★ 스트림 검증 = byte-equal 아님: call_stream 이 data: JSON 의 model 을 mode 어휘로 + 치환 + usage 제거(머신 경로/텔레메트리 비노출) — content 누적·프레이밍 보존을 본다. +""" + +from __future__ import annotations + +import asyncio +import json +import sys +from pathlib import Path + +import httpx +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "app")) + +import eid.ai as eid_ai # noqa: E402 +from eid.ai import EidAIClient, EidEgressBlocked # noqa: E402 +from services.llm.backends import BackendUnavailable # noqa: E402 +from services.search.llm_gate import _reset_for_test # noqa: E402 + +_FIXTURES = Path(__file__).resolve().parents[1] / "fixtures" +_SSE_MACMINI = (_FIXTURES / "router_sse_chat_macmini_26b.txt").read_bytes() +_SSE_QWEN = (_FIXTURES / "router_sse_chat_qwen_27b.txt").read_bytes() + +_BLOCKED_HOST = "anthropic" + ".com" + +_MSG = [{"role": "user", "content": "안녕"}] + + +@pytest.fixture(autouse=True) +def _reset_gate(): + """daily(mac-mini-default) 경로가 mlx gate 를 잡으므로 fresh event loop 마다 reset.""" + _reset_for_test() + yield + _reset_for_test() + + +def _patch_transport(monkeypatch, handler): + """eid.ai 내부 httpx.AsyncClient 생성에 MockTransport 주입 (생성 인자는 보존).""" + real = httpx.AsyncClient + + def _factory(*args, **kwargs): + kwargs["transport"] = httpx.MockTransport(handler) + return real(*args, **kwargs) + + monkeypatch.setattr(eid_ai.httpx, "AsyncClient", _factory) + + +def _data_objs(raw: bytes) -> list[dict]: + """SSE bytes → data: JSON 객체 목록 ([DONE] 제외).""" + objs = [] + for line in raw.split(b"\n"): + if line.startswith(b"data: ") and line[len(b"data: "):].strip() != b"[DONE]": + objs.append(json.loads(line[len(b"data: "):])) + return objs + + +def _content_concat(raw: bytes) -> str: + """delta.content 누적 — 본문 무손실 검증용.""" + return "".join( + (o["choices"][0]["delta"].get("content") or "") for o in _data_objs(raw) + ) + + +# ── mode 닫힌 매핑 / egress 차단 ────────────────────────────────────────────── + + +@pytest.mark.asyncio +@pytest.mark.parametrize("bad_mode", ["auto", "claude-cloud", "mac-mini-default", "bogus"]) +async def test_unknown_mode_blocked(bad_mode): + """미지 mode = EidEgressBlocked — alias 직접 지정 포함 닫힌 매핑(daily/deep) 밖 전부 차단.""" + c = EidAIClient() + try: + stream = c.call_stream(bad_mode, _MSG, "sys") + with pytest.raises(EidEgressBlocked): + await anext(stream) + finally: + await c.close() + + +@pytest.mark.asyncio +async def test_anthropic_router_url_blocked(monkeypatch): + """router URL 이 외부로 오결선돼도 call_stream 이 차단 (기존 _request 패턴 미러).""" + monkeypatch.setattr(eid_ai, "_router_url", lambda: "https://api." + _BLOCKED_HOST) + c = EidAIClient() + try: + stream = c.call_stream("deep", _MSG, "sys") + with pytest.raises(EidEgressBlocked): + await anext(stream) + finally: + await c.close() + + +# ── alias 매핑 + payload shape + 라인 단위 중계(model 치환·usage 제거) ──────── + + +@pytest.mark.asyncio +async def test_deep_mode_alias_and_sse_line_rewrite(monkeypatch): + """deep → qwen-macbook alias, system 은 messages[0] 단일 주입, 라인 단위 정화 중계.""" + seen: dict = {} + + def handler(request: httpx.Request) -> httpx.Response: + seen["url"] = str(request.url) + seen["json"] = json.loads(request.content) + return httpx.Response( + 200, content=_SSE_QWEN, headers={"content-type": "text/event-stream"} + ) + + _patch_transport(monkeypatch, handler) + c = EidAIClient() + try: + chunks = [b async for b in c.call_stream("deep", _MSG, "SYS_SENTINEL")] + finally: + await c.close() + + joined = b"".join(chunks) + # (a) content 누적 = fixture 와 동일 (델타 본문 무손실) + assert _content_concat(joined) == _content_concat(_SSE_QWEN) != "" + assert len(_data_objs(joined)) == len(_data_objs(_SSE_QWEN)) + # (b) model 필드 = mode 어휘 치환 — 맥북 파일시스템 절대경로/실모델명 비노출 + assert all(o["model"] == "deep" for o in _data_objs(joined)) + assert b"mlx-models" not in joined and b"Qwen" not in joined + # (c) usage(머신 텔레메트리) 부재 + assert all("usage" not in o for o in _data_objs(joined)) + assert b"peak_memory" not in joined + # (d) data: [DONE] 보존 + assert b"data: [DONE]" in joined + # (e) 빈 줄 프레이밍 보존 — 라인 수·빈 줄 위치가 fixture 와 동일 + assert [bool(l) for l in joined.split(b"\n")] == [ + bool(l) for l in _SSE_QWEN.split(b"\n") + ] + assert seen["url"].endswith("/v1/chat/completions") + body = seen["json"] + assert body["model"] == "qwen-macbook" + assert body["stream"] is True + assert body["max_tokens"] == 2048 + assert body["temperature"] == 0.4 + assert body["messages"][0] == {"role": "system", "content": "SYS_SENTINEL"} + assert body["messages"][1:] == _MSG + + +@pytest.mark.asyncio +async def test_daily_mode_alias_macmini(monkeypatch): + """daily → mac-mini-default alias (mlx gate 경유) + 라인 단위 정화 중계.""" + + class _TinyChunks(httpx.AsyncByteStream): + """청크 경계가 라인/JSON 중간에 오도록 7B 씩 방출 — 라인 버퍼링 검증.""" + + async def __aiter__(self): + for i in range(0, len(_SSE_MACMINI), 7): + yield _SSE_MACMINI[i : i + 7] + + async def aclose(self): + return None + + def handler(request: httpx.Request) -> httpx.Response: + assert json.loads(request.content)["model"] == "mac-mini-default" + return httpx.Response( + 200, stream=_TinyChunks(), headers={"content-type": "text/event-stream"} + ) + + _patch_transport(monkeypatch, handler) + c = EidAIClient() + try: + chunks = [b async for b in c.call_stream("daily", _MSG, "sys")] + finally: + await c.close() + joined = b"".join(chunks) + # (a) content 누적 동일 / (b) model 치환 / (c) usage 부재 / (d) [DONE] / (e) 프레이밍 + assert _content_concat(joined) == _content_concat(_SSE_MACMINI) != "" + assert all(o["model"] == "daily" for o in _data_objs(joined)) + assert b"gemma" not in joined + assert all("usage" not in o for o in _data_objs(joined)) + assert b"data: [DONE]" in joined + assert [bool(l) for l in joined.split(b"\n")] == [ + bool(l) for l in _SSE_MACMINI.split(b"\n") + ] + + +# ── 스트림 시작 전 에러 → BackendUnavailable (ask 어휘 일치) ────────────────── + + +@pytest.mark.asyncio +async def test_prestream_503_maps_reason(monkeypatch): + """router 503 body 의 error.type 을 error_reason 으로 추출 (ask 와 동일 어휘).""" + + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(503, json={"error": {"type": "macbook_unavailable"}}) + + _patch_transport(monkeypatch, handler) + c = EidAIClient() + try: + stream = c.call_stream("deep", _MSG, "sys") + with pytest.raises(BackendUnavailable) as ei: + await anext(stream) + assert ei.value.reason == "macbook_unavailable" + assert ei.value.backend_name == "qwen-macbook" + finally: + await c.close() + + +@pytest.mark.asyncio +async def test_prestream_503_no_body_falls_back_router_503(monkeypatch): + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(503, content=b"oops not json") + + _patch_transport(monkeypatch, handler) + c = EidAIClient() + try: + stream = c.call_stream("deep", _MSG, "sys") + with pytest.raises(BackendUnavailable) as ei: + await anext(stream) + assert ei.value.reason == "router_503" + finally: + await c.close() + + +@pytest.mark.asyncio +async def test_prestream_connect_error_maps_router_prefix(monkeypatch): + """연결 실패 → router_<예외명> (RouterBackend._post 어휘 일치).""" + + def handler(request: httpx.Request) -> httpx.Response: + raise httpx.ConnectError("connection refused") + + _patch_transport(monkeypatch, handler) + c = EidAIClient() + try: + stream = c.call_stream("deep", _MSG, "sys") + with pytest.raises(BackendUnavailable) as ei: + await anext(stream) + assert ei.value.reason == "router_ConnectError" + finally: + await c.close() + + +@pytest.mark.asyncio +async def test_prestream_400_raises_valueerror_failloud(monkeypatch): + """router 400 = 닫힌 매핑에서 alias drift 코드 버그 — BackendUnavailable 아닌 + ValueError fail-loud (RouterBackend._post 컨벤션 미러).""" + + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(400, json={"error": "unknown_alias"}) + + _patch_transport(monkeypatch, handler) + c = EidAIClient() + try: + stream = c.call_stream("deep", _MSG, "sys") + with pytest.raises(ValueError, match="router rejected alias='qwen-macbook'"): + await anext(stream) + finally: + await c.close() + + +# ── wall-clock deadline (게이트 점유 무한화 차단) ───────────────────────────── + + +@pytest.mark.asyncio +async def test_stream_deadline_exceeded(monkeypatch): + """업스트림 진입~종료 deadline 초과 → BackendUnavailable(stream_deadline_exceeded).""" + + class _StallStream(httpx.AsyncByteStream): + """첫 chunk 후 정체 — per-chunk read timeout 으론 안 잡히는 패턴 모사.""" + + async def __aiter__(self): + yield b'data: {"choices": []}\n\n' + await asyncio.sleep(30) + + async def aclose(self): + return None + + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, stream=_StallStream(), headers={"content-type": "text/event-stream"} + ) + + _patch_transport(monkeypatch, handler) + monkeypatch.setattr(eid_ai, "_STREAM_DEADLINE_S", 0.05) + c = EidAIClient() + try: + stream = c.call_stream("deep", _MSG, "sys") + with pytest.raises(BackendUnavailable) as ei: + async for _ in stream: + pass + assert ei.value.reason == "stream_deadline_exceeded" + assert ei.value.backend_name == "qwen-macbook" + finally: + await c.close() + + +# ── error_reason allowlist sanitize ────────────────────────────────────────── + + +def test_stream_error_reason_sanitized(): + """최종 reason 은 [a-z0-9_]{1,64} allowlist — 불일치(대문자/공백/dict 파편)는 + upstream_502(502)/router_error(그 외) 로 일반화, dict 직렬화 파편 비노출.""" + from eid.ai import _stream_error_reason + + # 정상 어휘는 그대로 (ask 와 동일) + assert ( + _stream_error_reason(503, b'{"error": {"type": "macbook_unavailable"}}') + == "macbook_unavailable" + ) + assert _stream_error_reason(503, b"oops not json") == "router_503" + assert _stream_error_reason(418, b"{}") == "router_http_418" + # 502 + 추출 실패 → upstream_502 (기존 upstream_502_{dict...} 파편 제거) + assert _stream_error_reason(502, b'{"error": {"detail": "x"}}') == "upstream_502" + # allowlist 밖(대문자/공백/특수문자) → 일반화 + assert _stream_error_reason(502, b'{"error": {"type": "Bad Gateway!"}}') == "upstream_502" + assert _stream_error_reason(503, b'{"error": {"type": "Weird Reason"}}') == "router_error" + assert _stream_error_reason(503, b'{"error": {"type": "' + b"a" * 80 + b'"}}') == "router_error" diff --git a/tests/fixtures/router_sse_chat_macmini_26b.txt b/tests/fixtures/router_sse_chat_macmini_26b.txt new file mode 100644 index 0000000..64f5ecd --- /dev/null +++ b/tests/fixtures/router_sse_chat_macmini_26b.txt @@ -0,0 +1,26 @@ +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139860, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 1, "total_tokens": 29, "prompt_tps": 183.51595345126498, "generation_tps": 140349.23521338476, "peak_memory": 34.66827434}} + + +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 2, "total_tokens": 30, "prompt_tps": 183.51595345126498, "generation_tps": 93.60885515563795, "peak_memory": 34.66827434}} + + +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "안녕하세요,", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 3, "total_tokens": 31, "prompt_tps": 183.51595345126498, "generation_tps": 70.37263329290622, "peak_memory": 34.66827434}} + + +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 4, "total_tokens": 32, "prompt_tps": 183.51595345126498, "generation_tps": 62.61454940315543, "peak_memory": 34.66827434}} + + +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": " 만나서", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 5, "total_tokens": 33, "prompt_tps": 183.51595345126498, "generation_tps": 58.7098801868211, "peak_memory": 34.66827434}} + + +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 6, "total_tokens": 34, "prompt_tps": 183.51595345126498, "generation_tps": 56.35974757228211, "peak_memory": 34.66827434}} + + +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": null, "delta": {"role": "assistant", "content": " 반갑습니다!", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 7, "total_tokens": 35, "prompt_tps": 183.51595345126498, "generation_tps": 54.81880127112613, "peak_memory": 34.66827434}} + + +data: {"id": "chatcmpl-4e188b8b-8617-4054-be82-25fece7b56f1", "object": "chat.completion.chunk", "created": 1781139861, "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [{"index": 0, "finish_reason": "stop", "delta": {"role": "assistant", "content": "", "tool_calls": []}}], "usage": {"input_tokens": 28, "output_tokens": 7, "total_tokens": 35, "prompt_tps": 183.51595345126498, "generation_tps": 54.81880127112613, "peak_memory": 34.66827434}} + + +data: [DONE] + diff --git a/tests/fixtures/router_sse_chat_qwen_27b.txt b/tests/fixtures/router_sse_chat_qwen_27b.txt new file mode 100644 index 0000000..05d35f5 --- /dev/null +++ b/tests/fixtures/router_sse_chat_qwen_27b.txt @@ -0,0 +1,12 @@ +data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":1,"total_tokens":26,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}} + +data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":2,"total_tokens":27,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}} + +data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139880,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":3,"total_tokens":28,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}} + +data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139881,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant","content":"","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":4,"total_tokens":29,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}} + +data: {"id":"chatcmpl-96ee9a0d-2f66-4357-876f-951c80c23bb2","object":"chat.completion.chunk","created":1781139881,"model":"/Users/hyungi/mlx-models/Qwen3.6-27B-8bit","choices":[{"index":0,"finish_reason":"stop","delta":{"role":"assistant","content":"안녕하세요!","reasoning":null,"tool_calls":null,"tool_call_id":null,"name":null},"logprobs":null}],"usage":{"prompt_tokens":25,"completion_tokens":5,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0},"prompt_tps":0.0,"generation_tps":0.0,"peak_memory":0.0}} + +data: [DONE] +