feat: NanoClaude Phase 1 — 비동기 job 기반 AI Gateway 코어 구현

POST /chat → job_id ACK, GET /chat/{job_id}/stream → SSE 스트리밍, EXAONE Ollama adapter, JobManager, StateStream, Worker 구조 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 11:12:04 +09:00
parent 4917fd568f
commit d946b769e5
13 changed files with 450 additions and 0 deletions
--- a/nanoclaude/services/exaone_adapter.py
+++ b/nanoclaude/services/exaone_adapter.py
@@ -0,0 +1,90 @@
+"""EXAONE Adapter — Ollama OpenAI-compat endpoint를 통한 EXAONE 호출."""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import AsyncGenerator
+
+import httpx
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+SYSTEM_PROMPT = (
+    "너는 NanoClaude, 사용자의 질문을 이해하고 정리하여 명확한 답변을 제공하는 AI 어시스턴트다. "
+    "사용자의 질문 의도를 파악하고, 문장을 정리하며, 구조화된 응답을 생성한다."
+)
+
+
+async def stream_chat(message: str) -> AsyncGenerator[str, None]:
+    """EXAONE 스트리밍 호출. OpenAI-compat SSE를 chunk 단위로 yield."""
+    payload = {
+        "model": settings.exaone_model,
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": message},
+        ],
+        "stream": True,
+        "temperature": settings.exaone_temperature,
+    }
+
+    async with httpx.AsyncClient(timeout=settings.exaone_timeout) as client:
+        try:
+            async with client.stream(
+                "POST",
+                f"{settings.exaone_base_url}/v1/chat/completions",
+                json=payload,
+            ) as resp:
+                if resp.status_code != 200:
+                    body = await resp.aread()
+                    logger.error("EXAONE error %d: %s", resp.status_code, body.decode())
+                    yield f"[Error] EXAONE 응답 실패 ({resp.status_code})"
+                    return
+
+                async for line in resp.aiter_lines():
+                    line = line.strip()
+                    if not line or not line.startswith("data: "):
+                        continue
+                    payload_str = line[len("data: "):]
+                    if payload_str == "[DONE]":
+                        return
+                    # Extract content delta from OpenAI-format chunk
+                    try:
+                        import json
+                        chunk = json.loads(payload_str)
+                        delta = chunk.get("choices", [{}])[0].get("delta", {})
+                        content = delta.get("content", "")
+                        if content:
+                            yield content
+                    except (json.JSONDecodeError, IndexError, KeyError):
+                        continue
+
+        except httpx.ConnectError:
+            logger.error("EXAONE connection failed: %s", settings.exaone_base_url)
+            yield "[Error] EXAONE 서버에 연결할 수 없습니다."
+        except httpx.ReadTimeout:
+            logger.error("EXAONE read timeout")
+            yield "[Error] EXAONE 응답 시간이 초과되었습니다."
+
+
+async def complete_chat(message: str) -> str:
+    """EXAONE 비스트리밍 호출. 전체 응답 텍스트 반환."""
+    payload = {
+        "model": settings.exaone_model,
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": message},
+        ],
+        "stream": False,
+        "temperature": settings.exaone_temperature,
+    }
+
+    async with httpx.AsyncClient(timeout=settings.exaone_timeout) as client:
+        resp = await client.post(
+            f"{settings.exaone_base_url}/v1/chat/completions",
+            json=payload,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        return data["choices"][0]["message"]["content"]