diff --git a/.env.example b/.env.example index 1603e55..b5aa0e1 100644 --- a/.env.example +++ b/.env.example @@ -14,3 +14,7 @@ BACKENDS_CONFIG=/app/config/backends.json # DB path (in Docker) DB_PATH=/app/data/gateway.db + +# NanoClaude +EXAONE_MODEL=exaone3.5:7.8b-instruct-q8_0 +NANOCLAUDE_API_KEY= diff --git a/CLAUDE.md b/CLAUDE.md index 42974e2..67211de 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,13 +9,14 @@ GPU 서버(RTX 4070 Ti Super)에서 운영하는 중앙 AI 라우팅 서비스. |--------|----------|------|------| | Caddy | caddy/ | Caddy 2 | 80/443 | | hub-api | hub-api/ | FastAPI + aiosqlite | 8000 | -| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 (Phase 2) | +| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 | +| NanoClaude | nanoclaude/ | FastAPI + aiosqlite | 8100 | ## 외부 연결 - GPU Ollama: host.docker.internal:11434 - 맥미니 Ollama: 100.115.153.119:11434 -- NanoClaude: 100.115.153.119:PORT (Phase 1.5) +- NanoClaude: localhost:8100 (비동기 job 기반 AI Gateway) ## 개발 @@ -37,6 +38,11 @@ OpenAI 호환: `/v1/chat/completions`, `/v1/models`, `/v1/embeddings` 인증: `/auth/login` → Cookie 또는 Bearer 토큰 모니터링: `/health`, `/gpu` +## NanoClaude API + +비동기 job 기반: `POST /nano/chat` → `{ job_id }`, `GET /nano/chat/{job_id}/stream` → SSE +취소: `POST /nano/chat/{job_id}/cancel` + ## 백엔드 설정 `backends.json`에서 백엔드 추가/제거. 서비스 재시작 필요. diff --git a/caddy/Caddyfile b/caddy/Caddyfile index 42170ec..3203bde 100644 --- a/caddy/Caddyfile +++ b/caddy/Caddyfile @@ -16,6 +16,12 @@ handle /gpu { reverse_proxy hub-api:8000 } + handle /nano/* { + uri strip_prefix /nano + reverse_proxy nanoclaude:8100 { + flush_interval -1 + } + } handle { reverse_proxy hub-web:80 } diff --git a/docker-compose.yml b/docker-compose.yml index 8ab0723..04a25a8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,7 @@ services: depends_on: - hub-api - hub-web + - nanoclaude networks: - gateway-net @@ -46,9 +47,31 @@ services: networks: - gateway-net + nanoclaude: + build: ./nanoclaude + container_name: gpu-nanoclaude + restart: unless-stopped + environment: + - EXAONE_BASE_URL=http://host.docker.internal:11434 + - EXAONE_MODEL=${EXAONE_MODEL:-exaone3.5:7.8b-instruct-q8_0} + - DB_PATH=/app/data/nanoclaude.db + - API_KEY=${NANOCLAUDE_API_KEY:-} + volumes: + - nano_data:/app/data + extra_hosts: + - "host.docker.internal:host-gateway" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8100/health"] + interval: 15s + timeout: 5s + retries: 3 + networks: + - gateway-net + volumes: caddy_data: hub_data: + nano_data: networks: gateway-net: diff --git a/nanoclaude/config.py b/nanoclaude/config.py index 7269d4c..2295dad 100644 --- a/nanoclaude/config.py +++ b/nanoclaude/config.py @@ -12,6 +12,9 @@ class Settings(BaseSettings): host: str = "0.0.0.0" port: int = 8100 + # DB + db_path: str = "/app/data/nanoclaude.db" + # Optional API key (empty = disabled) api_key: str = "" diff --git a/nanoclaude/db/__init__.py b/nanoclaude/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nanoclaude/db/database.py b/nanoclaude/db/database.py new file mode 100644 index 0000000..a95dbc9 --- /dev/null +++ b/nanoclaude/db/database.py @@ -0,0 +1,47 @@ +"""aiosqlite DB — 요청/응답 로깅 및 메트릭.""" + +import aiosqlite + +from config import settings + +SCHEMA = """ +CREATE TABLE IF NOT EXISTS request_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id TEXT NOT NULL, + message TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'queued', + model TEXT NOT NULL, + response_chars INTEGER DEFAULT 0, + latency_ms REAL DEFAULT 0, + created_at REAL NOT NULL, + completed_at REAL +); + +CREATE INDEX IF NOT EXISTS idx_logs_job ON request_logs(job_id); +CREATE INDEX IF NOT EXISTS idx_logs_created ON request_logs(created_at); +""" + + +async def init_db(): + async with aiosqlite.connect(settings.db_path) as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.executescript(SCHEMA) + await db.commit() + + +async def log_request(job_id: str, message: str, model: str, created_at: float): + async with aiosqlite.connect(settings.db_path) as db: + await db.execute( + "INSERT INTO request_logs (job_id, message, model, created_at) VALUES (?, ?, ?, ?)", + (job_id, message, model, created_at), + ) + await db.commit() + + +async def log_completion(job_id: str, status: str, response_chars: int, latency_ms: float, completed_at: float): + async with aiosqlite.connect(settings.db_path) as db: + await db.execute( + "UPDATE request_logs SET status=?, response_chars=?, latency_ms=?, completed_at=? WHERE job_id=?", + (status, response_chars, latency_ms, completed_at, job_id), + ) + await db.commit() diff --git a/nanoclaude/main.py b/nanoclaude/main.py index 846830d..273220d 100644 --- a/nanoclaude/main.py +++ b/nanoclaude/main.py @@ -3,12 +3,14 @@ from __future__ import annotations import logging +from contextlib import asynccontextmanager from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from config import settings +from db.database import init_db from routers import chat logging.basicConfig( @@ -16,10 +18,17 @@ logging.basicConfig( format="%(asctime)s %(levelname)s %(name)s — %(message)s", ) +@asynccontextmanager +async def lifespan(app: FastAPI): + await init_db() + yield + + app = FastAPI( title="NanoClaude", version="0.1.0", description="비동기 job 기반 AI Gateway — Phase 1", + lifespan=lifespan, ) app.add_middleware( diff --git a/nanoclaude/requirements.txt b/nanoclaude/requirements.txt index 6a40415..41b767e 100644 --- a/nanoclaude/requirements.txt +++ b/nanoclaude/requirements.txt @@ -2,3 +2,4 @@ fastapi==0.115.0 uvicorn[standard]==0.30.0 httpx==0.27.0 pydantic-settings==2.5.0 +aiosqlite==0.20.0 diff --git a/nanoclaude/services/worker.py b/nanoclaude/services/worker.py index 14abb1e..5d5c4f2 100644 --- a/nanoclaude/services/worker.py +++ b/nanoclaude/services/worker.py @@ -4,7 +4,10 @@ from __future__ import annotations import asyncio import logging +from time import time +from config import settings +from db.database import log_completion, log_request from models.schemas import JobStatus from services.exaone_adapter import stream_chat from services.job_manager import Job, job_manager @@ -18,6 +21,14 @@ HEARTBEAT_INTERVAL = 4.0 async def run(job: Job) -> None: """EXAONE 호출 → SSE 이벤트 발행.""" + start_time = time() + + # DB 로깅: 요청 기록 + try: + await log_request(job.id, job.message, settings.exaone_model, job.created_at) + except Exception: + logger.warning("Failed to log request for job %s", job.id, exc_info=True) + try: # --- ACK --- await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."}) @@ -50,9 +61,19 @@ async def run(job: Job) -> None: if not collected: job_manager.set_status(job.id, JobStatus.failed) await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."}) + status = "failed" else: job_manager.set_status(job.id, JobStatus.completed) await state_stream.push(job.id, "done", {"message": "완료"}) + status = "completed" + + # DB 로깅: 완료 기록 + latency_ms = (time() - start_time) * 1000 + response_text = "".join(collected) + try: + await log_completion(job.id, status, len(response_text), latency_ms, time()) + except Exception: + logger.warning("Failed to log completion for job %s", job.id, exc_info=True) except asyncio.CancelledError: job_manager.set_status(job.id, JobStatus.cancelled)