feat: NanoClaude 프로덕션 통합 — Docker, Caddy, aiosqlite 로깅
- docker-compose에 nanoclaude 서비스 추가 (포트 8100) - Caddy /nano/* → nanoclaude 리버스 프록시 (SSE flush) - aiosqlite 요청/응답 로깅 (request_logs 테이블) - .env.example, CLAUDE.md 업데이트 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,3 +14,7 @@ BACKENDS_CONFIG=/app/config/backends.json
|
||||
|
||||
# DB path (in Docker)
|
||||
DB_PATH=/app/data/gateway.db
|
||||
|
||||
# NanoClaude
|
||||
EXAONE_MODEL=exaone3.5:7.8b-instruct-q8_0
|
||||
NANOCLAUDE_API_KEY=
|
||||
|
||||
10
CLAUDE.md
10
CLAUDE.md
@@ -9,13 +9,14 @@ GPU 서버(RTX 4070 Ti Super)에서 운영하는 중앙 AI 라우팅 서비스.
|
||||
|--------|----------|------|------|
|
||||
| Caddy | caddy/ | Caddy 2 | 80/443 |
|
||||
| hub-api | hub-api/ | FastAPI + aiosqlite | 8000 |
|
||||
| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 (Phase 2) |
|
||||
| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 |
|
||||
| NanoClaude | nanoclaude/ | FastAPI + aiosqlite | 8100 |
|
||||
|
||||
## 외부 연결
|
||||
|
||||
- GPU Ollama: host.docker.internal:11434
|
||||
- 맥미니 Ollama: 100.115.153.119:11434
|
||||
- NanoClaude: 100.115.153.119:PORT (Phase 1.5)
|
||||
- NanoClaude: localhost:8100 (비동기 job 기반 AI Gateway)
|
||||
|
||||
## 개발
|
||||
|
||||
@@ -37,6 +38,11 @@ OpenAI 호환: `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`
|
||||
인증: `/auth/login` → Cookie 또는 Bearer 토큰
|
||||
모니터링: `/health`, `/gpu`
|
||||
|
||||
## NanoClaude API
|
||||
|
||||
비동기 job 기반: `POST /nano/chat` → `{ job_id }`, `GET /nano/chat/{job_id}/stream` → SSE
|
||||
취소: `POST /nano/chat/{job_id}/cancel`
|
||||
|
||||
## 백엔드 설정
|
||||
|
||||
`backends.json`에서 백엔드 추가/제거. 서비스 재시작 필요.
|
||||
|
||||
@@ -16,6 +16,12 @@
|
||||
handle /gpu {
|
||||
reverse_proxy hub-api:8000
|
||||
}
|
||||
handle /nano/* {
|
||||
uri strip_prefix /nano
|
||||
reverse_proxy nanoclaude:8100 {
|
||||
flush_interval -1
|
||||
}
|
||||
}
|
||||
handle {
|
||||
reverse_proxy hub-web:80
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ services:
|
||||
depends_on:
|
||||
- hub-api
|
||||
- hub-web
|
||||
- nanoclaude
|
||||
networks:
|
||||
- gateway-net
|
||||
|
||||
@@ -46,9 +47,31 @@ services:
|
||||
networks:
|
||||
- gateway-net
|
||||
|
||||
nanoclaude:
|
||||
build: ./nanoclaude
|
||||
container_name: gpu-nanoclaude
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- EXAONE_BASE_URL=http://host.docker.internal:11434
|
||||
- EXAONE_MODEL=${EXAONE_MODEL:-exaone3.5:7.8b-instruct-q8_0}
|
||||
- DB_PATH=/app/data/nanoclaude.db
|
||||
- API_KEY=${NANOCLAUDE_API_KEY:-}
|
||||
volumes:
|
||||
- nano_data:/app/data
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8100/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
networks:
|
||||
- gateway-net
|
||||
|
||||
volumes:
|
||||
caddy_data:
|
||||
hub_data:
|
||||
nano_data:
|
||||
|
||||
networks:
|
||||
gateway-net:
|
||||
|
||||
@@ -12,6 +12,9 @@ class Settings(BaseSettings):
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8100
|
||||
|
||||
# DB
|
||||
db_path: str = "/app/data/nanoclaude.db"
|
||||
|
||||
# Optional API key (empty = disabled)
|
||||
api_key: str = ""
|
||||
|
||||
|
||||
0
nanoclaude/db/__init__.py
Normal file
0
nanoclaude/db/__init__.py
Normal file
47
nanoclaude/db/database.py
Normal file
47
nanoclaude/db/database.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""aiosqlite DB — 요청/응답 로깅 및 메트릭."""
|
||||
|
||||
import aiosqlite
|
||||
|
||||
from config import settings
|
||||
|
||||
SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS request_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id TEXT NOT NULL,
|
||||
message TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'queued',
|
||||
model TEXT NOT NULL,
|
||||
response_chars INTEGER DEFAULT 0,
|
||||
latency_ms REAL DEFAULT 0,
|
||||
created_at REAL NOT NULL,
|
||||
completed_at REAL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_logs_job ON request_logs(job_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_logs_created ON request_logs(created_at);
|
||||
"""
|
||||
|
||||
|
||||
async def init_db():
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
await db.execute("PRAGMA journal_mode=WAL")
|
||||
await db.executescript(SCHEMA)
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def log_request(job_id: str, message: str, model: str, created_at: float):
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
await db.execute(
|
||||
"INSERT INTO request_logs (job_id, message, model, created_at) VALUES (?, ?, ?, ?)",
|
||||
(job_id, message, model, created_at),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def log_completion(job_id: str, status: str, response_chars: int, latency_ms: float, completed_at: float):
|
||||
async with aiosqlite.connect(settings.db_path) as db:
|
||||
await db.execute(
|
||||
"UPDATE request_logs SET status=?, response_chars=?, latency_ms=?, completed_at=? WHERE job_id=?",
|
||||
(status, response_chars, latency_ms, completed_at, job_id),
|
||||
)
|
||||
await db.commit()
|
||||
@@ -3,12 +3,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from config import settings
|
||||
from db.database import init_db
|
||||
from routers import chat
|
||||
|
||||
logging.basicConfig(
|
||||
@@ -16,10 +18,17 @@ logging.basicConfig(
|
||||
format="%(asctime)s %(levelname)s %(name)s — %(message)s",
|
||||
)
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
await init_db()
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="NanoClaude",
|
||||
version="0.1.0",
|
||||
description="비동기 job 기반 AI Gateway — Phase 1",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
|
||||
@@ -2,3 +2,4 @@ fastapi==0.115.0
|
||||
uvicorn[standard]==0.30.0
|
||||
httpx==0.27.0
|
||||
pydantic-settings==2.5.0
|
||||
aiosqlite==0.20.0
|
||||
|
||||
@@ -4,7 +4,10 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from time import time
|
||||
|
||||
from config import settings
|
||||
from db.database import log_completion, log_request
|
||||
from models.schemas import JobStatus
|
||||
from services.exaone_adapter import stream_chat
|
||||
from services.job_manager import Job, job_manager
|
||||
@@ -18,6 +21,14 @@ HEARTBEAT_INTERVAL = 4.0
|
||||
|
||||
async def run(job: Job) -> None:
|
||||
"""EXAONE 호출 → SSE 이벤트 발행."""
|
||||
start_time = time()
|
||||
|
||||
# DB 로깅: 요청 기록
|
||||
try:
|
||||
await log_request(job.id, job.message, settings.exaone_model, job.created_at)
|
||||
except Exception:
|
||||
logger.warning("Failed to log request for job %s", job.id, exc_info=True)
|
||||
|
||||
try:
|
||||
# --- ACK ---
|
||||
await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."})
|
||||
@@ -50,9 +61,19 @@ async def run(job: Job) -> None:
|
||||
if not collected:
|
||||
job_manager.set_status(job.id, JobStatus.failed)
|
||||
await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."})
|
||||
status = "failed"
|
||||
else:
|
||||
job_manager.set_status(job.id, JobStatus.completed)
|
||||
await state_stream.push(job.id, "done", {"message": "완료"})
|
||||
status = "completed"
|
||||
|
||||
# DB 로깅: 완료 기록
|
||||
latency_ms = (time() - start_time) * 1000
|
||||
response_text = "".join(collected)
|
||||
try:
|
||||
await log_completion(job.id, status, len(response_text), latency_ms, time())
|
||||
except Exception:
|
||||
logger.warning("Failed to log completion for job %s", job.id, exc_info=True)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
job_manager.set_status(job.id, JobStatus.cancelled)
|
||||
|
||||
Reference in New Issue
Block a user