feat: NanoClaude 프로덕션 통합 — Docker, Caddy, aiosqlite 로깅

- docker-compose에 nanoclaude 서비스 추가 (포트 8100)
- Caddy /nano/* → nanoclaude 리버스 프록시 (SSE flush)
- aiosqlite 요청/응답 로깅 (request_logs 테이블)
- .env.example, CLAUDE.md 업데이트

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-06 11:19:15 +09:00
parent 1e427bc98a
commit e970ebdbea
10 changed files with 122 additions and 2 deletions

View File

@@ -14,3 +14,7 @@ BACKENDS_CONFIG=/app/config/backends.json
# DB path (in Docker)
DB_PATH=/app/data/gateway.db
# NanoClaude
EXAONE_MODEL=exaone3.5:7.8b-instruct-q8_0
NANOCLAUDE_API_KEY=

View File

@@ -9,13 +9,14 @@ GPU 서버(RTX 4070 Ti Super)에서 운영하는 중앙 AI 라우팅 서비스.
|--------|----------|------|------|
| Caddy | caddy/ | Caddy 2 | 80/443 |
| hub-api | hub-api/ | FastAPI + aiosqlite | 8000 |
| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 (Phase 2) |
| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 |
| NanoClaude | nanoclaude/ | FastAPI + aiosqlite | 8100 |
## 외부 연결
- GPU Ollama: host.docker.internal:11434
- 맥미니 Ollama: 100.115.153.119:11434
- NanoClaude: 100.115.153.119:PORT (Phase 1.5)
- NanoClaude: localhost:8100 (비동기 job 기반 AI Gateway)
## 개발
@@ -37,6 +38,11 @@ OpenAI 호환: `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`
인증: `/auth/login` → Cookie 또는 Bearer 토큰
모니터링: `/health`, `/gpu`
## NanoClaude API
비동기 job 기반: `POST /nano/chat``{ job_id }`, `GET /nano/chat/{job_id}/stream` → SSE
취소: `POST /nano/chat/{job_id}/cancel`
## 백엔드 설정
`backends.json`에서 백엔드 추가/제거. 서비스 재시작 필요.

View File

@@ -16,6 +16,12 @@
handle /gpu {
reverse_proxy hub-api:8000
}
handle /nano/* {
uri strip_prefix /nano
reverse_proxy nanoclaude:8100 {
flush_interval -1
}
}
handle {
reverse_proxy hub-web:80
}

View File

@@ -12,6 +12,7 @@ services:
depends_on:
- hub-api
- hub-web
- nanoclaude
networks:
- gateway-net
@@ -46,9 +47,31 @@ services:
networks:
- gateway-net
nanoclaude:
build: ./nanoclaude
container_name: gpu-nanoclaude
restart: unless-stopped
environment:
- EXAONE_BASE_URL=http://host.docker.internal:11434
- EXAONE_MODEL=${EXAONE_MODEL:-exaone3.5:7.8b-instruct-q8_0}
- DB_PATH=/app/data/nanoclaude.db
- API_KEY=${NANOCLAUDE_API_KEY:-}
volumes:
- nano_data:/app/data
extra_hosts:
- "host.docker.internal:host-gateway"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8100/health"]
interval: 15s
timeout: 5s
retries: 3
networks:
- gateway-net
volumes:
caddy_data:
hub_data:
nano_data:
networks:
gateway-net:

View File

@@ -12,6 +12,9 @@ class Settings(BaseSettings):
host: str = "0.0.0.0"
port: int = 8100
# DB
db_path: str = "/app/data/nanoclaude.db"
# Optional API key (empty = disabled)
api_key: str = ""

View File

47
nanoclaude/db/database.py Normal file
View File

@@ -0,0 +1,47 @@
"""aiosqlite DB — 요청/응답 로깅 및 메트릭."""
import aiosqlite
from config import settings
SCHEMA = """
CREATE TABLE IF NOT EXISTS request_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id TEXT NOT NULL,
message TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'queued',
model TEXT NOT NULL,
response_chars INTEGER DEFAULT 0,
latency_ms REAL DEFAULT 0,
created_at REAL NOT NULL,
completed_at REAL
);
CREATE INDEX IF NOT EXISTS idx_logs_job ON request_logs(job_id);
CREATE INDEX IF NOT EXISTS idx_logs_created ON request_logs(created_at);
"""
async def init_db():
async with aiosqlite.connect(settings.db_path) as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.executescript(SCHEMA)
await db.commit()
async def log_request(job_id: str, message: str, model: str, created_at: float):
async with aiosqlite.connect(settings.db_path) as db:
await db.execute(
"INSERT INTO request_logs (job_id, message, model, created_at) VALUES (?, ?, ?, ?)",
(job_id, message, model, created_at),
)
await db.commit()
async def log_completion(job_id: str, status: str, response_chars: int, latency_ms: float, completed_at: float):
async with aiosqlite.connect(settings.db_path) as db:
await db.execute(
"UPDATE request_logs SET status=?, response_chars=?, latency_ms=?, completed_at=? WHERE job_id=?",
(status, response_chars, latency_ms, completed_at, job_id),
)
await db.commit()

View File

@@ -3,12 +3,14 @@
from __future__ import annotations
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from config import settings
from db.database import init_db
from routers import chat
logging.basicConfig(
@@ -16,10 +18,17 @@ logging.basicConfig(
format="%(asctime)s %(levelname)s %(name)s%(message)s",
)
@asynccontextmanager
async def lifespan(app: FastAPI):
await init_db()
yield
app = FastAPI(
title="NanoClaude",
version="0.1.0",
description="비동기 job 기반 AI Gateway — Phase 1",
lifespan=lifespan,
)
app.add_middleware(

View File

@@ -2,3 +2,4 @@ fastapi==0.115.0
uvicorn[standard]==0.30.0
httpx==0.27.0
pydantic-settings==2.5.0
aiosqlite==0.20.0

View File

@@ -4,7 +4,10 @@ from __future__ import annotations
import asyncio
import logging
from time import time
from config import settings
from db.database import log_completion, log_request
from models.schemas import JobStatus
from services.exaone_adapter import stream_chat
from services.job_manager import Job, job_manager
@@ -18,6 +21,14 @@ HEARTBEAT_INTERVAL = 4.0
async def run(job: Job) -> None:
"""EXAONE 호출 → SSE 이벤트 발행."""
start_time = time()
# DB 로깅: 요청 기록
try:
await log_request(job.id, job.message, settings.exaone_model, job.created_at)
except Exception:
logger.warning("Failed to log request for job %s", job.id, exc_info=True)
try:
# --- ACK ---
await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."})
@@ -50,9 +61,19 @@ async def run(job: Job) -> None:
if not collected:
job_manager.set_status(job.id, JobStatus.failed)
await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."})
status = "failed"
else:
job_manager.set_status(job.id, JobStatus.completed)
await state_stream.push(job.id, "done", {"message": "완료"})
status = "completed"
# DB 로깅: 완료 기록
latency_ms = (time() - start_time) * 1000
response_text = "".join(collected)
try:
await log_completion(job.id, status, len(response_text), latency_ms, time())
except Exception:
logger.warning("Failed to log completion for job %s", job.id, exc_info=True)
except asyncio.CancelledError:
job_manager.set_status(job.id, JobStatus.cancelled)