feat: NanoClaude 프로덕션 통합 — Docker, Caddy, aiosqlite 로깅

- docker-compose에 nanoclaude 서비스 추가 (포트 8100)
- Caddy /nano/* → nanoclaude 리버스 프록시 (SSE flush)
- aiosqlite 요청/응답 로깅 (request_logs 테이블)
- .env.example, CLAUDE.md 업데이트

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-06 11:19:15 +09:00
parent 1e427bc98a
commit e970ebdbea
10 changed files with 122 additions and 2 deletions

View File

@@ -14,3 +14,7 @@ BACKENDS_CONFIG=/app/config/backends.json
# DB path (in Docker) # DB path (in Docker)
DB_PATH=/app/data/gateway.db DB_PATH=/app/data/gateway.db
# NanoClaude
EXAONE_MODEL=exaone3.5:7.8b-instruct-q8_0
NANOCLAUDE_API_KEY=

View File

@@ -9,13 +9,14 @@ GPU 서버(RTX 4070 Ti Super)에서 운영하는 중앙 AI 라우팅 서비스.
|--------|----------|------|------| |--------|----------|------|------|
| Caddy | caddy/ | Caddy 2 | 80/443 | | Caddy | caddy/ | Caddy 2 | 80/443 |
| hub-api | hub-api/ | FastAPI + aiosqlite | 8000 | | hub-api | hub-api/ | FastAPI + aiosqlite | 8000 |
| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 (Phase 2) | | hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 |
| NanoClaude | nanoclaude/ | FastAPI + aiosqlite | 8100 |
## 외부 연결 ## 외부 연결
- GPU Ollama: host.docker.internal:11434 - GPU Ollama: host.docker.internal:11434
- 맥미니 Ollama: 100.115.153.119:11434 - 맥미니 Ollama: 100.115.153.119:11434
- NanoClaude: 100.115.153.119:PORT (Phase 1.5) - NanoClaude: localhost:8100 (비동기 job 기반 AI Gateway)
## 개발 ## 개발
@@ -37,6 +38,11 @@ OpenAI 호환: `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`
인증: `/auth/login` → Cookie 또는 Bearer 토큰 인증: `/auth/login` → Cookie 또는 Bearer 토큰
모니터링: `/health`, `/gpu` 모니터링: `/health`, `/gpu`
## NanoClaude API
비동기 job 기반: `POST /nano/chat``{ job_id }`, `GET /nano/chat/{job_id}/stream` → SSE
취소: `POST /nano/chat/{job_id}/cancel`
## 백엔드 설정 ## 백엔드 설정
`backends.json`에서 백엔드 추가/제거. 서비스 재시작 필요. `backends.json`에서 백엔드 추가/제거. 서비스 재시작 필요.

View File

@@ -16,6 +16,12 @@
handle /gpu { handle /gpu {
reverse_proxy hub-api:8000 reverse_proxy hub-api:8000
} }
handle /nano/* {
uri strip_prefix /nano
reverse_proxy nanoclaude:8100 {
flush_interval -1
}
}
handle { handle {
reverse_proxy hub-web:80 reverse_proxy hub-web:80
} }

View File

@@ -12,6 +12,7 @@ services:
depends_on: depends_on:
- hub-api - hub-api
- hub-web - hub-web
- nanoclaude
networks: networks:
- gateway-net - gateway-net
@@ -46,9 +47,31 @@ services:
networks: networks:
- gateway-net - gateway-net
nanoclaude:
build: ./nanoclaude
container_name: gpu-nanoclaude
restart: unless-stopped
environment:
- EXAONE_BASE_URL=http://host.docker.internal:11434
- EXAONE_MODEL=${EXAONE_MODEL:-exaone3.5:7.8b-instruct-q8_0}
- DB_PATH=/app/data/nanoclaude.db
- API_KEY=${NANOCLAUDE_API_KEY:-}
volumes:
- nano_data:/app/data
extra_hosts:
- "host.docker.internal:host-gateway"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8100/health"]
interval: 15s
timeout: 5s
retries: 3
networks:
- gateway-net
volumes: volumes:
caddy_data: caddy_data:
hub_data: hub_data:
nano_data:
networks: networks:
gateway-net: gateway-net:

View File

@@ -12,6 +12,9 @@ class Settings(BaseSettings):
host: str = "0.0.0.0" host: str = "0.0.0.0"
port: int = 8100 port: int = 8100
# DB
db_path: str = "/app/data/nanoclaude.db"
# Optional API key (empty = disabled) # Optional API key (empty = disabled)
api_key: str = "" api_key: str = ""

View File

47
nanoclaude/db/database.py Normal file
View File

@@ -0,0 +1,47 @@
"""aiosqlite DB — 요청/응답 로깅 및 메트릭."""
import aiosqlite
from config import settings
SCHEMA = """
CREATE TABLE IF NOT EXISTS request_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id TEXT NOT NULL,
message TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'queued',
model TEXT NOT NULL,
response_chars INTEGER DEFAULT 0,
latency_ms REAL DEFAULT 0,
created_at REAL NOT NULL,
completed_at REAL
);
CREATE INDEX IF NOT EXISTS idx_logs_job ON request_logs(job_id);
CREATE INDEX IF NOT EXISTS idx_logs_created ON request_logs(created_at);
"""
async def init_db():
async with aiosqlite.connect(settings.db_path) as db:
await db.execute("PRAGMA journal_mode=WAL")
await db.executescript(SCHEMA)
await db.commit()
async def log_request(job_id: str, message: str, model: str, created_at: float):
async with aiosqlite.connect(settings.db_path) as db:
await db.execute(
"INSERT INTO request_logs (job_id, message, model, created_at) VALUES (?, ?, ?, ?)",
(job_id, message, model, created_at),
)
await db.commit()
async def log_completion(job_id: str, status: str, response_chars: int, latency_ms: float, completed_at: float):
async with aiosqlite.connect(settings.db_path) as db:
await db.execute(
"UPDATE request_logs SET status=?, response_chars=?, latency_ms=?, completed_at=? WHERE job_id=?",
(status, response_chars, latency_ms, completed_at, job_id),
)
await db.commit()

View File

@@ -3,12 +3,14 @@
from __future__ import annotations from __future__ import annotations
import logging import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from config import settings from config import settings
from db.database import init_db
from routers import chat from routers import chat
logging.basicConfig( logging.basicConfig(
@@ -16,10 +18,17 @@ logging.basicConfig(
format="%(asctime)s %(levelname)s %(name)s%(message)s", format="%(asctime)s %(levelname)s %(name)s%(message)s",
) )
@asynccontextmanager
async def lifespan(app: FastAPI):
await init_db()
yield
app = FastAPI( app = FastAPI(
title="NanoClaude", title="NanoClaude",
version="0.1.0", version="0.1.0",
description="비동기 job 기반 AI Gateway — Phase 1", description="비동기 job 기반 AI Gateway — Phase 1",
lifespan=lifespan,
) )
app.add_middleware( app.add_middleware(

View File

@@ -2,3 +2,4 @@ fastapi==0.115.0
uvicorn[standard]==0.30.0 uvicorn[standard]==0.30.0
httpx==0.27.0 httpx==0.27.0
pydantic-settings==2.5.0 pydantic-settings==2.5.0
aiosqlite==0.20.0

View File

@@ -4,7 +4,10 @@ from __future__ import annotations
import asyncio import asyncio
import logging import logging
from time import time
from config import settings
from db.database import log_completion, log_request
from models.schemas import JobStatus from models.schemas import JobStatus
from services.exaone_adapter import stream_chat from services.exaone_adapter import stream_chat
from services.job_manager import Job, job_manager from services.job_manager import Job, job_manager
@@ -18,6 +21,14 @@ HEARTBEAT_INTERVAL = 4.0
async def run(job: Job) -> None: async def run(job: Job) -> None:
"""EXAONE 호출 → SSE 이벤트 발행.""" """EXAONE 호출 → SSE 이벤트 발행."""
start_time = time()
# DB 로깅: 요청 기록
try:
await log_request(job.id, job.message, settings.exaone_model, job.created_at)
except Exception:
logger.warning("Failed to log request for job %s", job.id, exc_info=True)
try: try:
# --- ACK --- # --- ACK ---
await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."}) await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."})
@@ -50,9 +61,19 @@ async def run(job: Job) -> None:
if not collected: if not collected:
job_manager.set_status(job.id, JobStatus.failed) job_manager.set_status(job.id, JobStatus.failed)
await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."}) await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."})
status = "failed"
else: else:
job_manager.set_status(job.id, JobStatus.completed) job_manager.set_status(job.id, JobStatus.completed)
await state_stream.push(job.id, "done", {"message": "완료"}) await state_stream.push(job.id, "done", {"message": "완료"})
status = "completed"
# DB 로깅: 완료 기록
latency_ms = (time() - start_time) * 1000
response_text = "".join(collected)
try:
await log_completion(job.id, status, len(response_text), latency_ms, time())
except Exception:
logger.warning("Failed to log completion for job %s", job.id, exc_info=True)
except asyncio.CancelledError: except asyncio.CancelledError:
job_manager.set_status(job.id, JobStatus.cancelled) job_manager.set_status(job.id, JobStatus.cancelled)