feat: NanoClaude 프로덕션 통합 — Docker, Caddy, aiosqlite 로깅
- docker-compose에 nanoclaude 서비스 추가 (포트 8100) - Caddy /nano/* → nanoclaude 리버스 프록시 (SSE flush) - aiosqlite 요청/응답 로깅 (request_logs 테이블) - .env.example, CLAUDE.md 업데이트 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,3 +14,7 @@ BACKENDS_CONFIG=/app/config/backends.json
|
|||||||
|
|
||||||
# DB path (in Docker)
|
# DB path (in Docker)
|
||||||
DB_PATH=/app/data/gateway.db
|
DB_PATH=/app/data/gateway.db
|
||||||
|
|
||||||
|
# NanoClaude
|
||||||
|
EXAONE_MODEL=exaone3.5:7.8b-instruct-q8_0
|
||||||
|
NANOCLAUDE_API_KEY=
|
||||||
|
|||||||
10
CLAUDE.md
10
CLAUDE.md
@@ -9,13 +9,14 @@ GPU 서버(RTX 4070 Ti Super)에서 운영하는 중앙 AI 라우팅 서비스.
|
|||||||
|--------|----------|------|------|
|
|--------|----------|------|------|
|
||||||
| Caddy | caddy/ | Caddy 2 | 80/443 |
|
| Caddy | caddy/ | Caddy 2 | 80/443 |
|
||||||
| hub-api | hub-api/ | FastAPI + aiosqlite | 8000 |
|
| hub-api | hub-api/ | FastAPI + aiosqlite | 8000 |
|
||||||
| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 (Phase 2) |
|
| hub-web | hub-web/ | Vite + React + shadcn/ui | 3000 |
|
||||||
|
| NanoClaude | nanoclaude/ | FastAPI + aiosqlite | 8100 |
|
||||||
|
|
||||||
## 외부 연결
|
## 외부 연결
|
||||||
|
|
||||||
- GPU Ollama: host.docker.internal:11434
|
- GPU Ollama: host.docker.internal:11434
|
||||||
- 맥미니 Ollama: 100.115.153.119:11434
|
- 맥미니 Ollama: 100.115.153.119:11434
|
||||||
- NanoClaude: 100.115.153.119:PORT (Phase 1.5)
|
- NanoClaude: localhost:8100 (비동기 job 기반 AI Gateway)
|
||||||
|
|
||||||
## 개발
|
## 개발
|
||||||
|
|
||||||
@@ -37,6 +38,11 @@ OpenAI 호환: `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`
|
|||||||
인증: `/auth/login` → Cookie 또는 Bearer 토큰
|
인증: `/auth/login` → Cookie 또는 Bearer 토큰
|
||||||
모니터링: `/health`, `/gpu`
|
모니터링: `/health`, `/gpu`
|
||||||
|
|
||||||
|
## NanoClaude API
|
||||||
|
|
||||||
|
비동기 job 기반: `POST /nano/chat` → `{ job_id }`, `GET /nano/chat/{job_id}/stream` → SSE
|
||||||
|
취소: `POST /nano/chat/{job_id}/cancel`
|
||||||
|
|
||||||
## 백엔드 설정
|
## 백엔드 설정
|
||||||
|
|
||||||
`backends.json`에서 백엔드 추가/제거. 서비스 재시작 필요.
|
`backends.json`에서 백엔드 추가/제거. 서비스 재시작 필요.
|
||||||
|
|||||||
@@ -16,6 +16,12 @@
|
|||||||
handle /gpu {
|
handle /gpu {
|
||||||
reverse_proxy hub-api:8000
|
reverse_proxy hub-api:8000
|
||||||
}
|
}
|
||||||
|
handle /nano/* {
|
||||||
|
uri strip_prefix /nano
|
||||||
|
reverse_proxy nanoclaude:8100 {
|
||||||
|
flush_interval -1
|
||||||
|
}
|
||||||
|
}
|
||||||
handle {
|
handle {
|
||||||
reverse_proxy hub-web:80
|
reverse_proxy hub-web:80
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
- hub-api
|
- hub-api
|
||||||
- hub-web
|
- hub-web
|
||||||
|
- nanoclaude
|
||||||
networks:
|
networks:
|
||||||
- gateway-net
|
- gateway-net
|
||||||
|
|
||||||
@@ -46,9 +47,31 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- gateway-net
|
- gateway-net
|
||||||
|
|
||||||
|
nanoclaude:
|
||||||
|
build: ./nanoclaude
|
||||||
|
container_name: gpu-nanoclaude
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- EXAONE_BASE_URL=http://host.docker.internal:11434
|
||||||
|
- EXAONE_MODEL=${EXAONE_MODEL:-exaone3.5:7.8b-instruct-q8_0}
|
||||||
|
- DB_PATH=/app/data/nanoclaude.db
|
||||||
|
- API_KEY=${NANOCLAUDE_API_KEY:-}
|
||||||
|
volumes:
|
||||||
|
- nano_data:/app/data
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8100/health"]
|
||||||
|
interval: 15s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
networks:
|
||||||
|
- gateway-net
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
caddy_data:
|
caddy_data:
|
||||||
hub_data:
|
hub_data:
|
||||||
|
nano_data:
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
gateway-net:
|
gateway-net:
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ class Settings(BaseSettings):
|
|||||||
host: str = "0.0.0.0"
|
host: str = "0.0.0.0"
|
||||||
port: int = 8100
|
port: int = 8100
|
||||||
|
|
||||||
|
# DB
|
||||||
|
db_path: str = "/app/data/nanoclaude.db"
|
||||||
|
|
||||||
# Optional API key (empty = disabled)
|
# Optional API key (empty = disabled)
|
||||||
api_key: str = ""
|
api_key: str = ""
|
||||||
|
|
||||||
|
|||||||
0
nanoclaude/db/__init__.py
Normal file
0
nanoclaude/db/__init__.py
Normal file
47
nanoclaude/db/database.py
Normal file
47
nanoclaude/db/database.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
"""aiosqlite DB — 요청/응답 로깅 및 메트릭."""
|
||||||
|
|
||||||
|
import aiosqlite
|
||||||
|
|
||||||
|
from config import settings
|
||||||
|
|
||||||
|
SCHEMA = """
|
||||||
|
CREATE TABLE IF NOT EXISTS request_logs (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id TEXT NOT NULL,
|
||||||
|
message TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL DEFAULT 'queued',
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
response_chars INTEGER DEFAULT 0,
|
||||||
|
latency_ms REAL DEFAULT 0,
|
||||||
|
created_at REAL NOT NULL,
|
||||||
|
completed_at REAL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_logs_job ON request_logs(job_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_logs_created ON request_logs(created_at);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def init_db():
|
||||||
|
async with aiosqlite.connect(settings.db_path) as db:
|
||||||
|
await db.execute("PRAGMA journal_mode=WAL")
|
||||||
|
await db.executescript(SCHEMA)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def log_request(job_id: str, message: str, model: str, created_at: float):
|
||||||
|
async with aiosqlite.connect(settings.db_path) as db:
|
||||||
|
await db.execute(
|
||||||
|
"INSERT INTO request_logs (job_id, message, model, created_at) VALUES (?, ?, ?, ?)",
|
||||||
|
(job_id, message, model, created_at),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def log_completion(job_id: str, status: str, response_chars: int, latency_ms: float, completed_at: float):
|
||||||
|
async with aiosqlite.connect(settings.db_path) as db:
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE request_logs SET status=?, response_chars=?, latency_ms=?, completed_at=? WHERE job_id=?",
|
||||||
|
(status, response_chars, latency_ms, completed_at, job_id),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
@@ -3,12 +3,14 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
from fastapi import FastAPI, Request
|
from fastapi import FastAPI, Request
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
from config import settings
|
from config import settings
|
||||||
|
from db.database import init_db
|
||||||
from routers import chat
|
from routers import chat
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@@ -16,10 +18,17 @@ logging.basicConfig(
|
|||||||
format="%(asctime)s %(levelname)s %(name)s — %(message)s",
|
format="%(asctime)s %(levelname)s %(name)s — %(message)s",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
await init_db()
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="NanoClaude",
|
title="NanoClaude",
|
||||||
version="0.1.0",
|
version="0.1.0",
|
||||||
description="비동기 job 기반 AI Gateway — Phase 1",
|
description="비동기 job 기반 AI Gateway — Phase 1",
|
||||||
|
lifespan=lifespan,
|
||||||
)
|
)
|
||||||
|
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
|
|||||||
@@ -2,3 +2,4 @@ fastapi==0.115.0
|
|||||||
uvicorn[standard]==0.30.0
|
uvicorn[standard]==0.30.0
|
||||||
httpx==0.27.0
|
httpx==0.27.0
|
||||||
pydantic-settings==2.5.0
|
pydantic-settings==2.5.0
|
||||||
|
aiosqlite==0.20.0
|
||||||
|
|||||||
@@ -4,7 +4,10 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
from config import settings
|
||||||
|
from db.database import log_completion, log_request
|
||||||
from models.schemas import JobStatus
|
from models.schemas import JobStatus
|
||||||
from services.exaone_adapter import stream_chat
|
from services.exaone_adapter import stream_chat
|
||||||
from services.job_manager import Job, job_manager
|
from services.job_manager import Job, job_manager
|
||||||
@@ -18,6 +21,14 @@ HEARTBEAT_INTERVAL = 4.0
|
|||||||
|
|
||||||
async def run(job: Job) -> None:
|
async def run(job: Job) -> None:
|
||||||
"""EXAONE 호출 → SSE 이벤트 발행."""
|
"""EXAONE 호출 → SSE 이벤트 발행."""
|
||||||
|
start_time = time()
|
||||||
|
|
||||||
|
# DB 로깅: 요청 기록
|
||||||
|
try:
|
||||||
|
await log_request(job.id, job.message, settings.exaone_model, job.created_at)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Failed to log request for job %s", job.id, exc_info=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# --- ACK ---
|
# --- ACK ---
|
||||||
await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."})
|
await state_stream.push(job.id, "ack", {"message": "요청을 확인했습니다. 분석을 시작합니다."})
|
||||||
@@ -50,9 +61,19 @@ async def run(job: Job) -> None:
|
|||||||
if not collected:
|
if not collected:
|
||||||
job_manager.set_status(job.id, JobStatus.failed)
|
job_manager.set_status(job.id, JobStatus.failed)
|
||||||
await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."})
|
await state_stream.push(job.id, "error", {"message": "EXAONE으로부터 응답을 받지 못했습니다."})
|
||||||
|
status = "failed"
|
||||||
else:
|
else:
|
||||||
job_manager.set_status(job.id, JobStatus.completed)
|
job_manager.set_status(job.id, JobStatus.completed)
|
||||||
await state_stream.push(job.id, "done", {"message": "완료"})
|
await state_stream.push(job.id, "done", {"message": "완료"})
|
||||||
|
status = "completed"
|
||||||
|
|
||||||
|
# DB 로깅: 완료 기록
|
||||||
|
latency_ms = (time() - start_time) * 1000
|
||||||
|
response_text = "".join(collected)
|
||||||
|
try:
|
||||||
|
await log_completion(job.id, status, len(response_text), latency_ms, time())
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Failed to log completion for job %s", job.id, exc_info=True)
|
||||||
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
job_manager.set_status(job.id, JobStatus.cancelled)
|
job_manager.set_status(job.id, JobStatus.cancelled)
|
||||||
|
|||||||
Reference in New Issue
Block a user