feat: scaffold v2 project structure with Docker, FastAPI, and config
동작하는 최소 코드 수준의 v2 스캐폴딩: - docker-compose.yml: postgres, fastapi, kordoc, frontend, caddy - app/: FastAPI 백엔드 (main, core, models, ai, prompts) - services/kordoc/: Node.js 문서 파싱 마이크로서비스 - gpu-server/: AI Gateway + GPU docker-compose - frontend/: SvelteKit 기본 구조 - migrations/: PostgreSQL 초기 스키마 (documents, tasks, processing_queue) - tests/: pytest conftest 기본 설정 - config.yaml, Caddyfile, credentials.env.example 갱신 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
33
gpu-server/docker-compose.yml
Normal file
33
gpu-server/docker-compose.yml
Normal file
@@ -0,0 +1,33 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
ollama:
|
||||
image: ollama/ollama
|
||||
volumes:
|
||||
- ollama_data:/root/.ollama
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
ports:
|
||||
- "11434:11434"
|
||||
restart: unless-stopped
|
||||
|
||||
ai-gateway:
|
||||
build: ./services/ai-gateway
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- PRIMARY_ENDPOINT=${PRIMARY_ENDPOINT:-http://mac-mini:8800/v1/chat/completions}
|
||||
- FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions
|
||||
- CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
|
||||
- DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00}
|
||||
depends_on:
|
||||
- ollama
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
ollama_data:
|
||||
10
gpu-server/services/ai-gateway/Dockerfile
Normal file
10
gpu-server/services/ai-gateway/Dockerfile
Normal file
@@ -0,0 +1,10 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY server.py .
|
||||
|
||||
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
3
gpu-server/services/ai-gateway/requirements.txt
Normal file
3
gpu-server/services/ai-gateway/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
fastapi>=0.110.0
|
||||
uvicorn[standard]>=0.27.0
|
||||
httpx>=0.27.0
|
||||
58
gpu-server/services/ai-gateway/server.py
Normal file
58
gpu-server/services/ai-gateway/server.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""AI Gateway — 모델 라우팅, 폴백, 비용 제어, 요청 로깅"""
|
||||
|
||||
import os
|
||||
from datetime import date
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
import httpx
|
||||
|
||||
app = FastAPI(title="AI Gateway", version="1.0.0")
|
||||
|
||||
PRIMARY = os.getenv("PRIMARY_ENDPOINT", "http://localhost:8800/v1/chat/completions")
|
||||
FALLBACK = os.getenv("FALLBACK_ENDPOINT", "http://localhost:11434/v1/chat/completions")
|
||||
CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "")
|
||||
DAILY_BUDGET = float(os.getenv("DAILY_BUDGET_USD", "5.00"))
|
||||
|
||||
# 일일 비용 추적 (메모리, 재시작 시 리셋)
|
||||
_daily_cost: dict[str, float] = {}
|
||||
_http = httpx.AsyncClient(timeout=120)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok", "service": "ai-gateway"}
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completions(request: Request):
|
||||
"""OpenAI 호환 채팅 엔드포인트 — 자동 폴백"""
|
||||
body = await request.json()
|
||||
tier = request.headers.get("x-model-tier", "primary")
|
||||
|
||||
if tier == "premium":
|
||||
return await _call_premium(body)
|
||||
|
||||
# Primary → Fallback 폴백
|
||||
try:
|
||||
resp = await _http.post(PRIMARY, json=body, timeout=60)
|
||||
resp.raise_for_status()
|
||||
return JSONResponse(content=resp.json())
|
||||
except (httpx.TimeoutException, httpx.ConnectError, httpx.HTTPStatusError):
|
||||
# 폴백
|
||||
resp = await _http.post(FALLBACK, json=body, timeout=120)
|
||||
resp.raise_for_status()
|
||||
return JSONResponse(content=resp.json())
|
||||
|
||||
|
||||
async def _call_premium(body: dict):
|
||||
"""Claude API 호출 — 비용 제어"""
|
||||
today = date.today().isoformat()
|
||||
if _daily_cost.get(today, 0) >= DAILY_BUDGET:
|
||||
raise HTTPException(429, f"일일 예산 초과: ${DAILY_BUDGET}")
|
||||
|
||||
if not CLAUDE_API_KEY:
|
||||
raise HTTPException(503, "CLAUDE_API_KEY 미설정")
|
||||
|
||||
# TODO: Anthropic API 호출 + 비용 계산 (Phase 3에서 구현)
|
||||
raise HTTPException(501, "Premium 모델 호출은 Phase 3에서 구현")
|
||||
Reference in New Issue
Block a user