동작하는 최소 코드 수준의 v2 스캐폴딩: - docker-compose.yml: postgres, fastapi, kordoc, frontend, caddy - app/: FastAPI 백엔드 (main, core, models, ai, prompts) - services/kordoc/: Node.js 문서 파싱 마이크로서비스 - gpu-server/: AI Gateway + GPU docker-compose - frontend/: SvelteKit 기본 구조 - migrations/: PostgreSQL 초기 스키마 (documents, tasks, processing_queue) - tests/: pytest conftest 기본 설정 - config.yaml, Caddyfile, credentials.env.example 갱신 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
59 lines
2.0 KiB
Python
59 lines
2.0 KiB
Python
"""AI Gateway — 모델 라우팅, 폴백, 비용 제어, 요청 로깅"""
|
|
|
|
import os
|
|
from datetime import date
|
|
|
|
from fastapi import FastAPI, HTTPException, Request
|
|
from fastapi.responses import JSONResponse
|
|
import httpx
|
|
|
|
app = FastAPI(title="AI Gateway", version="1.0.0")
|
|
|
|
PRIMARY = os.getenv("PRIMARY_ENDPOINT", "http://localhost:8800/v1/chat/completions")
|
|
FALLBACK = os.getenv("FALLBACK_ENDPOINT", "http://localhost:11434/v1/chat/completions")
|
|
CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "")
|
|
DAILY_BUDGET = float(os.getenv("DAILY_BUDGET_USD", "5.00"))
|
|
|
|
# 일일 비용 추적 (메모리, 재시작 시 리셋)
|
|
_daily_cost: dict[str, float] = {}
|
|
_http = httpx.AsyncClient(timeout=120)
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
return {"status": "ok", "service": "ai-gateway"}
|
|
|
|
|
|
@app.post("/v1/chat/completions")
|
|
async def chat_completions(request: Request):
|
|
"""OpenAI 호환 채팅 엔드포인트 — 자동 폴백"""
|
|
body = await request.json()
|
|
tier = request.headers.get("x-model-tier", "primary")
|
|
|
|
if tier == "premium":
|
|
return await _call_premium(body)
|
|
|
|
# Primary → Fallback 폴백
|
|
try:
|
|
resp = await _http.post(PRIMARY, json=body, timeout=60)
|
|
resp.raise_for_status()
|
|
return JSONResponse(content=resp.json())
|
|
except (httpx.TimeoutException, httpx.ConnectError, httpx.HTTPStatusError):
|
|
# 폴백
|
|
resp = await _http.post(FALLBACK, json=body, timeout=120)
|
|
resp.raise_for_status()
|
|
return JSONResponse(content=resp.json())
|
|
|
|
|
|
async def _call_premium(body: dict):
|
|
"""Claude API 호출 — 비용 제어"""
|
|
today = date.today().isoformat()
|
|
if _daily_cost.get(today, 0) >= DAILY_BUDGET:
|
|
raise HTTPException(429, f"일일 예산 초과: ${DAILY_BUDGET}")
|
|
|
|
if not CLAUDE_API_KEY:
|
|
raise HTTPException(503, "CLAUDE_API_KEY 미설정")
|
|
|
|
# TODO: Anthropic API 호출 + 비용 계산 (Phase 3에서 구현)
|
|
raise HTTPException(501, "Premium 모델 호출은 Phase 3에서 구현")
|