"""AI Gateway — 모델 라우팅, 폴백, 비용 제어, 요청 로깅""" import os from datetime import date from fastapi import FastAPI, HTTPException, Request from fastapi.responses import JSONResponse import httpx app = FastAPI(title="AI Gateway", version="1.0.0") PRIMARY = os.getenv("PRIMARY_ENDPOINT", "http://localhost:8800/v1/chat/completions") FALLBACK = os.getenv("FALLBACK_ENDPOINT", "http://localhost:11434/v1/chat/completions") CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "") DAILY_BUDGET = float(os.getenv("DAILY_BUDGET_USD", "5.00")) # 일일 비용 추적 (메모리, 재시작 시 리셋) _daily_cost: dict[str, float] = {} _http = httpx.AsyncClient(timeout=120) @app.get("/health") async def health(): return {"status": "ok", "service": "ai-gateway"} @app.post("/v1/chat/completions") async def chat_completions(request: Request): """OpenAI 호환 채팅 엔드포인트 — 자동 폴백""" body = await request.json() tier = request.headers.get("x-model-tier", "primary") if tier == "premium": return await _call_premium(body) # Primary → Fallback 폴백 try: resp = await _http.post(PRIMARY, json=body, timeout=60) resp.raise_for_status() return JSONResponse(content=resp.json()) except (httpx.TimeoutException, httpx.ConnectError, httpx.HTTPStatusError): # 폴백 resp = await _http.post(FALLBACK, json=body, timeout=120) resp.raise_for_status() return JSONResponse(content=resp.json()) async def _call_premium(body: dict): """Claude API 호출 — 비용 제어""" today = date.today().isoformat() if _daily_cost.get(today, 0) >= DAILY_BUDGET: raise HTTPException(429, f"일일 예산 초과: ${DAILY_BUDGET}") if not CLAUDE_API_KEY: raise HTTPException(503, "CLAUDE_API_KEY 미설정") # TODO: Anthropic API 호출 + 비용 계산 (Phase 3에서 구현) raise HTTPException(501, "Premium 모델 호출은 Phase 3에서 구현")