feat: AI 서비스 MLX 듀얼 백엔드 및 모델 최적화

- MLX(맥미니 27B) 우선 → Ollama(조립컴 9B) fallback 구조
- pydantic-settings 기반 config 전환
- health check에 MLX 상태 추가
- 텍스트 모델 qwen3:8b → qwen3.5:9b-q8_0 변경

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-03-06 23:17:50 +09:00
parent cad662473b
commit 2f7e083db0
14 changed files with 231 additions and 140 deletions

View File

@@ -1,10 +1,28 @@
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
from routers import health, embeddings, classification, daily_report, rag
from db.vector_store import vector_store
from db.metadata_store import metadata_store
from services.ollama_client import ollama_client
from middlewares.auth import verify_token
PUBLIC_PATHS = {"/", "/api/ai/health", "/api/ai/models"}
class AuthMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
if request.method == "OPTIONS" or request.url.path in PUBLIC_PATHS:
return await call_next(request)
try:
request.state.user = await verify_token(request)
except Exception as e:
return JSONResponse(status_code=401, content={"detail": str(e.detail) if hasattr(e, "detail") else "인증 실패"})
return await call_next(request)
@asynccontextmanager
@@ -12,6 +30,7 @@ async def lifespan(app: FastAPI):
vector_store.initialize()
metadata_store.initialize()
yield
await ollama_client.close()
app = FastAPI(
@@ -21,14 +40,25 @@ app = FastAPI(
lifespan=lifespan,
)
ALLOWED_ORIGINS = [
"https://tkfb.technicalkorea.net",
"https://tkreport.technicalkorea.net",
"https://tkqc.technicalkorea.net",
"https://tkuser.technicalkorea.net",
]
if os.getenv("ENV", "production") == "development":
ALLOWED_ORIGINS += ["http://localhost:30080", "http://localhost:30180", "http://localhost:30280"]
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_origins=ALLOWED_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(AuthMiddleware)
app.include_router(health.router, prefix="/api/ai")
app.include_router(embeddings.router, prefix="/api/ai")
app.include_router(classification.router, prefix="/api/ai")