Files
tk-factory-services/ai-service/main.py
Hyungi Ahn 2f7e083db0 feat: AI 서비스 MLX 듀얼 백엔드 및 모델 최적화
- MLX(맥미니 27B) 우선 → Ollama(조립컴 9B) fallback 구조
- pydantic-settings 기반 config 전환
- health check에 MLX 상태 추가
- 텍스트 모델 qwen3:8b → qwen3.5:9b-q8_0 변경

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 23:17:50 +09:00

72 lines
2.2 KiB
Python

import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
from routers import health, embeddings, classification, daily_report, rag
from db.vector_store import vector_store
from db.metadata_store import metadata_store
from services.ollama_client import ollama_client
from middlewares.auth import verify_token
PUBLIC_PATHS = {"/", "/api/ai/health", "/api/ai/models"}
class AuthMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
if request.method == "OPTIONS" or request.url.path in PUBLIC_PATHS:
return await call_next(request)
try:
request.state.user = await verify_token(request)
except Exception as e:
return JSONResponse(status_code=401, content={"detail": str(e.detail) if hasattr(e, "detail") else "인증 실패"})
return await call_next(request)
@asynccontextmanager
async def lifespan(app: FastAPI):
vector_store.initialize()
metadata_store.initialize()
yield
await ollama_client.close()
app = FastAPI(
title="TK AI Service",
description="AI 서비스 (유사 검색, 분류, 보고서)",
version="1.0.0",
lifespan=lifespan,
)
ALLOWED_ORIGINS = [
"https://tkfb.technicalkorea.net",
"https://tkreport.technicalkorea.net",
"https://tkqc.technicalkorea.net",
"https://tkuser.technicalkorea.net",
]
if os.getenv("ENV", "production") == "development":
ALLOWED_ORIGINS += ["http://localhost:30080", "http://localhost:30180", "http://localhost:30280"]
app.add_middleware(
CORSMiddleware,
allow_origins=ALLOWED_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(AuthMiddleware)
app.include_router(health.router, prefix="/api/ai")
app.include_router(embeddings.router, prefix="/api/ai")
app.include_router(classification.router, prefix="/api/ai")
app.include_router(daily_report.router, prefix="/api/ai")
app.include_router(rag.router, prefix="/api/ai")
@app.get("/")
async def root():
return {"message": "TK AI Service", "version": "1.0.0"}