feat: ai-service를 ds923에서 맥미니로 이전

- ChromaDB → Qdrant 전환 (맥미니 기존 인스턴스, tk_qc_issues 컬렉션)
- Ollama 임베딩/텍스트 생성 URL 분리 (임베딩: 맥미니, 텍스트: GPU서버)
- MLX fallback 제거, Ollama 단일 경로로 단순화
- ds923 docker-compose에서 ai-service 제거
- gateway/system3-web nginx: ai-service 프록시를 ai.hyungi.net 경유로 변경
- resolver + 변수 기반 proxy_pass로 런타임 DNS 해석 (컨테이너 시작 실패 방지)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-03-11 15:07:58 +09:00
parent 2d25d54589
commit 85f674c9cb
9 changed files with 125 additions and 130 deletions

View File

@@ -4,6 +4,6 @@ RUN apt-get update && apt-get install -y gcc build-essential && rm -rf /var/lib/
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY . . COPY . .
RUN mkdir -p /app/data/chroma RUN mkdir -p /app/data
EXPOSE 8000 EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -2,16 +2,22 @@ from pydantic_settings import BaseSettings
class Settings(BaseSettings): class Settings(BaseSettings):
OLLAMA_BASE_URL: str = "https://gpu.hyungi.net" # GPU서버 Ollama (텍스트 생성)
OLLAMA_BASE_URL: str = "http://192.168.1.186:11434"
OLLAMA_TEXT_MODEL: str = "qwen3.5:9b-q8_0" OLLAMA_TEXT_MODEL: str = "qwen3.5:9b-q8_0"
OLLAMA_EMBED_MODEL: str = "bge-m3"
OLLAMA_TIMEOUT: int = 120 OLLAMA_TIMEOUT: int = 120
MLX_BASE_URL: str = "https://llm.hyungi.net" # 맥미니 Ollama (임베딩) — OrbStack: host.internal / Docker Desktop: host.docker.internal
MLX_TEXT_MODEL: str = "/Users/hyungi/mlx-models/Qwen3.5-27B-4bit" OLLAMA_EMBED_URL: str = "http://host.internal:11434"
OLLAMA_EMBED_MODEL: str = "bge-m3"
DB_HOST: str = "mariadb" # 맥미니 Qdrant (기존 인스턴스, 회사 전용 컬렉션)
DB_PORT: int = 3306 QDRANT_URL: str = "http://host.internal:6333"
QDRANT_COLLECTION: str = "tk_qc_issues"
# ds923 MariaDB (Tailscale)
DB_HOST: str = "100.71.132.52"
DB_PORT: int = 30306
DB_USER: str = "hyungi_user" DB_USER: str = "hyungi_user"
DB_PASSWORD: str = "" DB_PASSWORD: str = ""
DB_NAME: str = "hyungi" DB_NAME: str = "hyungi"
@@ -19,8 +25,8 @@ class Settings(BaseSettings):
SECRET_KEY: str = "" SECRET_KEY: str = ""
ALGORITHM: str = "HS256" ALGORITHM: str = "HS256"
SYSTEM1_API_URL: str = "http://system1-api:3005" # ds923 System1 API (Tailscale)
CHROMA_PERSIST_DIR: str = "/app/data/chroma" SYSTEM1_API_URL: str = "http://100.71.132.52:30005"
METADATA_DB_PATH: str = "/app/data/metadata.db" METADATA_DB_PATH: str = "/app/data/metadata.db"
class Config: class Config:

View File

@@ -1,18 +1,31 @@
import chromadb import uuid
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
from config import settings from config import settings
class VectorStore: class VectorStore:
def __init__(self): def __init__(self):
self.client = None self.client = None
self.collection = None self.collection = settings.QDRANT_COLLECTION # "tk_qc_issues"
def initialize(self): def initialize(self):
self.client = chromadb.PersistentClient(path=settings.CHROMA_PERSIST_DIR) self.client = QdrantClient(url=settings.QDRANT_URL)
self.collection = self.client.get_or_create_collection( self._ensure_collection()
name="qc_issues",
metadata={"hnsw:space": "cosine"}, def _ensure_collection(self):
) collections = [c.name for c in self.client.get_collections().collections]
if self.collection not in collections:
# bge-m3 기본 출력 = 1024 dims
self.client.create_collection(
collection_name=self.collection,
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)
@staticmethod
def _to_uuid(doc_id) -> str:
"""문자열/정수 ID → UUID5 변환 (Qdrant 호환)"""
return str(uuid.uuid5(uuid.NAMESPACE_URL, str(doc_id)))
def upsert( def upsert(
self, self,
@@ -21,11 +34,13 @@ class VectorStore:
embedding: list[float], embedding: list[float],
metadata: dict = None, metadata: dict = None,
): ):
self.collection.upsert( point_id = self._to_uuid(doc_id)
ids=[doc_id], payload = {"document": document, "original_id": str(doc_id)}
documents=[document], if metadata:
embeddings=[embedding], payload.update(metadata)
metadatas=[metadata] if metadata else None, self.client.upsert(
collection_name=self.collection,
points=[PointStruct(id=point_id, vector=embedding, payload=payload)],
) )
def query( def query(
@@ -34,42 +49,53 @@ class VectorStore:
n_results: int = 5, n_results: int = 5,
where: dict = None, where: dict = None,
) -> list[dict]: ) -> list[dict]:
kwargs = { query_filter = self._build_filter(where) if where else None
"query_embeddings": [embedding],
"n_results": n_results,
"include": ["documents", "metadatas", "distances"],
}
if where:
kwargs["where"] = where
try: try:
results = self.collection.query(**kwargs) results = self.client.search(
collection_name=self.collection,
query_vector=embedding,
limit=n_results,
query_filter=query_filter,
)
except Exception: except Exception:
return [] return []
items = [] items = []
if results and results["ids"] and results["ids"][0]: for hit in results:
for i, doc_id in enumerate(results["ids"][0]): payload = hit.payload or {}
item = { item = {
"id": doc_id, "id": payload.get("original_id", str(hit.id)),
"document": results["documents"][0][i] if results["documents"] else "", "document": payload.get("document", ""),
"distance": results["distances"][0][i] if results["distances"] else 0, "distance": round(1 - hit.score, 4), # cosine score → distance
"metadata": results["metadatas"][0][i] if results["metadatas"] else {}, "metadata": {k: v for k, v in payload.items() if k not in ("document", "original_id")},
} "similarity": round(hit.score, 4),
# cosine distance → similarity }
item["similarity"] = round(1 - item["distance"], 4) items.append(item)
items.append(item)
return items return items
@staticmethod
def _build_filter(where: dict) -> Filter:
"""ChromaDB 스타일 where 조건 → Qdrant Filter 변환"""
conditions = []
for key, value in where.items():
conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
return Filter(must=conditions)
def delete(self, doc_id: str): def delete(self, doc_id: str):
self.collection.delete(ids=[doc_id]) point_id = self._to_uuid(doc_id)
self.client.delete(
collection_name=self.collection,
points_selector=[point_id],
)
def count(self) -> int: def count(self) -> int:
return self.collection.count() info = self.client.get_collection(collection_name=self.collection)
return info.points_count
def stats(self) -> dict: def stats(self) -> dict:
return { return {
"total_documents": self.count(), "total_documents": self.count(),
"collection_name": "qc_issues", "collection_name": self.collection,
} }

View File

@@ -1,7 +1,7 @@
fastapi==0.104.1 fastapi==0.104.1
uvicorn[standard]==0.24.0 uvicorn[standard]==0.24.0
httpx==0.27.0 httpx==0.27.0
chromadb==0.4.22 qdrant-client>=1.7.0
numpy==1.26.2 numpy==1.26.2
pydantic==2.5.0 pydantic==2.5.0
pydantic-settings==2.1.0 pydantic-settings==2.1.0

View File

@@ -10,20 +10,18 @@ async def health_check():
backends = await ollama_client.check_health() backends = await ollama_client.check_health()
stats = vector_store.stats() stats = vector_store.stats()
# 메인 텍스트 모델명 결정 (Ollama 메인, MLX fallback) # 메인 텍스트 모델명 결정
model_name = None model_name = None
ollama_models = backends.get("ollama", {}).get("models", []) text_models = backends.get("ollama_text", {}).get("models", [])
if ollama_models: if text_models:
model_name = ollama_models[0] model_name = text_models[0]
if not model_name and backends.get("mlx", {}).get("status") == "connected":
model_name = backends["mlx"].get("model")
return { return {
"status": "ok", "status": "ok",
"service": "tk-ai-service", "service": "tk-ai-service",
"model": model_name, "model": model_name,
"ollama": backends.get("ollama", {}), "ollama_text": backends.get("ollama_text", {}),
"mlx": backends.get("mlx", {}), "ollama_embed": backends.get("ollama_embed", {}),
"embeddings": stats, "embeddings": stats,
} }

View File

@@ -5,7 +5,8 @@ from config import settings
class OllamaClient: class OllamaClient:
def __init__(self): def __init__(self):
self.base_url = settings.OLLAMA_BASE_URL self.text_url = settings.OLLAMA_BASE_URL # GPU서버 (텍스트 생성)
self.embed_url = settings.OLLAMA_EMBED_URL # 맥미니 (임베딩)
self.timeout = httpx.Timeout(float(settings.OLLAMA_TIMEOUT), connect=10.0) self.timeout = httpx.Timeout(float(settings.OLLAMA_TIMEOUT), connect=10.0)
self._client: httpx.AsyncClient | None = None self._client: httpx.AsyncClient | None = None
@@ -22,7 +23,7 @@ class OllamaClient:
async def generate_embedding(self, text: str) -> list[float]: async def generate_embedding(self, text: str) -> list[float]:
client = await self._get_client() client = await self._get_client()
response = await client.post( response = await client.post(
f"{self.base_url}/api/embeddings", f"{self.embed_url}/api/embeddings",
json={"model": settings.OLLAMA_EMBED_MODEL, "prompt": text}, json={"model": settings.OLLAMA_EMBED_MODEL, "prompt": text},
) )
response.raise_for_status() response.raise_for_status()
@@ -43,49 +44,38 @@ class OllamaClient:
messages.append({"role": "system", "content": system}) messages.append({"role": "system", "content": system})
messages.append({"role": "user", "content": prompt}) messages.append({"role": "user", "content": prompt})
client = await self._get_client() client = await self._get_client()
# 조립컴 Ollama 메인, MLX fallback response = await client.post(
try: f"{self.text_url}/api/chat",
response = await client.post( json={
f"{self.base_url}/api/chat", "model": settings.OLLAMA_TEXT_MODEL,
json={ "messages": messages,
"model": settings.OLLAMA_TEXT_MODEL, "stream": False,
"messages": messages, "think": False,
"stream": False, "options": {"temperature": 0.3, "num_predict": 2048},
"think": False, },
"options": {"temperature": 0.3, "num_predict": 2048}, )
}, response.raise_for_status()
) return response.json()["message"]["content"]
response.raise_for_status()
return response.json()["message"]["content"]
except Exception:
response = await client.post(
f"{settings.MLX_BASE_URL}/chat/completions",
json={
"model": settings.MLX_TEXT_MODEL,
"messages": messages,
"max_tokens": 2048,
"temperature": 0.3,
},
)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
async def check_health(self) -> dict: async def check_health(self) -> dict:
result = {} result = {}
short_timeout = httpx.Timeout(5.0, connect=3.0) short_timeout = httpx.Timeout(5.0, connect=3.0)
# GPU서버 Ollama (텍스트 생성)
try: try:
async with httpx.AsyncClient(timeout=short_timeout) as c: async with httpx.AsyncClient(timeout=short_timeout) as c:
response = await c.get(f"{self.base_url}/api/tags") response = await c.get(f"{self.text_url}/api/tags")
models = response.json().get("models", []) models = response.json().get("models", [])
result["ollama"] = {"status": "connected", "models": [m["name"] for m in models]} result["ollama_text"] = {"status": "connected", "url": self.text_url, "models": [m["name"] for m in models]}
except Exception: except Exception:
result["ollama"] = {"status": "disconnected"} result["ollama_text"] = {"status": "disconnected", "url": self.text_url}
# 맥미니 Ollama (임베딩)
try: try:
async with httpx.AsyncClient(timeout=short_timeout) as c: async with httpx.AsyncClient(timeout=short_timeout) as c:
response = await c.get(f"{settings.MLX_BASE_URL}/health") response = await c.get(f"{self.embed_url}/api/tags")
result["mlx"] = {"status": "connected", "model": settings.MLX_TEXT_MODEL} models = response.json().get("models", [])
result["ollama_embed"] = {"status": "connected", "url": self.embed_url, "models": [m["name"] for m in models]}
except Exception: except Exception:
result["mlx"] = {"status": "disconnected"} result["ollama_embed"] = {"status": "disconnected", "url": self.embed_url}
return result return result

View File

@@ -286,41 +286,9 @@ services:
- tk-network - tk-network
# ================================================================= # =================================================================
# AI Service # AI Service — 맥미니로 이전됨 (~/docker/tk-ai-service/)
# ================================================================= # =================================================================
ai-service:
build:
context: ./ai-service
dockerfile: Dockerfile
container_name: tk-ai-service
restart: unless-stopped
ports:
- "30400:8000"
environment:
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-https://gpu.hyungi.net}
- OLLAMA_TEXT_MODEL=${OLLAMA_TEXT_MODEL:-qwen3.5:9b-q8_0}
- OLLAMA_EMBED_MODEL=${OLLAMA_EMBED_MODEL:-bge-m3}
- OLLAMA_TIMEOUT=${OLLAMA_TIMEOUT:-120}
- MLX_BASE_URL=${MLX_BASE_URL:-https://llm.hyungi.net}
- MLX_TEXT_MODEL=${MLX_TEXT_MODEL:-/Users/hyungi/mlx-models/Qwen3.5-27B-4bit}
- DB_HOST=mariadb
- DB_PORT=3306
- DB_USER=${MYSQL_USER:-hyungi_user}
- DB_PASSWORD=${MYSQL_PASSWORD}
- DB_NAME=${MYSQL_DATABASE:-hyungi}
- SECRET_KEY=${SSO_JWT_SECRET}
- SYSTEM1_API_URL=http://system1-api:3005
- CHROMA_PERSIST_DIR=/app/data/chroma
- TZ=Asia/Seoul
volumes:
- ai_data:/app/data
depends_on:
mariadb:
condition: service_healthy
networks:
- tk-network
# ================================================================= # =================================================================
# Gateway # Gateway
# ================================================================= # =================================================================
@@ -393,7 +361,6 @@ volumes:
system3_uploads: system3_uploads:
external: true external: true
name: tkqc-package_uploads name: tkqc-package_uploads
ai_data:
networks: networks:
tk-network: tk-network:
driver: bridge driver: bridge

View File

@@ -55,16 +55,20 @@ server {
proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header X-Forwarded-Proto $scheme;
} }
# ===== AI Service API ===== # ===== AI Service API (맥미니 home-service-proxy 경유) =====
location /ai-api/ { location /ai-api/ {
proxy_pass http://ai-service:8000/api/ai/; resolver 8.8.8.8 valid=300s ipv6=off;
set $ai_upstream https://ai.hyungi.net;
rewrite ^/ai-api/(.*) /api/ai/$1 break;
proxy_pass $ai_upstream;
proxy_http_version 1.1; proxy_http_version 1.1;
proxy_set_header Host $host; proxy_set_header Host ai.hyungi.net;
proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 120s; proxy_ssl_server_name on;
proxy_send_timeout 120s; proxy_read_timeout 180s;
proxy_send_timeout 180s;
} }
# ===== System 1 Web (나머지 모든 경로) ===== # ===== System 1 Web (나머지 모든 경로) =====

View File

@@ -48,16 +48,20 @@ server {
proxy_buffering off; proxy_buffering off;
} }
# AI API 프록시 # AI API 프록시 (맥미니 home-service-proxy 경유)
location /ai-api/ { location /ai-api/ {
proxy_pass http://ai-service:8000/api/ai/; resolver 8.8.8.8 valid=300s ipv6=off;
set $ai_upstream https://ai.hyungi.net;
rewrite ^/ai-api/(.*) /api/ai/$1 break;
proxy_pass $ai_upstream;
proxy_http_version 1.1; proxy_http_version 1.1;
proxy_set_header Host $host; proxy_set_header Host ai.hyungi.net;
proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 120s; proxy_ssl_server_name on;
proxy_send_timeout 120s; proxy_read_timeout 180s;
proxy_send_timeout 180s;
} }
# 모바일 전용 페이지 # 모바일 전용 페이지