feat: ai-service를 ds923에서 맥미니로 이전
- ChromaDB → Qdrant 전환 (맥미니 기존 인스턴스, tk_qc_issues 컬렉션) - Ollama 임베딩/텍스트 생성 URL 분리 (임베딩: 맥미니, 텍스트: GPU서버) - MLX fallback 제거, Ollama 단일 경로로 단순화 - ds923 docker-compose에서 ai-service 제거 - gateway/system3-web nginx: ai-service 프록시를 ai.hyungi.net 경유로 변경 - resolver + 변수 기반 proxy_pass로 런타임 DNS 해석 (컨테이너 시작 실패 방지) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,6 @@ RUN apt-get update && apt-get install -y gcc build-essential && rm -rf /var/lib/
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY . .
|
||||
RUN mkdir -p /app/data/chroma
|
||||
RUN mkdir -p /app/data
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
@@ -2,16 +2,22 @@ from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
OLLAMA_BASE_URL: str = "https://gpu.hyungi.net"
|
||||
# GPU서버 Ollama (텍스트 생성)
|
||||
OLLAMA_BASE_URL: str = "http://192.168.1.186:11434"
|
||||
OLLAMA_TEXT_MODEL: str = "qwen3.5:9b-q8_0"
|
||||
OLLAMA_EMBED_MODEL: str = "bge-m3"
|
||||
OLLAMA_TIMEOUT: int = 120
|
||||
|
||||
MLX_BASE_URL: str = "https://llm.hyungi.net"
|
||||
MLX_TEXT_MODEL: str = "/Users/hyungi/mlx-models/Qwen3.5-27B-4bit"
|
||||
# 맥미니 Ollama (임베딩) — OrbStack: host.internal / Docker Desktop: host.docker.internal
|
||||
OLLAMA_EMBED_URL: str = "http://host.internal:11434"
|
||||
OLLAMA_EMBED_MODEL: str = "bge-m3"
|
||||
|
||||
DB_HOST: str = "mariadb"
|
||||
DB_PORT: int = 3306
|
||||
# 맥미니 Qdrant (기존 인스턴스, 회사 전용 컬렉션)
|
||||
QDRANT_URL: str = "http://host.internal:6333"
|
||||
QDRANT_COLLECTION: str = "tk_qc_issues"
|
||||
|
||||
# ds923 MariaDB (Tailscale)
|
||||
DB_HOST: str = "100.71.132.52"
|
||||
DB_PORT: int = 30306
|
||||
DB_USER: str = "hyungi_user"
|
||||
DB_PASSWORD: str = ""
|
||||
DB_NAME: str = "hyungi"
|
||||
@@ -19,8 +25,8 @@ class Settings(BaseSettings):
|
||||
SECRET_KEY: str = ""
|
||||
ALGORITHM: str = "HS256"
|
||||
|
||||
SYSTEM1_API_URL: str = "http://system1-api:3005"
|
||||
CHROMA_PERSIST_DIR: str = "/app/data/chroma"
|
||||
# ds923 System1 API (Tailscale)
|
||||
SYSTEM1_API_URL: str = "http://100.71.132.52:30005"
|
||||
METADATA_DB_PATH: str = "/app/data/metadata.db"
|
||||
|
||||
class Config:
|
||||
|
||||
@@ -1,18 +1,31 @@
|
||||
import chromadb
|
||||
import uuid
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
|
||||
from config import settings
|
||||
|
||||
|
||||
class VectorStore:
|
||||
def __init__(self):
|
||||
self.client = None
|
||||
self.collection = None
|
||||
self.collection = settings.QDRANT_COLLECTION # "tk_qc_issues"
|
||||
|
||||
def initialize(self):
|
||||
self.client = chromadb.PersistentClient(path=settings.CHROMA_PERSIST_DIR)
|
||||
self.collection = self.client.get_or_create_collection(
|
||||
name="qc_issues",
|
||||
metadata={"hnsw:space": "cosine"},
|
||||
)
|
||||
self.client = QdrantClient(url=settings.QDRANT_URL)
|
||||
self._ensure_collection()
|
||||
|
||||
def _ensure_collection(self):
|
||||
collections = [c.name for c in self.client.get_collections().collections]
|
||||
if self.collection not in collections:
|
||||
# bge-m3 기본 출력 = 1024 dims
|
||||
self.client.create_collection(
|
||||
collection_name=self.collection,
|
||||
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _to_uuid(doc_id) -> str:
|
||||
"""문자열/정수 ID → UUID5 변환 (Qdrant 호환)"""
|
||||
return str(uuid.uuid5(uuid.NAMESPACE_URL, str(doc_id)))
|
||||
|
||||
def upsert(
|
||||
self,
|
||||
@@ -21,11 +34,13 @@ class VectorStore:
|
||||
embedding: list[float],
|
||||
metadata: dict = None,
|
||||
):
|
||||
self.collection.upsert(
|
||||
ids=[doc_id],
|
||||
documents=[document],
|
||||
embeddings=[embedding],
|
||||
metadatas=[metadata] if metadata else None,
|
||||
point_id = self._to_uuid(doc_id)
|
||||
payload = {"document": document, "original_id": str(doc_id)}
|
||||
if metadata:
|
||||
payload.update(metadata)
|
||||
self.client.upsert(
|
||||
collection_name=self.collection,
|
||||
points=[PointStruct(id=point_id, vector=embedding, payload=payload)],
|
||||
)
|
||||
|
||||
def query(
|
||||
@@ -34,42 +49,53 @@ class VectorStore:
|
||||
n_results: int = 5,
|
||||
where: dict = None,
|
||||
) -> list[dict]:
|
||||
kwargs = {
|
||||
"query_embeddings": [embedding],
|
||||
"n_results": n_results,
|
||||
"include": ["documents", "metadatas", "distances"],
|
||||
}
|
||||
if where:
|
||||
kwargs["where"] = where
|
||||
query_filter = self._build_filter(where) if where else None
|
||||
try:
|
||||
results = self.collection.query(**kwargs)
|
||||
results = self.client.search(
|
||||
collection_name=self.collection,
|
||||
query_vector=embedding,
|
||||
limit=n_results,
|
||||
query_filter=query_filter,
|
||||
)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
items = []
|
||||
if results and results["ids"] and results["ids"][0]:
|
||||
for i, doc_id in enumerate(results["ids"][0]):
|
||||
item = {
|
||||
"id": doc_id,
|
||||
"document": results["documents"][0][i] if results["documents"] else "",
|
||||
"distance": results["distances"][0][i] if results["distances"] else 0,
|
||||
"metadata": results["metadatas"][0][i] if results["metadatas"] else {},
|
||||
}
|
||||
# cosine distance → similarity
|
||||
item["similarity"] = round(1 - item["distance"], 4)
|
||||
items.append(item)
|
||||
for hit in results:
|
||||
payload = hit.payload or {}
|
||||
item = {
|
||||
"id": payload.get("original_id", str(hit.id)),
|
||||
"document": payload.get("document", ""),
|
||||
"distance": round(1 - hit.score, 4), # cosine score → distance
|
||||
"metadata": {k: v for k, v in payload.items() if k not in ("document", "original_id")},
|
||||
"similarity": round(hit.score, 4),
|
||||
}
|
||||
items.append(item)
|
||||
return items
|
||||
|
||||
@staticmethod
|
||||
def _build_filter(where: dict) -> Filter:
|
||||
"""ChromaDB 스타일 where 조건 → Qdrant Filter 변환"""
|
||||
conditions = []
|
||||
for key, value in where.items():
|
||||
conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
|
||||
return Filter(must=conditions)
|
||||
|
||||
def delete(self, doc_id: str):
|
||||
self.collection.delete(ids=[doc_id])
|
||||
point_id = self._to_uuid(doc_id)
|
||||
self.client.delete(
|
||||
collection_name=self.collection,
|
||||
points_selector=[point_id],
|
||||
)
|
||||
|
||||
def count(self) -> int:
|
||||
return self.collection.count()
|
||||
info = self.client.get_collection(collection_name=self.collection)
|
||||
return info.points_count
|
||||
|
||||
def stats(self) -> dict:
|
||||
return {
|
||||
"total_documents": self.count(),
|
||||
"collection_name": "qc_issues",
|
||||
"collection_name": self.collection,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
httpx==0.27.0
|
||||
chromadb==0.4.22
|
||||
qdrant-client>=1.7.0
|
||||
numpy==1.26.2
|
||||
pydantic==2.5.0
|
||||
pydantic-settings==2.1.0
|
||||
|
||||
@@ -10,20 +10,18 @@ async def health_check():
|
||||
backends = await ollama_client.check_health()
|
||||
stats = vector_store.stats()
|
||||
|
||||
# 메인 텍스트 모델명 결정 (Ollama 메인, MLX fallback)
|
||||
# 메인 텍스트 모델명 결정
|
||||
model_name = None
|
||||
ollama_models = backends.get("ollama", {}).get("models", [])
|
||||
if ollama_models:
|
||||
model_name = ollama_models[0]
|
||||
if not model_name and backends.get("mlx", {}).get("status") == "connected":
|
||||
model_name = backends["mlx"].get("model")
|
||||
text_models = backends.get("ollama_text", {}).get("models", [])
|
||||
if text_models:
|
||||
model_name = text_models[0]
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"service": "tk-ai-service",
|
||||
"model": model_name,
|
||||
"ollama": backends.get("ollama", {}),
|
||||
"mlx": backends.get("mlx", {}),
|
||||
"ollama_text": backends.get("ollama_text", {}),
|
||||
"ollama_embed": backends.get("ollama_embed", {}),
|
||||
"embeddings": stats,
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,8 @@ from config import settings
|
||||
|
||||
class OllamaClient:
|
||||
def __init__(self):
|
||||
self.base_url = settings.OLLAMA_BASE_URL
|
||||
self.text_url = settings.OLLAMA_BASE_URL # GPU서버 (텍스트 생성)
|
||||
self.embed_url = settings.OLLAMA_EMBED_URL # 맥미니 (임베딩)
|
||||
self.timeout = httpx.Timeout(float(settings.OLLAMA_TIMEOUT), connect=10.0)
|
||||
self._client: httpx.AsyncClient | None = None
|
||||
|
||||
@@ -22,7 +23,7 @@ class OllamaClient:
|
||||
async def generate_embedding(self, text: str) -> list[float]:
|
||||
client = await self._get_client()
|
||||
response = await client.post(
|
||||
f"{self.base_url}/api/embeddings",
|
||||
f"{self.embed_url}/api/embeddings",
|
||||
json={"model": settings.OLLAMA_EMBED_MODEL, "prompt": text},
|
||||
)
|
||||
response.raise_for_status()
|
||||
@@ -43,49 +44,38 @@ class OllamaClient:
|
||||
messages.append({"role": "system", "content": system})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
client = await self._get_client()
|
||||
# 조립컴 Ollama 메인, MLX fallback
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/api/chat",
|
||||
json={
|
||||
"model": settings.OLLAMA_TEXT_MODEL,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"think": False,
|
||||
"options": {"temperature": 0.3, "num_predict": 2048},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["message"]["content"]
|
||||
except Exception:
|
||||
response = await client.post(
|
||||
f"{settings.MLX_BASE_URL}/chat/completions",
|
||||
json={
|
||||
"model": settings.MLX_TEXT_MODEL,
|
||||
"messages": messages,
|
||||
"max_tokens": 2048,
|
||||
"temperature": 0.3,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["choices"][0]["message"]["content"]
|
||||
response = await client.post(
|
||||
f"{self.text_url}/api/chat",
|
||||
json={
|
||||
"model": settings.OLLAMA_TEXT_MODEL,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"think": False,
|
||||
"options": {"temperature": 0.3, "num_predict": 2048},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["message"]["content"]
|
||||
|
||||
async def check_health(self) -> dict:
|
||||
result = {}
|
||||
short_timeout = httpx.Timeout(5.0, connect=3.0)
|
||||
# GPU서버 Ollama (텍스트 생성)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=short_timeout) as c:
|
||||
response = await c.get(f"{self.base_url}/api/tags")
|
||||
response = await c.get(f"{self.text_url}/api/tags")
|
||||
models = response.json().get("models", [])
|
||||
result["ollama"] = {"status": "connected", "models": [m["name"] for m in models]}
|
||||
result["ollama_text"] = {"status": "connected", "url": self.text_url, "models": [m["name"] for m in models]}
|
||||
except Exception:
|
||||
result["ollama"] = {"status": "disconnected"}
|
||||
result["ollama_text"] = {"status": "disconnected", "url": self.text_url}
|
||||
# 맥미니 Ollama (임베딩)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=short_timeout) as c:
|
||||
response = await c.get(f"{settings.MLX_BASE_URL}/health")
|
||||
result["mlx"] = {"status": "connected", "model": settings.MLX_TEXT_MODEL}
|
||||
response = await c.get(f"{self.embed_url}/api/tags")
|
||||
models = response.json().get("models", [])
|
||||
result["ollama_embed"] = {"status": "connected", "url": self.embed_url, "models": [m["name"] for m in models]}
|
||||
except Exception:
|
||||
result["mlx"] = {"status": "disconnected"}
|
||||
result["ollama_embed"] = {"status": "disconnected", "url": self.embed_url}
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -286,41 +286,9 @@ services:
|
||||
- tk-network
|
||||
|
||||
# =================================================================
|
||||
# AI Service
|
||||
# AI Service — 맥미니로 이전됨 (~/docker/tk-ai-service/)
|
||||
# =================================================================
|
||||
|
||||
ai-service:
|
||||
build:
|
||||
context: ./ai-service
|
||||
dockerfile: Dockerfile
|
||||
container_name: tk-ai-service
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "30400:8000"
|
||||
environment:
|
||||
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-https://gpu.hyungi.net}
|
||||
- OLLAMA_TEXT_MODEL=${OLLAMA_TEXT_MODEL:-qwen3.5:9b-q8_0}
|
||||
- OLLAMA_EMBED_MODEL=${OLLAMA_EMBED_MODEL:-bge-m3}
|
||||
- OLLAMA_TIMEOUT=${OLLAMA_TIMEOUT:-120}
|
||||
- MLX_BASE_URL=${MLX_BASE_URL:-https://llm.hyungi.net}
|
||||
- MLX_TEXT_MODEL=${MLX_TEXT_MODEL:-/Users/hyungi/mlx-models/Qwen3.5-27B-4bit}
|
||||
- DB_HOST=mariadb
|
||||
- DB_PORT=3306
|
||||
- DB_USER=${MYSQL_USER:-hyungi_user}
|
||||
- DB_PASSWORD=${MYSQL_PASSWORD}
|
||||
- DB_NAME=${MYSQL_DATABASE:-hyungi}
|
||||
- SECRET_KEY=${SSO_JWT_SECRET}
|
||||
- SYSTEM1_API_URL=http://system1-api:3005
|
||||
- CHROMA_PERSIST_DIR=/app/data/chroma
|
||||
- TZ=Asia/Seoul
|
||||
volumes:
|
||||
- ai_data:/app/data
|
||||
depends_on:
|
||||
mariadb:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- tk-network
|
||||
|
||||
# =================================================================
|
||||
# Gateway
|
||||
# =================================================================
|
||||
@@ -393,7 +361,6 @@ volumes:
|
||||
system3_uploads:
|
||||
external: true
|
||||
name: tkqc-package_uploads
|
||||
ai_data:
|
||||
networks:
|
||||
tk-network:
|
||||
driver: bridge
|
||||
|
||||
@@ -55,16 +55,20 @@ server {
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# ===== AI Service API =====
|
||||
# ===== AI Service API (맥미니 home-service-proxy 경유) =====
|
||||
location /ai-api/ {
|
||||
proxy_pass http://ai-service:8000/api/ai/;
|
||||
resolver 8.8.8.8 valid=300s ipv6=off;
|
||||
set $ai_upstream https://ai.hyungi.net;
|
||||
rewrite ^/ai-api/(.*) /api/ai/$1 break;
|
||||
proxy_pass $ai_upstream;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Host ai.hyungi.net;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 120s;
|
||||
proxy_send_timeout 120s;
|
||||
proxy_ssl_server_name on;
|
||||
proxy_read_timeout 180s;
|
||||
proxy_send_timeout 180s;
|
||||
}
|
||||
|
||||
# ===== System 1 Web (나머지 모든 경로) =====
|
||||
|
||||
@@ -48,16 +48,20 @@ server {
|
||||
proxy_buffering off;
|
||||
}
|
||||
|
||||
# AI API 프록시
|
||||
# AI API 프록시 (맥미니 home-service-proxy 경유)
|
||||
location /ai-api/ {
|
||||
proxy_pass http://ai-service:8000/api/ai/;
|
||||
resolver 8.8.8.8 valid=300s ipv6=off;
|
||||
set $ai_upstream https://ai.hyungi.net;
|
||||
rewrite ^/ai-api/(.*) /api/ai/$1 break;
|
||||
proxy_pass $ai_upstream;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Host ai.hyungi.net;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_read_timeout 120s;
|
||||
proxy_send_timeout 120s;
|
||||
proxy_ssl_server_name on;
|
||||
proxy_read_timeout 180s;
|
||||
proxy_send_timeout 180s;
|
||||
}
|
||||
|
||||
# 모바일 전용 페이지
|
||||
|
||||
Reference in New Issue
Block a user