feat: ai-service를 ds923에서 맥미니로 이전

- ChromaDB → Qdrant 전환 (맥미니 기존 인스턴스, tk_qc_issues 컬렉션) - Ollama 임베딩/텍스트 생성 URL 분리 (임베딩: 맥미니, 텍스트: GPU서버) - MLX fallback 제거, Ollama 단일 경로로 단순화 - ds923 docker-compose에서 ai-service 제거 - gateway/system3-web nginx: ai-service 프록시를 ai.hyungi.net 경유로 변경 - resolver + 변수 기반 proxy_pass로 런타임 DNS 해석 (컨테이너 시작 실패 방지) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 15:07:58 +09:00
parent 2d25d54589
commit 85f674c9cb
9 changed files with 125 additions and 130 deletions
--- a/ai-service/Dockerfile
+++ b/ai-service/Dockerfile
@@ -4,6 +4,6 @@ RUN apt-get update && apt-get install -y gcc build-essential && rm -rf /var/lib/
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
-RUN mkdir -p /app/data/chroma
+RUN mkdir -p /app/data
 EXPOSE 8000
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/ai-service/config.py
+++ b/ai-service/config.py
@@ -2,16 +2,22 @@ from pydantic_settings import BaseSettings


 class Settings(BaseSettings):
-    OLLAMA_BASE_URL: str = "https://gpu.hyungi.net"
+    # GPU서버 Ollama (텍스트 생성)
+    OLLAMA_BASE_URL: str = "http://192.168.1.186:11434"
    OLLAMA_TEXT_MODEL: str = "qwen3.5:9b-q8_0"
-    OLLAMA_EMBED_MODEL: str = "bge-m3"
    OLLAMA_TIMEOUT: int = 120

-    MLX_BASE_URL: str = "https://llm.hyungi.net"
-    MLX_TEXT_MODEL: str = "/Users/hyungi/mlx-models/Qwen3.5-27B-4bit"
+    # 맥미니 Ollama (임베딩) — OrbStack: host.internal / Docker Desktop: host.docker.internal
+    OLLAMA_EMBED_URL: str = "http://host.internal:11434"
+    OLLAMA_EMBED_MODEL: str = "bge-m3"

-    DB_HOST: str = "mariadb"
-    DB_PORT: int = 3306
+    # 맥미니 Qdrant (기존 인스턴스, 회사 전용 컬렉션)
+    QDRANT_URL: str = "http://host.internal:6333"
+    QDRANT_COLLECTION: str = "tk_qc_issues"
+
+    # ds923 MariaDB (Tailscale)
+    DB_HOST: str = "100.71.132.52"
+    DB_PORT: int = 30306
    DB_USER: str = "hyungi_user"
    DB_PASSWORD: str = ""
    DB_NAME: str = "hyungi"
@@ -19,8 +25,8 @@ class Settings(BaseSettings):
    SECRET_KEY: str = ""
    ALGORITHM: str = "HS256"

-    SYSTEM1_API_URL: str = "http://system1-api:3005"
-    CHROMA_PERSIST_DIR: str = "/app/data/chroma"
+    # ds923 System1 API (Tailscale)
+    SYSTEM1_API_URL: str = "http://100.71.132.52:30005"
    METADATA_DB_PATH: str = "/app/data/metadata.db"

    class Config:
--- a/ai-service/db/vector_store.py
+++ b/ai-service/db/vector_store.py
@@ -1,18 +1,31 @@
-import chromadb
+import uuid
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
 from config import settings


 class VectorStore:
    def __init__(self):
        self.client = None
-        self.collection = None
+        self.collection = settings.QDRANT_COLLECTION  # "tk_qc_issues"

    def initialize(self):
-        self.client = chromadb.PersistentClient(path=settings.CHROMA_PERSIST_DIR)
-        self.collection = self.client.get_or_create_collection(
-            name="qc_issues",
-            metadata={"hnsw:space": "cosine"},
-        )
+        self.client = QdrantClient(url=settings.QDRANT_URL)
+        self._ensure_collection()
+
+    def _ensure_collection(self):
+        collections = [c.name for c in self.client.get_collections().collections]
+        if self.collection not in collections:
+            # bge-m3 기본 출력 = 1024 dims
+            self.client.create_collection(
+                collection_name=self.collection,
+                vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
+            )
+
+    @staticmethod
+    def _to_uuid(doc_id) -> str:
+        """문자열/정수 ID → UUID5 변환 (Qdrant 호환)"""
+        return str(uuid.uuid5(uuid.NAMESPACE_URL, str(doc_id)))

    def upsert(
        self,
@@ -21,11 +34,13 @@ class VectorStore:
        embedding: list[float],
        metadata: dict = None,
    ):
-        self.collection.upsert(
-            ids=[doc_id],
-            documents=[document],
-            embeddings=[embedding],
-            metadatas=[metadata] if metadata else None,
+        point_id = self._to_uuid(doc_id)
+        payload = {"document": document, "original_id": str(doc_id)}
+        if metadata:
+            payload.update(metadata)
+        self.client.upsert(
+            collection_name=self.collection,
+            points=[PointStruct(id=point_id, vector=embedding, payload=payload)],
        )

    def query(
@@ -34,42 +49,53 @@ class VectorStore:
        n_results: int = 5,
        where: dict = None,
    ) -> list[dict]:
-        kwargs = {
-            "query_embeddings": [embedding],
-            "n_results": n_results,
-            "include": ["documents", "metadatas", "distances"],
-        }
-        if where:
-            kwargs["where"] = where
+        query_filter = self._build_filter(where) if where else None
        try:
-            results = self.collection.query(**kwargs)
+            results = self.client.search(
+                collection_name=self.collection,
+                query_vector=embedding,
+                limit=n_results,
+                query_filter=query_filter,
+            )
        except Exception:
            return []

        items = []
-        if results and results["ids"] and results["ids"][0]:
-            for i, doc_id in enumerate(results["ids"][0]):
-                item = {
-                    "id": doc_id,
-                    "document": results["documents"][0][i] if results["documents"] else "",
-                    "distance": results["distances"][0][i] if results["distances"] else 0,
-                    "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
-                }
-                # cosine distance → similarity
-                item["similarity"] = round(1 - item["distance"], 4)
-                items.append(item)
+        for hit in results:
+            payload = hit.payload or {}
+            item = {
+                "id": payload.get("original_id", str(hit.id)),
+                "document": payload.get("document", ""),
+                "distance": round(1 - hit.score, 4),  # cosine score → distance
+                "metadata": {k: v for k, v in payload.items() if k not in ("document", "original_id")},
+                "similarity": round(hit.score, 4),
+            }
+            items.append(item)
        return items

+    @staticmethod
+    def _build_filter(where: dict) -> Filter:
+        """ChromaDB 스타일 where 조건 → Qdrant Filter 변환"""
+        conditions = []
+        for key, value in where.items():
+            conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
+        return Filter(must=conditions)
+
    def delete(self, doc_id: str):
-        self.collection.delete(ids=[doc_id])
+        point_id = self._to_uuid(doc_id)
+        self.client.delete(
+            collection_name=self.collection,
+            points_selector=[point_id],
+        )

    def count(self) -> int:
-        return self.collection.count()
+        info = self.client.get_collection(collection_name=self.collection)
+        return info.points_count

    def stats(self) -> dict:
        return {
            "total_documents": self.count(),
-            "collection_name": "qc_issues",
+            "collection_name": self.collection,
        }


--- a/ai-service/requirements.txt
+++ b/ai-service/requirements.txt
@@ -1,7 +1,7 @@
 fastapi==0.104.1
 uvicorn[standard]==0.24.0
 httpx==0.27.0
-chromadb==0.4.22
+qdrant-client>=1.7.0
 numpy==1.26.2
 pydantic==2.5.0
 pydantic-settings==2.1.0
--- a/ai-service/routers/health.py
+++ b/ai-service/routers/health.py
@@ -10,20 +10,18 @@ async def health_check():
    backends = await ollama_client.check_health()
    stats = vector_store.stats()

-    # 메인 텍스트 모델명 결정 (Ollama 메인, MLX fallback)
+    # 메인 텍스트 모델명 결정
    model_name = None
-    ollama_models = backends.get("ollama", {}).get("models", [])
-    if ollama_models:
-        model_name = ollama_models[0]
-    if not model_name and backends.get("mlx", {}).get("status") == "connected":
-        model_name = backends["mlx"].get("model")
+    text_models = backends.get("ollama_text", {}).get("models", [])
+    if text_models:
+        model_name = text_models[0]

    return {
        "status": "ok",
        "service": "tk-ai-service",
        "model": model_name,
-        "ollama": backends.get("ollama", {}),
-        "mlx": backends.get("mlx", {}),
+        "ollama_text": backends.get("ollama_text", {}),
+        "ollama_embed": backends.get("ollama_embed", {}),
        "embeddings": stats,
    }

--- a/ai-service/services/ollama_client.py
+++ b/ai-service/services/ollama_client.py
@@ -5,7 +5,8 @@ from config import settings

 class OllamaClient:
    def __init__(self):
-        self.base_url = settings.OLLAMA_BASE_URL
+        self.text_url = settings.OLLAMA_BASE_URL      # GPU서버 (텍스트 생성)
+        self.embed_url = settings.OLLAMA_EMBED_URL     # 맥미니 (임베딩)
        self.timeout = httpx.Timeout(float(settings.OLLAMA_TIMEOUT), connect=10.0)
        self._client: httpx.AsyncClient | None = None

@@ -22,7 +23,7 @@ class OllamaClient:
    async def generate_embedding(self, text: str) -> list[float]:
        client = await self._get_client()
        response = await client.post(
-            f"{self.base_url}/api/embeddings",
+            f"{self.embed_url}/api/embeddings",
            json={"model": settings.OLLAMA_EMBED_MODEL, "prompt": text},
        )
        response.raise_for_status()
@@ -43,49 +44,38 @@ class OllamaClient:
            messages.append({"role": "system", "content": system})
        messages.append({"role": "user", "content": prompt})
        client = await self._get_client()
-        # 조립컴 Ollama 메인, MLX fallback
-        try:
-            response = await client.post(
-                f"{self.base_url}/api/chat",
-                json={
-                    "model": settings.OLLAMA_TEXT_MODEL,
-                    "messages": messages,
-                    "stream": False,
-                    "think": False,
-                    "options": {"temperature": 0.3, "num_predict": 2048},
-                },
-            )
-            response.raise_for_status()
-            return response.json()["message"]["content"]
-        except Exception:
-            response = await client.post(
-                f"{settings.MLX_BASE_URL}/chat/completions",
-                json={
-                    "model": settings.MLX_TEXT_MODEL,
-                    "messages": messages,
-                    "max_tokens": 2048,
-                    "temperature": 0.3,
-                },
-            )
-            response.raise_for_status()
-            return response.json()["choices"][0]["message"]["content"]
+        response = await client.post(
+            f"{self.text_url}/api/chat",
+            json={
+                "model": settings.OLLAMA_TEXT_MODEL,
+                "messages": messages,
+                "stream": False,
+                "think": False,
+                "options": {"temperature": 0.3, "num_predict": 2048},
+            },
+        )
+        response.raise_for_status()
+        return response.json()["message"]["content"]

    async def check_health(self) -> dict:
        result = {}
        short_timeout = httpx.Timeout(5.0, connect=3.0)
+        # GPU서버 Ollama (텍스트 생성)
        try:
            async with httpx.AsyncClient(timeout=short_timeout) as c:
-                response = await c.get(f"{self.base_url}/api/tags")
+                response = await c.get(f"{self.text_url}/api/tags")
            models = response.json().get("models", [])
-            result["ollama"] = {"status": "connected", "models": [m["name"] for m in models]}
+            result["ollama_text"] = {"status": "connected", "url": self.text_url, "models": [m["name"] for m in models]}
        except Exception:
-            result["ollama"] = {"status": "disconnected"}
+            result["ollama_text"] = {"status": "disconnected", "url": self.text_url}
+        # 맥미니 Ollama (임베딩)
        try:
            async with httpx.AsyncClient(timeout=short_timeout) as c:
-                response = await c.get(f"{settings.MLX_BASE_URL}/health")
-            result["mlx"] = {"status": "connected", "model": settings.MLX_TEXT_MODEL}
+                response = await c.get(f"{self.embed_url}/api/tags")
+            models = response.json().get("models", [])
+            result["ollama_embed"] = {"status": "connected", "url": self.embed_url, "models": [m["name"] for m in models]}
        except Exception:
-            result["mlx"] = {"status": "disconnected"}
+            result["ollama_embed"] = {"status": "disconnected", "url": self.embed_url}
        return result


--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -286,41 +286,9 @@ services:
      - tk-network

  # =================================================================
-  # AI Service
+  # AI Service — 맥미니로 이전됨 (~/docker/tk-ai-service/)
  # =================================================================

-  ai-service:
-    build:
-      context: ./ai-service
-      dockerfile: Dockerfile
-    container_name: tk-ai-service
-    restart: unless-stopped
-    ports:
-      - "30400:8000"
-    environment:
-      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-https://gpu.hyungi.net}
-      - OLLAMA_TEXT_MODEL=${OLLAMA_TEXT_MODEL:-qwen3.5:9b-q8_0}
-      - OLLAMA_EMBED_MODEL=${OLLAMA_EMBED_MODEL:-bge-m3}
-      - OLLAMA_TIMEOUT=${OLLAMA_TIMEOUT:-120}
-      - MLX_BASE_URL=${MLX_BASE_URL:-https://llm.hyungi.net}
-      - MLX_TEXT_MODEL=${MLX_TEXT_MODEL:-/Users/hyungi/mlx-models/Qwen3.5-27B-4bit}
-      - DB_HOST=mariadb
-      - DB_PORT=3306
-      - DB_USER=${MYSQL_USER:-hyungi_user}
-      - DB_PASSWORD=${MYSQL_PASSWORD}
-      - DB_NAME=${MYSQL_DATABASE:-hyungi}
-      - SECRET_KEY=${SSO_JWT_SECRET}
-      - SYSTEM1_API_URL=http://system1-api:3005
-      - CHROMA_PERSIST_DIR=/app/data/chroma
-      - TZ=Asia/Seoul
-    volumes:
-      - ai_data:/app/data
-    depends_on:
-      mariadb:
-        condition: service_healthy
-    networks:
-      - tk-network
-
  # =================================================================
  # Gateway
  # =================================================================
@@ -393,7 +361,6 @@ volumes:
  system3_uploads:
    external: true
    name: tkqc-package_uploads
-  ai_data:
 networks:
  tk-network:
    driver: bridge
--- a/gateway/nginx.conf
+++ b/gateway/nginx.conf
@@ -55,16 +55,20 @@ server {
        proxy_set_header X-Forwarded-Proto $scheme;
    }

-    # ===== AI Service API =====
+    # ===== AI Service API (맥미니 home-service-proxy 경유) =====
    location /ai-api/ {
-        proxy_pass http://ai-service:8000/api/ai/;
+        resolver 8.8.8.8 valid=300s ipv6=off;
+        set $ai_upstream https://ai.hyungi.net;
+        rewrite ^/ai-api/(.*) /api/ai/$1 break;
+        proxy_pass $ai_upstream;
        proxy_http_version 1.1;
-        proxy_set_header Host $host;
+        proxy_set_header Host ai.hyungi.net;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
-        proxy_read_timeout 120s;
-        proxy_send_timeout 120s;
+        proxy_ssl_server_name on;
+        proxy_read_timeout 180s;
+        proxy_send_timeout 180s;
    }

    # ===== System 1 Web (나머지 모든 경로) =====
--- a/system3-nonconformance/web/nginx.conf
+++ b/system3-nonconformance/web/nginx.conf
@@ -48,16 +48,20 @@ server {
        proxy_buffering off;
    }

-    # AI API 프록시
+    # AI API 프록시 (맥미니 home-service-proxy 경유)
    location /ai-api/ {
-        proxy_pass http://ai-service:8000/api/ai/;
+        resolver 8.8.8.8 valid=300s ipv6=off;
+        set $ai_upstream https://ai.hyungi.net;
+        rewrite ^/ai-api/(.*) /api/ai/$1 break;
+        proxy_pass $ai_upstream;
        proxy_http_version 1.1;
-        proxy_set_header Host $host;
+        proxy_set_header Host ai.hyungi.net;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
-        proxy_read_timeout 120s;
-        proxy_send_timeout 120s;
+        proxy_ssl_server_name on;
+        proxy_read_timeout 180s;
+        proxy_send_timeout 180s;
    }

    # 모바일 전용 페이지