From 85f674c9cb6364805ab18d28d8c2272591ac0584 Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Wed, 11 Mar 2026 15:07:58 +0900
Subject: [PATCH] =?UTF-8?q?feat:=20ai-service=EB=A5=BC=20ds923=EC=97=90?=
 =?UTF-8?q?=EC=84=9C=20=EB=A7=A5=EB=AF=B8=EB=8B=88=EB=A1=9C=20=EC=9D=B4?=
 =?UTF-8?q?=EC=A0=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ChromaDB → Qdrant 전환 (맥미니 기존 인스턴스, tk_qc_issues 컬렉션)
- Ollama 임베딩/텍스트 생성 URL 분리 (임베딩: 맥미니, 텍스트: GPU서버)
- MLX fallback 제거, Ollama 단일 경로로 단순화
- ds923 docker-compose에서 ai-service 제거
- gateway/system3-web nginx: ai-service 프록시를 ai.hyungi.net 경유로 변경
- resolver + 변수 기반 proxy_pass로 런타임 DNS 해석 (컨테이너 시작 실패 방지)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ai-service/Dockerfile                 |  2 +-
 ai-service/config.py                  | 22 ++++---
 ai-service/db/vector_store.py         | 94 +++++++++++++++++----------
 ai-service/requirements.txt           |  2 +-
 ai-service/routers/health.py          | 14 ++--
 ai-service/services/ollama_client.py  | 58 +++++++----------
 docker-compose.yml                    | 35 +---------
 gateway/nginx.conf                    | 14 ++--
 system3-nonconformance/web/nginx.conf | 14 ++--
 9 files changed, 125 insertions(+), 130 deletions(-)

diff --git a/ai-service/Dockerfile b/ai-service/Dockerfile
index 0e51481..d3a11d6 100644
--- a/ai-service/Dockerfile
+++ b/ai-service/Dockerfile
@@ -4,6 +4,6 @@ RUN apt-get update && apt-get install -y gcc build-essential && rm -rf /var/lib/
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
-RUN mkdir -p /app/data/chroma
+RUN mkdir -p /app/data
 EXPOSE 8000
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/ai-service/config.py b/ai-service/config.py
index 79aef0c..d4c9f3f 100644
--- a/ai-service/config.py
+++ b/ai-service/config.py
@@ -2,16 +2,22 @@ from pydantic_settings import BaseSettings
 
 
 class Settings(BaseSettings):
-    OLLAMA_BASE_URL: str = "https://gpu.hyungi.net"
+    # GPU서버 Ollama (텍스트 생성)
+    OLLAMA_BASE_URL: str = "http://192.168.1.186:11434"
     OLLAMA_TEXT_MODEL: str = "qwen3.5:9b-q8_0"
-    OLLAMA_EMBED_MODEL: str = "bge-m3"
     OLLAMA_TIMEOUT: int = 120
 
-    MLX_BASE_URL: str = "https://llm.hyungi.net"
-    MLX_TEXT_MODEL: str = "/Users/hyungi/mlx-models/Qwen3.5-27B-4bit"
+    # 맥미니 Ollama (임베딩) — OrbStack: host.internal / Docker Desktop: host.docker.internal
+    OLLAMA_EMBED_URL: str = "http://host.internal:11434"
+    OLLAMA_EMBED_MODEL: str = "bge-m3"
 
-    DB_HOST: str = "mariadb"
-    DB_PORT: int = 3306
+    # 맥미니 Qdrant (기존 인스턴스, 회사 전용 컬렉션)
+    QDRANT_URL: str = "http://host.internal:6333"
+    QDRANT_COLLECTION: str = "tk_qc_issues"
+
+    # ds923 MariaDB (Tailscale)
+    DB_HOST: str = "100.71.132.52"
+    DB_PORT: int = 30306
     DB_USER: str = "hyungi_user"
     DB_PASSWORD: str = ""
     DB_NAME: str = "hyungi"
@@ -19,8 +25,8 @@ class Settings(BaseSettings):
     SECRET_KEY: str = ""
     ALGORITHM: str = "HS256"
 
-    SYSTEM1_API_URL: str = "http://system1-api:3005"
-    CHROMA_PERSIST_DIR: str = "/app/data/chroma"
+    # ds923 System1 API (Tailscale)
+    SYSTEM1_API_URL: str = "http://100.71.132.52:30005"
     METADATA_DB_PATH: str = "/app/data/metadata.db"
 
     class Config:
diff --git a/ai-service/db/vector_store.py b/ai-service/db/vector_store.py
index 23868f3..ad8e4f6 100644
--- a/ai-service/db/vector_store.py
+++ b/ai-service/db/vector_store.py
@@ -1,18 +1,31 @@
-import chromadb
+import uuid
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
 from config import settings
 
 
 class VectorStore:
     def __init__(self):
         self.client = None
-        self.collection = None
+        self.collection = settings.QDRANT_COLLECTION  # "tk_qc_issues"
 
     def initialize(self):
-        self.client = chromadb.PersistentClient(path=settings.CHROMA_PERSIST_DIR)
-        self.collection = self.client.get_or_create_collection(
-            name="qc_issues",
-            metadata={"hnsw:space": "cosine"},
-        )
+        self.client = QdrantClient(url=settings.QDRANT_URL)
+        self._ensure_collection()
+
+    def _ensure_collection(self):
+        collections = [c.name for c in self.client.get_collections().collections]
+        if self.collection not in collections:
+            # bge-m3 기본 출력 = 1024 dims
+            self.client.create_collection(
+                collection_name=self.collection,
+                vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
+            )
+
+    @staticmethod
+    def _to_uuid(doc_id) -> str:
+        """문자열/정수 ID → UUID5 변환 (Qdrant 호환)"""
+        return str(uuid.uuid5(uuid.NAMESPACE_URL, str(doc_id)))
 
     def upsert(
         self,
@@ -21,11 +34,13 @@ class VectorStore:
         embedding: list[float],
         metadata: dict = None,
     ):
-        self.collection.upsert(
-            ids=[doc_id],
-            documents=[document],
-            embeddings=[embedding],
-            metadatas=[metadata] if metadata else None,
+        point_id = self._to_uuid(doc_id)
+        payload = {"document": document, "original_id": str(doc_id)}
+        if metadata:
+            payload.update(metadata)
+        self.client.upsert(
+            collection_name=self.collection,
+            points=[PointStruct(id=point_id, vector=embedding, payload=payload)],
         )
 
     def query(
@@ -34,42 +49,53 @@ class VectorStore:
         n_results: int = 5,
         where: dict = None,
     ) -> list[dict]:
-        kwargs = {
-            "query_embeddings": [embedding],
-            "n_results": n_results,
-            "include": ["documents", "metadatas", "distances"],
-        }
-        if where:
-            kwargs["where"] = where
+        query_filter = self._build_filter(where) if where else None
         try:
-            results = self.collection.query(**kwargs)
+            results = self.client.search(
+                collection_name=self.collection,
+                query_vector=embedding,
+                limit=n_results,
+                query_filter=query_filter,
+            )
         except Exception:
             return []
 
         items = []
-        if results and results["ids"] and results["ids"][0]:
-            for i, doc_id in enumerate(results["ids"][0]):
-                item = {
-                    "id": doc_id,
-                    "document": results["documents"][0][i] if results["documents"] else "",
-                    "distance": results["distances"][0][i] if results["distances"] else 0,
-                    "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
-                }
-                # cosine distance → similarity
-                item["similarity"] = round(1 - item["distance"], 4)
-                items.append(item)
+        for hit in results:
+            payload = hit.payload or {}
+            item = {
+                "id": payload.get("original_id", str(hit.id)),
+                "document": payload.get("document", ""),
+                "distance": round(1 - hit.score, 4),  # cosine score → distance
+                "metadata": {k: v for k, v in payload.items() if k not in ("document", "original_id")},
+                "similarity": round(hit.score, 4),
+            }
+            items.append(item)
         return items
 
+    @staticmethod
+    def _build_filter(where: dict) -> Filter:
+        """ChromaDB 스타일 where 조건 → Qdrant Filter 변환"""
+        conditions = []
+        for key, value in where.items():
+            conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
+        return Filter(must=conditions)
+
     def delete(self, doc_id: str):
-        self.collection.delete(ids=[doc_id])
+        point_id = self._to_uuid(doc_id)
+        self.client.delete(
+            collection_name=self.collection,
+            points_selector=[point_id],
+        )
 
     def count(self) -> int:
-        return self.collection.count()
+        info = self.client.get_collection(collection_name=self.collection)
+        return info.points_count
 
     def stats(self) -> dict:
         return {
             "total_documents": self.count(),
-            "collection_name": "qc_issues",
+            "collection_name": self.collection,
         }
 
 
diff --git a/ai-service/requirements.txt b/ai-service/requirements.txt
index e54eaab..7c817fc 100644
--- a/ai-service/requirements.txt
+++ b/ai-service/requirements.txt
@@ -1,7 +1,7 @@
 fastapi==0.104.1
 uvicorn[standard]==0.24.0
 httpx==0.27.0
-chromadb==0.4.22
+qdrant-client>=1.7.0
 numpy==1.26.2
 pydantic==2.5.0
 pydantic-settings==2.1.0
diff --git a/ai-service/routers/health.py b/ai-service/routers/health.py
index bbee30e..b643afa 100644
--- a/ai-service/routers/health.py
+++ b/ai-service/routers/health.py
@@ -10,20 +10,18 @@ async def health_check():
     backends = await ollama_client.check_health()
     stats = vector_store.stats()
 
-    # 메인 텍스트 모델명 결정 (Ollama 메인, MLX fallback)
+    # 메인 텍스트 모델명 결정
     model_name = None
-    ollama_models = backends.get("ollama", {}).get("models", [])
-    if ollama_models:
-        model_name = ollama_models[0]
-    if not model_name and backends.get("mlx", {}).get("status") == "connected":
-        model_name = backends["mlx"].get("model")
+    text_models = backends.get("ollama_text", {}).get("models", [])
+    if text_models:
+        model_name = text_models[0]
 
     return {
         "status": "ok",
         "service": "tk-ai-service",
         "model": model_name,
-        "ollama": backends.get("ollama", {}),
-        "mlx": backends.get("mlx", {}),
+        "ollama_text": backends.get("ollama_text", {}),
+        "ollama_embed": backends.get("ollama_embed", {}),
         "embeddings": stats,
     }
 
diff --git a/ai-service/services/ollama_client.py b/ai-service/services/ollama_client.py
index a897e1d..0f6807a 100644
--- a/ai-service/services/ollama_client.py
+++ b/ai-service/services/ollama_client.py
@@ -5,7 +5,8 @@ from config import settings
 
 class OllamaClient:
     def __init__(self):
-        self.base_url = settings.OLLAMA_BASE_URL
+        self.text_url = settings.OLLAMA_BASE_URL      # GPU서버 (텍스트 생성)
+        self.embed_url = settings.OLLAMA_EMBED_URL     # 맥미니 (임베딩)
         self.timeout = httpx.Timeout(float(settings.OLLAMA_TIMEOUT), connect=10.0)
         self._client: httpx.AsyncClient | None = None
 
@@ -22,7 +23,7 @@ class OllamaClient:
     async def generate_embedding(self, text: str) -> list[float]:
         client = await self._get_client()
         response = await client.post(
-            f"{self.base_url}/api/embeddings",
+            f"{self.embed_url}/api/embeddings",
             json={"model": settings.OLLAMA_EMBED_MODEL, "prompt": text},
         )
         response.raise_for_status()
@@ -43,49 +44,38 @@ class OllamaClient:
             messages.append({"role": "system", "content": system})
         messages.append({"role": "user", "content": prompt})
         client = await self._get_client()
-        # 조립컴 Ollama 메인, MLX fallback
-        try:
-            response = await client.post(
-                f"{self.base_url}/api/chat",
-                json={
-                    "model": settings.OLLAMA_TEXT_MODEL,
-                    "messages": messages,
-                    "stream": False,
-                    "think": False,
-                    "options": {"temperature": 0.3, "num_predict": 2048},
-                },
-            )
-            response.raise_for_status()
-            return response.json()["message"]["content"]
-        except Exception:
-            response = await client.post(
-                f"{settings.MLX_BASE_URL}/chat/completions",
-                json={
-                    "model": settings.MLX_TEXT_MODEL,
-                    "messages": messages,
-                    "max_tokens": 2048,
-                    "temperature": 0.3,
-                },
-            )
-            response.raise_for_status()
-            return response.json()["choices"][0]["message"]["content"]
+        response = await client.post(
+            f"{self.text_url}/api/chat",
+            json={
+                "model": settings.OLLAMA_TEXT_MODEL,
+                "messages": messages,
+                "stream": False,
+                "think": False,
+                "options": {"temperature": 0.3, "num_predict": 2048},
+            },
+        )
+        response.raise_for_status()
+        return response.json()["message"]["content"]
 
     async def check_health(self) -> dict:
         result = {}
         short_timeout = httpx.Timeout(5.0, connect=3.0)
+        # GPU서버 Ollama (텍스트 생성)
         try:
             async with httpx.AsyncClient(timeout=short_timeout) as c:
-                response = await c.get(f"{self.base_url}/api/tags")
+                response = await c.get(f"{self.text_url}/api/tags")
             models = response.json().get("models", [])
-            result["ollama"] = {"status": "connected", "models": [m["name"] for m in models]}
+            result["ollama_text"] = {"status": "connected", "url": self.text_url, "models": [m["name"] for m in models]}
         except Exception:
-            result["ollama"] = {"status": "disconnected"}
+            result["ollama_text"] = {"status": "disconnected", "url": self.text_url}
+        # 맥미니 Ollama (임베딩)
         try:
             async with httpx.AsyncClient(timeout=short_timeout) as c:
-                response = await c.get(f"{settings.MLX_BASE_URL}/health")
-            result["mlx"] = {"status": "connected", "model": settings.MLX_TEXT_MODEL}
+                response = await c.get(f"{self.embed_url}/api/tags")
+            models = response.json().get("models", [])
+            result["ollama_embed"] = {"status": "connected", "url": self.embed_url, "models": [m["name"] for m in models]}
         except Exception:
-            result["mlx"] = {"status": "disconnected"}
+            result["ollama_embed"] = {"status": "disconnected", "url": self.embed_url}
         return result
 
 
diff --git a/docker-compose.yml b/docker-compose.yml
index bb01b20..c128a1a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -286,41 +286,9 @@ services:
       - tk-network
 
   # =================================================================
-  # AI Service
+  # AI Service — 맥미니로 이전됨 (~/docker/tk-ai-service/)
   # =================================================================
 
-  ai-service:
-    build:
-      context: ./ai-service
-      dockerfile: Dockerfile
-    container_name: tk-ai-service
-    restart: unless-stopped
-    ports:
-      - "30400:8000"
-    environment:
-      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-https://gpu.hyungi.net}
-      - OLLAMA_TEXT_MODEL=${OLLAMA_TEXT_MODEL:-qwen3.5:9b-q8_0}
-      - OLLAMA_EMBED_MODEL=${OLLAMA_EMBED_MODEL:-bge-m3}
-      - OLLAMA_TIMEOUT=${OLLAMA_TIMEOUT:-120}
-      - MLX_BASE_URL=${MLX_BASE_URL:-https://llm.hyungi.net}
-      - MLX_TEXT_MODEL=${MLX_TEXT_MODEL:-/Users/hyungi/mlx-models/Qwen3.5-27B-4bit}
-      - DB_HOST=mariadb
-      - DB_PORT=3306
-      - DB_USER=${MYSQL_USER:-hyungi_user}
-      - DB_PASSWORD=${MYSQL_PASSWORD}
-      - DB_NAME=${MYSQL_DATABASE:-hyungi}
-      - SECRET_KEY=${SSO_JWT_SECRET}
-      - SYSTEM1_API_URL=http://system1-api:3005
-      - CHROMA_PERSIST_DIR=/app/data/chroma
-      - TZ=Asia/Seoul
-    volumes:
-      - ai_data:/app/data
-    depends_on:
-      mariadb:
-        condition: service_healthy
-    networks:
-      - tk-network
-
   # =================================================================
   # Gateway
   # =================================================================
@@ -393,7 +361,6 @@ volumes:
   system3_uploads:
     external: true
     name: tkqc-package_uploads
-  ai_data:
 networks:
   tk-network:
     driver: bridge
diff --git a/gateway/nginx.conf b/gateway/nginx.conf
index 531eb19..5b3c614 100644
--- a/gateway/nginx.conf
+++ b/gateway/nginx.conf
@@ -55,16 +55,20 @@ server {
         proxy_set_header X-Forwarded-Proto $scheme;
     }
 
-    # ===== AI Service API =====
+    # ===== AI Service API (맥미니 home-service-proxy 경유) =====
     location /ai-api/ {
-        proxy_pass http://ai-service:8000/api/ai/;
+        resolver 8.8.8.8 valid=300s ipv6=off;
+        set $ai_upstream https://ai.hyungi.net;
+        rewrite ^/ai-api/(.*) /api/ai/$1 break;
+        proxy_pass $ai_upstream;
         proxy_http_version 1.1;
-        proxy_set_header Host $host;
+        proxy_set_header Host ai.hyungi.net;
         proxy_set_header X-Real-IP $remote_addr;
         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
         proxy_set_header X-Forwarded-Proto $scheme;
-        proxy_read_timeout 120s;
-        proxy_send_timeout 120s;
+        proxy_ssl_server_name on;
+        proxy_read_timeout 180s;
+        proxy_send_timeout 180s;
     }
 
     # ===== System 1 Web (나머지 모든 경로) =====
diff --git a/system3-nonconformance/web/nginx.conf b/system3-nonconformance/web/nginx.conf
index df2d137..6c84fa9 100644
--- a/system3-nonconformance/web/nginx.conf
+++ b/system3-nonconformance/web/nginx.conf
@@ -48,16 +48,20 @@ server {
         proxy_buffering off;
     }
 
-    # AI API 프록시
+    # AI API 프록시 (맥미니 home-service-proxy 경유)
     location /ai-api/ {
-        proxy_pass http://ai-service:8000/api/ai/;
+        resolver 8.8.8.8 valid=300s ipv6=off;
+        set $ai_upstream https://ai.hyungi.net;
+        rewrite ^/ai-api/(.*) /api/ai/$1 break;
+        proxy_pass $ai_upstream;
         proxy_http_version 1.1;
-        proxy_set_header Host $host;
+        proxy_set_header Host ai.hyungi.net;
         proxy_set_header X-Real-IP $remote_addr;
         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
         proxy_set_header X-Forwarded-Proto $scheme;
-        proxy_read_timeout 120s;
-        proxy_send_timeout 120s;
+        proxy_ssl_server_name on;
+        proxy_read_timeout 180s;
+        proxy_send_timeout 180s;
     }
 
     # 모바일 전용 페이지