From 85f674c9cb6364805ab18d28d8c2272591ac0584 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Wed, 11 Mar 2026 15:07:58 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20ai-service=EB=A5=BC=20ds923=EC=97=90?= =?UTF-8?q?=EC=84=9C=20=EB=A7=A5=EB=AF=B8=EB=8B=88=EB=A1=9C=20=EC=9D=B4?= =?UTF-8?q?=EC=A0=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ChromaDB → Qdrant 전환 (맥미니 기존 인스턴스, tk_qc_issues 컬렉션) - Ollama 임베딩/텍스트 생성 URL 분리 (임베딩: 맥미니, 텍스트: GPU서버) - MLX fallback 제거, Ollama 단일 경로로 단순화 - ds923 docker-compose에서 ai-service 제거 - gateway/system3-web nginx: ai-service 프록시를 ai.hyungi.net 경유로 변경 - resolver + 변수 기반 proxy_pass로 런타임 DNS 해석 (컨테이너 시작 실패 방지) Co-Authored-By: Claude Opus 4.6 --- ai-service/Dockerfile | 2 +- ai-service/config.py | 22 ++++--- ai-service/db/vector_store.py | 94 +++++++++++++++++---------- ai-service/requirements.txt | 2 +- ai-service/routers/health.py | 14 ++-- ai-service/services/ollama_client.py | 58 +++++++---------- docker-compose.yml | 35 +--------- gateway/nginx.conf | 14 ++-- system3-nonconformance/web/nginx.conf | 14 ++-- 9 files changed, 125 insertions(+), 130 deletions(-) diff --git a/ai-service/Dockerfile b/ai-service/Dockerfile index 0e51481..d3a11d6 100644 --- a/ai-service/Dockerfile +++ b/ai-service/Dockerfile @@ -4,6 +4,6 @@ RUN apt-get update && apt-get install -y gcc build-essential && rm -rf /var/lib/ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . -RUN mkdir -p /app/data/chroma +RUN mkdir -p /app/data EXPOSE 8000 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/ai-service/config.py b/ai-service/config.py index 79aef0c..d4c9f3f 100644 --- a/ai-service/config.py +++ b/ai-service/config.py @@ -2,16 +2,22 @@ from pydantic_settings import BaseSettings class Settings(BaseSettings): - OLLAMA_BASE_URL: str = "https://gpu.hyungi.net" + # GPU서버 Ollama (텍스트 생성) + OLLAMA_BASE_URL: str = "http://192.168.1.186:11434" OLLAMA_TEXT_MODEL: str = "qwen3.5:9b-q8_0" - OLLAMA_EMBED_MODEL: str = "bge-m3" OLLAMA_TIMEOUT: int = 120 - MLX_BASE_URL: str = "https://llm.hyungi.net" - MLX_TEXT_MODEL: str = "/Users/hyungi/mlx-models/Qwen3.5-27B-4bit" + # 맥미니 Ollama (임베딩) — OrbStack: host.internal / Docker Desktop: host.docker.internal + OLLAMA_EMBED_URL: str = "http://host.internal:11434" + OLLAMA_EMBED_MODEL: str = "bge-m3" - DB_HOST: str = "mariadb" - DB_PORT: int = 3306 + # 맥미니 Qdrant (기존 인스턴스, 회사 전용 컬렉션) + QDRANT_URL: str = "http://host.internal:6333" + QDRANT_COLLECTION: str = "tk_qc_issues" + + # ds923 MariaDB (Tailscale) + DB_HOST: str = "100.71.132.52" + DB_PORT: int = 30306 DB_USER: str = "hyungi_user" DB_PASSWORD: str = "" DB_NAME: str = "hyungi" @@ -19,8 +25,8 @@ class Settings(BaseSettings): SECRET_KEY: str = "" ALGORITHM: str = "HS256" - SYSTEM1_API_URL: str = "http://system1-api:3005" - CHROMA_PERSIST_DIR: str = "/app/data/chroma" + # ds923 System1 API (Tailscale) + SYSTEM1_API_URL: str = "http://100.71.132.52:30005" METADATA_DB_PATH: str = "/app/data/metadata.db" class Config: diff --git a/ai-service/db/vector_store.py b/ai-service/db/vector_store.py index 23868f3..ad8e4f6 100644 --- a/ai-service/db/vector_store.py +++ b/ai-service/db/vector_store.py @@ -1,18 +1,31 @@ -import chromadb +import uuid +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue from config import settings class VectorStore: def __init__(self): self.client = None - self.collection = None + self.collection = settings.QDRANT_COLLECTION # "tk_qc_issues" def initialize(self): - self.client = chromadb.PersistentClient(path=settings.CHROMA_PERSIST_DIR) - self.collection = self.client.get_or_create_collection( - name="qc_issues", - metadata={"hnsw:space": "cosine"}, - ) + self.client = QdrantClient(url=settings.QDRANT_URL) + self._ensure_collection() + + def _ensure_collection(self): + collections = [c.name for c in self.client.get_collections().collections] + if self.collection not in collections: + # bge-m3 기본 출력 = 1024 dims + self.client.create_collection( + collection_name=self.collection, + vectors_config=VectorParams(size=1024, distance=Distance.COSINE), + ) + + @staticmethod + def _to_uuid(doc_id) -> str: + """문자열/정수 ID → UUID5 변환 (Qdrant 호환)""" + return str(uuid.uuid5(uuid.NAMESPACE_URL, str(doc_id))) def upsert( self, @@ -21,11 +34,13 @@ class VectorStore: embedding: list[float], metadata: dict = None, ): - self.collection.upsert( - ids=[doc_id], - documents=[document], - embeddings=[embedding], - metadatas=[metadata] if metadata else None, + point_id = self._to_uuid(doc_id) + payload = {"document": document, "original_id": str(doc_id)} + if metadata: + payload.update(metadata) + self.client.upsert( + collection_name=self.collection, + points=[PointStruct(id=point_id, vector=embedding, payload=payload)], ) def query( @@ -34,42 +49,53 @@ class VectorStore: n_results: int = 5, where: dict = None, ) -> list[dict]: - kwargs = { - "query_embeddings": [embedding], - "n_results": n_results, - "include": ["documents", "metadatas", "distances"], - } - if where: - kwargs["where"] = where + query_filter = self._build_filter(where) if where else None try: - results = self.collection.query(**kwargs) + results = self.client.search( + collection_name=self.collection, + query_vector=embedding, + limit=n_results, + query_filter=query_filter, + ) except Exception: return [] items = [] - if results and results["ids"] and results["ids"][0]: - for i, doc_id in enumerate(results["ids"][0]): - item = { - "id": doc_id, - "document": results["documents"][0][i] if results["documents"] else "", - "distance": results["distances"][0][i] if results["distances"] else 0, - "metadata": results["metadatas"][0][i] if results["metadatas"] else {}, - } - # cosine distance → similarity - item["similarity"] = round(1 - item["distance"], 4) - items.append(item) + for hit in results: + payload = hit.payload or {} + item = { + "id": payload.get("original_id", str(hit.id)), + "document": payload.get("document", ""), + "distance": round(1 - hit.score, 4), # cosine score → distance + "metadata": {k: v for k, v in payload.items() if k not in ("document", "original_id")}, + "similarity": round(hit.score, 4), + } + items.append(item) return items + @staticmethod + def _build_filter(where: dict) -> Filter: + """ChromaDB 스타일 where 조건 → Qdrant Filter 변환""" + conditions = [] + for key, value in where.items(): + conditions.append(FieldCondition(key=key, match=MatchValue(value=value))) + return Filter(must=conditions) + def delete(self, doc_id: str): - self.collection.delete(ids=[doc_id]) + point_id = self._to_uuid(doc_id) + self.client.delete( + collection_name=self.collection, + points_selector=[point_id], + ) def count(self) -> int: - return self.collection.count() + info = self.client.get_collection(collection_name=self.collection) + return info.points_count def stats(self) -> dict: return { "total_documents": self.count(), - "collection_name": "qc_issues", + "collection_name": self.collection, } diff --git a/ai-service/requirements.txt b/ai-service/requirements.txt index e54eaab..7c817fc 100644 --- a/ai-service/requirements.txt +++ b/ai-service/requirements.txt @@ -1,7 +1,7 @@ fastapi==0.104.1 uvicorn[standard]==0.24.0 httpx==0.27.0 -chromadb==0.4.22 +qdrant-client>=1.7.0 numpy==1.26.2 pydantic==2.5.0 pydantic-settings==2.1.0 diff --git a/ai-service/routers/health.py b/ai-service/routers/health.py index bbee30e..b643afa 100644 --- a/ai-service/routers/health.py +++ b/ai-service/routers/health.py @@ -10,20 +10,18 @@ async def health_check(): backends = await ollama_client.check_health() stats = vector_store.stats() - # 메인 텍스트 모델명 결정 (Ollama 메인, MLX fallback) + # 메인 텍스트 모델명 결정 model_name = None - ollama_models = backends.get("ollama", {}).get("models", []) - if ollama_models: - model_name = ollama_models[0] - if not model_name and backends.get("mlx", {}).get("status") == "connected": - model_name = backends["mlx"].get("model") + text_models = backends.get("ollama_text", {}).get("models", []) + if text_models: + model_name = text_models[0] return { "status": "ok", "service": "tk-ai-service", "model": model_name, - "ollama": backends.get("ollama", {}), - "mlx": backends.get("mlx", {}), + "ollama_text": backends.get("ollama_text", {}), + "ollama_embed": backends.get("ollama_embed", {}), "embeddings": stats, } diff --git a/ai-service/services/ollama_client.py b/ai-service/services/ollama_client.py index a897e1d..0f6807a 100644 --- a/ai-service/services/ollama_client.py +++ b/ai-service/services/ollama_client.py @@ -5,7 +5,8 @@ from config import settings class OllamaClient: def __init__(self): - self.base_url = settings.OLLAMA_BASE_URL + self.text_url = settings.OLLAMA_BASE_URL # GPU서버 (텍스트 생성) + self.embed_url = settings.OLLAMA_EMBED_URL # 맥미니 (임베딩) self.timeout = httpx.Timeout(float(settings.OLLAMA_TIMEOUT), connect=10.0) self._client: httpx.AsyncClient | None = None @@ -22,7 +23,7 @@ class OllamaClient: async def generate_embedding(self, text: str) -> list[float]: client = await self._get_client() response = await client.post( - f"{self.base_url}/api/embeddings", + f"{self.embed_url}/api/embeddings", json={"model": settings.OLLAMA_EMBED_MODEL, "prompt": text}, ) response.raise_for_status() @@ -43,49 +44,38 @@ class OllamaClient: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) client = await self._get_client() - # 조립컴 Ollama 메인, MLX fallback - try: - response = await client.post( - f"{self.base_url}/api/chat", - json={ - "model": settings.OLLAMA_TEXT_MODEL, - "messages": messages, - "stream": False, - "think": False, - "options": {"temperature": 0.3, "num_predict": 2048}, - }, - ) - response.raise_for_status() - return response.json()["message"]["content"] - except Exception: - response = await client.post( - f"{settings.MLX_BASE_URL}/chat/completions", - json={ - "model": settings.MLX_TEXT_MODEL, - "messages": messages, - "max_tokens": 2048, - "temperature": 0.3, - }, - ) - response.raise_for_status() - return response.json()["choices"][0]["message"]["content"] + response = await client.post( + f"{self.text_url}/api/chat", + json={ + "model": settings.OLLAMA_TEXT_MODEL, + "messages": messages, + "stream": False, + "think": False, + "options": {"temperature": 0.3, "num_predict": 2048}, + }, + ) + response.raise_for_status() + return response.json()["message"]["content"] async def check_health(self) -> dict: result = {} short_timeout = httpx.Timeout(5.0, connect=3.0) + # GPU서버 Ollama (텍스트 생성) try: async with httpx.AsyncClient(timeout=short_timeout) as c: - response = await c.get(f"{self.base_url}/api/tags") + response = await c.get(f"{self.text_url}/api/tags") models = response.json().get("models", []) - result["ollama"] = {"status": "connected", "models": [m["name"] for m in models]} + result["ollama_text"] = {"status": "connected", "url": self.text_url, "models": [m["name"] for m in models]} except Exception: - result["ollama"] = {"status": "disconnected"} + result["ollama_text"] = {"status": "disconnected", "url": self.text_url} + # 맥미니 Ollama (임베딩) try: async with httpx.AsyncClient(timeout=short_timeout) as c: - response = await c.get(f"{settings.MLX_BASE_URL}/health") - result["mlx"] = {"status": "connected", "model": settings.MLX_TEXT_MODEL} + response = await c.get(f"{self.embed_url}/api/tags") + models = response.json().get("models", []) + result["ollama_embed"] = {"status": "connected", "url": self.embed_url, "models": [m["name"] for m in models]} except Exception: - result["mlx"] = {"status": "disconnected"} + result["ollama_embed"] = {"status": "disconnected", "url": self.embed_url} return result diff --git a/docker-compose.yml b/docker-compose.yml index bb01b20..c128a1a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -286,41 +286,9 @@ services: - tk-network # ================================================================= - # AI Service + # AI Service — 맥미니로 이전됨 (~/docker/tk-ai-service/) # ================================================================= - ai-service: - build: - context: ./ai-service - dockerfile: Dockerfile - container_name: tk-ai-service - restart: unless-stopped - ports: - - "30400:8000" - environment: - - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-https://gpu.hyungi.net} - - OLLAMA_TEXT_MODEL=${OLLAMA_TEXT_MODEL:-qwen3.5:9b-q8_0} - - OLLAMA_EMBED_MODEL=${OLLAMA_EMBED_MODEL:-bge-m3} - - OLLAMA_TIMEOUT=${OLLAMA_TIMEOUT:-120} - - MLX_BASE_URL=${MLX_BASE_URL:-https://llm.hyungi.net} - - MLX_TEXT_MODEL=${MLX_TEXT_MODEL:-/Users/hyungi/mlx-models/Qwen3.5-27B-4bit} - - DB_HOST=mariadb - - DB_PORT=3306 - - DB_USER=${MYSQL_USER:-hyungi_user} - - DB_PASSWORD=${MYSQL_PASSWORD} - - DB_NAME=${MYSQL_DATABASE:-hyungi} - - SECRET_KEY=${SSO_JWT_SECRET} - - SYSTEM1_API_URL=http://system1-api:3005 - - CHROMA_PERSIST_DIR=/app/data/chroma - - TZ=Asia/Seoul - volumes: - - ai_data:/app/data - depends_on: - mariadb: - condition: service_healthy - networks: - - tk-network - # ================================================================= # Gateway # ================================================================= @@ -393,7 +361,6 @@ volumes: system3_uploads: external: true name: tkqc-package_uploads - ai_data: networks: tk-network: driver: bridge diff --git a/gateway/nginx.conf b/gateway/nginx.conf index 531eb19..5b3c614 100644 --- a/gateway/nginx.conf +++ b/gateway/nginx.conf @@ -55,16 +55,20 @@ server { proxy_set_header X-Forwarded-Proto $scheme; } - # ===== AI Service API ===== + # ===== AI Service API (맥미니 home-service-proxy 경유) ===== location /ai-api/ { - proxy_pass http://ai-service:8000/api/ai/; + resolver 8.8.8.8 valid=300s ipv6=off; + set $ai_upstream https://ai.hyungi.net; + rewrite ^/ai-api/(.*) /api/ai/$1 break; + proxy_pass $ai_upstream; proxy_http_version 1.1; - proxy_set_header Host $host; + proxy_set_header Host ai.hyungi.net; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; - proxy_read_timeout 120s; - proxy_send_timeout 120s; + proxy_ssl_server_name on; + proxy_read_timeout 180s; + proxy_send_timeout 180s; } # ===== System 1 Web (나머지 모든 경로) ===== diff --git a/system3-nonconformance/web/nginx.conf b/system3-nonconformance/web/nginx.conf index df2d137..6c84fa9 100644 --- a/system3-nonconformance/web/nginx.conf +++ b/system3-nonconformance/web/nginx.conf @@ -48,16 +48,20 @@ server { proxy_buffering off; } - # AI API 프록시 + # AI API 프록시 (맥미니 home-service-proxy 경유) location /ai-api/ { - proxy_pass http://ai-service:8000/api/ai/; + resolver 8.8.8.8 valid=300s ipv6=off; + set $ai_upstream https://ai.hyungi.net; + rewrite ^/ai-api/(.*) /api/ai/$1 break; + proxy_pass $ai_upstream; proxy_http_version 1.1; - proxy_set_header Host $host; + proxy_set_header Host ai.hyungi.net; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; - proxy_read_timeout 120s; - proxy_send_timeout 120s; + proxy_ssl_server_name on; + proxy_read_timeout 180s; + proxy_send_timeout 180s; } # 모바일 전용 페이지