diff --git a/docker-compose.yml b/docker-compose.yml index b55599a..cf4e1f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -45,6 +45,28 @@ services: - "127.0.0.1:11434:11434" restart: unless-stopped + # Phase 1.3: bge-reranker-v2-m3 (TEI) — internal only, fastapi에서 reranker:80으로 호출 + # fastapi가 depends_on 안 함 → 단독 시작 가능, 없어도 fastapi 동작 (rerank=false fallback) + reranker: + image: ghcr.io/huggingface/text-embeddings-inference:1.5 + container_name: hyungi_document_server-reranker-1 + expose: + - "80" + environment: + - MODEL_ID=BAAI/bge-reranker-v2-m3 + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - reranker_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + ai-gateway: build: ./gpu-server/services/ai-gateway ports: @@ -103,3 +125,4 @@ volumes: pgdata: caddy_data: ollama_data: + reranker_cache: