hyungi_document_server/docker-compose.override.rerank-cand.yml

# Phase 2B — Reranker candidate compose override (Diagnose only)
#
# Profile-isolated: `--profile rerank-cand` 명시 opt-in. default up 시 미기동.
# production fastapi/postgres/reranker(bge-reranker-v2-m3) 에 영향 0.
# 본 PR 종료 후 별 chore (PR-2B-Rerank-Cand-Cleanup-1) 에서 제거.
#
# 후보 상태 (2026-05-23):
#   - gte_ml_base       : Apache 2.0, 305M, smoke 대기
#   - mxbai_large       : Apache 2.0, ~435M, safetensors 부재 — TEI smoke risk
#   - bge_v2_gemma_2b   : Gemma 라이센스, 2.5B FP16 ~5GB, smoke 대기
#
# 사용:
#   docker compose -f docker-compose.yml -f docker-compose.override.rerank-cand.yml \
#     --profile rerank-cand up -d rerank-cand-gte-ml-base

services:
  rerank-cand-gte-ml-base:
    image: ghcr.io/huggingface/text-embeddings-inference:1.7
    restart: unless-stopped
    container_name: hyungi_document_server-rerank-cand-gte-ml-base-1
    expose:
      - "80"
    environment:
      - MODEL_ID=Alibaba-NLP/gte-multilingual-reranker-base
      - MAX_BATCH_TOKENS=8192
      - MAX_CONCURRENT_REQUESTS=4
    volumes:
      - rerank_cand_gte_ml_base_cache:/data
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 60s
    profiles: ["rerank-cand"]

  rerank-cand-mxbai-large:
    image: ghcr.io/huggingface/text-embeddings-inference:1.7
    restart: unless-stopped
    container_name: hyungi_document_server-rerank-cand-mxbai-large-1
    expose:
      - "80"
    environment:
      - MODEL_ID=mixedbread-ai/mxbai-rerank-large-v1
      - MAX_BATCH_TOKENS=8192
      - MAX_CONCURRENT_REQUESTS=4
    volumes:
      - rerank_cand_mxbai_large_cache:/data
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 60s
    profiles: ["rerank-cand"]

  rerank-cand-bge-v2-gemma-2b:
    image: ghcr.io/huggingface/text-embeddings-inference:1.7
    restart: unless-stopped
    container_name: hyungi_document_server-rerank-cand-bge-v2-gemma-2b-1
    expose:
      - "80"
    environment:
      - MODEL_ID=BAAI/bge-reranker-v2-gemma
      - MAX_BATCH_TOKENS=8192
      - MAX_CONCURRENT_REQUESTS=2
    volumes:
      - rerank_cand_bge_v2_gemma_2b_cache:/data
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-fsS", "http://localhost/health"]
      interval: 30s
      timeout: 10s
      retries: 5
      start_period: 120s
    profiles: ["rerank-cand"]

volumes:
  rerank_cand_gte_ml_base_cache:
  rerank_cand_mxbai_large_cache:
  rerank_cand_bge_v2_gemma_2b_cache: