# Phase 2B — Reranker candidate compose override (Diagnose only) # # Profile-isolated: `--profile rerank-cand` 명시 opt-in. default up 시 미기동. # production fastapi/postgres/reranker(bge-reranker-v2-m3) 에 영향 0. # 본 PR 종료 후 별 chore (PR-2B-Rerank-Cand-Cleanup-1) 에서 제거. # # 후보 상태 (2026-05-23): # - gte_ml_base : Apache 2.0, 305M, smoke 대기 # - mxbai_large : Apache 2.0, ~435M, safetensors 부재 — TEI smoke risk # - bge_v2_gemma_2b : Gemma 라이센스, 2.5B FP16 ~5GB, smoke 대기 # # 사용: # docker compose -f docker-compose.yml -f docker-compose.override.rerank-cand.yml \ # --profile rerank-cand up -d rerank-cand-gte-ml-base services: rerank-cand-gte-ml-base: image: ghcr.io/huggingface/text-embeddings-inference:1.7 restart: unless-stopped container_name: hyungi_document_server-rerank-cand-gte-ml-base-1 expose: - "80" environment: - MODEL_ID=Alibaba-NLP/gte-multilingual-reranker-base - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=4 volumes: - rerank_cand_gte_ml_base_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 10s retries: 5 start_period: 60s profiles: ["rerank-cand"] rerank-cand-mxbai-large: image: ghcr.io/huggingface/text-embeddings-inference:1.7 restart: unless-stopped container_name: hyungi_document_server-rerank-cand-mxbai-large-1 expose: - "80" environment: - MODEL_ID=mixedbread-ai/mxbai-rerank-large-v1 - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=4 volumes: - rerank_cand_mxbai_large_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 10s retries: 5 start_period: 60s profiles: ["rerank-cand"] rerank-cand-bge-v2-gemma-2b: image: ghcr.io/huggingface/text-embeddings-inference:1.7 restart: unless-stopped container_name: hyungi_document_server-rerank-cand-bge-v2-gemma-2b-1 expose: - "80" environment: - MODEL_ID=BAAI/bge-reranker-v2-gemma - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=2 volumes: - rerank_cand_bge_v2_gemma_2b_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 10s retries: 5 start_period: 120s profiles: ["rerank-cand"] volumes: rerank_cand_gte_ml_base_cache: rerank_cand_mxbai_large_cache: rerank_cand_bge_v2_gemma_2b_cache: