# Phase 2A — Embedding candidate compose override (Diagnose only) # # Profile-isolated: `--profile embed-cand` 명시 opt-in. default up 시 미기동. # production fastapi/postgres/reranker 에 영향 0. # 본 PR 종료 시 별 chore (PR-2A-Chunks-Cand-Cleanup-1) 에서 제거. # # 후보 상태 (2026-05-23): # - me5_large_inst : ✅ smoke PASS (dim 1024) # - bge_mgemma2 : ❌ Phase 2A-Extended 별 PR 이관 (9B FP16 → VRAM OOM risk + 다운로드 cost) # - me5_ko : ❌ 폐기 (401 Unauthorized, gated/모델명 부정확) # - snowflake_l_v2 : 신규 추가 (Snowflake/snowflake-arctic-embed-l-v2.0, 2024-12, multilingual 강화) # # 사용: # docker compose -f docker-compose.yml -f docker-compose.override.cand.yml \ # --profile embed-cand up -d embedding-cand-me5-inst # # 호출 (DS network 내부): # http://embedding-cand-me5-inst:80/embed # http://embedding-cand-snowflake-l-v2:80/embed services: embedding-cand-me5-inst: image: ghcr.io/huggingface/text-embeddings-inference:1.7 restart: unless-stopped container_name: hyungi_document_server-embedding-cand-me5-inst-1 expose: - "80" environment: - MODEL_ID=intfloat/multilingual-e5-large-instruct - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=4 volumes: - embedding_cand_me5_inst_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 10s retries: 5 start_period: 60s profiles: ["embed-cand"] embedding-cand-snowflake-l-v2: image: ghcr.io/huggingface/text-embeddings-inference:1.7 restart: unless-stopped container_name: hyungi_document_server-embedding-cand-snowflake-l-v2-1 expose: - "80" environment: - MODEL_ID=Snowflake/snowflake-arctic-embed-l-v2.0 - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=4 volumes: - embedding_cand_snowflake_l_v2_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 10s retries: 5 start_period: 60s profiles: ["embed-cand"] # ===== 비활성 후보 (Phase 2A-Extended 별 PR 이관 또는 폐기) ===== # 진단 박제만 보존. 본 PR scope 외. embedding-cand-bge-mgemma2: image: ghcr.io/huggingface/text-embeddings-inference:1.7 container_name: hyungi_document_server-embedding-cand-bge-mgemma2-1 expose: - "80" environment: - MODEL_ID=BAAI/bge-multilingual-gemma2 - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=4 volumes: - embedding_cand_bge_mgemma2_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 10s retries: 5 start_period: 300s profiles: ["embed-cand-extended"] # 본 PR 미사용. extended 별 profile. embedding-cand-me5-ko: image: ghcr.io/huggingface/text-embeddings-inference:1.7 container_name: hyungi_document_server-embedding-cand-me5-ko-1 expose: - "80" environment: - MODEL_ID=dragonkue/multilingual-e5-large-ko - MAX_BATCH_TOKENS=8192 - MAX_CONCURRENT_REQUESTS=4 volumes: - embedding_cand_me5_ko_cache:/data deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost/health"] interval: 30s timeout: 10s retries: 5 start_period: 60s profiles: ["embed-cand-disabled"] # 401 fail. 사용 X. volumes: embedding_cand_me5_inst_cache: embedding_cand_snowflake_l_v2_cache: embedding_cand_bge_mgemma2_cache: embedding_cand_me5_ko_cache: