From 624b9d523d7a8047d367995d3079beb99364248c Mon Sep 17 00:00:00 2001 From: hyungi Date: Thu, 18 Jun 2026 00:00:42 +0000 Subject: [PATCH] =?UTF-8?q?refactor(search):=20Phase=202A/2B=20cand=20rera?= =?UTF-8?q?nker=20=EC=9E=94=EC=9E=AC=20teardown?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - RERANKER_BACKEND_MAP 에서 cand_gte_ml_base 슬러그 제거 (컨테이너·DB 테이블 마이그360·override 이미 종료) - docker-compose.override.cand.yml / override.rerank-cand.yml 삭제 - search.py allowlist · run_eval.py help 정합 - dispatcher scaffold(_resolve_reranker)는 보존 (후보 재진입 대비) Co-Authored-By: Claude Opus 4.8 (1M context) --- app/api/search.py | 2 +- app/services/search/rerank_service.py | 9 +- docker-compose.override.cand.yml | 135 ------------------------ docker-compose.override.rerank-cand.yml | 101 ------------------ tests/search_eval/run_eval.py | 2 +- 5 files changed, 6 insertions(+), 243 deletions(-) delete mode 100644 docker-compose.override.cand.yml delete mode 100644 docker-compose.override.rerank-cand.yml diff --git a/app/api/search.py b/app/api/search.py index b781986..9fadcfe 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -282,7 +282,7 @@ async def search( content={ "error_reason": "unknown_reranker_backend", "backend_requested": reranker_backend, - "allowed": ["baseline", "cand_gte_ml_base"], + "allowed": ["baseline"], "detail": msg, }, ) diff --git a/app/services/search/rerank_service.py b/app/services/search/rerank_service.py index 877926a..209f09e 100644 --- a/app/services/search/rerank_service.py +++ b/app/services/search/rerank_service.py @@ -44,11 +44,10 @@ RERANK_TIMEOUT = 5.0 # server-side allowlist map. query parameter 가 raw endpoint URL 받지 않음. RERANKER_BACKEND_MAP: dict[str, dict[str, str] | None] = { "baseline": None, # production reranker (config.yaml endpoint via AIClient.rerank) - "cand_gte_ml_base": { - "endpoint": "http://rerank-cand-gte-ml-base:80/rerank", - }, - # mxbai_large 후보 (deberta-v2 → TEI 1.7 미지원) Phase 2B-Extended 이관 - # bge_v2_gemma_2b 후보 (LLM-based reranker, 1_Pooling/config.json 부재) Phase 2B-Extended 이관 + # Phase 2B 후보 reranker 전부 NO-GO 종결 (2026-06-18 teardown): + # - cand_gte_ml_base : 컨테이너·DB 테이블(마이그 360)·override.rerank-cand.yml 제거됨 + # - mxbai_large (deberta-v2 → TEI 1.7 미지원) / bge_v2_gemma_2b (1_Pooling 부재) 미진입 + # dispatcher scaffold(_resolve_reranker)는 향후 후보 재진입 위해 보존. } diff --git a/docker-compose.override.cand.yml b/docker-compose.override.cand.yml deleted file mode 100644 index 59602c2..0000000 --- a/docker-compose.override.cand.yml +++ /dev/null @@ -1,135 +0,0 @@ -# Phase 2A — Embedding candidate compose override (Diagnose only) -# -# Profile-isolated: `--profile embed-cand` 명시 opt-in. default up 시 미기동. -# production fastapi/postgres/reranker 에 영향 0. -# 본 PR 종료 시 별 chore (PR-2A-Chunks-Cand-Cleanup-1) 에서 제거. -# -# 후보 상태 (2026-05-23): -# - me5_large_inst : ✅ smoke PASS (dim 1024) -# - bge_mgemma2 : ❌ Phase 2A-Extended 별 PR 이관 (9B FP16 → VRAM OOM risk + 다운로드 cost) -# - me5_ko : ❌ 폐기 (401 Unauthorized, gated/모델명 부정확) -# - snowflake_l_v2 : 신규 추가 (Snowflake/snowflake-arctic-embed-l-v2.0, 2024-12, multilingual 강화) -# -# 사용: -# docker compose -f docker-compose.yml -f docker-compose.override.cand.yml \ -# --profile embed-cand up -d embedding-cand-me5-inst -# -# 호출 (DS network 내부): -# http://embedding-cand-me5-inst:80/embed -# http://embedding-cand-snowflake-l-v2:80/embed - -services: - embedding-cand-me5-inst: - image: ghcr.io/huggingface/text-embeddings-inference:1.7 - restart: unless-stopped - container_name: hyungi_document_server-embedding-cand-me5-inst-1 - expose: - - "80" - environment: - - MODEL_ID=intfloat/multilingual-e5-large-instruct - - MAX_BATCH_TOKENS=8192 - - MAX_CONCURRENT_REQUESTS=4 - volumes: - - embedding_cand_me5_inst_cache:/data - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-fsS", "http://localhost/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 60s - profiles: ["embed-cand"] - - embedding-cand-snowflake-l-v2: - image: ghcr.io/huggingface/text-embeddings-inference:1.7 - restart: unless-stopped - container_name: hyungi_document_server-embedding-cand-snowflake-l-v2-1 - expose: - - "80" - environment: - - MODEL_ID=Snowflake/snowflake-arctic-embed-l-v2.0 - - MAX_BATCH_TOKENS=8192 - - MAX_CONCURRENT_REQUESTS=4 - volumes: - - embedding_cand_snowflake_l_v2_cache:/data - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-fsS", "http://localhost/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 60s - profiles: ["embed-cand"] - - # ===== 비활성 후보 (Phase 2A-Extended 별 PR 이관 또는 폐기) ===== - # 진단 박제만 보존. 본 PR scope 외. - - embedding-cand-bge-mgemma2: - image: ghcr.io/huggingface/text-embeddings-inference:1.7 - container_name: hyungi_document_server-embedding-cand-bge-mgemma2-1 - expose: - - "80" - environment: - - MODEL_ID=BAAI/bge-multilingual-gemma2 - - MAX_BATCH_TOKENS=8192 - - MAX_CONCURRENT_REQUESTS=4 - volumes: - - embedding_cand_bge_mgemma2_cache:/data - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-fsS", "http://localhost/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 300s - profiles: ["embed-cand-extended"] # 본 PR 미사용. extended 별 profile. - - embedding-cand-me5-ko: - image: ghcr.io/huggingface/text-embeddings-inference:1.7 - container_name: hyungi_document_server-embedding-cand-me5-ko-1 - expose: - - "80" - environment: - - MODEL_ID=dragonkue/multilingual-e5-large-ko - - MAX_BATCH_TOKENS=8192 - - MAX_CONCURRENT_REQUESTS=4 - volumes: - - embedding_cand_me5_ko_cache:/data - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-fsS", "http://localhost/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 60s - profiles: ["embed-cand-disabled"] # 401 fail. 사용 X. - -volumes: - embedding_cand_me5_inst_cache: - embedding_cand_snowflake_l_v2_cache: - embedding_cand_bge_mgemma2_cache: - embedding_cand_me5_ko_cache: diff --git a/docker-compose.override.rerank-cand.yml b/docker-compose.override.rerank-cand.yml deleted file mode 100644 index 6ffe16e..0000000 --- a/docker-compose.override.rerank-cand.yml +++ /dev/null @@ -1,101 +0,0 @@ -# Phase 2B — Reranker candidate compose override (Diagnose only) -# -# Profile-isolated: `--profile rerank-cand` 명시 opt-in. default up 시 미기동. -# production fastapi/postgres/reranker(bge-reranker-v2-m3) 에 영향 0. -# 본 PR 종료 후 별 chore (PR-2B-Rerank-Cand-Cleanup-1) 에서 제거. -# -# 후보 상태 (2026-05-23): -# - gte_ml_base : Apache 2.0, 305M, smoke 대기 -# - mxbai_large : Apache 2.0, ~435M, safetensors 부재 — TEI smoke risk -# - bge_v2_gemma_2b : Gemma 라이센스, 2.5B FP16 ~5GB, smoke 대기 -# -# 사용: -# docker compose -f docker-compose.yml -f docker-compose.override.rerank-cand.yml \ -# --profile rerank-cand up -d rerank-cand-gte-ml-base - -services: - rerank-cand-gte-ml-base: - image: ghcr.io/huggingface/text-embeddings-inference:1.7 - restart: unless-stopped - container_name: hyungi_document_server-rerank-cand-gte-ml-base-1 - expose: - - "80" - environment: - - MODEL_ID=Alibaba-NLP/gte-multilingual-reranker-base - - MAX_BATCH_TOKENS=8192 - - MAX_CONCURRENT_REQUESTS=4 - volumes: - - rerank_cand_gte_ml_base_cache:/data - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-fsS", "http://localhost/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 60s - profiles: ["rerank-cand"] - - rerank-cand-mxbai-large: - image: ghcr.io/huggingface/text-embeddings-inference:1.7 - restart: unless-stopped - container_name: hyungi_document_server-rerank-cand-mxbai-large-1 - expose: - - "80" - environment: - - MODEL_ID=mixedbread-ai/mxbai-rerank-large-v1 - - MAX_BATCH_TOKENS=8192 - - MAX_CONCURRENT_REQUESTS=4 - volumes: - - rerank_cand_mxbai_large_cache:/data - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-fsS", "http://localhost/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 60s - profiles: ["rerank-cand"] - - rerank-cand-bge-v2-gemma-2b: - image: ghcr.io/huggingface/text-embeddings-inference:1.7 - restart: unless-stopped - container_name: hyungi_document_server-rerank-cand-bge-v2-gemma-2b-1 - expose: - - "80" - environment: - - MODEL_ID=BAAI/bge-reranker-v2-gemma - - MAX_BATCH_TOKENS=8192 - - MAX_CONCURRENT_REQUESTS=2 - volumes: - - rerank_cand_bge_v2_gemma_2b_cache:/data - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - healthcheck: - test: ["CMD", "curl", "-fsS", "http://localhost/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 120s - profiles: ["rerank-cand"] - -volumes: - rerank_cand_gte_ml_base_cache: - rerank_cand_mxbai_large_cache: - rerank_cand_bge_v2_gemma_2b_cache: diff --git a/tests/search_eval/run_eval.py b/tests/search_eval/run_eval.py index 20e37ce..d4729a1 100644 --- a/tests/search_eval/run_eval.py +++ b/tests/search_eval/run_eval.py @@ -1394,7 +1394,7 @@ def main() -> int: "--reranker-backend", type=str, default=None, - help="Phase 2B Diagnose reranker dispatcher slug (baseline | cand_gte_ml_base). 미지정 = production.", + help="Phase 2B Diagnose reranker dispatcher slug (baseline). 후보 cand_gte_ml_base = NO-GO 종결·teardown(2026-06-18). 미지정 = production.", ) parser.add_argument( "--rewrite-backend",