From 943ac5f59c76f7fc4de484669b67b3d19a858204 Mon Sep 17 00:00:00 2001 From: hyungi Date: Sat, 23 May 2026 05:04:21 +0000 Subject: [PATCH] =?UTF-8?q?feat(eval):=20Phase=202A=20Diagnose=20Phase=201?= =?UTF-8?q?=20=E2=80=94=20TEI=20candidate=20compose=20override=20+=20fixtu?= =?UTF-8?q?re=20G0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2A Embedding Diagnose 본 PR 의 Phase 1 산출물. - docker-compose.override.cand.yml: 4 후보 service, profile 'embed-cand' 격리 - active: me5_large_inst (intfloat/multilingual-e5-large-instruct, smoke PASS) - active: snowflake_l_v2 (Snowflake/snowflake-arctic-embed-l-v2.0, smoke PASS) - 비활성 (extended profile): bge_mgemma2 (9B FP16 OOM risk → 별 PR 이관) - 비활성 (disabled profile): me5_ko (HF 401 → 폐기) - tests/fixtures/: G0 fixture 3건 박제 - ollama_bge_m3_embedding_response.json (G0-2: dim 1024, flat dict shape) - tei_embedding_response.json (G0-1: me5_large_inst, dim 1024, nested array) - tei_embedding_snowflake_l_v2_response.json (G0-1: snowflake, dim 1024, nested array) 운영 변경 0 (profile 격리, default up 시 미기동). production 9 컨테이너 영향 없음. Co-Authored-By: Claude Opus 4.7 (1M context) --- docker-compose.override.cand.yml | 133 ++++++++++++++++++ .../ollama_bge_m3_embedding_response.json | 17 +++ tests/fixtures/tei_embedding_response.json | 19 +++ ...tei_embedding_snowflake_l_v2_response.json | 19 +++ 4 files changed, 188 insertions(+) create mode 100644 docker-compose.override.cand.yml create mode 100644 tests/fixtures/ollama_bge_m3_embedding_response.json create mode 100644 tests/fixtures/tei_embedding_response.json create mode 100644 tests/fixtures/tei_embedding_snowflake_l_v2_response.json diff --git a/docker-compose.override.cand.yml b/docker-compose.override.cand.yml new file mode 100644 index 0000000..50633db --- /dev/null +++ b/docker-compose.override.cand.yml @@ -0,0 +1,133 @@ +# Phase 2A — Embedding candidate compose override (Diagnose only) +# +# Profile-isolated: `--profile embed-cand` 명시 opt-in. default up 시 미기동. +# production fastapi/postgres/reranker 에 영향 0. +# 본 PR 종료 시 별 chore (PR-2A-Chunks-Cand-Cleanup-1) 에서 제거. +# +# 후보 상태 (2026-05-23): +# - me5_large_inst : ✅ smoke PASS (dim 1024) +# - bge_mgemma2 : ❌ Phase 2A-Extended 별 PR 이관 (9B FP16 → VRAM OOM risk + 다운로드 cost) +# - me5_ko : ❌ 폐기 (401 Unauthorized, gated/모델명 부정확) +# - snowflake_l_v2 : 신규 추가 (Snowflake/snowflake-arctic-embed-l-v2.0, 2024-12, multilingual 강화) +# +# 사용: +# docker compose -f docker-compose.yml -f docker-compose.override.cand.yml \ +# --profile embed-cand up -d embedding-cand-me5-inst +# +# 호출 (DS network 내부): +# http://embedding-cand-me5-inst:80/embed +# http://embedding-cand-snowflake-l-v2:80/embed + +services: + embedding-cand-me5-inst: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + container_name: hyungi_document_server-embedding-cand-me5-inst-1 + expose: + - "80" + environment: + - MODEL_ID=intfloat/multilingual-e5-large-instruct + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_me5_inst_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["embed-cand"] + + embedding-cand-snowflake-l-v2: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + container_name: hyungi_document_server-embedding-cand-snowflake-l-v2-1 + expose: + - "80" + environment: + - MODEL_ID=Snowflake/snowflake-arctic-embed-l-v2.0 + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_snowflake_l_v2_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["embed-cand"] + + # ===== 비활성 후보 (Phase 2A-Extended 별 PR 이관 또는 폐기) ===== + # 진단 박제만 보존. 본 PR scope 외. + + embedding-cand-bge-mgemma2: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + container_name: hyungi_document_server-embedding-cand-bge-mgemma2-1 + expose: + - "80" + environment: + - MODEL_ID=BAAI/bge-multilingual-gemma2 + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_bge_mgemma2_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 300s + profiles: ["embed-cand-extended"] # 본 PR 미사용. extended 별 profile. + + embedding-cand-me5-ko: + image: ghcr.io/huggingface/text-embeddings-inference:1.7 + container_name: hyungi_document_server-embedding-cand-me5-ko-1 + expose: + - "80" + environment: + - MODEL_ID=dragonkue/multilingual-e5-large-ko + - MAX_BATCH_TOKENS=8192 + - MAX_CONCURRENT_REQUESTS=4 + volumes: + - embedding_cand_me5_ko_cache:/data + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + profiles: ["embed-cand-disabled"] # 401 fail. 사용 X. + +volumes: + embedding_cand_me5_inst_cache: + embedding_cand_snowflake_l_v2_cache: + embedding_cand_bge_mgemma2_cache: + embedding_cand_me5_ko_cache: diff --git a/tests/fixtures/ollama_bge_m3_embedding_response.json b/tests/fixtures/ollama_bge_m3_embedding_response.json new file mode 100644 index 0000000..8c57a51 --- /dev/null +++ b/tests/fixtures/ollama_bge_m3_embedding_response.json @@ -0,0 +1,17 @@ +{ + "model_called": "bge-m3", + "endpoint": "http://ollama-gpu:11434/api/embeddings", + "input": "테스트 한국어 문장 + English mixed", + "response_shape": { + "embedding_dim": 1024, + "top_3_values": [ + -0.010903210379183292, + -0.1681741178035736, + -0.24618254601955414 + ] + }, + "raw_keys": [ + "embedding" + ], + "captured_at": "2026-05-23" +} diff --git a/tests/fixtures/tei_embedding_response.json b/tests/fixtures/tei_embedding_response.json new file mode 100644 index 0000000..f957032 --- /dev/null +++ b/tests/fixtures/tei_embedding_response.json @@ -0,0 +1,19 @@ +{ + "slug": "me5_large_inst", + "model_id": "intfloat/multilingual-e5-large-instruct", + "endpoint": "http://embedding-cand-me5-inst:80/embed", + "input": "테스트 한국어 문장 + English mixed", + "response_shape": { + "outer_type": "list", + "batch_size": 1, + "inner_type": "list", + "embedding_dim": 1024, + "top_3_values": [ + 0.020381121, + -0.0070679397, + 0.001018147 + ] + }, + "note": "TEI returns nested array [[float, ...]] — Ollama 와 shape diff (Ollama = {\"embedding\": [float, ...]}). dispatcher 가 shape diff 흡수 필요.", + "captured_at": "2026-05-23" +} diff --git a/tests/fixtures/tei_embedding_snowflake_l_v2_response.json b/tests/fixtures/tei_embedding_snowflake_l_v2_response.json new file mode 100644 index 0000000..93bd3c8 --- /dev/null +++ b/tests/fixtures/tei_embedding_snowflake_l_v2_response.json @@ -0,0 +1,19 @@ +{ + "slug": "snowflake_l_v2", + "model_id": "Snowflake/snowflake-arctic-embed-l-v2.0", + "endpoint": "http://embedding-cand-snowflake-l-v2:80/embed", + "input": "테스트 한국어 문장 + English mixed", + "response_shape": { + "outer_type": "list", + "batch_size": 1, + "inner_type": "list", + "embedding_dim": 1024, + "top_3_values": [ + 0.07000499, + 0.0054518348, + 0.027516967 + ] + }, + "note": "TEI nested array shape — me5_large_inst 와 동일.", + "captured_at": "2026-05-23" +}