diff --git a/tests/search_eval/baselines/v0_2_baseline_2026-05-23.json b/tests/search_eval/baselines/v0_2_baseline_2026-05-23.json new file mode 100644 index 0000000..f2eb3eb --- /dev/null +++ b/tests/search_eval/baselines/v0_2_baseline_2026-05-23.json @@ -0,0 +1,134 @@ +{ + "version": "v0.2", + "date": "2026-05-23", + "corpus_size": 21385, + "eval_set": { + "total_cases": 51, + "scored_cases": 46, + "failure_expected_cases": 5 + }, + "model_config": { + "embedding": "BAAI/bge-m3", + "reranker": "BAAI/bge-reranker-v2-m3", + "search_mode": "hybrid", + "fusion": "server_default", + "rerank_enabled": "server_default", + "config_yaml_sha256_prefix": "97ae77b4e1dfbbc6", + "git_head": "725a4e1f1d2c3508093db452b6d647b807bd684b" + }, + "overall": { + "n": 46, + "recall_at_10": 0.6458, + "mrr_at_10": 0.7239, + "ndcg_at_10": 0.6058, + "top3_hit": 0.8913, + "graded_ndcg_at_10": 0.6593, + "graded_recall_at_10_t2": 0.6948, + "graded_recall_at_10_t3": 0.7609, + "latency_p50_ms": 528, + "latency_p95_ms": 1664, + "failure_precision": 0.0, + "failure_correct": "0/5" + }, + "by_category": { + "english_only": { + "n": 9, + "recall_at_10": 0.7778, + "mrr_at_10": 0.8056, + "ndcg_at_10": 0.7116, + "top3_hit": 0.8889, + "graded_ndcg_at_10": 0.7804, + "graded_recall_at_10_t2": 0.8333, + "graded_recall_at_10_t3": 0.8889 + }, + "exam": { + "n": 7, + "recall_at_10": 0.5714, + "mrr_at_10": 0.8929, + "ndcg_at_10": 0.619, + "top3_hit": 0.8571, + "graded_ndcg_at_10": 0.738, + "graded_recall_at_10_t2": 0.6429, + "graded_recall_at_10_t3": 1.0 + }, + "korean_only": { + "n": 9, + "recall_at_10": 0.551, + "mrr_at_10": 0.5852, + "ndcg_at_10": 0.4679, + "top3_hit": 0.7778, + "graded_ndcg_at_10": 0.5137, + "graded_recall_at_10_t2": 0.5696, + "graded_recall_at_10_t3": 0.5556 + }, + "mixed": { + "n": 10, + "recall_at_10": 0.375, + "mrr_at_10": 0.47, + "ndcg_at_10": 0.3572, + "top3_hit": 0.9, + "graded_ndcg_at_10": 0.3913, + "graded_recall_at_10_t2": 0.3833, + "graded_recall_at_10_t3": 0.4 + }, + "standards": { + "n": 11, + "recall_at_10": 0.9091, + "mrr_at_10": 0.8939, + "ndcg_at_10": 0.8498, + "top3_hit": 1.0, + "graded_ndcg_at_10": 0.8729, + "graded_recall_at_10_t2": 1.0, + "graded_recall_at_10_t3": 1.0 + } + }, + "by_language": { + "en": { + "n": 9, + "recall_at_10": 0.7778, + "mrr_at_10": 0.8056, + "ndcg_at_10": 0.7116, + "top3_hit": 0.8889, + "graded_ndcg_at_10": 0.7804, + "graded_recall_at_10_t2": 0.8333, + "graded_recall_at_10_t3": 0.8889 + }, + "ko": { + "n": 27, + "recall_at_10": 0.7022, + "mrr_at_10": 0.7907, + "ndcg_at_10": 0.6627, + "top3_hit": 0.8889, + "graded_ndcg_at_10": 0.7182, + "graded_recall_at_10_t2": 0.7639, + "graded_recall_at_10_t3": 0.8519 + }, + "mixed": { + "n": 10, + "recall_at_10": 0.375, + "mrr_at_10": 0.47, + "ndcg_at_10": 0.3572, + "top3_hit": 0.9, + "graded_ndcg_at_10": 0.3913, + "graded_recall_at_10_t2": 0.3833, + "graded_recall_at_10_t3": 0.4 + } + }, + "by_ocr_derived": { + "false": { + "n": 46, + "recall_at_10": 0.6458, + "mrr_at_10": 0.7239, + "ndcg_at_10": 0.6058, + "top3_hit": 0.8913, + "graded_ndcg_at_10": 0.6593, + "graded_recall_at_10_t2": 0.6948, + "graded_recall_at_10_t3": 0.7609 + } + }, + "notes": { + "eval_version_mode": "both (v0.1 binary + v0.2 graded)", + "ocr_derived_status": "NOT_MEASURED (TBD-O FAILED — extract_meta / md_extraction_engine NULL 21385, chunks.source = RSS feed 명. v0.2 51 case 중 ocr_derived=true 0 case)", + "failure_expected_status": "0/5 returned empty (DS 가 confidence threshold 미적용 — Phase 2D 또는 별도 chore 후보)" + } +} \ No newline at end of file diff --git a/tests/search_eval/baselines/v0_2_baseline_2026-05-23_analysis.md b/tests/search_eval/baselines/v0_2_baseline_2026-05-23_analysis.md new file mode 100644 index 0000000..36f79dc --- /dev/null +++ b/tests/search_eval/baselines/v0_2_baseline_2026-05-23_analysis.md @@ -0,0 +1,142 @@ +# Eval v0.2 Baseline Analysis — 2026-05-23 + +> Parent PR: PR-Eval-V0_2-Baseline-Analysis +> Eval set: `tests/search_eval/queries.yaml` (v0.2, 51 case) +> Raw: `tests/search_eval/baselines/v0_2_baseline_2026-05-23.json` +> Branch: `feat/eval-v0-2-baseline-analysis` (HEAD-1: 725a4e1 PR-1 Schema-Harness) + +## 측정 환경 + +| 항목 | 값 | +|---|---| +| corpus_size | 21,385 documents (deleted_at IS NULL) | +| embedding | BAAI/bge-m3 | +| reranker | BAAI/bge-reranker-v2-m3 | +| search_mode | hybrid (server default fusion + rerank) | +| eval cases | 51 (graded=46, failure_expected=5) | +| latency p50 / p95 | 528 ms / 1,664 ms | +| config.yaml sha256 prefix | 97ae77b4e1dfbbc6 | + +## 1. 카테고리별 점수표 + +| Category | n | Recall@10 | NDCG@10 | graded NDCG@10 | Recall@10 (g≥2) | Recall@10 (g≥3) | +|---|---:|---:|---:|---:|---:|---:| +| **standards** | 11 | 0.91 | 0.85 | **0.87** | 1.00 | 1.00 | +| **english_only** | 9 | 0.78 | 0.71 | **0.78** | 0.83 | 0.89 | +| **exam** | 7 | 0.57 | 0.62 | **0.74** | 0.64 | **1.00** | +| **korean_only** | 9 | 0.55 | 0.47 | **0.51** | 0.57 | 0.56 | +| **mixed** | 10 | 0.38 | 0.36 | **0.39** | 0.38 | 0.40 | +| **ocr_derived** | 0 | — | — | — | — | — | +| **failure_expected** | 5 | failure precision = **0/5 (0.00)** | + +### 언어별 + +| Language | n | Recall@10 | graded NDCG@10 | +|---|---:|---:|---:| +| en | 9 | 0.78 | 0.78 | +| ko | 27 | 0.70 | 0.72 | +| mixed | 10 | 0.38 | 0.39 | + +## 2. 약점 식별 (top 3) + +### 2.1 mixed (crosslingual) — graded NDCG **0.39** + +가장 큰 weakness. 10 case 모두 한국어↔영어 cross-lingual. +- top-3 hit 0.90 자체는 높음 → "정답 후보가 top-10 안에 들지만 양언어 관련 doc 을 골고루 못 잡음" +- Recall (g≥2) 0.38, (g≥3) 0.40 — multi-doc relevant set 의 일부만 회수 +- 예: cl_006 ("pressure vessel Korean industrial safety regulation", 영문 query → 한국 법령 + 영문 책 둘 다 정답) 같은 case 가 한쪽 언어만 회수 + +**추정 root cause**: +- bge-m3 multilingual 능력 한계 (특히 한국어↔영어 간 의미 align) +- 동일 의미의 ko/en doc 가 embedding 공간에서 분리 → fusion 후에도 한쪽 언어 dominance +- chunk language metadata 활용 부재 (필터 X, weighted X) + +### 2.2 korean_only (natural language ko) — graded NDCG **0.51** + +자연어 한국어 query 약함. 9 case 중 standards (exact_keyword) 만 보면 0.87, natural language 위주의 nl_*/news_*/misc_* 에서 점수 하락. +- Recall (g≥3) 0.56 — top-3 강제 정답조차 절반만 회수 +- top3_hit 0.78 — direct match 도 22% miss + +**추정 root cause**: +- query rewrite/expansion 부재 (자연어 의역 매핑 약함) +- bge-m3 한국어 chunk embedding 품질 (특히 도메인 용어) +- "유해화학물질을 다루는 회사가 지켜야 할 안전 의무" 같은 query 의 키워드 추출 약함 + +### 2.3 failure_expected — precision **0/5 (0.00)** + +5 case 모두 "결과 0건 expected" 였으나 DS 가 무관 결과를 반환. +- confidence/score threshold 미적용 +- 예: "초전도 안전 관리법" → 산안법 같은 무관 doc 반환 + +**추정 root cause**: +- search confidence score < 0.5 차단 로직 부재 ([[project_phase3_5_guardrails]] 의 B1/B2 미배포 또는 threshold 너무 낮음) +- reranker score 자체는 정상 동작이나 cutoff 정책 없음 + +## 3. Embedding-sensitive failure pattern (Phase 2A 입력) + +peppy-hugging-nest Phase 2A 의 식별 카테고리 4개: + +| 패턴 | 본 baseline 신호 | 우선순위 | +|---|---|---| +| **candidate recall@k** | exam Recall@10 0.57 vs MRR 0.89 — top-3 안엔 정답이 있으나 grade 2 보조 정답을 못 잡음 (10 슬롯 안에 후보 부재) | M | +| **crosslingual miss** | mixed 0.39 graded NDCG, 동일 의미 ko/en doc 분리 — **TOP weakness** | **H** | +| **Korean-English mismatch** | korean_only natural language 0.51, 한자/영문 fallback 표현 매칭 약함 (cl_005 "asme_code해설" 매칭은 OK 였으나 자연어 의역 약함) | H | +| **OCR-derived chunk miss** | **NOT_MEASURED** — OCR 식별 컬럼 부재 (TBD-O FAILED) | 보류 | + +**embedding 독립 추정 원인**: +- chunking 전략: window 14041 / paragraph 9897 / section 4801 / legal_article 1859 — legal_article (1859, 5.5%) 만 짧은 단위, 나머지는 큰 window 위주. 자연어 query 시 chunk 가 너무 길어 의미 dilute 가능 +- query rewrite 부재: 자연어 → 키워드 변환 layer 없음 +- reranker threshold: 정상 동작 but failure cutoff X +- metadata filter: language/country 미활용 + +## 4. 다른 가능한 원인 (embedding 독립) + +| 원인 | 신호 | 비고 | +|---|---|---| +| **chunking 단위 too coarse** | window 14041 (≈50% chunks) | Phase 3 markdown 전환 시 chunk_type 재분포 필요 | +| **query rewrite 부재** | korean_only natural language 0.51 vs standards 0.87 — exact keyword 와 의역 query 큰 격차 | Phase 2Q (별 PR-Query-Rewrite) 후보 | +| **reranker threshold 부재** | failure_expected 0/5 | Phase 3.5 B1/B2 활성화 검토 또는 별 chore | +| **metadata filter 미활용** | mixed 카테고리 양언어 분리 가능했을 chance | language 가중치/필터 도입 검토 | + +## 5. Phase 2 dispatch 권고 + +본 baseline 의 약점 분포 → Phase 2A~D 우선순위: + +| Phase | 영역 | 권고 | 근거 | +|---|---|---|---| +| **2A Embedding (bge-m3)** | TOP — **즉시 진입** | mixed 0.39 / korean_only 0.51 — 두 약점 모두 embedding-sensitive. bge-m3 alternative (예: bge-m3-large, multilingual-e5-large, polyglot embedding) 점검 가치 高 | +| **2B Reranker** | M — 2A 이후 | 일반 NDCG 향상 + failure precision 동시 개선 가능. threshold cutoff 추가 검토 | +| **2C OCR-Marker Pipeline** | 보류 — **선행 chore 필요** | ocr_derived 0 case → 측정 불가. **별 chore: OCR 식별 컬럼 추가** (`documents.is_ocr_derived` 또는 `document_chunks.extraction_source`) 가 선행. 이후 ocr_derived 4 case 추가 후 측정 | +| **2D STT (Faster Whisper)** | 보류 — 본 평가셋 영역 외 | search eval 과 직접 측정 link 없음. 별 평가셋 (voice → 텍스트 정확도) 필요 | + +**추천 즉시 액션**: +1. Phase 2A 별 plan 작성 — bge-m3 alternative 후보 3종 비교, 본 baseline json 대비 mixed/korean_only 점수 변화 측정 +2. failure_expected 0/5 chore 별 PR — confidence threshold 활성화 ([[project_phase3_5_guardrails]] B1/B2 재검토) +3. OCR 식별 컬럼 추가 chore (Phase 2C 선행) + +Query rewrite 는 본 4-Phase 외 후보 → Phase 2Q 또는 Search-PR 로 별도 분리 (korean_only 약점 해결 보조). + +## 6. v0.1 회귀 (참고) + +PR-1 (`725a4e1`) 의 v0.1 23 case 점수는 별도 박제 부재. 본 baseline 의 v0.1 binary 점수: +- Recall@10 0.646, MRR@10 0.724, NDCG@10 0.606, Top-3 0.891 + +기존 23 case 만 분리 측정 시 (legacy_category 가 있는 case) 동일 점수 회귀 여부는 후속 별 PR 또는 chore 에서 검증 (PR-1 baseline 박제 부재로 직접 회귀 비교 불가). + +## 7. Closure 조건 ([[feedback_quant_expectation_not_hard_gate]]) + +- ✓ 51 case 작성 완료, id 중복 0 +- ✓ baseline json + analysis md 박제 +- ✓ 약점 카테고리 top 3 식별 (mixed / korean_only / failure_expected) +- ✓ embedding-sensitive failure pattern 4 카테고리 분류 (3 식별 + 1 NOT_MEASURED) +- ✓ Phase 2 dispatch 권고 (2A 우선, 2B M, 2C 선행 chore 필요, 2D 본 평가셋 외) +- ✓ 0.70 미달 항목 (mixed 0.39 / korean_only 0.51) 의 원인 분해 보고서 역할 + +**Phase 1 closure**. + +## 다음 단계 + +본 PR closure 후: +- Phase 2A 별 plan 작성 (`~/.claude/plans/phase-2a-embedding-diagnose.md`) +- chore: OCR 식별 컬럼 추가 별 PR (`PR-Chore-OCR-Column-1`) +- chore: failure_expected confidence threshold 별 PR (`PR-Chore-Confidence-Cutoff-1`) diff --git a/tests/search_eval/queries.yaml b/tests/search_eval/queries.yaml index 1e990b6..df98681 100644 --- a/tests/search_eval/queries.yaml +++ b/tests/search_eval/queries.yaml @@ -454,9 +454,501 @@ queries: relevant_ids: [] notes: 코퍼스에 음악/재즈 문서 없음. 결과 0건 = correct. + # ═════════════════════════════════════════════════════════ + # PR-Eval-V0_2-Baseline-Analysis 신규 28 case (2026-05-23) + # 분배 (계획 plan 의 +28): + # standards +6 / english_only +8 / mixed +5 / exam +7 / failure_expected +2 + # ocr_derived 0 — TBD-O FAILED (chunks.source = RSS feed 명, + # extract_meta / md_extraction_engine 전부 NULL → OCR 식별 불가). + # 재배분: standards +1 / english_only +2 / mixed +1 → analysis md 명시. + # ═════════════════════════════════════════════════════════ + # ───────────────────────────────────────────────────────── - # 신규 카테고리 (후속 PR-Eval-V0_2-Baseline-Analysis 에서 batch 작성): - # - exam : 가스기사 study 도메인 (study_questions 기반) - # - ocr_derived : marker/OCR pipeline 통한 scanned PDF chunk 검색 - # - english_only : 영어 standards / 자연어 (현재 news_en 1건만) + # 1b. standards 신규 6 (한국 KGS code + 산안기준 후반 편 + 고압가스법) # ───────────────────────────────────────────────────────── + - id: kw_006 + query: "산업안전보건기준에 관한 규칙 작업장 통로" + category: standards + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 3886: 3 + 3887: 2 + relevant_ids: [3886, 3887] + top3_ids: [3886] + notes: 산안기준 규칙 1편 제2장 작업장(3886, grade 3) + 제3장 통로(3887, grade 2). + + - id: kw_007 + query: "산업안전보건기준 폭발 화재 위험물 누출 방지" + category: standards + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 3896: 3 + 3766: 1 + relevant_ids: [3896, 3766] + top3_ids: [3896] + notes: | + 산안기준 규칙 2편 제2장 폭발ㆍ화재(3896) = grade 3. + Industrial Safety and Health Management ch11 Flammable and Explosive(3766) + = grade 1 (영문 도서, 주제 일치 but 한국 법령 query → cross-lingual marginal). + + - id: kw_008 + query: "고압가스 안전관리법 전문" + category: standards + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 4025: 3 + 4026: 2 + relevant_ids: [4025, 4026] + top3_ids: [4025] + notes: 고압가스법 본법(4025, grade 3) + 시행령(4026, grade 2). "전문" 키워드 매칭. + + - id: kw_009 + query: "KGS FP111 가스설비 배관설비 기준" + category: standards + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 13305: 3 + relevant_ids: [13305] + top3_ids: [13305] + notes: KGS FP111 § 2.4~2.5 가스설비·배관설비(13305) = grade 3. KGS code exact lookup. + + - id: kw_010 + query: "KGS FU551 가스설비 압력조정기 가스계량기" + category: standards + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 13652: 3 + relevant_ids: [13652] + top3_ids: [13652] + notes: KGS FU551 § 2.4 가스설비 기준(13652) = grade 3. KGS code exact lookup. + + - id: kw_011 + query: "산업안전보건기준 전기로 인한 위험 방지" + category: standards + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 3897: 3 + 3772: 1 + relevant_ids: [3897, 3772] + top3_ids: [3897] + notes: | + 산안기준 규칙 2편 제3장 전기 위험 방지(3897) = grade 3. + Industrial Safety and Health Management ch17 Electrical Hazards(3772) = grade 1 + (영문, 주제 일치). + + # ───────────────────────────────────────────────────────── + # 3b. english_only 신규 8 (Pressure Vessel Design Manual + ASME VIII/IX + # + Hydrogen ASME + Industrial Safety 영문 교재 + Structural Analysis) + # ───────────────────────────────────────────────────────── + - id: en_001 + query: "pressure vessel flange design" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 5144: 3 + 5136: 2 + relevant_ids: [5144, 5136] + top3_ids: [5144] + notes: | + Pressure Vessel Design Manual ch03 Flange Design(5144) = grade 3. + Manual_00 contents(5136) = grade 2 (catalog). + + - id: en_002 + query: "ASME Section VIII introduction" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 5204: 3 + 5206: 2 + relevant_ids: [5204, 5206] + top3_ids: [5204] + notes: ASME VIII Introduction(5204) = grade 3, Design_Basic_R1(5206) = grade 2. + + - id: en_003 + query: "impact test requirements ASME" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 5205: 3 + 5148: 1 + relevant_ids: [5205, 5148] + top3_ids: [5205] + notes: | + ASME VIII Impact Test(5205) = grade 3. Structural Analysis Materials of + Construction(5148) = grade 1 (재료 관련 약한 연관). + + - id: en_004 + query: "design of vessel supports" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 5149: 3 + relevant_ids: [5149] + top3_ids: [5149] + notes: Pressure Vessel Design Manual ch04 Design of Vessel Supports(5149) = grade 3. + + - id: en_005 + query: "hydrogen piping ASME code" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 5178: 3 + relevant_ids: [5178] + top3_ids: [5178] + notes: Hydrogen_Piping_and_Pipelines_ASME_Code(5178) = grade 3. exact title match. + + - id: en_006 + query: "ASME welding qualification requirements" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 5209: 3 + 3771: 1 + relevant_ids: [5209, 3771] + top3_ids: [5209] + notes: | + ASME IX Welding(5209) = grade 3. + Industrial Safety and Health Management ch16 Welding(3771) = grade 1 + (안전 관점 영문 도서, qualification 주제 약한 연관). + + - id: en_007 + query: "pressure vessel fabrication and inspection" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 5208: 3 + 5145: 2 + relevant_ids: [5208, 5145] + top3_ids: [5208] + notes: | + ASME VIII Fabrication-Inspection_R1(5208) = grade 3. + Structural Analysis ch02 Selection of Vessel/Specifications(5145) = grade 2. + + - id: en_008 + query: "Industrial Safety and Health Management ergonomics" + category: english_only + intent: semantic_search + domain_hint: document + language: en + ocr_derived: false + failure_expected: false + graded_relevance: + 3763: 3 + 3755: 1 + relevant_ids: [3763, 3755] + top3_ids: [3763] + notes: | + Industrial Safety and Health Management(7-ED) ch08 Ergonomics(3763) = grade 3. + Contents(3755) = grade 1 (목차 매칭, 본 챕터 우선). + + # ───────────────────────────────────────────────────────── + # 4b. mixed 신규 5 (양언어 ASME / 한국 KGS ↔ 영문 ASME / 영문 query → 한국 doc) + # ───────────────────────────────────────────────────────── + - id: cl_004 + query: "ASME 압력용기 설계 실무" + category: mixed + intent: semantic_search + domain_hint: document + language: mixed + ocr_derived: false + failure_expected: false + graded_relevance: + 5207: 3 + 5210: 3 + 5139: 2 + 5135: 2 + relevant_ids: [5207, 5210, 5139, 5135] + top3_ids: [5207] + notes: | + 한국어 query → 영문/혼합 ASME 도서. + ASME VIII Design_Practical_R1(5207, en) = grade 3 (실무 정확 일치). + ASME B-PV CODE SECTION Ⅷ Div.1 압력용기 코드 2007(5210, mixed title) = grade 3. + 압력용기 핸드북_설계와 제작(5139, ko) = grade 2. + 압력용기 핸드북_기하학과 배치도(5135, ko) = grade 2. + + - id: cl_005 + query: "ASME 용접 코드 해설" + category: mixed + intent: semantic_search + domain_hint: document + language: mixed + ocr_derived: false + failure_expected: false + graded_relevance: + 5224: 3 + 5209: 2 + relevant_ids: [5224, 5209] + top3_ids: [5224] + notes: | + asme_code해설[용접](5224, ko 해설) = grade 3 (직접 일치). + ASME IX Welding(5209, en) = grade 2 (원본 코드). + + - id: cl_006 + query: "pressure vessel Korean industrial safety regulation" + category: mixed + intent: semantic_search + domain_hint: document + language: mixed + ocr_derived: false + failure_expected: false + graded_relevance: + 4025: 2 + 3856: 2 + 5136: 1 + relevant_ids: [4025, 3856, 5136] + notes: | + 영문 query → 한국 법령. 고압가스 안전관리법(4025, ko) = grade 2, + 산안법 6장(3856, ko) = grade 2, PV Design Manual(5136, en) = grade 1 + (보조 영문 자료). 동등 정답 2건, top3 강제 없음. + + - id: cl_007 + query: "KGS 코드 LPG safety standard" + category: mixed + intent: semantic_search + domain_hint: document + language: mixed + ocr_derived: false + failure_expected: false + graded_relevance: + 11647: 3 + 11689: 2 + 11645: 2 + 4025: 1 + relevant_ids: [11647, 11689, 11645, 4025] + top3_ids: [11647] + notes: | + 혼합 keyword (KGS + LPG + safety standard). + 04_KGS_Code(11647, ko 개요) = grade 3. + 02_KGS_FU_사용(11689) / 02_LPG법(11645) = grade 2. + 고압가스 안전관리법(4025) = grade 1 (관련 법령). + + - id: cl_008 + query: "수소 가스 안전 기준 hydrogen safety" + category: mixed + intent: semantic_search + domain_hint: document + language: mixed + ocr_derived: false + failure_expected: false + graded_relevance: + 5178: 3 + 5169: 2 + relevant_ids: [5178, 5169] + top3_ids: [5178] + notes: | + Hydrogen_Piping_and_Pipelines_ASME_Code(5178, en) = grade 3. + 수소인프라 이슈 및 과제(5169, ko) = grade 2. + 한국 KGS 수소 관련 별도 doc 부재 → 영문 우선 확정. + + # ───────────────────────────────────────────────────────── + # 6. exam 신규 7 (가스기사 study_questions → library 개념 docs) + # ───────────────────────────────────────────────────────── + - id: exam_001 + query: "레이놀즈수 정의 공식" + category: exam + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 11504: 3 + 11505: 2 + relevant_ids: [11504, 11505] + top3_ids: [11504] + notes: | + study_questions 2019-1회 #2 "레이놀즈수를 옳게 나타낸 것은?" 매핑. + 01_레이놀즈수(11504) = grade 3 (직접 정답 개념). + 02_층류와난류(11505) = grade 2 (연관 개념). + + - id: exam_002 + query: "탱크 바닥 구멍 유체 유속 토리첼리" + category: exam + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 11500: 3 + 11495: 2 + 11496: 2 + relevant_ids: [11500, 11495, 11496] + top3_ids: [11500] + notes: | + study_questions 2019-1회 #1 "수면의 높이가 10m로 일정한 탱크 바닥 5mm 구멍 유속" + 매핑. 01_토리첼리(11500) = grade 3. 베르누이 개념/식(11495/11496) = grade 2. + + - id: exam_003 + query: "이상기체 음속 마하수" + category: exam + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 11514: 3 + 11515: 2 + relevant_ids: [11514, 11515] + top3_ids: [11514] + notes: | + study_questions 2019-1회 #4 "이상기체 속에서의 음속을 옳게 나타낸 식" 매핑. + 01_음속과마하수(11514) = grade 3. 02_등엔트로피유동(11515) = grade 2. + + - id: exam_004 + query: "고압가스 용기 내압시험 영구증가량" + category: exam + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 11591: 3 + 11644: 2 + 11691: 2 + relevant_ids: [11591, 11644, 11691] + top3_ids: [11591] + notes: | + study_questions 2019-1회 #58 "고압가스 용접용기 내압검사 전증가량 250 mL + 영구증가량" 매핑. 02_고압가스용기(11591) = grade 3. + 01_고압가스안전관리법(11644) / 04_KGS_AC_용기(11691) = grade 2 (법령/용기 코드). + + - id: exam_005 + query: "도시가스 배관 매설 이격거리" + category: exam + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 11627: 3 + 11625: 2 + 11646: 1 + relevant_ids: [11627, 11625, 11646] + top3_ids: [11627] + notes: | + study_questions 2019-1회 #76 "고압가스일반제조 사업소 밖 배관 매몰 설치 + 매설물 최소 이격거리" 매핑. 06_도시가스배관매설(11627) = grade 3. + 04_도시가스배관(11625) = grade 2. 03_도시가스사업법(11646) = grade 1. + + - id: exam_006 + query: "LPG 저장탱크 안전거리 분말소화기" + category: exam + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 11617: 3 + 11669: 2 + 11620: 1 + relevant_ids: [11617, 11669, 11620] + top3_ids: [11617] + notes: | + study_questions 2019-1회 #69 (LPG지상저장탱크 거리) + #80 (소형저장탱크 + 분말소화기) 매핑. 03_LPG저장설비(11617) = grade 3. + 01_LPG안전(11669) = grade 2. 06_LPG사용설비(11620) = grade 1. + + - id: exam_007 + query: "오리피스 차압식 유량계" + category: exam + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: false + graded_relevance: + 11712: 3 + 11711: 2 + 11503: 2 + relevant_ids: [11712, 11711, 11503] + top3_ids: [11712] + notes: | + 가스기사 계측 분야 (study_questions 회차 빈출). 02_오리피스유량계(11712) = + grade 3. 01_차압식유량계(11711) = grade 2. 04_오리피스(11503, 유체역학) + = grade 2 (원리 설명). + + # ───────────────────────────────────────────────────────── + # 5b. failure_expected 신규 2 (의도적 no-result) + # ───────────────────────────────────────────────────────── + - id: fail_004 + query: "KGS AC999 임의 가스 코드" + category: failure_expected + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: true + graded_relevance: {} + relevant_ids: [] + notes: | + KGS AC 시리즈 (용기) 에 999 번호 부재. 의도적 no-result. + AC101/AC201/AC213 등 실재 번호와 혼동 회피 위해 999 선택. + + - id: fail_005 + query: "초전도 안전 관리법 시행규칙" + category: failure_expected + intent: fact_lookup + domain_hint: document + language: ko + ocr_derived: false + failure_expected: true + graded_relevance: {} + relevant_ids: [] + notes: 존재하지 않는 가짜 법령명. 결과 0건 = correct.