From 446ba82c9197ba47a85ed014a556ec6a19744ef1 Mon Sep 17 00:00:00 2001 From: hyungi Date: Sat, 23 May 2026 22:09:29 +0000 Subject: [PATCH] =?UTF-8?q?feat(eval):=20Phase=202Q=20Diagnose=20Phase=201?= =?UTF-8?q?A=20=E2=80=94=20fixture=20(4=20=EC=B9=B4=ED=85=8C=EA=B3=A0?= =?UTF-8?q?=EB=A6=AC=20=C3=97=202=20LLM)=20+=20prompt=20v1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit phase-2q-query-rewrite-diagnose.md v6 plan 의 Phase 1 fixture 박제 (G0-1 + G0-2). 산출물: - app/prompts/query_rewrite.txt — multi-query rewrite prompt v1 (3 variants: 원본 + 한국어 rephrase + 영어 번역) - tests/fixtures/macmini_gemma4_query_rewrite_response.json — 4 카테고리 (korean_only/mixed/english_only/exam) - tests/fixtures/macbook_qwen_query_rewrite_response.json — 4 카테고리 동일 inspect 9 결과 (2026-05-24): - Mac mini gemma-4-26B-A4B :8801 = response_format json_object 지원 - MacBook qwen3.6-27B-8bit :8810 = response_format json_object 미지원 (120s hang) — prompt rule only - prompt rule \"no markdown, no code fence\" 강제 시 둘 다 strict JSON (gemma 도 fence wrap 없음) - parser fallback (markdown fence regex) 유지 — 첫 호출 prompt 없을 때 wrap 관찰 사례 8 호출 측정: - gemma 1.16~1.36s / qwen 1.93~2.24s (warm) - variants 의미 일관 + 도메인 용어 (ASME/Section VIII/압력용기/가스기사) verbatim preserve - 한국어→영어 cross-lingual translation 자연 Co-Authored-By: Claude Opus 4.7 (1M context) --- app/prompts/query_rewrite.txt | 12 + .../macbook_qwen_query_rewrite_response.json | 238 ++++++++++++++++++ ...macmini_gemma4_query_rewrite_response.json | 208 +++++++++++++++ 3 files changed, 458 insertions(+) create mode 100644 app/prompts/query_rewrite.txt create mode 100644 tests/fixtures/macbook_qwen_query_rewrite_response.json create mode 100644 tests/fixtures/macmini_gemma4_query_rewrite_response.json diff --git a/app/prompts/query_rewrite.txt b/app/prompts/query_rewrite.txt new file mode 100644 index 0000000..f11c92d --- /dev/null +++ b/app/prompts/query_rewrite.txt @@ -0,0 +1,12 @@ +You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary). + +Task: given the user's search query, produce 3 search-friendly variants: +- variant 0 = original query (verbatim, no change) +- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형) +- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term) + +Rules: +- Each variant ≤ 80 chars. +- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration. +- Do not invent new entities. +- Output STRICT JSON only (no prose, no markdown, no code fence): {"variants": ["...", "...", "..."]} diff --git a/tests/fixtures/macbook_qwen_query_rewrite_response.json b/tests/fixtures/macbook_qwen_query_rewrite_response.json new file mode 100644 index 0000000..892f88f --- /dev/null +++ b/tests/fixtures/macbook_qwen_query_rewrite_response.json @@ -0,0 +1,238 @@ +{ + "_meta": { + "description": "Phase 2Q G0-2 MacBook qwen3.6-27B query rewrite response fixture (4 카테고리)", + "captured_at": "2026-05-24", + "endpoint": "http://100.118.112.84:8810/v1/chat/completions", + "model": "mlx-community/Qwen3.6-27B-8bit", + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + "response_format_omitted": true + }, + "prompt_version": "v1", + "prompt_path": "app/prompts/query_rewrite.txt", + "note": "response_format=json_object 미지원 (120s hang at inspect 9, 2026-05-24). prompt rule 'no markdown' 만으로 strict JSON 강제. 3.2s cold first call → ~2.1s warm. OpenAI 호환 + extra reasoning/tool_calls/peak_memory fields ([[reference_mlx_vlm_tool_calling]])." + }, + "cases": { + "korean_only": { + "query": "압력용기 설계 기준", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "압력용기 설계 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-0a123e20-1f15-4e76-bfa2-34c4a1f06a98", + "object": "chat.completion", + "created": 1779574036, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"압력용기 설계 기준\", \"압력용기 설계 규정\", \"pressure vessel design standards\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 210, + "completion_tokens": 26, + "total_tokens": 236, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.210776441 + } + }, + "parsed_variants": [ + "압력용기 설계 기준", + "압력용기 설계 규정", + "pressure vessel design standards" + ] + }, + "mixed": { + "query": "ASME Section VIII 적용 기준", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "ASME Section VIII 적용 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-51c5fd7c-b4a8-4fe1-ac77-ebcf232513e7", + "object": "chat.completion", + "created": 1779574038, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"ASME Section VIII 적용 기준\", \"ASME Section VIII 적용 범위 및 기준\", \"ASME Section VIII application criteria\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 210, + "completion_tokens": 30, + "total_tokens": 240, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.210776441 + } + }, + "parsed_variants": [ + "ASME Section VIII 적용 기준", + "ASME Section VIII 적용 범위 및 기준", + "ASME Section VIII application criteria" + ] + }, + "english_only": { + "query": "pressure vessel design code", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "pressure vessel design code" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-580c1a94-9710-4cf0-931f-2a9661560ca5", + "object": "chat.completion", + "created": 1779574032, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"pressure vessel design code\", \"압력용기 설계 규정\", \"ASME pressure vessel code\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 208, + "completion_tokens": 25, + "total_tokens": 233, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.200421726 + } + }, + "parsed_variants": [ + "pressure vessel design code", + "압력용기 설계 규정", + "ASME pressure vessel code" + ] + }, + "exam": { + "query": "가스기사 필기 출제기준", + "request_body": { + "model": "mlx-community/Qwen3.6-27B-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "가스기사 필기 출제기준" + } + ], + "temperature": 0.3, + "max_tokens": 256 + }, + "response": { + "id": "chatcmpl-f793ee8b-73aa-4c31-b950-03f00603145e", + "object": "chat.completion", + "created": 1779574034, + "model": "mlx-community/Qwen3.6-27B-8bit", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "{\"variants\": [\"가스기사 필기 출제기준\", \"가스기사 필기시험 출제 범위\", \"Gas Engineer written exam syllabus\"]}", + "reasoning": null, + "tool_calls": null, + "tool_call_id": null, + "name": null + }, + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 211, + "completion_tokens": 31, + "total_tokens": 242, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "prompt_tps": 0.0, + "generation_tps": 0.0, + "peak_memory": 30.210776441 + } + }, + "parsed_variants": [ + "가스기사 필기 출제기준", + "가스기사 필기시험 출제 범위", + "Gas Engineer written exam syllabus" + ] + } + } +} diff --git a/tests/fixtures/macmini_gemma4_query_rewrite_response.json b/tests/fixtures/macmini_gemma4_query_rewrite_response.json new file mode 100644 index 0000000..14e3835 --- /dev/null +++ b/tests/fixtures/macmini_gemma4_query_rewrite_response.json @@ -0,0 +1,208 @@ +{ + "_meta": { + "description": "Phase 2Q G0-1 Mac mini gemma-4 query rewrite response fixture (4 카테고리)", + "captured_at": "2026-05-24", + "endpoint": "http://100.76.254.116:8801/v1/chat/completions", + "model": "gemma-4-26b-a4b-it-8bit", + "sampling": { + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "prompt_version": "v1", + "prompt_path": "app/prompts/query_rewrite.txt", + "note": "response_format=json_object 지원. prompt rule 'no markdown' 강제 시 strict JSON. parser fallback (markdown fence regex) 유지 — 첫 시도 prompt 없이 호출 시 ```json...``` wrap 관찰 (2026-05-24 inspect 9)." + }, + "cases": { + "korean_only": { + "query": "압력용기 설계 기준", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "압력용기 설계 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574028", + "object": "chat.completion", + "created": 1779574028, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"압력용기 설계 기준\", \"압력용기 설계 규격 및 기준\", \"Pressure vessel design standards\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 214, + "completion_tokens": 29, + "total_tokens": 243 + } + }, + "parsed_variants": [ + "압력용기 설계 기준", + "압력용기 설계 규격 및 기준", + "Pressure vessel design standards" + ] + }, + "mixed": { + "query": "ASME Section VIII 적용 기준", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "ASME Section VIII 적용 기준" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574029", + "object": "chat.completion", + "created": 1779574029, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"ASME Section VIII 적용 기준\", \"ASME Section VIII 적용 규정 및 기준\", \"Application standards for ASME Section VIII\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 214, + "completion_tokens": 31, + "total_tokens": 245 + } + }, + "parsed_variants": [ + "ASME Section VIII 적용 기준", + "ASME Section VIII 적용 규정 및 기준", + "Application standards for ASME Section VIII" + ] + }, + "english_only": { + "query": "pressure vessel design code", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "pressure vessel design code" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574026", + "object": "chat.completion", + "created": 1779574026, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"pressure vessel design code\", \"압력용기 설계 코드\", \"pressure vessel design standard\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 212, + "completion_tokens": 24, + "total_tokens": 236 + } + }, + "parsed_variants": [ + "pressure vessel design code", + "압력용기 설계 코드", + "pressure vessel design standard" + ] + }, + "exam": { + "query": "가스기사 필기 출제기준", + "request_body": { + "model": "gemma-4-26b-a4b-it-8bit", + "messages": [ + { + "role": "system", + "content": "You are a search query rewriter for a multilingual document search system (Korean primary, English/mixed secondary).\n\nTask: given the user's search query, produce 3 search-friendly variants:\n- variant 0 = original query (verbatim, no change)\n- variant 1 = Korean rephrase with different phrasing (synonyms / 명사구 변형 / 조사 변형)\n- variant 2 = English translation OR cross-lingual rephrase (if Korean → English term; if English → Korean term)\n\nRules:\n- Each variant ≤ 80 chars.\n- Preserve domain-specific terms (ASME, KGS, 가스기사, 압력용기) verbatim — no abbreviation/transliteration.\n- Do not invent new entities.\n- Output STRICT JSON only (no prose, no markdown, no code fence): {\"variants\": [\"...\", \"...\", \"...\"]}" + }, + { + "role": "user", + "content": "가스기사 필기 출제기준" + } + ], + "temperature": 0.3, + "max_tokens": 256, + "response_format": { + "type": "json_object" + } + }, + "response": { + "id": "chatcmpl-1779574027", + "object": "chat.completion", + "created": 1779574027, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "{\"variants\": [\"가스기사 필기 출제기준\", \"가스기사 필기 시험 출제 범위 및 기준\", \"Gas Engineer written exam syllabus\"]}" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 217, + "completion_tokens": 36, + "total_tokens": 253 + } + }, + "parsed_variants": [ + "가스기사 필기 출제기준", + "가스기사 필기 시험 출제 범위 및 기준", + "Gas Engineer written exam syllabus" + ] + } + } +}