From a0e1717206af4a5b77c68b37a408a061d9820815 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Fri, 10 Apr 2026 08:59:29 +0900 Subject: [PATCH] =?UTF-8?q?fix(grounding):=20citation=20marker=20[n]=20?= =?UTF-8?q?=EC=9D=84=20fabricated=5Fnumber=20=EC=97=90=EC=84=9C=20?= =?UTF-8?q?=EC=A0=9C=EC=99=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [1][2][4] 같은 citation 마커의 숫자가 evidence 에 없다고 판정되어 모든 정상 답변이 refuse(2+strong) 되는 critical bug. answer 에서 \[\d+\] 제거 후 숫자 추출. Co-Authored-By: Claude Opus 4.6 (1M context) --- app/services/search/grounding_check.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/app/services/search/grounding_check.py b/app/services/search/grounding_check.py index a03417d..97a2bbf 100644 --- a/app/services/search/grounding_check.py +++ b/app/services/search/grounding_check.py @@ -76,14 +76,16 @@ def check( evidence_text = " ".join(e.span_text for e in evidence) - # ── Strong 1: fabricated number ── - answer_nums = _extract_number_literals(answer) + # ── Strong 1: fabricated number (equality, not substring) ── + # ⚠ citation marker [n] 제거 후 숫자 추출 (안 그러면 [1][2][3] 이 fabricated 로 오탐) + answer_clean = re.sub(r'\[\d+\]', '', answer) + answer_nums = _extract_number_literals(answer_clean) evidence_nums = _extract_number_literals(evidence_text) + evidence_digits = {re.sub(r'[^\d]', '', en) for en in evidence_nums} + evidence_digits.discard('') for num in answer_nums: digits_only = re.sub(r'[^\d]', '', num) - if digits_only and not any( - digits_only in re.sub(r'[^\d]', '', en) for en in evidence_nums - ): + if digits_only and digits_only not in evidence_digits: strong.append(f"fabricated_number:{num}") # ── Strong/Weak 2: query-answer intent alignment ──