Files
hyungi cd33ded7a8 docs(search): passage-RAG go/no-go = NO-GO (hier evidence 동등, diagnose c4+c5)
PR-DocSrv-Hier-PassageRAG-Diagnose-1 c4+c5. 조건부 N=12(retrieval 통제) blind pairwise
(hypothesis-blind subagent, 익명 3-file split). 결과 4-way 수렴 = 동등:
pairwise prehier4/hier3/tie5(no edge) + axis ±0.08 + objective 동일(halluc36/36) +
variance~0(byte-identical 재생성). verbosity artifact 없음(prehier 더 길었으나 승+1).
=> NO-GO: hier-leaf evidence 무이득. hier leaf = section-outline UI 전용 완전 확정
(UI yes / doc-search NO-GO / passage-RAG NO-GO 3영역 종결). 2026-06-21 freeze input only.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 07:02:46 +00:00

73 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""c4: conditional subset + objective signals + anonymized 3-file split."""
import json, random, os
os.chdir(os.path.expanduser("~/Documents/code/hyungi_Document_Server"))
recs = [json.loads(l) for l in open("reports/passage_rag_capture_2026-05-25.jsonl")]
by = {}
for r in recs:
by.setdefault(r["q_id"], {})[r["variant"]] = r
def nonempty(r):
return (r.get("answer_len_chars") or 0) > 0 and r.get("synthesis_status") == "completed"
# conditional subset: both variants retrieved a target-g2 doc AND both produced an answer
cond, excluded = [], []
for qid, vs in by.items():
p, h = vs.get("prehier"), vs.get("hier_sim_clean")
if not p or not h:
excluded.append((qid, "missing variant")); continue
if not (p["target_doc_present"] and h["target_doc_present"]):
excluded.append((qid, f"tgt_present p={p['target_doc_present']} h={h['target_doc_present']}")); continue
if not (nonempty(p) and nonempty(h)):
excluded.append((qid, f"empty/skip p={p.get('answer_len_chars')}/{p.get('synthesis_status')} h={h.get('answer_len_chars')}/{h.get('synthesis_status')}")); continue
cond.append(qid)
print(f"=== CONDITIONAL SUBSET (둘 다 tgt_present + non-empty) N={len(cond)} ===")
print(" ", sorted(cond))
print(f"=== EXCLUDED {len(excluded)} ===")
for qid, why in sorted(excluded): print(f" {qid}: {why}")
def halluc(r): return len((r.get("debug") or {}).get("hallucination_flags") or [])
def grounding_weak(r):
g = ((r.get("debug") or {}).get("defense_layers") or {}).get("grounding") or {}
return len(g.get("weak") or [])
def grounding_strong(r):
g = ((r.get("debug") or {}).get("defense_layers") or {}).get("grounding") or {}
return len(g.get("strong") or [])
print(f"\n=== OBJECTIVE SIGNALS on conditional subset (N={len(cond)}) ===")
for v in ["prehier", "hier_sim_clean"]:
rs = [by[q][v] for q in cond]
print(f" {v}: halluc_flags={sum(halluc(r) for r in rs)} "
f"grounding_weak={sum(grounding_weak(r) for r in rs)} "
f"grounding_strong={sum(grounding_strong(r) for r in rs)} "
f"avg_answer_len={sum(r['answer_len_chars'] for r in rs)//len(rs)} "
f"completeness={[r.get('completeness') for r in rs].count('full')}full/"
f"{[r.get('completeness') for r in rs].count('partial')}part/"
f"{[r.get('completeness') for r in rs].count('insufficient')}insuf "
f"refused={sum(1 for r in rs if r.get('refused'))}")
# anonymized 3-file split (conditional only)
rng = random.Random(42)
pairs, key = [], {}
for i, qid in enumerate(sorted(cond)):
p, h = by[qid]["prehier"], by[qid]["hier_sim_clean"]
swap = rng.random() < 0.5
a, b = (p, h) if not swap else (h, p)
pid = f"pair_{i+1:02d}"
def spans(r): return [e.get("span_text") for e in (r.get("evidence") or []) if e.get("span_text")]
pairs.append({
"pair_id": pid,
"question": p["query"],
"answer_A": a["ai_answer"], "evidence_A": spans(a),
"answer_B": b["ai_answer"], "evidence_B": spans(b),
})
key[pid] = {"q_id": qid, "A": a["variant"], "B": b["variant"]}
with open("reports/passage_rag_judge_pairs_2026-05-25.jsonl", "w") as f:
for pr in pairs: f.write(json.dumps(pr, ensure_ascii=False) + "\n")
with open("reports/passage_rag_judge_key_2026-05-25.json", "w") as f:
json.dump(key, f, ensure_ascii=False, indent=2)
print(f"\nwrote {len(pairs)} anonymized pairs → passage_rag_judge_pairs_2026-05-25.jsonl")
print("wrote key → passage_rag_judge_key_2026-05-25.json (judge 미제공)")