cd33ded7a8
PR-DocSrv-Hier-PassageRAG-Diagnose-1 c4+c5. 조건부 N=12(retrieval 통제) blind pairwise (hypothesis-blind subagent, 익명 3-file split). 결과 4-way 수렴 = 동등: pairwise prehier4/hier3/tie5(no edge) + axis ±0.08 + objective 동일(halluc36/36) + variance~0(byte-identical 재생성). verbosity artifact 없음(prehier 더 길었으나 승+1). => NO-GO: hier-leaf evidence 무이득. hier leaf = section-outline UI 전용 완전 확정 (UI yes / doc-search NO-GO / passage-RAG NO-GO 3영역 종결). 2026-06-21 freeze input only. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
73 lines
3.5 KiB
Python
73 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""c4: conditional subset + objective signals + anonymized 3-file split."""
|
|
import json, random, os
|
|
os.chdir(os.path.expanduser("~/Documents/code/hyungi_Document_Server"))
|
|
recs = [json.loads(l) for l in open("reports/passage_rag_capture_2026-05-25.jsonl")]
|
|
by = {}
|
|
for r in recs:
|
|
by.setdefault(r["q_id"], {})[r["variant"]] = r
|
|
|
|
def nonempty(r):
|
|
return (r.get("answer_len_chars") or 0) > 0 and r.get("synthesis_status") == "completed"
|
|
|
|
# conditional subset: both variants retrieved a target-g2 doc AND both produced an answer
|
|
cond, excluded = [], []
|
|
for qid, vs in by.items():
|
|
p, h = vs.get("prehier"), vs.get("hier_sim_clean")
|
|
if not p or not h:
|
|
excluded.append((qid, "missing variant")); continue
|
|
if not (p["target_doc_present"] and h["target_doc_present"]):
|
|
excluded.append((qid, f"tgt_present p={p['target_doc_present']} h={h['target_doc_present']}")); continue
|
|
if not (nonempty(p) and nonempty(h)):
|
|
excluded.append((qid, f"empty/skip p={p.get('answer_len_chars')}/{p.get('synthesis_status')} h={h.get('answer_len_chars')}/{h.get('synthesis_status')}")); continue
|
|
cond.append(qid)
|
|
|
|
print(f"=== CONDITIONAL SUBSET (둘 다 tgt_present + non-empty) N={len(cond)} ===")
|
|
print(" ", sorted(cond))
|
|
print(f"=== EXCLUDED {len(excluded)} ===")
|
|
for qid, why in sorted(excluded): print(f" {qid}: {why}")
|
|
|
|
def halluc(r): return len((r.get("debug") or {}).get("hallucination_flags") or [])
|
|
def grounding_weak(r):
|
|
g = ((r.get("debug") or {}).get("defense_layers") or {}).get("grounding") or {}
|
|
return len(g.get("weak") or [])
|
|
def grounding_strong(r):
|
|
g = ((r.get("debug") or {}).get("defense_layers") or {}).get("grounding") or {}
|
|
return len(g.get("strong") or [])
|
|
|
|
print(f"\n=== OBJECTIVE SIGNALS on conditional subset (N={len(cond)}) ===")
|
|
for v in ["prehier", "hier_sim_clean"]:
|
|
rs = [by[q][v] for q in cond]
|
|
print(f" {v}: halluc_flags={sum(halluc(r) for r in rs)} "
|
|
f"grounding_weak={sum(grounding_weak(r) for r in rs)} "
|
|
f"grounding_strong={sum(grounding_strong(r) for r in rs)} "
|
|
f"avg_answer_len={sum(r['answer_len_chars'] for r in rs)//len(rs)} "
|
|
f"completeness={[r.get('completeness') for r in rs].count('full')}full/"
|
|
f"{[r.get('completeness') for r in rs].count('partial')}part/"
|
|
f"{[r.get('completeness') for r in rs].count('insufficient')}insuf "
|
|
f"refused={sum(1 for r in rs if r.get('refused'))}")
|
|
|
|
# anonymized 3-file split (conditional only)
|
|
rng = random.Random(42)
|
|
pairs, key = [], {}
|
|
for i, qid in enumerate(sorted(cond)):
|
|
p, h = by[qid]["prehier"], by[qid]["hier_sim_clean"]
|
|
swap = rng.random() < 0.5
|
|
a, b = (p, h) if not swap else (h, p)
|
|
pid = f"pair_{i+1:02d}"
|
|
def spans(r): return [e.get("span_text") for e in (r.get("evidence") or []) if e.get("span_text")]
|
|
pairs.append({
|
|
"pair_id": pid,
|
|
"question": p["query"],
|
|
"answer_A": a["ai_answer"], "evidence_A": spans(a),
|
|
"answer_B": b["ai_answer"], "evidence_B": spans(b),
|
|
})
|
|
key[pid] = {"q_id": qid, "A": a["variant"], "B": b["variant"]}
|
|
|
|
with open("reports/passage_rag_judge_pairs_2026-05-25.jsonl", "w") as f:
|
|
for pr in pairs: f.write(json.dumps(pr, ensure_ascii=False) + "\n")
|
|
with open("reports/passage_rag_judge_key_2026-05-25.json", "w") as f:
|
|
json.dump(key, f, ensure_ascii=False, indent=2)
|
|
print(f"\nwrote {len(pairs)} anonymized pairs → passage_rag_judge_pairs_2026-05-25.jsonl")
|
|
print("wrote key → passage_rag_judge_key_2026-05-25.json (judge 미제공)")
|