feat(search): passage-RAG capture runner + raw JSONL (diagnose c3)

PR-DocSrv-Hier-PassageRAG-Diagnose-1 c3. 22Q x {prehier,hier_sim_clean} /ask?debug=true exact_knn capture (44 rec). ai_answer/evidence/target_doc_present/target_span_used/ objective signals(hallucination/grounding/completeness/refused) 박제. 관찰: hier 일부 타깃 retrieval 실패(exam_005/006,cl_007=doc-search NO-GO 일관) + 일부 gain (cl_001/002). empty-answer 케이스(cl_005/cl_007 prehier, cl_006/exam_004 skipped) 존재. JWT 15min 만료로 1차 부분실패 → cache-warm 재실행 완주. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 06:53:11 +00:00
parent 698510bc0e
commit 9c039139ef
2 changed files with 121 additions and 0 deletions
@@ -0,0 +1,77 @@
 #!/usr/bin/env python3
 """PR-DocSrv-Hier-PassageRAG-Diagnose-1 c3 — /ask capture for prehier vs hier_sim_clean.
 각 Q × variant 호출 → JSONL 박제 (judge 입력 raw). EVAL-ONLY."""
 import json, sys, time, os
 import httpx, yaml
 BASE = "http://100.110.63.63:8000"
 VARIANTS = ["prehier", "hier_sim_clean"]
 SUBSET = "tests/search_eval/queries_passage_rag.yaml"
 def trim_citations(cits):
    out = []
    for c in cits or []:
        out.append({k: c.get(k) for k in ("n", "doc_id", "chunk_id", "section_title", "span_text", "rerank_score")})
    return out
 def main():
    token = os.environ["DOCSRV_TOKEN"]
    out_path = sys.argv[1] if len(sys.argv) > 1 else "reports/passage_rag_capture.jsonl"
    d = yaml.safe_load(open(SUBSET))
    queries = d["queries"]
    recs = []
    with httpx.Client(timeout=90.0) as cli:
        for q in queries:
            g2 = set(q.get("targets_g2") or [])
            for variant in VARIANTS:
                params = {"q": q["query"], "debug": "true",
                          "corpus_variant": variant, "exact_knn": "true"}
                t0 = time.perf_counter()
                try:
                    r = cli.get(f"{BASE}/api/search/ask",
                                headers={"Authorization": f"Bearer {token}"}, params=params)
                    lat = (time.perf_counter() - t0) * 1000
                    r.raise_for_status()
                    data = r.json()
                except Exception as exc:
                    print(f"  ✗ {q['id']}/{variant}: {type(exc).__name__}: {repr(exc)[:120]}", flush=True)
                    recs.append({"q_id": q["id"], "variant": variant, "error": f"{type(exc).__name__}"})
                    continue
                cits = trim_citations(data.get("citations"))
                used = data.get("used_citations") or [c.get("n") for c in (data.get("citations") or []) if c.get("n")]
                cited_docs = {c["doc_id"] for c in cits if c.get("doc_id")}
                target_doc_present = bool(cited_docs & g2)
                # cited span from a target doc that actually appears in the answer (used citation)
                used_set = set(used or [])
                target_span_used = any((c.get("doc_id") in g2) and (c.get("n") in used_set) for c in cits)
                ans = data.get("ai_answer") or ""
                rec = {
                    "q_id": q["id"], "category": q["category"], "query": q["query"],
                    "variant": variant, "targets_g2": sorted(g2), "targets_g3": q.get("targets_g3"),
                    "answer_hint": q.get("notes"),
                    "ai_answer": ans, "answer_len_chars": len(ans),
                    "evidence": cits, "used_citations": sorted(used_set),
                    "cited_docs": sorted(cited_docs),
                    "target_doc_present": target_doc_present,
                    "target_span_used_in_citation": target_span_used,
                    "synthesis_status": data.get("synthesis_status"),
                    "completeness": data.get("completeness"),
                    "covered_aspects": data.get("covered_aspects"),
                    "missing_aspects": data.get("missing_aspects"),
                    "confidence": data.get("confidence"),
                    "refused": data.get("refused"),
                    "backend_used": data.get("backend_used"),
                    "synthesis_ms": data.get("synthesis_ms"),
                    "latency_ms": round(lat, 1),
                    "debug": data.get("debug"),
                }
                recs.append(rec)
                print(f"  ✓ {q['id']}/{variant}: ans_len={len(ans)} cited={sorted(cited_docs)} "
                      f"tgt_present={target_doc_present} tgt_used={target_span_used} status={data.get('synthesis_status')}", flush=True)
    with open(out_path, "w") as f:
        for rec in recs:
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")
    print(f"\nwrote {len(recs)} records → {out_path}")
 if __name__ == "__main__":
    main()