hyungi_document_server/tests/search_eval/scripts/capture_passage_rag.py

#!/usr/bin/env python3
"""PR-DocSrv-Hier-PassageRAG-Diagnose-1 c3 — /ask capture for prehier vs hier_sim_clean.
각 Q × variant 호출 → JSONL 박제 (judge 입력 raw). EVAL-ONLY."""
import json, sys, time, os
import httpx, yaml

BASE = "http://100.110.63.63:8000"
VARIANTS = ["prehier", "hier_sim_clean"]
SUBSET = "tests/search_eval/queries_passage_rag.yaml"

def trim_citations(cits):
    out = []
    for c in cits or []:
        out.append({k: c.get(k) for k in ("n", "doc_id", "chunk_id", "section_title", "span_text", "rerank_score")})
    return out

def main():
    token = os.environ["DOCSRV_TOKEN"]
    out_path = sys.argv[1] if len(sys.argv) > 1 else "reports/passage_rag_capture.jsonl"
    d = yaml.safe_load(open(SUBSET))
    queries = d["queries"]
    recs = []
    with httpx.Client(timeout=90.0) as cli:
        for q in queries:
            g2 = set(q.get("targets_g2") or [])
            for variant in VARIANTS:
                params = {"q": q["query"], "debug": "true",
                          "corpus_variant": variant, "exact_knn": "true"}
                t0 = time.perf_counter()
                try:
                    r = cli.get(f"{BASE}/api/search/ask",
                                headers={"Authorization": f"Bearer {token}"}, params=params)
                    lat = (time.perf_counter() - t0) * 1000
                    r.raise_for_status()
                    data = r.json()
                except Exception as exc:
                    print(f"  ✗ {q['id']}/{variant}: {type(exc).__name__}: {repr(exc)[:120]}", flush=True)
                    recs.append({"q_id": q["id"], "variant": variant, "error": f"{type(exc).__name__}"})
                    continue
                cits = trim_citations(data.get("citations"))
                used = data.get("used_citations") or [c.get("n") for c in (data.get("citations") or []) if c.get("n")]
                cited_docs = {c["doc_id"] for c in cits if c.get("doc_id")}
                target_doc_present = bool(cited_docs & g2)
                # cited span from a target doc that actually appears in the answer (used citation)
                used_set = set(used or [])
                target_span_used = any((c.get("doc_id") in g2) and (c.get("n") in used_set) for c in cits)
                ans = data.get("ai_answer") or ""
                rec = {
                    "q_id": q["id"], "category": q["category"], "query": q["query"],
                    "variant": variant, "targets_g2": sorted(g2), "targets_g3": q.get("targets_g3"),
                    "answer_hint": q.get("notes"),
                    "ai_answer": ans, "answer_len_chars": len(ans),
                    "evidence": cits, "used_citations": sorted(used_set),
                    "cited_docs": sorted(cited_docs),
                    "target_doc_present": target_doc_present,
                    "target_span_used_in_citation": target_span_used,
                    "synthesis_status": data.get("synthesis_status"),
                    "completeness": data.get("completeness"),
                    "covered_aspects": data.get("covered_aspects"),
                    "missing_aspects": data.get("missing_aspects"),
                    "confidence": data.get("confidence"),
                    "refused": data.get("refused"),
                    "backend_used": data.get("backend_used"),
                    "synthesis_ms": data.get("synthesis_ms"),
                    "latency_ms": round(lat, 1),
                    "debug": data.get("debug"),
                }
                recs.append(rec)
                print(f"  ✓ {q['id']}/{variant}: ans_len={len(ans)} cited={sorted(cited_docs)} "
                      f"tgt_present={target_doc_present} tgt_used={target_span_used} status={data.get('synthesis_status')}", flush=True)
    with open(out_path, "w") as f:
        for rec in recs:
            f.write(json.dumps(rec, ensure_ascii=False) + "\n")
    print(f"\nwrote {len(recs)} records → {out_path}")

if __name__ == "__main__":
    main()