"""doc 단위 atomic 코퍼스 교체 (PR-DocSrv-Hierarchical-Decomposition-1 c5/c6). legacy 윈도우 청크 → hier_section leaf 청크로 검색 코퍼스 교체(in_corpus 토글). - 물리 삭제 없음(in_corpus 플래그만). 부분 ivfflat 이 자동 반영. - G5 precondition(doc-local): hier leaf>0 + 모든 leaf embedding 보유(doc-local 100%) + parent 무결성(dangling 0). - 단일 트랜잭션 atomic. 실패/precond 미충족 → 변경 0(legacy 유지). - rollback: in_corpus 역토글(아래 rollback_doc_corpus). """ from __future__ import annotations from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession CHUNKER_VERSION = "hier-rule-v1" async def precheck(session: AsyncSession, doc_id: int) -> dict: row = (await session.execute(text(""" SELECT count(*) FILTER (WHERE source_type='hier_section' AND is_leaf) AS hier_leaves, count(*) FILTER (WHERE source_type='hier_section' AND is_leaf AND embedding IS NOT NULL) AS hier_leaves_emb, count(*) FILTER (WHERE source_type='legacy' AND in_corpus) AS legacy_active, count(*) FILTER (WHERE source_type='hier_section' AND parent_id IS NOT NULL AND parent_id NOT IN (SELECT id FROM document_chunks WHERE doc_id=:d AND source_type='hier_section')) AS dangling FROM document_chunks WHERE doc_id=:d"""), {"d": doc_id})).one() leaves, leaves_emb = row.hier_leaves, row.hier_leaves_emb doc_local_100 = leaves > 0 and leaves_emb == leaves ok = doc_local_100 and row.dangling == 0 return { "doc_id": doc_id, "hier_leaves": leaves, "hier_leaves_embedded": leaves_emb, "doc_local_embed_100": doc_local_100, "legacy_active": row.legacy_active, "dangling_parent": row.dangling, "precond_ok": ok, "reason": None if ok else ( "no_hier_leaves" if leaves == 0 else "embed_incomplete" if not doc_local_100 else "dangling_parent"), } async def replace_doc_corpus(session: AsyncSession, doc_id: int, *, dry_run: bool = True) -> dict: pc = await precheck(session, doc_id) pc["dry_run"] = dry_run if not pc["precond_ok"]: pc["action"] = "aborted" return pc if dry_run: pc["action"] = "dry_run" pc["would_deactivate_legacy"] = pc["legacy_active"] pc["would_activate_hier_leaves"] = pc["hier_leaves"] return pc # atomic 교체 (단일 트랜잭션) deact = (await session.execute(text( "UPDATE document_chunks SET in_corpus=false WHERE doc_id=:d AND source_type='legacy' AND in_corpus=true"), {"d": doc_id})).rowcount act = (await session.execute(text( "UPDATE document_chunks SET in_corpus=true WHERE doc_id=:d AND source_type='hier_section'" " AND chunker_version=:cv AND is_leaf=true AND embedding IS NOT NULL AND in_corpus=false"), {"d": doc_id, "cv": CHUNKER_VERSION})).rowcount await session.commit() pc.update({"action": "replaced", "legacy_deactivated": deact, "hier_activated": act}) return pc async def rollback_doc_corpus(session: AsyncSession, doc_id: int) -> dict: """교체 역토글 (legacy 복귀, hier 비활성).""" act = (await session.execute(text( "UPDATE document_chunks SET in_corpus=true WHERE doc_id=:d AND source_type='legacy' AND in_corpus=false"), {"d": doc_id})).rowcount deact = (await session.execute(text( "UPDATE document_chunks SET in_corpus=false WHERE doc_id=:d AND source_type='hier_section' AND in_corpus=true"), {"d": doc_id})).rowcount await session.commit() return {"doc_id": doc_id, "action": "rolled_back", "legacy_reactivated": act, "hier_deactivated": deact}