diff --git a/scripts/phase1d_pilot.py b/scripts/phase1d_pilot.py index 71d7612..2c43e18 100644 --- a/scripts/phase1d_pilot.py +++ b/scripts/phase1d_pilot.py @@ -176,20 +176,56 @@ async def cmd_select(out_path: Path) -> None: # ─── enqueue ─── async def cmd_enqueue(in_path: Path, yes: bool) -> None: - payload = json.loads(in_path.read_text()) - ids = payload["ids"] + from datetime import datetime, timezone + from sqlalchemy import func + + raw_payload = in_path.read_text() + payload = json.loads(raw_payload) + ids: list[int] = payload["ids"] + + ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + + # (1) /tmp/phase1d_pilot.json 원본 보존 — 재실행/덮어쓰기 대비 timestamped 사본 + backup_path = in_path.with_name(f"{in_path.stem}_pre_enqueue_{ts}.json") + backup_path.write_text(raw_payload) + print(f"[backup] {backup_path}") + + # (2) enqueue 대상 document_id 목록 (한 줄) + print(f"[targets] {len(ids)}건: {ids}") + + engine = _build_engine() + Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + from models.document import Document # type: ignore + from models.queue import enqueue_stage # type: ignore + + # (3) 실행 전 md_status 분포 스냅샷 + async with Session() as session: + snap_rows = ( + await session.execute( + select(Document.md_status, func.count()) + .where(Document.deleted_at.is_(None)) + .group_by(Document.md_status) + ) + ).all() + snapshot = { + "timestamp_utc": ts, + "scope": "documents WHERE deleted_at IS NULL", + "md_status_distribution": {str(s): int(c) for s, c in snap_rows}, + "sample_ids": ids, + } + snap_path = in_path.with_name(f"phase1d_md_status_pre_{ts}.json") + snap_path.write_text(json.dumps(snapshot, ensure_ascii=False, indent=2)) + print(f"[snapshot] {snap_path}") + print(f" {snapshot['md_status_distribution']}") if not yes: confirm = input(f"\n{len(ids)}건 markdown 큐에 enqueue 합니다. 진행? [y/N] ") if confirm.strip().lower() not in ("y", "yes"): print("취소됨.") + await engine.dispose() return - engine = _build_engine() - Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) - - from models.queue import enqueue_stage # type: ignore - enqueued, skipped = [], [] async with Session() as session: for doc_id in ids: @@ -197,7 +233,7 @@ async def cmd_enqueue(in_path: Path, yes: bool) -> None: (enqueued if ok else skipped).append(doc_id) await session.commit() - print(f"enqueued: {len(enqueued)}, skipped (이미 active): {len(skipped)}") + print(f"\nenqueued: {len(enqueued)}, skipped (이미 active): {len(skipped)}") if skipped: print(f" skipped ids: {skipped[:20]}{' …' if len(skipped) > 20 else ''}") await engine.dispose()