ops(canonical): Phase 1D enqueue 전 backup + targets + md_status 스냅샷

enqueue 시작 직전 3가지 흔적 남김:
  (1) /tmp/phase1d_pilot.json 의 timestamped 사본 (재실행 대비)
  (2) 대상 30건 document_id 한 줄 출력
  (3) documents.md_status 분포 스냅샷 JSON 저장

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-05-01 10:00:23 +09:00
parent 7e5716e594
commit 7cab78e490
+44 -8
View File
@@ -176,20 +176,56 @@ async def cmd_select(out_path: Path) -> None:
# ─── enqueue ───
async def cmd_enqueue(in_path: Path, yes: bool) -> None:
payload = json.loads(in_path.read_text())
ids = payload["ids"]
from datetime import datetime, timezone
from sqlalchemy import func
raw_payload = in_path.read_text()
payload = json.loads(raw_payload)
ids: list[int] = payload["ids"]
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
# (1) /tmp/phase1d_pilot.json 원본 보존 — 재실행/덮어쓰기 대비 timestamped 사본
backup_path = in_path.with_name(f"{in_path.stem}_pre_enqueue_{ts}.json")
backup_path.write_text(raw_payload)
print(f"[backup] {backup_path}")
# (2) enqueue 대상 document_id 목록 (한 줄)
print(f"[targets] {len(ids)}건: {ids}")
engine = _build_engine()
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
from models.document import Document # type: ignore
from models.queue import enqueue_stage # type: ignore
# (3) 실행 전 md_status 분포 스냅샷
async with Session() as session:
snap_rows = (
await session.execute(
select(Document.md_status, func.count())
.where(Document.deleted_at.is_(None))
.group_by(Document.md_status)
)
).all()
snapshot = {
"timestamp_utc": ts,
"scope": "documents WHERE deleted_at IS NULL",
"md_status_distribution": {str(s): int(c) for s, c in snap_rows},
"sample_ids": ids,
}
snap_path = in_path.with_name(f"phase1d_md_status_pre_{ts}.json")
snap_path.write_text(json.dumps(snapshot, ensure_ascii=False, indent=2))
print(f"[snapshot] {snap_path}")
print(f" {snapshot['md_status_distribution']}")
if not yes:
confirm = input(f"\n{len(ids)}건 markdown 큐에 enqueue 합니다. 진행? [y/N] ")
if confirm.strip().lower() not in ("y", "yes"):
print("취소됨.")
await engine.dispose()
return
engine = _build_engine()
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
from models.queue import enqueue_stage # type: ignore
enqueued, skipped = [], []
async with Session() as session:
for doc_id in ids:
@@ -197,7 +233,7 @@ async def cmd_enqueue(in_path: Path, yes: bool) -> None:
(enqueued if ok else skipped).append(doc_id)
await session.commit()
print(f"enqueued: {len(enqueued)}, skipped (이미 active): {len(skipped)}")
print(f"\nenqueued: {len(enqueued)}, skipped (이미 active): {len(skipped)}")
if skipped:
print(f" skipped ids: {skipped[:20]}{'' if len(skipped) > 20 else ''}")
await engine.dispose()