ops(canonical): Phase 1D enqueue 전 backup + targets + md_status 스냅샷
enqueue 시작 직전 3가지 흔적 남김: (1) /tmp/phase1d_pilot.json 의 timestamped 사본 (재실행 대비) (2) 대상 30건 document_id 한 줄 출력 (3) documents.md_status 분포 스냅샷 JSON 저장 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -176,20 +176,56 @@ async def cmd_select(out_path: Path) -> None:
|
||||
# ─── enqueue ───
|
||||
|
||||
async def cmd_enqueue(in_path: Path, yes: bool) -> None:
|
||||
payload = json.loads(in_path.read_text())
|
||||
ids = payload["ids"]
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import func
|
||||
|
||||
raw_payload = in_path.read_text()
|
||||
payload = json.loads(raw_payload)
|
||||
ids: list[int] = payload["ids"]
|
||||
|
||||
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||
|
||||
# (1) /tmp/phase1d_pilot.json 원본 보존 — 재실행/덮어쓰기 대비 timestamped 사본
|
||||
backup_path = in_path.with_name(f"{in_path.stem}_pre_enqueue_{ts}.json")
|
||||
backup_path.write_text(raw_payload)
|
||||
print(f"[backup] {backup_path}")
|
||||
|
||||
# (2) enqueue 대상 document_id 목록 (한 줄)
|
||||
print(f"[targets] {len(ids)}건: {ids}")
|
||||
|
||||
engine = _build_engine()
|
||||
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
from models.document import Document # type: ignore
|
||||
from models.queue import enqueue_stage # type: ignore
|
||||
|
||||
# (3) 실행 전 md_status 분포 스냅샷
|
||||
async with Session() as session:
|
||||
snap_rows = (
|
||||
await session.execute(
|
||||
select(Document.md_status, func.count())
|
||||
.where(Document.deleted_at.is_(None))
|
||||
.group_by(Document.md_status)
|
||||
)
|
||||
).all()
|
||||
snapshot = {
|
||||
"timestamp_utc": ts,
|
||||
"scope": "documents WHERE deleted_at IS NULL",
|
||||
"md_status_distribution": {str(s): int(c) for s, c in snap_rows},
|
||||
"sample_ids": ids,
|
||||
}
|
||||
snap_path = in_path.with_name(f"phase1d_md_status_pre_{ts}.json")
|
||||
snap_path.write_text(json.dumps(snapshot, ensure_ascii=False, indent=2))
|
||||
print(f"[snapshot] {snap_path}")
|
||||
print(f" {snapshot['md_status_distribution']}")
|
||||
|
||||
if not yes:
|
||||
confirm = input(f"\n{len(ids)}건 markdown 큐에 enqueue 합니다. 진행? [y/N] ")
|
||||
if confirm.strip().lower() not in ("y", "yes"):
|
||||
print("취소됨.")
|
||||
await engine.dispose()
|
||||
return
|
||||
|
||||
engine = _build_engine()
|
||||
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
from models.queue import enqueue_stage # type: ignore
|
||||
|
||||
enqueued, skipped = [], []
|
||||
async with Session() as session:
|
||||
for doc_id in ids:
|
||||
@@ -197,7 +233,7 @@ async def cmd_enqueue(in_path: Path, yes: bool) -> None:
|
||||
(enqueued if ok else skipped).append(doc_id)
|
||||
await session.commit()
|
||||
|
||||
print(f"enqueued: {len(enqueued)}, skipped (이미 active): {len(skipped)}")
|
||||
print(f"\nenqueued: {len(enqueued)}, skipped (이미 active): {len(skipped)}")
|
||||
if skipped:
|
||||
print(f" skipped ids: {skipped[:20]}{' …' if len(skipped) > 20 else ''}")
|
||||
await engine.dispose()
|
||||
|
||||
Reference in New Issue
Block a user