From 0362f521303633c59112eca171ef63c48b61866e Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Sat, 2 May 2026 16:27:31 +0900
Subject: [PATCH] =?UTF-8?q?fix(scripts):=20Phase=201D=20enqueue=20?=
 =?UTF-8?q?=EA=B0=80=20existing=5Fsuccess=20=EC=9E=AC=EC=B2=98=EB=A6=AC?=
 =?UTF-8?q?=ED=95=98=EC=A7=80=20=EC=95=8A=EB=8F=84=EB=A1=9D=20=ED=95=84?=
 =?UTF-8?q?=ED=84=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round 2 sample 에 existing_success 5건 (anchor doc 4809 + calibration 4)
이 포함되었지만, cmd_enqueue 가 sample_source 무시하고 30건 전부 enqueue
하던 버그. 결과:
  - existing 5건 marker 재처리 (~25분 marker 시간 낭비)
  - 동일 quality output 으로 md_content overwrite → baseline 유실
  - anchor (doc 4809) 의 "before" 상태가 사라져 후속 라운드 비교 anchor 손상

Fix:
  - default = sample_source == "controlled_backfill" 만 enqueue (25건)
  - --include-existing flag 추가 (후속 Marker 튜닝 라운드에서 anchor 재처리
    필요 시 사용)
  - print 로 mode 명시 + 제외된 ids 표시

야간 단발 sweep (23:00 KST) 예약 실행 전 fix.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/phase1d_pilot.py | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/scripts/phase1d_pilot.py b/scripts/phase1d_pilot.py
index 75d5d45..51ecfda 100644
--- a/scripts/phase1d_pilot.py
+++ b/scripts/phase1d_pilot.py
@@ -484,13 +484,34 @@ async def cmd_select(out_path: Path, csv_path: Path | None) -> None:
 
 # ─── enqueue ───
 
-async def cmd_enqueue(in_path: Path, yes: bool) -> None:
+async def cmd_enqueue(in_path: Path, yes: bool, include_existing: bool = False) -> None:
     from datetime import datetime, timezone
     from sqlalchemy import func
 
     raw_payload = in_path.read_text()
     payload = json.loads(raw_payload)
-    ids: list[int] = payload["ids"]
+    items: list[dict] = payload.get("items", [])
+
+    # Round 2 정책: existing_success (5건, anchor + calibration) 는 enqueue 제외.
+    # 그들은 기존 md_content 그대로 두고 평가 anchor 로 사용. 재처리 시 marker 시간
+    # 낭비 + 같은 quality output overwrite 로 baseline 유실. controlled_backfill (25건)
+    # 만 새로 변환. 후속 라운드 (Marker 튜닝 후) anchor 재처리 필요 시 --include-existing.
+    if include_existing:
+        target_items = items
+        ids: list[int] = [it["id"] for it in target_items]
+        print(f"[mode]      include_existing=True — sample 30건 전부 enqueue")
+    else:
+        target_items = [it for it in items if it.get("sample_source") == "controlled_backfill"]
+        ids = [it["id"] for it in target_items]
+        excluded = [it["id"] for it in items if it.get("sample_source") != "controlled_backfill"]
+        print(
+            f"[mode]      controlled_backfill 만 enqueue ({len(ids)}건). "
+            f"existing_success 제외 ({len(excluded)}건): {excluded}"
+        )
+
+    if not ids:
+        print("[abort] enqueue 대상 없음.")
+        return
 
     ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
 
@@ -740,6 +761,11 @@ def main() -> None:
     p_enq = sub.add_parser("enqueue", help="markdown 큐 enqueue")
     p_enq.add_argument("--in", dest="in_path", type=Path, default=DEFAULT_OUT)
     p_enq.add_argument("--yes", action="store_true")
+    p_enq.add_argument(
+        "--include-existing",
+        action="store_true",
+        help="existing_success (anchor + calibration) 도 재처리. default 는 controlled_backfill 만.",
+    )
 
     p_rep = sub.add_parser("report", help="결과 집계")
     p_rep.add_argument("--in", dest="in_path", type=Path, default=DEFAULT_OUT)
@@ -752,7 +778,7 @@ def main() -> None:
     if args.cmd == "select":
         asyncio.run(cmd_select(args.out, args.csv))
     elif args.cmd == "enqueue":
-        asyncio.run(cmd_enqueue(args.in_path, args.yes))
+        asyncio.run(cmd_enqueue(args.in_path, args.yes, args.include_existing))
     elif args.cmd == "report":
         asyncio.run(cmd_report(args.in_path))
     elif args.cmd == "eval_template":