From 381fcfc6757911b6d56f219cb4c0860e5d9b0a96 Mon Sep 17 00:00:00 2001 From: hyungi Date: Tue, 16 Jun 2026 15:49:28 +0900 Subject: [PATCH] =?UTF-8?q?ops(ci):=20=EC=A0=84=EC=B2=B4=20app=20=EB=B6=80?= =?UTF-8?q?=ED=8C=85=20=EC=8A=A4=EB=AA=A8=ED=81=AC=20(boot=5Fsmoke.py)=20?= =?UTF-8?q?=E2=80=94=20GPU=20=EA=B2=A9=EB=A6=AC=20deploy-blocker=20?= =?UTF-8?q?=EA=B2=8C=EC=9D=B4=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lifespan 실 경로(init_db + 전 worker import + 전 add_job)를 prod 이미지 컨테이너 + ephemeral PG 로 실행해 router/worker import 오류·잡 등록 오류를 검출. NAS/scheduler.start/ prewarm 3개 부작용만 중립화(prod/AI 무접촉). GPU 실측 PASS: routes=173·jobs=34·schema 361·health ok. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/ci/boot_smoke.py | 122 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 scripts/ci/boot_smoke.py diff --git a/scripts/ci/boot_smoke.py b/scripts/ci/boot_smoke.py new file mode 100644 index 0000000..b924a1a --- /dev/null +++ b/scripts/ci/boot_smoke.py @@ -0,0 +1,122 @@ +"""전체 app 부팅 런타임 스모크 (GPU 격리) — deploy-blocker 게이트. + +init_db 자체는 initdb_runtime_test.py(R1)·migration_smoke.sh 가 검증한다. +본 스모크는 그 위에서 **실제 컨테이너 부팅 경로**(main:app + lifespan startup)를 실행해 +py_compile 이 못 잡는 deploy-blocker 클래스를 잡는다: + + ① `import main` = 전 router import + FastAPI app 빌드 (router 심볼누락·순환 검출) + ② lifespan startup = lifespan 안의 전 worker import(≈35) + init_db + 전 add_job 실행 + (worker import-time 오류·잡 등록 오류 검출, **drift 0** = 실제 경로) + ③ /health (health_check 직접 호출) = DB connected + +prod/AI/NAS 무접촉을 위해 부작용 3개만 외과적으로 중립화한다 (검증 대상 로직은 그대로): + - NAS 마운트 체크 → 임시 디렉토리(+PKM/) 로 통과 (실 NAS 의존 제거) + - scheduler.start() → no-op (잡은 등록되지만 실행 안 됨 = 워커 폴링·외부 API 호출 0) + - scheduler.shutdown() → no-op (start 안 했으니 __aexit__ 의 shutdown 이 raise 안 하도록) + - prewarm_analyzer() → no-op (AI 라우터 :8890 미호출 = 검색실험 soft-lock 안전) + +실행 (worktree 루트를 마운트한 prod fastapi 이미지 컨테이너 안): + docker run --rm --network -v :/work -w /work \ + -e PYTHONPATH=/work/app -e BOOT_SMOKE=1 \ + -e DATABASE_URL="postgresql+asyncpg://postgres@ds-bootsmoke-pg:5432/pkm" \ + python scripts/ci/boot_smoke.py + +기대: IMPORTS OK → LIFESPAN startup OK (jobs=N, purge_sweep 포함) → schema OK → HEALTH ok → PASS +""" +import asyncio +import os +import tempfile +from pathlib import Path + +from sqlalchemy import text + + +async def main() -> None: + # ── 0) 안전 가드: prod DB 오접속 차단 ───────────────────────────────── + from core.config import settings + + url = settings.database_url + print("DATABASE_URL:", url) + assert os.getenv("BOOT_SMOKE") == "1", "SAFETY ABORT: BOOT_SMOKE=1 미설정" + # prod = '...@postgres:5432/pkm' (user pkm). ephemeral = bootsmoke 호스트 / localhost / postgres user. + assert "@postgres:" not in url and "@postgres/" not in url, f"SAFETY ABORT: prod DB 로 보임: {url}" + assert ("bootsmoke" in url) or ("localhost" in url) or ("127.0.0.1" in url), \ + f"SAFETY ABORT: ephemeral 마커(bootsmoke/localhost) 없음: {url}" + + # ── 1) 부작용 3개 중립화 (검증 대상 로직 보존) ─────────────────────── + # prewarm: AI 라우터 미호출 + import services.search.query_analyzer as qa + + async def _noop_prewarm(*a, **k): + return None + + qa.prewarm_analyzer = _noop_prewarm + + # scheduler.start/shutdown no-op + start 캡처로 잡 개수 집계 + from apscheduler.schedulers.asyncio import AsyncIOScheduler + + captured: dict = {} + _orig_init = AsyncIOScheduler.__init__ + + def _init(self, *a, **k): + _orig_init(self, *a, **k) + captured["sched"] = self + + AsyncIOScheduler.__init__ = _init + AsyncIOScheduler.start = lambda self, *a, **k: None + AsyncIOScheduler.shutdown = lambda self, *a, **k: None + + # NAS 체크 통과용 임시 마운트 + tmp = tempfile.mkdtemp(prefix="bootsmoke-nas-") + (Path(tmp) / "PKM").mkdir(parents=True, exist_ok=True) + settings.nas_mount_path = tmp + print("nas_mount_path(override):", tmp) + + # ── 2) import main = 전 router import + app 빌드 ────────────────────── + import main + + route_count = len(main.app.routes) + print(f"IMPORTS OK — main 빌드, app.routes={route_count}") + assert route_count > 50, f"라우트 수 비정상({route_count}) — 라우터 누락 의심" + + # ── 3) lifespan startup 실행 (init_db + 전 worker import + 전 add_job) ─ + cm = main.lifespan(main.app) + await cm.__aenter__() + sched = captured.get("sched") + jobs = sched.get_jobs() if sched else [] + job_ids = sorted(j.id for j in jobs) + print(f"LIFESPAN startup OK — 등록 잡 {len(jobs)}건") + print(" job_ids:", ", ".join(job_ids)) + assert len(jobs) >= 30, f"잡 등록 수 비정상({len(jobs)})" + for required in ("purge_sweep", "auto_review", "queue_consumer", "statute_collector"): + assert required in job_ids, f"필수 잡 누락: {required}" + + # ── 4) 스키마 상태 (lifespan 의 실 init_db 가 359/360/361 적용했는지) ── + from core.database import async_session, engine + + async with async_session() as s: + docs = (await s.execute(text("SELECT to_regclass('public.documents') IS NOT NULL"))).scalar() + purge = (await s.execute(text( + "SELECT count(*) FROM information_schema.columns " + "WHERE table_name='documents' AND column_name='purge_requested_at'"))).scalar() + cand = (await s.execute(text( + "SELECT count(*) FROM information_schema.tables " + "WHERE table_name LIKE 'documents_cand_qwen%'"))).scalar() + uq = (await s.execute(text( + "SELECT count(*) FROM pg_indexes WHERE indexname='uq_attempt_session_question'"))).scalar() + mx = (await s.execute(text("SELECT max(version) FROM schema_migrations"))).scalar() + print(f"SCHEMA OK — max_migration={mx} documents={docs} purge_col={purge} cand_qwen={cand} attempt_uq={uq}") + assert docs and purge == 1 and cand == 0 and uq == 1 and mx == 361, "FAIL: 기대 스키마 상태 불일치" + + # ── 5) /health 직접 호출 ────────────────────────────────────────────── + health = await main.health_check() + print("HEALTH:", health) + assert health["status"] == "ok" and health["database"] == "connected", "FAIL: health degraded" + + # ── 6) 정리 ─────────────────────────────────────────────────────────── + await cm.__aexit__(None, None, None) + await engine.dispose() + print("RESULT: PASS — 전체 app 부팅(import·init_db·잡등록·health) 검증") + + +asyncio.run(main())