diff --git a/app/core/database.py b/app/core/database.py index 6d5ec1c..cbf4052 100644 --- a/app/core/database.py +++ b/app/core/database.py @@ -100,7 +100,12 @@ async def _load_baseline_if_fresh(conn, migrations_dir: Path) -> None: baseline_path = baseline_files[-1] logger.info(f"[migration] fresh DB 감지 — baseline 적재: {baseline_path.name}") - await conn.exec_driver_sql(baseline_path.read_text(encoding="utf-8")) + # baseline 은 multi-statement 덤프 — exec_driver_sql(asyncpg prepared)은 multi-statement + # 불허("cannot insert multiple commands into a prepared statement"). raw asyncpg 의 simple + # 프로토콜 execute() 로 적재한다(같은 connection = 현재 트랜잭션 내). psql 스모크는 이 제약을 + # 못 잡으므로 init_db 런타임 검증으로 확인됨. + raw = await conn.get_raw_connection() + await raw.driver_connection.execute(baseline_path.read_text(encoding="utf-8")) # baseline = cutoff 까지의 스키마 → 실제 파일 버전 기준으로 schema_migrations 스탬프. versions = [v for v, _, _ in _parse_migration_files(migrations_dir) if v <= _BASELINE_CUTOFF] for v in versions: diff --git a/migrations/360_drop_phase2a_cand_tables.sql b/migrations/360_drop_phase2a_cand_tables.sql index 8345f55..03924e0 100644 --- a/migrations/360_drop_phase2a_cand_tables.sql +++ b/migrations/360_drop_phase2a_cand_tables.sql @@ -1,14 +1,11 @@ -- 360: Phase 2A 임베딩 후보 cand 섀도 테이블 제거 (R13). --- Phase 2A no-go 종결(2026-06-12, 후보 전부 -0.03~-0.04) + phase2a_cand_backfill 워커 --- dormant. retrieval_service.CANDIDATE_BACKEND_MAP / api.search allowed 슬러그 선제거 후 DROP. +-- Phase 2A no-go 종결(2026-06-12, 후보 전부 -0.03~-0.04) + phase2a_cand_backfill 워커 dormant. +-- retrieval_service.CANDIDATE_BACKEND_MAP / api.search allowed 슬러그 선제거 후 DROP. +-- ★single statement(콤마 구분) — init_db 의 exec_driver_sql(asyncpg)은 multi-statement 불허. -- IF EXISTS — me5/snowflake 는 ad-hoc 생성분이라 환경별 존재 여부 다를 수 있음(멱등). -DROP TABLE IF EXISTS document_chunks_cand_me5_large_inst; -DROP TABLE IF EXISTS documents_cand_me5_large_inst; -DROP TABLE IF EXISTS document_chunks_cand_snowflake_l_v2; -DROP TABLE IF EXISTS documents_cand_snowflake_l_v2; -DROP TABLE IF EXISTS document_chunks_cand_qwen06; -DROP TABLE IF EXISTS documents_cand_qwen06; -DROP TABLE IF EXISTS document_chunks_cand_qwen4; -DROP TABLE IF EXISTS documents_cand_qwen4; -DROP TABLE IF EXISTS document_chunks_cand_qwen4m; -DROP TABLE IF EXISTS documents_cand_qwen4m; +DROP TABLE IF EXISTS + document_chunks_cand_me5_large_inst, documents_cand_me5_large_inst, + document_chunks_cand_snowflake_l_v2, documents_cand_snowflake_l_v2, + document_chunks_cand_qwen06, documents_cand_qwen06, + document_chunks_cand_qwen4, documents_cand_qwen4, + document_chunks_cand_qwen4m, documents_cand_qwen4m; diff --git a/migrations/361_attempt_session_question_unique.sql b/migrations/361_attempt_session_question_unique.sql index 5670886..ee80a36 100644 --- a/migrations/361_attempt_session_question_unique.sql +++ b/migrations/361_attempt_session_question_unique.sql @@ -1,14 +1,9 @@ -- 361: quiz 세션 내 같은 문제 이중 attempt 방지 partial UNIQUE (R9). --- submit_attempt 의 FOR UPDATE 행잠금이 1차 방어이고, 이 제약은 DB 레벨 belt-and-suspenders --- (모바일 더블탭/재시도가 어떤 경로로든 이중 INSERT 에 도달해도 차단). prod 실측 중복 0 건 --- (SELECT ... GROUP BY HAVING count>1 = 0) — dedup DELETE 는 멱등 precaution, UNIQUE 는 안전. --- quiz_session_id IS NULL(세션 외 직접 입력)은 대상 아님 → partial index. -DELETE FROM study_question_attempts a USING study_question_attempts b -WHERE a.quiz_session_id IS NOT NULL - AND a.quiz_session_id = b.quiz_session_id - AND a.study_question_id = b.study_question_id - AND a.id > b.id; - +-- submit_attempt 의 FOR UPDATE 행잠금이 1차 방어, 이 제약은 DB 레벨 belt-and-suspenders. +-- prod 실측 중복 0 (GROUP BY (quiz_session_id, study_question_id) HAVING count>1 = 0) + fresh DB +-- 빈 테이블이라 dedup DELETE 불요 → ★single statement(init_db exec_driver_sql 은 multi-statement +-- 불허). 혹시 중복이 생긴 환경이면 이 마이그가 실패하므로(IntegrityError) 수동 dedup 후 재적용. +-- quiz_session_id IS NULL(세션 외 직접 입력)은 비대상 → partial index. CREATE UNIQUE INDEX IF NOT EXISTS uq_attempt_session_question ON study_question_attempts (quiz_session_id, study_question_id) WHERE quiz_session_id IS NOT NULL; diff --git a/migrations/_baseline/0358_schema_baseline.sql b/migrations/_baseline/0358_schema_baseline.sql index 51dd809..05baf49 100644 --- a/migrations/_baseline/0358_schema_baseline.sql +++ b/migrations/_baseline/0358_schema_baseline.sql @@ -1765,17 +1765,6 @@ CREATE SEQUENCE public.processing_queue_id_seq ALTER SEQUENCE public.processing_queue_id_seq OWNED BY public.processing_queue.id; --- --- Name: schema_migrations; Type: TABLE; Schema: public; Owner: - --- - -CREATE TABLE public.schema_migrations ( - version integer NOT NULL, - name text NOT NULL, - applied_at timestamp with time zone DEFAULT now() -); - - -- -- Name: search_failure_logs; Type: TABLE; Schema: public; Owner: - -- @@ -3447,14 +3436,6 @@ ALTER TABLE ONLY public.processing_queue ADD CONSTRAINT processing_queue_pkey PRIMARY KEY (id); --- --- Name: schema_migrations schema_migrations_pkey; Type: CONSTRAINT; Schema: public; Owner: - --- - -ALTER TABLE ONLY public.schema_migrations - ADD CONSTRAINT schema_migrations_pkey PRIMARY KEY (version); - - -- -- Name: search_failure_logs search_failure_logs_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- diff --git a/scripts/ci/initdb_runtime_test.py b/scripts/ci/initdb_runtime_test.py new file mode 100644 index 0000000..e3f75de --- /dev/null +++ b/scripts/ci/initdb_runtime_test.py @@ -0,0 +1,51 @@ +"""init_db() baseline 부팅 런타임 검증 (R1) — psql migration_smoke 가 못 잡는 asyncpg 경로 확인. + +migration_smoke.sh(psql)는 SQL 유효성만 검증한다. init_db 는 asyncpg exec_driver_sql(prepared) +경로라 ① multi-statement 불허 ② baseline 의 raw asyncpg 적재 ③ skip/stamp/멱등 — 이걸 실측한다. + +실행 (worktree 루트): + python3.11 -m venv /tmp/v && /tmp/v/bin/pip install -q "sqlalchemy[asyncio]>=2" asyncpg pydantic pyyaml + docker run -d --name idb -p 55432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust pgvector/pgvector:pg16 + docker exec idb psql -U postgres -c "CREATE DATABASE pkm" + ln -sfn ../migrations app/migrations # Docker 의 /app/migrations 레이아웃 모사 (테스트 후 rm) + PYTHONPATH=app DATABASE_URL="postgresql+asyncpg://postgres@localhost:55432/pkm" /tmp/v/bin/python scripts/ci/initdb_runtime_test.py + rm -f app/migrations; docker rm -f idb + +기대: 1st OK(documents=True·purge_col=1·cand_qwen=0·attempt_unique=1), 2nd 멱등동일=True. +""" +import asyncio +from sqlalchemy import text + + +async def main(): + from core.config import settings + url = settings.database_url + print("effective DATABASE_URL:", url) + assert "localhost" in url or "127.0.0.1" in url, f"SAFETY ABORT non-local: {url}" + from core.database import init_db, async_session, engine + + print("=== 1st init_db (fresh DB) ===") + await init_db() + async with async_session() as s: + cnt = (await s.execute(text("SELECT count(*) FROM schema_migrations"))).scalar() + mx = (await s.execute(text("SELECT max(version) FROM schema_migrations"))).scalar() + bl = (await s.execute(text("SELECT count(*) FROM schema_migrations WHERE name LIKE 'baseline:%'"))).scalar() + docs = (await s.execute(text("SELECT to_regclass('public.documents') IS NOT NULL"))).scalar() + purge = (await s.execute(text("SELECT count(*) FROM information_schema.columns WHERE table_name='documents' AND column_name='purge_requested_at'"))).scalar() + cand = (await s.execute(text("SELECT count(*) FROM information_schema.tables WHERE table_name LIKE 'documents_cand_qwen%'"))).scalar() + uq = (await s.execute(text("SELECT count(*) FROM pg_indexes WHERE indexname='uq_attempt_session_question'"))).scalar() + print(f" schema_migrations count={cnt} max={mx} baseline_stamped={bl}") + print(f" documents={docs} purge_col={purge} cand_qwen_tables={cand} attempt_unique={uq}") + assert docs and purge == 1 and cand == 0 and uq == 1, "FAIL: 기대 스키마 상태 불일치" + + print("=== 2nd init_db (rerun = baseline skip + 멱등) ===") + await init_db() + async with async_session() as s: + cnt2 = (await s.execute(text("SELECT count(*) FROM schema_migrations"))).scalar() + assert cnt == cnt2, "FAIL: 멱등 아님 (재실행이 schema_migrations 변경)" + print(f" count={cnt2} 멱등동일={cnt == cnt2}") + print("RESULT: PASS — init_db baseline 부팅/멱등 검증") + await engine.dispose() + + +asyncio.run(main())