"""DB 일괄 — 5개 텍스트 필드의 outer fenced code block unwrap. 처리 대상: - study_questions.question_text / choice_1~4 / explanation / ai_explanation - study_topic_subject_notes.content 두 케이스: (1) terminated: ```...\n본문\n``` 으로 wrap. inner 에 ``` 추가 없으면 unwrap. (2) unterminated: 시작 ``` 만 있고 닫음 누락. 본문 중 ``` 추가 없으면 (백틱 그룹 = 1) unwrap. 정상 중간 코드블록 (예: ```python ... ```) 가 본문 중에 있는 경우는 보존. dry-run 먼저 출력 (각 필드 N건). 그 다음 --apply 옵션으로 UPDATE. """ from __future__ import annotations import asyncio import re import sys import asyncpg TARGETS: list[tuple[str, str, str]] = [ # (table, field, where_extra) ("study_questions", "question_text", "deleted_at IS NULL"), ("study_questions", "choice_1", "deleted_at IS NULL"), ("study_questions", "choice_2", "deleted_at IS NULL"), ("study_questions", "choice_3", "deleted_at IS NULL"), ("study_questions", "choice_4", "deleted_at IS NULL"), ("study_questions", "explanation", "deleted_at IS NULL AND explanation IS NOT NULL"), ("study_questions", "ai_explanation", "deleted_at IS NULL AND ai_explanation IS NOT NULL"), ("study_topic_subject_notes", "content", "content IS NOT NULL"), ] TERM_RE = re.compile(r"^```[A-Za-z0-9_-]*[ \t]*\n([\s\S]*?)\n```$") UNTERM_RE = re.compile(r"^```[A-Za-z0-9_-]*[ \t]*\n([\s\S]*)$") def strip_outer_fence(text: str) -> str | None: """unwrap 가능하면 새 텍스트 반환, 아니면 None (변경 없음).""" if not text: return None trimmed = text.strip() # (1) terminated m = TERM_RE.match(trimmed) if m: inner = m.group(1) if "```" not in inner: return inner return None # (2) unterminated backtick_groups = trimmed.count("```") if backtick_groups == 1: m2 = UNTERM_RE.match(trimmed) if m2: return m2.group(1) return None async def scan_and_apply(conn: asyncpg.Connection, apply: bool) -> None: total_to_change = 0 for table, field, where in TARGETS: rows = await conn.fetch( f"SELECT id, {field} AS val FROM {table} WHERE {where}" ) candidates: list[tuple[int, str, str]] = [] for r in rows: val = r["val"] if val is None: continue new = strip_outer_fence(val) if new is not None and new != val: candidates.append((r["id"], val, new)) n = len(candidates) total_to_change += n print(f" {table}.{field}: {n}건") if n and not apply: sample_id, sample_val, _ = candidates[0] head = sample_val[:80].replace("\n", "\\n") print(f" 샘플 id={sample_id} head={head!r}") if apply and n: for qid, _old, new in candidates: await conn.execute( f"UPDATE {table} SET {field} = $1 WHERE id = $2", new, qid ) print(f" → UPDATE {n}건 적용 완료") print(f"\n총 변경 대상: {total_to_change}건") async def main() -> None: apply = "--apply" in sys.argv mode = "APPLY (UPDATE)" if apply else "DRY-RUN" print(f"[{mode}] outer-fence unwrap 검사 시작\n") conn = await asyncpg.connect( host="postgres", port=5432, user="pkm", password="uW38friypljVS0X2ULoMnw", database="pkm", ) try: await scan_and_apply(conn, apply=apply) finally: await conn.close() if __name__ == "__main__": asyncio.run(main())