373dd059b7
AI 응답이 마크다운 자체를 \`\`\` 으로 감싸서 오는 패턴 (시작만 있고 닫음 누락 포함) 때문에 explanation/AI 해설 영역이 raw 코드블록으로 보이는 회귀. - frontend/lib/utils/mathMarkdown.ts: stripOuterFence helper. - terminated wrap 처리 (inner 에 \`\`\` 추가 있으면 보존) - unterminated 처리 (백틱 그룹 == 1 인 경우만 안전하게 unwrap) - 본문 중간 정상 코드블록은 보존 - scripts/strip_outer_fences.py: dry-run + --apply 양 모드. - 5개 필드 (question_text, choice_1~4, explanation, ai_explanation, content) 검사. - 운영 결과 explanation 34건 unwrap 적용 완료, recount 0 검증. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
113 lines
3.6 KiB
Python
113 lines
3.6 KiB
Python
"""DB 일괄 — 5개 텍스트 필드의 outer fenced code block unwrap.
|
|
|
|
처리 대상:
|
|
- study_questions.question_text / choice_1~4 / explanation / ai_explanation
|
|
- study_topic_subject_notes.content
|
|
|
|
두 케이스:
|
|
(1) terminated: ```...\n본문\n``` 으로 wrap. inner 에 ``` 추가 없으면 unwrap.
|
|
(2) unterminated: 시작 ``` 만 있고 닫음 누락. 본문 중 ``` 추가 없으면 (백틱 그룹 = 1) unwrap.
|
|
|
|
정상 중간 코드블록 (예: ```python ... ```) 가 본문 중에 있는 경우는 보존.
|
|
|
|
dry-run 먼저 출력 (각 필드 N건). 그 다음 --apply 옵션으로 UPDATE.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import re
|
|
import sys
|
|
|
|
import asyncpg
|
|
|
|
|
|
TARGETS: list[tuple[str, str, str]] = [
|
|
# (table, field, where_extra)
|
|
("study_questions", "question_text", "deleted_at IS NULL"),
|
|
("study_questions", "choice_1", "deleted_at IS NULL"),
|
|
("study_questions", "choice_2", "deleted_at IS NULL"),
|
|
("study_questions", "choice_3", "deleted_at IS NULL"),
|
|
("study_questions", "choice_4", "deleted_at IS NULL"),
|
|
("study_questions", "explanation", "deleted_at IS NULL AND explanation IS NOT NULL"),
|
|
("study_questions", "ai_explanation", "deleted_at IS NULL AND ai_explanation IS NOT NULL"),
|
|
("study_topic_subject_notes", "content", "content IS NOT NULL"),
|
|
]
|
|
|
|
|
|
TERM_RE = re.compile(r"^```[A-Za-z0-9_-]*[ \t]*\n([\s\S]*?)\n```$")
|
|
UNTERM_RE = re.compile(r"^```[A-Za-z0-9_-]*[ \t]*\n([\s\S]*)$")
|
|
|
|
|
|
def strip_outer_fence(text: str) -> str | None:
|
|
"""unwrap 가능하면 새 텍스트 반환, 아니면 None (변경 없음)."""
|
|
if not text:
|
|
return None
|
|
trimmed = text.strip()
|
|
# (1) terminated
|
|
m = TERM_RE.match(trimmed)
|
|
if m:
|
|
inner = m.group(1)
|
|
if "```" not in inner:
|
|
return inner
|
|
return None
|
|
# (2) unterminated
|
|
backtick_groups = trimmed.count("```")
|
|
if backtick_groups == 1:
|
|
m2 = UNTERM_RE.match(trimmed)
|
|
if m2:
|
|
return m2.group(1)
|
|
return None
|
|
|
|
|
|
async def scan_and_apply(conn: asyncpg.Connection, apply: bool) -> None:
|
|
total_to_change = 0
|
|
for table, field, where in TARGETS:
|
|
rows = await conn.fetch(
|
|
f"SELECT id, {field} AS val FROM {table} WHERE {where}"
|
|
)
|
|
candidates: list[tuple[int, str, str]] = []
|
|
for r in rows:
|
|
val = r["val"]
|
|
if val is None:
|
|
continue
|
|
new = strip_outer_fence(val)
|
|
if new is not None and new != val:
|
|
candidates.append((r["id"], val, new))
|
|
n = len(candidates)
|
|
total_to_change += n
|
|
print(f" {table}.{field}: {n}건")
|
|
if n and not apply:
|
|
sample_id, sample_val, _ = candidates[0]
|
|
head = sample_val[:80].replace("\n", "\\n")
|
|
print(f" 샘플 id={sample_id} head={head!r}")
|
|
if apply and n:
|
|
for qid, _old, new in candidates:
|
|
await conn.execute(
|
|
f"UPDATE {table} SET {field} = $1 WHERE id = $2", new, qid
|
|
)
|
|
print(f" → UPDATE {n}건 적용 완료")
|
|
print(f"\n총 변경 대상: {total_to_change}건")
|
|
|
|
|
|
async def main() -> None:
|
|
apply = "--apply" in sys.argv
|
|
mode = "APPLY (UPDATE)" if apply else "DRY-RUN"
|
|
print(f"[{mode}] outer-fence unwrap 검사 시작\n")
|
|
|
|
conn = await asyncpg.connect(
|
|
host="postgres",
|
|
port=5432,
|
|
user="pkm",
|
|
password="uW38friypljVS0X2ULoMnw",
|
|
database="pkm",
|
|
)
|
|
try:
|
|
await scan_and_apply(conn, apply=apply)
|
|
finally:
|
|
await conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|