feat(canonical): Phase 1C MarkdownDoc renderer + heading anchor + KaTeX

문서 상세 페이지에서 canonical markdown(md_content) 을 우선 렌더하고
없으면 extracted_text fallback. md_frontmatter 가 있으면 본문 위에 메타
박스. h1~h6 에 GFM heading id + hover 시 # 링크 표시. 이미지 alt 가
있으면 figure + figcaption. KaTeX 수식 ($...$ / $$...$$) 지원.

Backend:
- DocumentDetailResponse 신규 (DocumentResponse + extracted_text + md_*)
- GET /documents/{doc_id} 응답 모델 전환
- 리스트 응답은 DocumentResponse 그대로 (페이로드 비대화 회피)

Frontend:
- lib/utils/docMarkdown.ts — 별도 Marked 인스턴스 (study mathMarkdown.ts
  영향 0). marked-katex-extension + marked-gfm-heading-id + custom image
  renderer (figure/figcaption + data-md-img marker).
- lib/components/MarkdownDoc.svelte — md_content/extracted_text 우선순위,
  frontmatter 박스, mdStatus=failed 안내 배지, heading anchor DOM 후처리.
- /documents/[id] markdown / hwp-markdown / article viewer 3 곳 wiring.
- app.css — .markdown-doc heading-anchor / md-figure / katex 가로 스크롤.

이미지 ImgAuth 후처리(blob URL 교체) wiring 은 Phase 1B.5 에서. 현재는
data-md-img="1" 마킹만 두고 marker 출력 src 그대로.

Plan: ~/.claude/plans/plan-idempotent-sundae.md (Phase 1C)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-05-01 09:37:33 +09:00
parent 242288aaf3
commit d038f11444
7 changed files with 300 additions and 10 deletions
+19 -3
View File
@@ -124,6 +124,22 @@ class DocumentListResponse(BaseModel):
page_size: int
class DocumentDetailResponse(DocumentResponse):
"""단건 조회 전용 — 본문(extracted_text)·canonical markdown 동봉.
리스트 응답은 페이로드 비대화 회피로 DocumentResponse 만 사용.
"""
extracted_text: str | None = None
md_content: str | None = None
md_frontmatter: dict | None = None
md_status: str | None = None
md_extraction_quality: dict | None = None
md_extraction_error: str | None = None
md_extraction_engine: str | None = None
md_extraction_engine_version: str | None = None
md_generated_at: datetime | None = None
class AcceptSuggestionRequest(BaseModel):
"""§1 accept-suggestion 요청 body — stale payload / doc 수정 검출."""
expected_source_updated_at: datetime
@@ -507,17 +523,17 @@ async def list_documents(
)
@router.get("/{doc_id}", response_model=DocumentResponse)
@router.get("/{doc_id}", response_model=DocumentDetailResponse)
async def get_document(
doc_id: int,
user: Annotated[User, Depends(get_current_user)],
session: Annotated[AsyncSession, Depends(get_session)],
):
"""문서 단건 조회"""
"""문서 단건 조회. 본문(extracted_text)·canonical markdown 동봉."""
doc = await session.get(Document, doc_id)
if not doc or doc.deleted_at is not None:
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
return DocumentResponse.model_validate(doc)
return DocumentDetailResponse.model_validate(doc)
# ─── 자료실 인접 자료 (이전/다음) ───
+19
View File
@@ -12,6 +12,7 @@
"katex": "^0.16.45",
"lucide-svelte": "^0.400.0",
"marked": "^15.0.0",
"marked-gfm-heading-id": "^4.1.4",
"marked-katex-extension": "^5.1.8",
"perfect-freehand": "^1.2.3"
},
@@ -1444,6 +1445,12 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/github-slugger": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/github-slugger/-/github-slugger-2.0.0.tgz",
"integrity": "sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw==",
"license": "ISC"
},
"node_modules/graceful-fs": {
"version": "4.2.11",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -1830,6 +1837,18 @@
"node": ">= 18"
}
},
"node_modules/marked-gfm-heading-id": {
"version": "4.1.4",
"resolved": "https://registry.npmjs.org/marked-gfm-heading-id/-/marked-gfm-heading-id-4.1.4.tgz",
"integrity": "sha512-CspnvVfHSkb/znqdPS4jUR8HtCjq3M/DnrsJCrfLBLvdrgbemmoINKpeWKQYkBiXAoBGejw0cV7xzqrPdup3WA==",
"license": "MIT",
"dependencies": {
"github-slugger": "^2.0.0"
},
"peerDependencies": {
"marked": ">=13 <19"
}
},
"node_modules/marked-katex-extension": {
"version": "5.1.8",
"resolved": "https://registry.npmjs.org/marked-katex-extension/-/marked-katex-extension-5.1.8.tgz",
+1
View File
@@ -22,6 +22,7 @@
"katex": "^0.16.45",
"lucide-svelte": "^0.400.0",
"marked": "^15.0.0",
"marked-gfm-heading-id": "^4.1.4",
"marked-katex-extension": "^5.1.8",
"perfect-freehand": "^1.2.3"
}
+43
View File
@@ -139,3 +139,46 @@ body {
.math-area p { margin: 0.5em 0; }
.math-area p:first-child { margin-top: 0; }
.math-area p:last-child { margin-bottom: 0; }
/* Phase 1C: MarkdownDoc — heading anchor / figure caption / KaTeX 가로 스크롤 */
.markdown-doc { overflow-wrap: anywhere; }
.markdown-doc h1, .markdown-doc h2, .markdown-doc h3,
.markdown-doc h4, .markdown-doc h5, .markdown-doc h6 {
scroll-margin-top: 80px;
position: relative;
}
.markdown-doc .heading-anchor {
display: inline-block;
margin-right: 0.35em;
color: var(--text-dim);
font-weight: 400;
text-decoration: none;
opacity: 0;
transition: opacity 0.15s;
font-family: 'SF Mono', Menlo, monospace;
}
.markdown-doc h1:hover .heading-anchor,
.markdown-doc h2:hover .heading-anchor,
.markdown-doc h3:hover .heading-anchor,
.markdown-doc h4:hover .heading-anchor,
.markdown-doc h5:hover .heading-anchor,
.markdown-doc h6:hover .heading-anchor,
.markdown-doc .heading-anchor:focus { opacity: 1; }
.markdown-doc .heading-anchor:hover { color: var(--accent); }
.markdown-doc .md-figure {
margin: 1em 0;
text-align: center;
}
.markdown-doc .md-figure img { display: inline-block; max-width: 100%; }
.markdown-doc .md-figure figcaption {
margin-top: 0.4em;
font-size: 0.85em;
color: var(--text-dim);
font-style: italic;
}
.markdown-doc .katex-display { overflow-x: auto; overflow-y: hidden; padding: 0.25em 0; }
.markdown-doc .katex-display > .katex { white-space: nowrap; }
/* Phase 1C: frontmatter 박스 — 본문 위 메타 표시 */
.md-frontmatter dt { font-weight: 500; }
@@ -0,0 +1,124 @@
<script lang="ts">
/**
* 문서 본문 canonical markdown 렌더러 (Phase 1C).
*
* 우선순위:
* 1. md_content (Phase 1B 의 marker_worker 가 채운 canonical markdown)
* 2. extracted_text fallback (기존 본문)
* 3. placeholder
*
* 기능:
* - md_frontmatter (JSONB) 가 비어있지 않으면 본문 위에 메타 박스
* - heading anchor (h1~h6 id 자동 부여 + # 링크)
* - figure caption (이미지 alt 있을 때)
* - KaTeX ($...$ inline / $$...$$ block)
* - DOMPurify sanitize
*
* 이미지 ImgAuth wiring 은 Phase 1B.5 후 추가 — 현재는 marker 출력 그대로 (data-md-img="1" 마킹만).
*/
import { renderDocMarkdown } from '$lib/utils/docMarkdown';
type Props = {
mdContent?: string | null;
mdFrontmatter?: Record<string, unknown> | null;
extractedText?: string | null;
mdStatus?: string | null;
mdExtractionError?: string | null;
placeholder?: string;
/** 추가 래퍼 클래스. tailwind prose-* / spacing 등을 호출 측에서 입혀야 할 때. */
class?: string;
};
let {
mdContent = null,
mdFrontmatter = null,
extractedText = null,
mdStatus = null,
mdExtractionError = null,
placeholder = '*텍스트 추출 대기 중*',
class: klass = '',
}: Props = $props();
let usingMarkdown = $derived(!!(mdContent && mdContent.trim()));
let body = $derived(
usingMarkdown
? (mdContent as string)
: extractedText && extractedText.trim()
? extractedText
: placeholder,
);
let renderedHtml = $derived(renderDocMarkdown(body));
let frontmatterEntries = $derived.by(() => {
if (!usingMarkdown || !mdFrontmatter) return [] as [string, unknown][];
return Object.entries(mdFrontmatter).filter(
([, v]) => v !== null && v !== undefined && v !== '',
);
});
let showFailureBadge = $derived(!usingMarkdown && mdStatus === 'failed');
function formatValue(v: unknown): string {
if (v === null || v === undefined) return '';
if (typeof v === 'object') {
try {
return JSON.stringify(v);
} catch {
return String(v);
}
}
return String(v);
}
let containerRef: HTMLDivElement | undefined = $state();
// heading anchor 후처리 — gfmHeadingId 가 부여한 id 에 # 링크 prepend.
// string-level 정규식 처리 대신 DOM 후처리 — id 에 따옴표/HTML 메타 들어와도 안전.
$effect(() => {
// renderedHtml 변할 때마다 재실행
void renderedHtml;
if (!containerRef) return;
const headings = containerRef.querySelectorAll<HTMLHeadingElement>(
'h1[id], h2[id], h3[id], h4[id], h5[id], h6[id]',
);
for (const h of headings) {
if (h.querySelector('a.heading-anchor')) continue;
const id = h.getAttribute('id');
if (!id) continue;
const a = document.createElement('a');
a.className = 'heading-anchor';
a.setAttribute('href', `#${id}`);
a.setAttribute('aria-label', '이 항목으로 링크');
a.textContent = '#';
h.insertBefore(a, h.firstChild);
}
});
</script>
{#if frontmatterEntries.length}
<dl
class="md-frontmatter mb-4 grid grid-cols-[max-content,1fr] gap-x-3 gap-y-1 rounded border border-border/60 bg-bg/40 px-4 py-3 text-xs"
>
<div class="col-span-2 mb-1 text-[10px] uppercase tracking-wide text-dim">메타</div>
{#each frontmatterEntries as [k, v] (k)}
<dt class="text-dim">{k}</dt>
<dd class="break-words text-text">{formatValue(v)}</dd>
{/each}
</dl>
{/if}
{#if showFailureBadge}
<div
class="mb-3 inline-flex items-center gap-2 rounded border border-warn/40 bg-warn/10 px-2 py-1 text-[10px] text-warn"
title={mdExtractionError ?? undefined}
>
Markdown 변환 실패 — 원본 추출 텍스트 표시
</div>
{/if}
<div
bind:this={containerRef}
class="markdown-body markdown-doc leading-relaxed {klass}"
>
{@html renderedHtml}
</div>
+76
View File
@@ -0,0 +1,76 @@
/**
* markdown (Phase 1C MarkdownDoc ).
*
* mathMarkdown.ts (study ·) study .
*
* :
* - GFM heading id (anchor id , prefix=doc-)
* - token figure + figcaption (alt )
* - <img> data-md-img="1" Phase 1B.5 ImgAuth selector
*
* KaTeX / DOMPurify mathMarkdown.ts .
*/
import DOMPurify from 'dompurify';
import { Marked } from 'marked';
// @ts-ignore — 타입 정의 누락 시 무시
import markedKatex from 'marked-katex-extension';
// @ts-ignore — 타입 정의 누락 시 무시
import { gfmHeadingId } from 'marked-gfm-heading-id';
import 'katex/dist/katex.min.css';
function escAttr(s: string): string {
return s.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
}
function escText(s: string): string {
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
}
const docMarked = new Marked();
docMarked.use({ mangle: false } as any);
docMarked.use(gfmHeadingId({ prefix: 'doc-' }));
docMarked.use(
markedKatex({
throwOnError: false,
nonStandard: false,
output: 'html',
} as any),
);
// 이미지 → figure + figcaption (alt 가 있으면). 모든 img 에 data-md-img="1" 마킹.
docMarked.use({
renderer: {
image(token: any): string {
const href = (token?.href ?? '') as string;
const text = (token?.text ?? '') as string;
const title = (token?.title ?? '') as string;
const titleAttr = title ? ` title="${escAttr(title)}"` : '';
const img = `<img src="${escAttr(href)}" alt="${escAttr(text)}"${titleAttr} loading="lazy" data-md-img="1" />`;
if (text) {
return `<figure class="md-figure">${img}<figcaption>${escText(text)}</figcaption></figure>`;
}
return img;
},
},
});
const SANITIZE_OPTS = {
USE_PROFILES: { html: true },
// KaTeX (style + aria-hidden), heading anchor (id), 이미지 마킹 (data-md-img), figure caption (figure/figcaption)
ADD_ATTR: ['style', 'aria-hidden', 'id', 'data-md-img', 'loading'],
ADD_TAGS: ['figure', 'figcaption'],
FORBID_TAGS: ['script', 'iframe', 'object', 'embed', 'link', 'meta'],
FORBID_ATTR: ['onerror', 'onclick', 'onload', 'onmouseover', 'onfocus'],
ALLOW_UNKNOWN_PROTOCOLS: false,
} as const;
export function renderDocMarkdown(text: string | null | undefined): string {
if (!text) return '';
try {
const html = docMarked.parse(text) as string;
return DOMPurify.sanitize(html, SANITIZE_OPTS);
} catch {
// 마지막 안전망: 모든 태그 제거 후 escape
return DOMPurify.sanitize(text, { ALLOWED_TAGS: [], ALLOWED_ATTR: [] });
}
}
@@ -15,6 +15,7 @@
import EmptyState from '$lib/components/ui/EmptyState.svelte';
import Skeleton from '$lib/components/ui/Skeleton.svelte';
import HandwriteCanvas from '$lib/components/HandwriteCanvas.svelte';
import MarkdownDoc from '$lib/components/MarkdownDoc.svelte';
import NoteEditor from '$lib/components/editors/NoteEditor.svelte';
import EditUrlEditor from '$lib/components/editors/EditUrlEditor.svelte';
import TagsEditor from '$lib/components/editors/TagsEditor.svelte';
@@ -224,9 +225,14 @@
<!-- 뷰어 — 모바일 가독성: 본문 폰트 키우고 line-height 늘림 -->
<Card class="min-h-[500px]">
{#if viewerType === 'markdown' || viewerType === 'hwp-markdown'}
<div class="prose prose-invert prose-base lg:prose-sm max-w-none markdown-body leading-relaxed">
{@html renderMd(doc.extracted_text || rawMarkdown || '*텍스트 추출 대기 중*')}
</div>
<MarkdownDoc
mdContent={doc.md_content}
mdFrontmatter={doc.md_frontmatter}
mdStatus={doc.md_status}
mdExtractionError={doc.md_extraction_error}
extractedText={doc.extracted_text || rawMarkdown}
class="prose prose-invert prose-base lg:prose-sm max-w-none"
/>
{:else if viewerType === 'pdf'}
<iframe
src="/api/documents/{doc.id}/file?token={getAccessToken()}"
@@ -268,10 +274,15 @@
})}
</span>
</div>
{#if doc.extracted_text}
<div class="markdown-body mb-6">
{@html renderMd(doc.extracted_text)}
</div>
{#if doc.md_content || doc.extracted_text}
<MarkdownDoc
mdContent={doc.md_content}
mdFrontmatter={doc.md_frontmatter}
mdStatus={doc.md_status}
mdExtractionError={doc.md_extraction_error}
extractedText={doc.extracted_text}
class="mb-6"
/>
{/if}
{#if doc.edit_url}
<Button