diff --git a/app/api/documents.py b/app/api/documents.py index 2e39473..920bb7d 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -124,6 +124,22 @@ class DocumentListResponse(BaseModel): page_size: int +class DocumentDetailResponse(DocumentResponse): + """단건 조회 전용 — 본문(extracted_text)·canonical markdown 동봉. + 리스트 응답은 페이로드 비대화 회피로 DocumentResponse 만 사용. + """ + + extracted_text: str | None = None + md_content: str | None = None + md_frontmatter: dict | None = None + md_status: str | None = None + md_extraction_quality: dict | None = None + md_extraction_error: str | None = None + md_extraction_engine: str | None = None + md_extraction_engine_version: str | None = None + md_generated_at: datetime | None = None + + class AcceptSuggestionRequest(BaseModel): """§1 accept-suggestion 요청 body — stale payload / doc 수정 검출.""" expected_source_updated_at: datetime @@ -507,17 +523,17 @@ async def list_documents( ) -@router.get("/{doc_id}", response_model=DocumentResponse) +@router.get("/{doc_id}", response_model=DocumentDetailResponse) async def get_document( doc_id: int, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], ): - """문서 단건 조회""" + """문서 단건 조회. 본문(extracted_text)·canonical markdown 동봉.""" doc = await session.get(Document, doc_id) if not doc or doc.deleted_at is not None: raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") - return DocumentResponse.model_validate(doc) + return DocumentDetailResponse.model_validate(doc) # ─── 자료실 인접 자료 (이전/다음) ─── diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 688f728..f9e1b12 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -12,6 +12,7 @@ "katex": "^0.16.45", "lucide-svelte": "^0.400.0", "marked": "^15.0.0", + "marked-gfm-heading-id": "^4.1.4", "marked-katex-extension": "^5.1.8", "perfect-freehand": "^1.2.3" }, @@ -1444,6 +1445,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/github-slugger": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/github-slugger/-/github-slugger-2.0.0.tgz", + "integrity": "sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw==", + "license": "ISC" + }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -1830,6 +1837,18 @@ "node": ">= 18" } }, + "node_modules/marked-gfm-heading-id": { + "version": "4.1.4", + "resolved": "https://registry.npmjs.org/marked-gfm-heading-id/-/marked-gfm-heading-id-4.1.4.tgz", + "integrity": "sha512-CspnvVfHSkb/znqdPS4jUR8HtCjq3M/DnrsJCrfLBLvdrgbemmoINKpeWKQYkBiXAoBGejw0cV7xzqrPdup3WA==", + "license": "MIT", + "dependencies": { + "github-slugger": "^2.0.0" + }, + "peerDependencies": { + "marked": ">=13 <19" + } + }, "node_modules/marked-katex-extension": { "version": "5.1.8", "resolved": "https://registry.npmjs.org/marked-katex-extension/-/marked-katex-extension-5.1.8.tgz", diff --git a/frontend/package.json b/frontend/package.json index 36ff279..6d3f040 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -22,6 +22,7 @@ "katex": "^0.16.45", "lucide-svelte": "^0.400.0", "marked": "^15.0.0", + "marked-gfm-heading-id": "^4.1.4", "marked-katex-extension": "^5.1.8", "perfect-freehand": "^1.2.3" } diff --git a/frontend/src/app.css b/frontend/src/app.css index c40f163..267f62d 100644 --- a/frontend/src/app.css +++ b/frontend/src/app.css @@ -139,3 +139,46 @@ body { .math-area p { margin: 0.5em 0; } .math-area p:first-child { margin-top: 0; } .math-area p:last-child { margin-bottom: 0; } + +/* Phase 1C: MarkdownDoc — heading anchor / figure caption / KaTeX 가로 스크롤 */ +.markdown-doc { overflow-wrap: anywhere; } +.markdown-doc h1, .markdown-doc h2, .markdown-doc h3, +.markdown-doc h4, .markdown-doc h5, .markdown-doc h6 { + scroll-margin-top: 80px; + position: relative; +} +.markdown-doc .heading-anchor { + display: inline-block; + margin-right: 0.35em; + color: var(--text-dim); + font-weight: 400; + text-decoration: none; + opacity: 0; + transition: opacity 0.15s; + font-family: 'SF Mono', Menlo, monospace; +} +.markdown-doc h1:hover .heading-anchor, +.markdown-doc h2:hover .heading-anchor, +.markdown-doc h3:hover .heading-anchor, +.markdown-doc h4:hover .heading-anchor, +.markdown-doc h5:hover .heading-anchor, +.markdown-doc h6:hover .heading-anchor, +.markdown-doc .heading-anchor:focus { opacity: 1; } +.markdown-doc .heading-anchor:hover { color: var(--accent); } + +.markdown-doc .md-figure { + margin: 1em 0; + text-align: center; +} +.markdown-doc .md-figure img { display: inline-block; max-width: 100%; } +.markdown-doc .md-figure figcaption { + margin-top: 0.4em; + font-size: 0.85em; + color: var(--text-dim); + font-style: italic; +} +.markdown-doc .katex-display { overflow-x: auto; overflow-y: hidden; padding: 0.25em 0; } +.markdown-doc .katex-display > .katex { white-space: nowrap; } + +/* Phase 1C: frontmatter 박스 — 본문 위 메타 표시 */ +.md-frontmatter dt { font-weight: 500; } diff --git a/frontend/src/lib/components/MarkdownDoc.svelte b/frontend/src/lib/components/MarkdownDoc.svelte new file mode 100644 index 0000000..2740c74 --- /dev/null +++ b/frontend/src/lib/components/MarkdownDoc.svelte @@ -0,0 +1,124 @@ + + +{#if frontmatterEntries.length} +
+
메타
+ {#each frontmatterEntries as [k, v] (k)} +
{k}
+
{formatValue(v)}
+ {/each} +
+{/if} + +{#if showFailureBadge} +
+ Markdown 변환 실패 — 원본 추출 텍스트 표시 +
+{/if} + +
+ {@html renderedHtml} +
diff --git a/frontend/src/lib/utils/docMarkdown.ts b/frontend/src/lib/utils/docMarkdown.ts new file mode 100644 index 0000000..b4e4839 --- /dev/null +++ b/frontend/src/lib/utils/docMarkdown.ts @@ -0,0 +1,76 @@ +/** + * 문서 본문 markdown 렌더 (Phase 1C — MarkdownDoc 컴포넌트 전용). + * + * mathMarkdown.ts (study 의 문제·해설용) 와 별도 인스턴스를 둬서 study 측 동작에 영향 없음. + * + * 차이점: + * - GFM heading id (anchor 용 id 자동 부여, prefix=doc-) + * - 이미지 token 을 figure + figcaption 으로 감싸기 (alt 있을 때) + * - 모든 에 data-md-img="1" 마킹 — Phase 1B.5 에서 ImgAuth 후처리 selector 로 사용 + * + * KaTeX / DOMPurify 정책은 mathMarkdown.ts 의 정책과 동일. + */ + +import DOMPurify from 'dompurify'; +import { Marked } from 'marked'; +// @ts-ignore — 타입 정의 누락 시 무시 +import markedKatex from 'marked-katex-extension'; +// @ts-ignore — 타입 정의 누락 시 무시 +import { gfmHeadingId } from 'marked-gfm-heading-id'; +import 'katex/dist/katex.min.css'; + +function escAttr(s: string): string { + return s.replace(/&/g, '&').replace(/"/g, '"').replace(//g, '>'); +} +function escText(s: string): string { + return s.replace(/&/g, '&').replace(//g, '>'); +} + +const docMarked = new Marked(); +docMarked.use({ mangle: false } as any); +docMarked.use(gfmHeadingId({ prefix: 'doc-' })); +docMarked.use( + markedKatex({ + throwOnError: false, + nonStandard: false, + output: 'html', + } as any), +); + +// 이미지 → figure + figcaption (alt 가 있으면). 모든 img 에 data-md-img="1" 마킹. +docMarked.use({ + renderer: { + image(token: any): string { + const href = (token?.href ?? '') as string; + const text = (token?.text ?? '') as string; + const title = (token?.title ?? '') as string; + const titleAttr = title ? ` title="${escAttr(title)}"` : ''; + const img = `${escAttr(text)}`; + if (text) { + return `
${img}
${escText(text)}
`; + } + return img; + }, + }, +}); + +const SANITIZE_OPTS = { + USE_PROFILES: { html: true }, + // KaTeX (style + aria-hidden), heading anchor (id), 이미지 마킹 (data-md-img), figure caption (figure/figcaption) + ADD_ATTR: ['style', 'aria-hidden', 'id', 'data-md-img', 'loading'], + ADD_TAGS: ['figure', 'figcaption'], + FORBID_TAGS: ['script', 'iframe', 'object', 'embed', 'link', 'meta'], + FORBID_ATTR: ['onerror', 'onclick', 'onload', 'onmouseover', 'onfocus'], + ALLOW_UNKNOWN_PROTOCOLS: false, +} as const; + +export function renderDocMarkdown(text: string | null | undefined): string { + if (!text) return ''; + try { + const html = docMarked.parse(text) as string; + return DOMPurify.sanitize(html, SANITIZE_OPTS); + } catch { + // 마지막 안전망: 모든 태그 제거 후 escape + return DOMPurify.sanitize(text, { ALLOWED_TAGS: [], ALLOWED_ATTR: [] }); + } +} diff --git a/frontend/src/routes/documents/[id]/+page.svelte b/frontend/src/routes/documents/[id]/+page.svelte index bcb8a69..985d852 100644 --- a/frontend/src/routes/documents/[id]/+page.svelte +++ b/frontend/src/routes/documents/[id]/+page.svelte @@ -15,6 +15,7 @@ import EmptyState from '$lib/components/ui/EmptyState.svelte'; import Skeleton from '$lib/components/ui/Skeleton.svelte'; import HandwriteCanvas from '$lib/components/HandwriteCanvas.svelte'; + import MarkdownDoc from '$lib/components/MarkdownDoc.svelte'; import NoteEditor from '$lib/components/editors/NoteEditor.svelte'; import EditUrlEditor from '$lib/components/editors/EditUrlEditor.svelte'; import TagsEditor from '$lib/components/editors/TagsEditor.svelte'; @@ -224,9 +225,14 @@ {#if viewerType === 'markdown' || viewerType === 'hwp-markdown'} -
- {@html renderMd(doc.extracted_text || rawMarkdown || '*텍스트 추출 대기 중*')} -
+ {:else if viewerType === 'pdf'}