feat: Markdown 편집기 + PDF 변환 파이프라인 + 뷰어 포맷 분기
- Markdown split editor: textarea + marked preview, Ctrl+S 저장
- PUT /api/documents/{id}/content: 원본 파일 저장 + extracted_text 갱신
- GET /api/documents/{id}/preview: PDF 미리보기 캐시 서빙
- preview_worker: LibreOffice headless → PDF 변환 (timeout 60s, retry 1회)
- queue_consumer: preview stage 추가 (embed 후 자동 트리거)
- DocumentViewer: 포맷별 분기 (markdown/pdf/preview-pdf/image/text/cad)
- 오피스/CAD 문서: 새 탭 편집 버튼
- Dockerfile: LibreOffice headless 설치
- migration 005: preview_status, preview_hash, preview_at 컬럼
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,11 @@ FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# LibreOffice headless (PDF 변환용)
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends libreoffice-core libreoffice-calc libreoffice-writer libreoffice-impress && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ class DocumentResponse(BaseModel):
|
||||
ai_tags: list | None
|
||||
ai_summary: str | None
|
||||
user_note: str | None
|
||||
preview_status: str | None
|
||||
source_channel: str | None
|
||||
data_origin: str | None
|
||||
extracted_at: datetime | None
|
||||
@@ -298,6 +299,66 @@ async def update_document(
|
||||
return DocumentResponse.model_validate(doc)
|
||||
|
||||
|
||||
@router.put("/{doc_id}/content")
|
||||
async def save_document_content(
|
||||
doc_id: int,
|
||||
user: Annotated[User, Depends(get_current_user)],
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
body: dict = None,
|
||||
):
|
||||
"""Markdown 원본 파일 저장 + extracted_text 갱신"""
|
||||
doc = await session.get(Document, doc_id)
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
|
||||
if doc.file_format not in ("md", "txt"):
|
||||
raise HTTPException(status_code=400, detail="편집 가능한 포맷이 아닙니다 (md, txt만 가능)")
|
||||
|
||||
content = body.get("content", "") if body else ""
|
||||
file_path = Path(settings.nas_mount_path) / doc.file_path
|
||||
file_path.write_text(content, encoding="utf-8")
|
||||
|
||||
# 메타 갱신
|
||||
doc.file_size = len(content.encode("utf-8"))
|
||||
doc.file_hash = file_hash(file_path)
|
||||
doc.extracted_text = content[:15000]
|
||||
doc.updated_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
|
||||
return DocumentResponse.model_validate(doc)
|
||||
|
||||
|
||||
@router.get("/{doc_id}/preview")
|
||||
async def get_document_preview(
|
||||
doc_id: int,
|
||||
session: Annotated[AsyncSession, Depends(get_session)],
|
||||
token: str | None = Query(None, description="Bearer token (iframe용)"),
|
||||
):
|
||||
"""PDF 미리보기 캐시 서빙"""
|
||||
from core.auth import decode_token
|
||||
|
||||
if token:
|
||||
payload = decode_token(token)
|
||||
if not payload or payload.get("type") != "access":
|
||||
raise HTTPException(status_code=401, detail="유효하지 않은 토큰")
|
||||
else:
|
||||
raise HTTPException(status_code=401, detail="토큰이 필요합니다")
|
||||
|
||||
doc = await session.get(Document, doc_id)
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다")
|
||||
|
||||
preview_path = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf"
|
||||
if not preview_path.exists():
|
||||
raise HTTPException(status_code=404, detail="미리보기가 아직 생성되지 않았습니다")
|
||||
|
||||
return FileResponse(
|
||||
path=str(preview_path),
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": "inline"},
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{doc_id}")
|
||||
async def delete_document(
|
||||
doc_id: int,
|
||||
|
||||
@@ -47,6 +47,11 @@ class Document(Base):
|
||||
# 사용자 메모
|
||||
user_note: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# 미리보기
|
||||
preview_status: Mapped[str | None] = mapped_column(String(20), default="none")
|
||||
preview_hash: Mapped[str | None] = mapped_column(String(64))
|
||||
preview_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
# 메타데이터
|
||||
source_channel: Mapped[str | None] = mapped_column(
|
||||
Enum("law_monitor", "devonagent", "email", "web_clip",
|
||||
|
||||
110
app/workers/preview_worker.py
Normal file
110
app/workers/preview_worker.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""PDF 미리보기 생성 워커 — LibreOffice Headless로 문서→PDF 변환"""
|
||||
|
||||
import subprocess
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from core.config import settings
|
||||
from core.utils import setup_logger
|
||||
|
||||
logger = setup_logger("preview_worker")
|
||||
|
||||
# PDF 변환 대상 포맷
|
||||
CONVERTIBLE_FORMATS = {
|
||||
"docx", "xlsx", "pptx", "odt", "ods", "odp", # 안정 지원
|
||||
"odoc", "osheet", "hwp", "hwpx", # 검증 필요
|
||||
}
|
||||
# 이미 PDF이거나 변환 불필요한 포맷
|
||||
NATIVE_PDF = {"pdf"}
|
||||
NATIVE_IMAGE = {"jpg", "jpeg", "png", "gif", "bmp", "tiff"}
|
||||
TEXT_FORMATS = {"md", "txt", "csv", "json", "xml", "html"}
|
||||
|
||||
PREVIEW_DIR_NAME = "PKM/.preview"
|
||||
TIMEOUT_SECONDS = 60
|
||||
|
||||
|
||||
async def process(document_id: int, session: AsyncSession) -> None:
|
||||
"""문서 PDF 미리보기 생성"""
|
||||
from models.document import Document
|
||||
|
||||
doc = await session.get(Document, document_id)
|
||||
if not doc:
|
||||
logger.error(f"[preview] document_id={document_id} 없음")
|
||||
return
|
||||
|
||||
fmt = doc.file_format.lower()
|
||||
|
||||
# PDF/이미지/텍스트는 변환 불필요
|
||||
if fmt in NATIVE_PDF or fmt in NATIVE_IMAGE or fmt in TEXT_FORMATS:
|
||||
doc.preview_status = "ready" if fmt in NATIVE_PDF else "none"
|
||||
doc.preview_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
return
|
||||
|
||||
if fmt not in CONVERTIBLE_FORMATS:
|
||||
doc.preview_status = "none"
|
||||
await session.commit()
|
||||
logger.info(f"[preview] {doc.title} — 변환 불가 포맷: {fmt}")
|
||||
return
|
||||
|
||||
# 원본 파일 경로
|
||||
source = Path(settings.nas_mount_path) / doc.file_path
|
||||
if not source.exists():
|
||||
doc.preview_status = "failed"
|
||||
await session.commit()
|
||||
logger.error(f"[preview] 원본 없음: {source}")
|
||||
return
|
||||
|
||||
# 미리보기 디렉토리
|
||||
preview_dir = Path(settings.nas_mount_path) / PREVIEW_DIR_NAME
|
||||
preview_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = preview_dir / f"{document_id}.pdf"
|
||||
|
||||
doc.preview_status = "processing"
|
||||
await session.commit()
|
||||
|
||||
# LibreOffice 변환
|
||||
try:
|
||||
tmp_dir = Path("/tmp/preview_work")
|
||||
tmp_dir.mkdir(exist_ok=True)
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
"libreoffice", "--headless", "--convert-to", "pdf",
|
||||
"--outdir", str(tmp_dir),
|
||||
str(source),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=TIMEOUT_SECONDS,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"LibreOffice 변환 실패: {result.stderr[:200]}")
|
||||
|
||||
# 변환 결과 찾기
|
||||
converted = tmp_dir / f"{source.stem}.pdf"
|
||||
if not converted.exists():
|
||||
raise RuntimeError(f"변환 결과물 없음: {converted}")
|
||||
|
||||
# 캐시로 이동
|
||||
shutil.move(str(converted), str(output_path))
|
||||
|
||||
doc.preview_status = "ready"
|
||||
doc.preview_hash = doc.file_hash
|
||||
doc.preview_at = datetime.now(timezone.utc)
|
||||
await session.commit()
|
||||
logger.info(f"[preview] {doc.title} → PDF 변환 완료")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
doc.preview_status = "failed"
|
||||
await session.commit()
|
||||
logger.error(f"[preview] {doc.title} — 변환 timeout ({TIMEOUT_SECONDS}s)")
|
||||
|
||||
except Exception as e:
|
||||
doc.preview_status = "failed"
|
||||
await session.commit()
|
||||
logger.error(f"[preview] {doc.title} — 변환 실패: {e}")
|
||||
@@ -11,7 +11,7 @@ from models.queue import ProcessingQueue
|
||||
logger = setup_logger("queue_consumer")
|
||||
|
||||
# stage별 배치 크기
|
||||
BATCH_SIZE = {"extract": 5, "classify": 3, "embed": 1}
|
||||
BATCH_SIZE = {"extract": 5, "classify": 3, "embed": 1, "preview": 2}
|
||||
STALE_THRESHOLD_MINUTES = 10
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ async def reset_stale_items():
|
||||
|
||||
async def enqueue_next_stage(document_id: int, current_stage: str):
|
||||
"""현재 stage 완료 후 다음 stage를 pending으로 등록"""
|
||||
next_stages = {"extract": "classify", "classify": "embed"}
|
||||
next_stages = {"extract": "classify", "classify": "embed", "embed": "preview"}
|
||||
next_stage = next_stages.get(current_stage)
|
||||
if not next_stage:
|
||||
return
|
||||
@@ -63,11 +63,13 @@ async def consume_queue():
|
||||
from workers.classify_worker import process as classify_process
|
||||
from workers.embed_worker import process as embed_process
|
||||
from workers.extract_worker import process as extract_process
|
||||
from workers.preview_worker import process as preview_process
|
||||
|
||||
workers = {
|
||||
"extract": extract_process,
|
||||
"classify": classify_process,
|
||||
"embed": embed_process,
|
||||
"preview": preview_process,
|
||||
}
|
||||
|
||||
await reset_stale_items()
|
||||
|
||||
@@ -1,25 +1,44 @@
|
||||
<script>
|
||||
import { api, getAccessToken } from '$lib/api';
|
||||
import { addToast } from '$lib/stores/ui';
|
||||
import { marked } from 'marked';
|
||||
import { ExternalLink, Save, RefreshCw } from 'lucide-svelte';
|
||||
|
||||
let { doc } = $props();
|
||||
let fullDoc = $state(null);
|
||||
let loading = $state(true);
|
||||
let viewerType = $state('none');
|
||||
|
||||
// Markdown 편집
|
||||
let editMode = $state(false);
|
||||
let editContent = $state('');
|
||||
let saving = $state(false);
|
||||
|
||||
function getViewerType(format) {
|
||||
if (['md', 'txt', 'csv', 'html'].includes(format)) return 'markdown';
|
||||
if (['md', 'txt'].includes(format)) return 'markdown';
|
||||
if (format === 'pdf') return 'pdf';
|
||||
if (['hwp', 'hwpx'].includes(format)) return 'hwp-markdown';
|
||||
if (['odoc', 'osheet'].includes(format)) return 'synology';
|
||||
if (['jpg', 'jpeg', 'png', 'gif', 'bmp'].includes(format)) return 'image';
|
||||
if (['hwp', 'hwpx'].includes(format)) return 'preview-pdf';
|
||||
if (['odoc', 'osheet', 'docx', 'xlsx', 'pptx', 'odt', 'ods', 'odp'].includes(format)) return 'preview-pdf';
|
||||
if (['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff'].includes(format)) return 'image';
|
||||
if (['csv', 'json', 'xml', 'html'].includes(format)) return 'text';
|
||||
if (['dwg', 'dxf'].includes(format)) return 'cad';
|
||||
return 'unsupported';
|
||||
}
|
||||
|
||||
// doc이 바뀌면 상세 데이터 로딩
|
||||
function getEditUrl(doc) {
|
||||
if (['odoc', 'osheet', 'docx', 'xlsx', 'pptx', 'odt', 'ods', 'odp'].includes(doc.file_format)) {
|
||||
return `https://link.hyungi.net`;
|
||||
}
|
||||
if (['dwg', 'dxf'].includes(doc.file_format)) {
|
||||
return 'https://web.autocad.com';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
if (doc?.id) {
|
||||
loadFullDoc(doc.id);
|
||||
editMode = false;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -35,46 +54,149 @@
|
||||
loading = false;
|
||||
}
|
||||
}
|
||||
|
||||
function startEdit() {
|
||||
editContent = fullDoc?.extracted_text || '';
|
||||
editMode = true;
|
||||
}
|
||||
|
||||
async function saveContent() {
|
||||
saving = true;
|
||||
try {
|
||||
await api(`/documents/${fullDoc.id}/content`, {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify({ content: editContent }),
|
||||
});
|
||||
fullDoc.extracted_text = editContent;
|
||||
editMode = false;
|
||||
addToast('success', '저장됨');
|
||||
} catch (err) {
|
||||
addToast('error', '저장 실패');
|
||||
} finally {
|
||||
saving = false;
|
||||
}
|
||||
}
|
||||
|
||||
function handleKeydown(e) {
|
||||
if ((e.metaKey || e.ctrlKey) && e.key === 's' && editMode) {
|
||||
e.preventDefault();
|
||||
saveContent();
|
||||
}
|
||||
}
|
||||
|
||||
let editUrl = $derived(fullDoc ? getEditUrl(fullDoc) : null);
|
||||
</script>
|
||||
|
||||
<div class="h-full bg-[var(--surface)] border-t border-[var(--border)] overflow-auto">
|
||||
{#if loading}
|
||||
<div class="flex items-center justify-center h-full">
|
||||
<p class="text-sm text-[var(--text-dim)]">로딩 중...</p>
|
||||
<svelte:window on:keydown={handleKeydown} />
|
||||
|
||||
<div class="h-full flex flex-col bg-[var(--surface)] border-t border-[var(--border)]">
|
||||
<!-- 뷰어 툴바 -->
|
||||
{#if fullDoc && !loading}
|
||||
<div class="flex items-center justify-between px-3 py-1.5 border-b border-[var(--border)] bg-[var(--sidebar-bg)] shrink-0">
|
||||
<span class="text-xs text-[var(--text-dim)] truncate">{fullDoc.title || '제목 없음'}</span>
|
||||
<div class="flex items-center gap-2">
|
||||
{#if viewerType === 'markdown'}
|
||||
{#if editMode}
|
||||
<button
|
||||
onclick={saveContent}
|
||||
disabled={saving}
|
||||
class="flex items-center gap-1 px-2 py-1 text-xs bg-[var(--accent)] text-white rounded hover:bg-[var(--accent-hover)] disabled:opacity-50"
|
||||
>
|
||||
<Save size={12} /> {saving ? '저장 중...' : '저장'}
|
||||
</button>
|
||||
<button
|
||||
onclick={() => editMode = false}
|
||||
class="px-2 py-1 text-xs text-[var(--text-dim)] hover:text-[var(--text)]"
|
||||
>취소</button>
|
||||
{:else}
|
||||
<button
|
||||
onclick={startEdit}
|
||||
class="px-2 py-1 text-xs text-[var(--text-dim)] hover:text-[var(--accent)] border border-[var(--border)] rounded"
|
||||
>편집</button>
|
||||
{/if}
|
||||
{/if}
|
||||
{#if editUrl}
|
||||
<a
|
||||
href={editUrl}
|
||||
target="_blank"
|
||||
rel="noopener"
|
||||
class="flex items-center gap-1 px-2 py-1 text-xs text-[var(--text-dim)] hover:text-[var(--accent)] border border-[var(--border)] rounded"
|
||||
>
|
||||
<ExternalLink size={12} /> 편집
|
||||
</a>
|
||||
{/if}
|
||||
<a
|
||||
href="/documents/{fullDoc.id}"
|
||||
class="px-2 py-1 text-xs text-[var(--text-dim)] hover:text-[var(--accent)] border border-[var(--border)] rounded"
|
||||
>전체 보기</a>
|
||||
</div>
|
||||
</div>
|
||||
{:else if fullDoc}
|
||||
{#if viewerType === 'markdown' || viewerType === 'hwp-markdown'}
|
||||
<div class="p-4 prose prose-invert prose-sm max-w-none">
|
||||
{@html marked(fullDoc.extracted_text || '*텍스트 추출 대기 중*')}
|
||||
</div>
|
||||
{:else if viewerType === 'pdf'}
|
||||
<iframe
|
||||
src="/api/documents/{fullDoc.id}/file?token={getAccessToken()}"
|
||||
class="w-full h-full border-0"
|
||||
title={fullDoc.title}
|
||||
></iframe>
|
||||
{:else if viewerType === 'image'}
|
||||
<div class="flex items-center justify-center h-full p-4">
|
||||
<img
|
||||
src="/api/documents/{fullDoc.id}/file?token={getAccessToken()}"
|
||||
alt={fullDoc.title}
|
||||
class="max-w-full max-h-full object-contain rounded"
|
||||
/>
|
||||
</div>
|
||||
{:else if viewerType === 'synology'}
|
||||
<iframe
|
||||
src="https://ds1525.hyungi.net:15001/oo/r/{fullDoc.file_path}"
|
||||
class="w-full h-full border-0"
|
||||
title={fullDoc.title}
|
||||
allow="clipboard-read; clipboard-write"
|
||||
></iframe>
|
||||
{:else}
|
||||
<div class="flex items-center justify-center h-full">
|
||||
<div class="text-center">
|
||||
<p class="text-sm text-[var(--text-dim)] mb-1">미리보기를 지원하지 않는 형식입니다</p>
|
||||
<p class="text-xs text-[var(--text-dim)]">{fullDoc.file_format}</p>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
|
||||
<!-- 뷰어 본문 -->
|
||||
<div class="flex-1 overflow-auto min-h-0">
|
||||
{#if loading}
|
||||
<div class="flex items-center justify-center h-full">
|
||||
<p class="text-sm text-[var(--text-dim)]">로딩 중...</p>
|
||||
</div>
|
||||
{:else if fullDoc}
|
||||
{#if viewerType === 'markdown'}
|
||||
{#if editMode}
|
||||
<!-- Markdown split editor -->
|
||||
<div class="flex h-full">
|
||||
<textarea
|
||||
bind:value={editContent}
|
||||
class="w-1/2 h-full p-4 bg-[var(--bg)] text-[var(--text)] text-sm font-mono resize-none outline-none border-r border-[var(--border)]"
|
||||
spellcheck="false"
|
||||
></textarea>
|
||||
<div class="w-1/2 h-full p-4 overflow-auto prose prose-invert prose-sm max-w-none">
|
||||
{@html marked(editContent)}
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="p-4 prose prose-invert prose-sm max-w-none">
|
||||
{@html marked(fullDoc.extracted_text || '*텍스트 추출 대기 중*')}
|
||||
</div>
|
||||
{/if}
|
||||
{:else if viewerType === 'pdf'}
|
||||
<iframe
|
||||
src="/api/documents/{fullDoc.id}/file?token={getAccessToken()}"
|
||||
class="w-full h-full border-0"
|
||||
title={fullDoc.title}
|
||||
></iframe>
|
||||
{:else if viewerType === 'preview-pdf'}
|
||||
<iframe
|
||||
src="/api/documents/{fullDoc.id}/preview?token={getAccessToken()}"
|
||||
class="w-full h-full border-0"
|
||||
title={fullDoc.title}
|
||||
onerror={() => {}}
|
||||
></iframe>
|
||||
{:else if viewerType === 'image'}
|
||||
<div class="flex items-center justify-center h-full p-4">
|
||||
<img
|
||||
src="/api/documents/{fullDoc.id}/file?token={getAccessToken()}"
|
||||
alt={fullDoc.title}
|
||||
class="max-w-full max-h-full object-contain rounded"
|
||||
/>
|
||||
</div>
|
||||
{:else if viewerType === 'text'}
|
||||
<div class="p-4">
|
||||
<pre class="text-sm text-[var(--text)] whitespace-pre-wrap font-mono">{fullDoc.extracted_text || '텍스트 없음'}</pre>
|
||||
</div>
|
||||
{:else if viewerType === 'cad'}
|
||||
<div class="flex flex-col items-center justify-center h-full gap-3">
|
||||
<p class="text-sm text-[var(--text-dim)]">CAD 미리보기 (향후 지원 예정)</p>
|
||||
<a
|
||||
href="https://web.autocad.com"
|
||||
target="_blank"
|
||||
class="px-3 py-1.5 text-sm bg-[var(--accent)] text-white rounded hover:bg-[var(--accent-hover)]"
|
||||
>AutoCAD Web에서 열기</a>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="flex items-center justify-center h-full">
|
||||
<p class="text-sm text-[var(--text-dim)]">미리보기를 지원하지 않는 형식입니다 ({fullDoc.file_format})</p>
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
4
migrations/005_preview_fields.sql
Normal file
4
migrations/005_preview_fields.sql
Normal file
@@ -0,0 +1,4 @@
|
||||
-- 문서 미리보기 상태 필드 추가
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS preview_status VARCHAR(20) DEFAULT 'none';
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS preview_hash VARCHAR(64);
|
||||
ALTER TABLE documents ADD COLUMN IF NOT EXISTS preview_at TIMESTAMPTZ;
|
||||
Reference in New Issue
Block a user