feat: export pipeline outputs (HTML copy + upload archiving) via EXPORT_* envs
This commit is contained in:
@@ -287,6 +287,9 @@ curl -s -X POST http://localhost:26000/paperless/sync \
|
||||
- `PAPERLESS_BASE_URL`, `PAPERLESS_TOKEN`(선택): Paperless API 연동 시 사용
|
||||
- `PAPERLESS_VERIFY_SSL`(기본 `true`): Paperless HTTPS 검증 비활성화는 `false`
|
||||
- `PAPERLESS_CA_BUNDLE`(선택): 신뢰할 CA 번들 경로 지정 시 해당 번들로 검증
|
||||
- `OUTPUT_DIR`(기본 `outputs`): 파이프라인 산출물(HTML) 저장 루트
|
||||
- `EXPORT_HTML_DIR`(선택): HTML 산출물 사본을 내보낼 디렉터리(예: 시놀로지 공유 폴더)
|
||||
- `EXPORT_UPLOAD_DIR`(선택): 업로드 원본 파일 보관 디렉터리
|
||||
- `API_KEY`(선택): 설정 시 모든 민감 엔드포인트 호출에 `X-API-Key` 헤더 필요
|
||||
- `CORS_ORIGINS`(선택): CORS 허용 오리진(쉼표 구분), 미설정 시 `*`
|
||||
|
||||
|
||||
@@ -13,6 +13,11 @@ class Settings:
|
||||
english_ratio_threshold: float = float(os.getenv("ENGLISH_RATIO_THRESHOLD", "0.65"))
|
||||
embedding_model: str = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")
|
||||
index_path: str = os.getenv("INDEX_PATH", "data/index.jsonl")
|
||||
output_dir: str = os.getenv("OUTPUT_DIR", "outputs")
|
||||
|
||||
# Optional export targets (e.g., Synology NAS shares)
|
||||
export_html_dir: str = os.getenv("EXPORT_HTML_DIR", "")
|
||||
export_upload_dir: str = os.getenv("EXPORT_UPLOAD_DIR", "")
|
||||
|
||||
# Paperless (user will provide API details)
|
||||
paperless_base_url: str = os.getenv("PAPERLESS_BASE_URL", "")
|
||||
|
||||
@@ -4,6 +4,8 @@ from fastapi import FastAPI, HTTPException, Depends, UploadFile, File, Form
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Any
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from .config import settings
|
||||
from .ollama_client import OllamaClient
|
||||
@@ -29,7 +31,7 @@ app.add_middleware(
|
||||
)
|
||||
ollama = OllamaClient(settings.ollama_host)
|
||||
index = JsonlIndex(settings.index_path)
|
||||
pipeline = DocumentPipeline(ollama, settings.embedding_model, settings.boost_model)
|
||||
pipeline = DocumentPipeline(ollama, settings.embedding_model, settings.boost_model, output_dir=settings.output_dir)
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
@@ -178,7 +180,13 @@ def pipeline_ingest(req: PipelineIngestRequest, _: None = Depends(require_api_ke
|
||||
summary_sentences=req.summary_sentences,
|
||||
summary_language=req.summary_language,
|
||||
)
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path}
|
||||
exported_html: str | None = None
|
||||
if result.html_path and settings.export_html_dir:
|
||||
Path(settings.export_html_dir).mkdir(parents=True, exist_ok=True)
|
||||
dst = str(Path(settings.export_html_dir) / Path(result.html_path).name)
|
||||
shutil.copyfile(result.html_path, dst)
|
||||
exported_html = dst
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path, "exported_html": exported_html}
|
||||
|
||||
|
||||
@app.post("/pipeline/ingest_file")
|
||||
@@ -226,7 +234,18 @@ async def pipeline_ingest_file(
|
||||
translate=translate,
|
||||
target_language=target_language,
|
||||
)
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path}
|
||||
exported_html: str | None = None
|
||||
if result.html_path and settings.export_html_dir:
|
||||
Path(settings.export_html_dir).mkdir(parents=True, exist_ok=True)
|
||||
dst = str(Path(settings.export_html_dir) / Path(result.html_path).name)
|
||||
shutil.copyfile(result.html_path, dst)
|
||||
exported_html = dst
|
||||
if settings.export_upload_dir:
|
||||
Path(settings.export_upload_dir).mkdir(parents=True, exist_ok=True)
|
||||
orig_name = f"{doc_id}__{file.filename}"
|
||||
with open(str(Path(settings.export_upload_dir) / orig_name), "wb") as f:
|
||||
f.write(raw)
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path, "exported_html": exported_html}
|
||||
|
||||
|
||||
# Paperless webhook placeholder (to be wired with user-provided details)
|
||||
|
||||
Reference in New Issue
Block a user