feat: export pipeline outputs (HTML copy + upload archiving) via EXPORT_* envs
This commit is contained in:
@@ -4,6 +4,8 @@ from fastapi import FastAPI, HTTPException, Depends, UploadFile, File, Form
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Any
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from .config import settings
|
||||
from .ollama_client import OllamaClient
|
||||
@@ -29,7 +31,7 @@ app.add_middleware(
|
||||
)
|
||||
ollama = OllamaClient(settings.ollama_host)
|
||||
index = JsonlIndex(settings.index_path)
|
||||
pipeline = DocumentPipeline(ollama, settings.embedding_model, settings.boost_model)
|
||||
pipeline = DocumentPipeline(ollama, settings.embedding_model, settings.boost_model, output_dir=settings.output_dir)
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
@@ -178,7 +180,13 @@ def pipeline_ingest(req: PipelineIngestRequest, _: None = Depends(require_api_ke
|
||||
summary_sentences=req.summary_sentences,
|
||||
summary_language=req.summary_language,
|
||||
)
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path}
|
||||
exported_html: str | None = None
|
||||
if result.html_path and settings.export_html_dir:
|
||||
Path(settings.export_html_dir).mkdir(parents=True, exist_ok=True)
|
||||
dst = str(Path(settings.export_html_dir) / Path(result.html_path).name)
|
||||
shutil.copyfile(result.html_path, dst)
|
||||
exported_html = dst
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path, "exported_html": exported_html}
|
||||
|
||||
|
||||
@app.post("/pipeline/ingest_file")
|
||||
@@ -226,7 +234,18 @@ async def pipeline_ingest_file(
|
||||
translate=translate,
|
||||
target_language=target_language,
|
||||
)
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path}
|
||||
exported_html: str | None = None
|
||||
if result.html_path and settings.export_html_dir:
|
||||
Path(settings.export_html_dir).mkdir(parents=True, exist_ok=True)
|
||||
dst = str(Path(settings.export_html_dir) / Path(result.html_path).name)
|
||||
shutil.copyfile(result.html_path, dst)
|
||||
exported_html = dst
|
||||
if settings.export_upload_dir:
|
||||
Path(settings.export_upload_dir).mkdir(parents=True, exist_ok=True)
|
||||
orig_name = f"{doc_id}__{file.filename}"
|
||||
with open(str(Path(settings.export_upload_dir) / orig_name), "wb") as f:
|
||||
f.write(raw)
|
||||
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path, "exported_html": exported_html}
|
||||
|
||||
|
||||
# Paperless webhook placeholder (to be wired with user-provided details)
|
||||
|
||||
Reference in New Issue
Block a user