diff --git a/README.md b/README.md index 8dd5774..580d292 100644 --- a/README.md +++ b/README.md @@ -287,6 +287,9 @@ curl -s -X POST http://localhost:26000/paperless/sync \ - `PAPERLESS_BASE_URL`, `PAPERLESS_TOKEN`(선택): Paperless API 연동 시 사용 - `PAPERLESS_VERIFY_SSL`(기본 `true`): Paperless HTTPS 검증 비활성화는 `false` - `PAPERLESS_CA_BUNDLE`(선택): 신뢰할 CA 번들 경로 지정 시 해당 번들로 검증 +- `OUTPUT_DIR`(기본 `outputs`): 파이프라인 산출물(HTML) 저장 루트 +- `EXPORT_HTML_DIR`(선택): HTML 산출물 사본을 내보낼 디렉터리(예: 시놀로지 공유 폴더) +- `EXPORT_UPLOAD_DIR`(선택): 업로드 원본 파일 보관 디렉터리 - `API_KEY`(선택): 설정 시 모든 민감 엔드포인트 호출에 `X-API-Key` 헤더 필요 - `CORS_ORIGINS`(선택): CORS 허용 오리진(쉼표 구분), 미설정 시 `*` diff --git a/server/config.py b/server/config.py index 7939b88..8c94b3a 100644 --- a/server/config.py +++ b/server/config.py @@ -13,6 +13,11 @@ class Settings: english_ratio_threshold: float = float(os.getenv("ENGLISH_RATIO_THRESHOLD", "0.65")) embedding_model: str = os.getenv("EMBEDDING_MODEL", "nomic-embed-text") index_path: str = os.getenv("INDEX_PATH", "data/index.jsonl") + output_dir: str = os.getenv("OUTPUT_DIR", "outputs") + + # Optional export targets (e.g., Synology NAS shares) + export_html_dir: str = os.getenv("EXPORT_HTML_DIR", "") + export_upload_dir: str = os.getenv("EXPORT_UPLOAD_DIR", "") # Paperless (user will provide API details) paperless_base_url: str = os.getenv("PAPERLESS_BASE_URL", "") diff --git a/server/main.py b/server/main.py index e7ef2a5..7ac1464 100644 --- a/server/main.py +++ b/server/main.py @@ -4,6 +4,8 @@ from fastapi import FastAPI, HTTPException, Depends, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import List, Dict, Any +import shutil +from pathlib import Path from .config import settings from .ollama_client import OllamaClient @@ -29,7 +31,7 @@ app.add_middleware( ) ollama = OllamaClient(settings.ollama_host) index = JsonlIndex(settings.index_path) -pipeline = DocumentPipeline(ollama, settings.embedding_model, settings.boost_model) +pipeline = DocumentPipeline(ollama, settings.embedding_model, settings.boost_model, output_dir=settings.output_dir) class ChatRequest(BaseModel): @@ -178,7 +180,13 @@ def pipeline_ingest(req: PipelineIngestRequest, _: None = Depends(require_api_ke summary_sentences=req.summary_sentences, summary_language=req.summary_language, ) - return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path} + exported_html: str | None = None + if result.html_path and settings.export_html_dir: + Path(settings.export_html_dir).mkdir(parents=True, exist_ok=True) + dst = str(Path(settings.export_html_dir) / Path(result.html_path).name) + shutil.copyfile(result.html_path, dst) + exported_html = dst + return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path, "exported_html": exported_html} @app.post("/pipeline/ingest_file") @@ -226,7 +234,18 @@ async def pipeline_ingest_file( translate=translate, target_language=target_language, ) - return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path} + exported_html: str | None = None + if result.html_path and settings.export_html_dir: + Path(settings.export_html_dir).mkdir(parents=True, exist_ok=True) + dst = str(Path(settings.export_html_dir) / Path(result.html_path).name) + shutil.copyfile(result.html_path, dst) + exported_html = dst + if settings.export_upload_dir: + Path(settings.export_upload_dir).mkdir(parents=True, exist_ok=True) + orig_name = f"{doc_id}__{file.filename}" + with open(str(Path(settings.export_upload_dir) / orig_name), "wb") as f: + f.write(raw) + return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path, "exported_html": exported_html} # Paperless webhook placeholder (to be wired with user-provided details)