feat: document pipeline (embedding->Korean translation->HTML). Add /pipeline/ingest endpoint

This commit is contained in:
hyungi
2025-08-13 08:45:01 +09:00
parent b430a27215
commit a280304adc
3 changed files with 106 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ from .index_store import JsonlIndex
from .security import require_api_key
from .paperless_client import PaperlessClient
from .utils import chunk_text
from .pipeline import DocumentPipeline
app = FastAPI(title="Local AI Server", version="0.2.1")
@@ -28,6 +29,7 @@ app.add_middleware(
)
ollama = OllamaClient(settings.ollama_host)
index = JsonlIndex(settings.index_path)
pipeline = DocumentPipeline(ollama, settings.embedding_model, settings.boost_model)
class ChatRequest(BaseModel):
@@ -55,6 +57,12 @@ class UpsertRequest(BaseModel):
batch: int = 16
class PipelineIngestRequest(BaseModel):
doc_id: str
text: str
generate_html: bool = True
@app.get("/health")
def health() -> Dict[str, Any]:
return {
@@ -152,6 +160,12 @@ def index_reload() -> Dict[str, Any]:
return {"total": total}
@app.post("/pipeline/ingest")
def pipeline_ingest(req: PipelineIngestRequest, _: None = Depends(require_api_key)) -> Dict[str, Any]:
result = pipeline.process(doc_id=req.doc_id, text=req.text, index=index, generate_html=req.generate_html)
return {"status": "ok", "doc_id": result.doc_id, "added": result.added_chunks, "chunks": result.chunks, "html_path": result.html_path}
# Paperless webhook placeholder (to be wired with user-provided details)
class PaperlessHook(BaseModel):
document_id: int