feat: Paperless integration (content endpoint, list+sync). Add /paperless/sync and docs
This commit is contained in:
@@ -90,7 +90,7 @@ def chat(req: ChatRequest) -> Dict[str, Any]:
|
||||
non_ascii_letters = sum((not ch.isascii()) and ch.isalpha() for ch in user_text)
|
||||
english_ratio = ascii_letters / max(ascii_letters + non_ascii_letters, 1)
|
||||
total_chars = len(user_text)
|
||||
if english_ratio > 0.8:
|
||||
if english_ratio > settings.english_ratio_threshold:
|
||||
model = settings.english_model
|
||||
else:
|
||||
model = settings.boost_model if (req.force_boost or total_chars > 2000) else settings.base_model
|
||||
@@ -175,6 +175,55 @@ def paperless_hook(hook: PaperlessHook, _: None = Depends(require_api_key)) -> D
|
||||
return {"status": "indexed", "document_id": hook.document_id, "chunks": added}
|
||||
|
||||
|
||||
class PaperlessSyncRequest(BaseModel):
|
||||
page_size: int = 50
|
||||
ordering: str = "-created"
|
||||
tags: List[int] | None = None
|
||||
query: str | None = None
|
||||
limit: int = 200
|
||||
|
||||
|
||||
@app.post("/paperless/sync")
|
||||
def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key)) -> Dict[str, Any]:
|
||||
client = PaperlessClient(settings.paperless_base_url, settings.paperless_token)
|
||||
from .index_store import IndexRow
|
||||
added_total = 0
|
||||
next_url: str | None = None
|
||||
fetched = 0
|
||||
|
||||
while True:
|
||||
if next_url:
|
||||
import requests as _rq
|
||||
resp = _rq.get(next_url, headers=client._headers(), timeout=60)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
else:
|
||||
data = client.list_documents(page_size=req.page_size, ordering=req.ordering, tags=req.tags, query=req.query)
|
||||
results = data.get("results", [])
|
||||
to_append: List[IndexRow] = []
|
||||
for doc in results:
|
||||
doc_id = doc.get("id")
|
||||
if not doc_id:
|
||||
continue
|
||||
text = client.get_document_text(int(doc_id))
|
||||
if not text:
|
||||
continue
|
||||
parts = chunk_text(text)
|
||||
for i, t in enumerate(parts):
|
||||
vec = ollama.embeddings(settings.embedding_model, t)
|
||||
to_append.append(IndexRow(id=f"paperless:{doc_id}:{i}", text=t, vector=vec, source="paperless"))
|
||||
if to_append:
|
||||
added_total += index.append(to_append)
|
||||
fetched += len(results)
|
||||
if fetched >= req.limit:
|
||||
break
|
||||
next_url = data.get("next")
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
return {"status": "synced", "added": added_total}
|
||||
|
||||
|
||||
# OpenAI-compatible chat completions (minimal)
|
||||
class ChatCompletionsRequest(BaseModel):
|
||||
model: str | None = None
|
||||
|
||||
Reference in New Issue
Block a user