feat: Paperless integration (content endpoint, list+sync). Add /paperless/sync and docs
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List, Optional
|
||||
import requests
|
||||
|
||||
|
||||
@@ -16,15 +16,47 @@ class PaperlessClient:
|
||||
headers["Authorization"] = f"Token {self.token}"
|
||||
return headers
|
||||
|
||||
def get_document_text(self, doc_id: int) -> str:
|
||||
def get_document(self, doc_id: int) -> Dict[str, Any]:
|
||||
if not self.base_url:
|
||||
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
||||
# Example endpoint; adjust to real Paperless API
|
||||
url = f"{self.base_url}/api/documents/{doc_id}/"
|
||||
resp = requests.get(url, headers=self._headers(), timeout=60)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
# Prefer content field if available; else title
|
||||
text = data.get("content", "") or data.get("notes", "") or data.get("title", "")
|
||||
return text
|
||||
return resp.json()
|
||||
|
||||
def get_document_text(self, doc_id: int) -> str:
|
||||
if not self.base_url:
|
||||
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
||||
# Try content endpoint
|
||||
url_content = f"{self.base_url}/api/documents/{doc_id}/content/"
|
||||
try:
|
||||
r = requests.get(url_content, headers=self._headers(), timeout=60)
|
||||
if r.status_code == 200 and r.text:
|
||||
return r.text
|
||||
except Exception:
|
||||
pass
|
||||
# Try txt download
|
||||
url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt"
|
||||
try:
|
||||
r = requests.get(url_txt, headers=self._headers(), timeout=60)
|
||||
if r.status_code == 200 and r.text:
|
||||
return r.text
|
||||
except Exception:
|
||||
pass
|
||||
# Fallback to metadata fields
|
||||
data = self.get_document(doc_id)
|
||||
return data.get("content", "") or data.get("notes", "") or data.get("title", "")
|
||||
|
||||
def list_documents(self, page_size: int = 50, ordering: str = "-created", tags: Optional[List[int]] = None, query: Optional[str] = None) -> Dict[str, Any]:
|
||||
if not self.base_url:
|
||||
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
||||
params: Dict[str, Any] = {"page_size": page_size, "ordering": ordering}
|
||||
if tags:
|
||||
params["tags__id__in"] = ",".join(str(t) for t in tags)
|
||||
if query:
|
||||
params["query"] = query
|
||||
url = f"{self.base_url}/api/documents/"
|
||||
resp = requests.get(url, headers=self._headers(), params=params, timeout=60)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user