71 lines
2.9 KiB
Python
71 lines
2.9 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import Any, Dict, List, Optional
|
|
import requests
|
|
|
|
|
|
class PaperlessClient:
|
|
def __init__(self, base_url: str | None = None, token: str | None = None) -> None:
|
|
self.base_url = (base_url or os.getenv("PAPERLESS_BASE_URL", "")).rstrip("/")
|
|
self.token = token or os.getenv("PAPERLESS_TOKEN", "")
|
|
verify_env = os.getenv("PAPERLESS_VERIFY_SSL", "true").lower().strip()
|
|
ca_bundle = os.getenv("PAPERLESS_CA_BUNDLE", "").strip()
|
|
if ca_bundle:
|
|
self.verify: Any = ca_bundle
|
|
elif verify_env in ("0", "false", "no"):
|
|
self.verify = False
|
|
else:
|
|
self.verify = True
|
|
|
|
def _headers(self) -> Dict[str, str]:
|
|
headers: Dict[str, str] = {"Accept": "application/json"}
|
|
if self.token:
|
|
headers["Authorization"] = f"Token {self.token}"
|
|
return headers
|
|
|
|
def get_document(self, doc_id: int) -> Dict[str, Any]:
|
|
if not self.base_url:
|
|
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
|
url = f"{self.base_url}/api/documents/{doc_id}/"
|
|
resp = requests.get(url, headers=self._headers(), timeout=60, verify=self.verify)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
def get_document_text(self, doc_id: int) -> str:
|
|
if not self.base_url:
|
|
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
|
# Try content endpoint
|
|
url_content = f"{self.base_url}/api/documents/{doc_id}/content/"
|
|
try:
|
|
r = requests.get(url_content, headers=self._headers(), timeout=60, verify=self.verify)
|
|
if r.status_code == 200 and r.text:
|
|
return r.text
|
|
except Exception:
|
|
pass
|
|
# Try txt download
|
|
url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt"
|
|
try:
|
|
r = requests.get(url_txt, headers=self._headers(), timeout=60, verify=self.verify)
|
|
if r.status_code == 200 and r.text:
|
|
return r.text
|
|
except Exception:
|
|
pass
|
|
# Fallback to metadata fields
|
|
data = self.get_document(doc_id)
|
|
return data.get("content", "") or data.get("notes", "") or data.get("title", "")
|
|
|
|
def list_documents(self, page_size: int = 50, ordering: str = "-created", tags: Optional[List[int]] = None, query: Optional[str] = None) -> Dict[str, Any]:
|
|
if not self.base_url:
|
|
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
|
params: Dict[str, Any] = {"page_size": page_size, "ordering": ordering}
|
|
if tags:
|
|
params["tags__id__in"] = ",".join(str(t) for t in tags)
|
|
if query:
|
|
params["query"] = query
|
|
url = f"{self.base_url}/api/documents/"
|
|
resp = requests.get(url, headers=self._headers(), params=params, timeout=60, verify=self.verify)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|