from __future__ import annotations import os from typing import Any, Dict, List, Optional import requests class PaperlessClient: def __init__(self, base_url: str | None = None, token: str | None = None) -> None: self.base_url = (base_url or os.getenv("PAPERLESS_BASE_URL", "")).rstrip("/") self.token = token or os.getenv("PAPERLESS_TOKEN", "") verify_env = os.getenv("PAPERLESS_VERIFY_SSL", "true").lower().strip() ca_bundle = os.getenv("PAPERLESS_CA_BUNDLE", "").strip() if ca_bundle: self.verify: Any = ca_bundle elif verify_env in ("0", "false", "no"): self.verify = False else: self.verify = True def _headers(self) -> Dict[str, str]: headers: Dict[str, str] = {"Accept": "application/json"} if self.token: headers["Authorization"] = f"Token {self.token}" return headers def get_document(self, doc_id: int) -> Dict[str, Any]: if not self.base_url: raise RuntimeError("PAPERLESS_BASE_URL not configured") url = f"{self.base_url}/api/documents/{doc_id}/" resp = requests.get(url, headers=self._headers(), timeout=60, verify=self.verify) resp.raise_for_status() return resp.json() def get_document_text(self, doc_id: int) -> str: if not self.base_url: raise RuntimeError("PAPERLESS_BASE_URL not configured") # Try content endpoint url_content = f"{self.base_url}/api/documents/{doc_id}/content/" try: r = requests.get(url_content, headers=self._headers(), timeout=60, verify=self.verify) if r.status_code == 200 and r.text: return r.text except Exception: pass # Try txt download url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt" try: r = requests.get(url_txt, headers=self._headers(), timeout=60, verify=self.verify) if r.status_code == 200 and r.text: return r.text except Exception: pass # Fallback to metadata fields data = self.get_document(doc_id) return data.get("content", "") or data.get("notes", "") or data.get("title", "") def list_documents(self, page_size: int = 50, ordering: str = "-created", tags: Optional[List[int]] = None, query: Optional[str] = None) -> Dict[str, Any]: if not self.base_url: raise RuntimeError("PAPERLESS_BASE_URL not configured") params: Dict[str, Any] = {"page_size": page_size, "ordering": ordering} if tags: params["tags__id__in"] = ",".join(str(t) for t in tags) if query: params["query"] = query url = f"{self.base_url}/api/documents/" resp = requests.get(url, headers=self._headers(), params=params, timeout=60, verify=self.verify) resp.raise_for_status() return resp.json()