from __future__ import annotations import os from typing import Any, Dict import requests class PaperlessClient: def __init__(self, base_url: str | None = None, token: str | None = None) -> None: self.base_url = (base_url or os.getenv("PAPERLESS_BASE_URL", "")).rstrip("/") self.token = token or os.getenv("PAPERLESS_TOKEN", "") def _headers(self) -> Dict[str, str]: headers: Dict[str, str] = {"Accept": "application/json"} if self.token: headers["Authorization"] = f"Token {self.token}" return headers def get_document_text(self, doc_id: int) -> str: if not self.base_url: raise RuntimeError("PAPERLESS_BASE_URL not configured") # Example endpoint; adjust to real Paperless API url = f"{self.base_url}/api/documents/{doc_id}/" resp = requests.get(url, headers=self._headers(), timeout=60) resp.raise_for_status() data = resp.json() # Prefer content field if available; else title text = data.get("content", "") or data.get("notes", "") or data.get("title", "") return text