feat: Paperless integration (content endpoint, list+sync). Add /paperless/sync and docs

2025-08-13 08:02:59 +09:00
parent bcb1e543e6
commit 4c81686657
4 changed files with 107 additions and 8 deletions
--- a/server/paperless_client.py
+++ b/server/paperless_client.py
@@ -1,7 +1,7 @@
 from __future__ import annotations

 import os
-from typing import Any, Dict
+from typing import Any, Dict, List, Optional
 import requests


@@ -16,15 +16,47 @@ class PaperlessClient:
            headers["Authorization"] = f"Token {self.token}"
        return headers

-    def get_document_text(self, doc_id: int) -> str:
+    def get_document(self, doc_id: int) -> Dict[str, Any]:
        if not self.base_url:
            raise RuntimeError("PAPERLESS_BASE_URL not configured")
-        # Example endpoint; adjust to real Paperless API
        url = f"{self.base_url}/api/documents/{doc_id}/"
        resp = requests.get(url, headers=self._headers(), timeout=60)
        resp.raise_for_status()
-        data = resp.json()
-        # Prefer content field if available; else title
-        text = data.get("content", "") or data.get("notes", "") or data.get("title", "")
-        return text
+        return resp.json()
+
+    def get_document_text(self, doc_id: int) -> str:
+        if not self.base_url:
+            raise RuntimeError("PAPERLESS_BASE_URL not configured")
+        # Try content endpoint
+        url_content = f"{self.base_url}/api/documents/{doc_id}/content/"
+        try:
+            r = requests.get(url_content, headers=self._headers(), timeout=60)
+            if r.status_code == 200 and r.text:
+                return r.text
+        except Exception:
+            pass
+        # Try txt download
+        url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt"
+        try:
+            r = requests.get(url_txt, headers=self._headers(), timeout=60)
+            if r.status_code == 200 and r.text:
+                return r.text
+        except Exception:
+            pass
+        # Fallback to metadata fields
+        data = self.get_document(doc_id)
+        return data.get("content", "") or data.get("notes", "") or data.get("title", "")
+
+    def list_documents(self, page_size: int = 50, ordering: str = "-created", tags: Optional[List[int]] = None, query: Optional[str] = None) -> Dict[str, Any]:
+        if not self.base_url:
+            raise RuntimeError("PAPERLESS_BASE_URL not configured")
+        params: Dict[str, Any] = {"page_size": page_size, "ordering": ordering}
+        if tags:
+            params["tags__id__in"] = ",".join(str(t) for t in tags)
+        if query:
+            params["query"] = query
+        url = f"{self.base_url}/api/documents/"
+        resp = requests.get(url, headers=self._headers(), params=params, timeout=60)
+        resp.raise_for_status()
+        return resp.json()