chore: save WIP before importing Document-AI subtree
This commit is contained in:
@@ -188,6 +188,7 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key)
|
||||
client = PaperlessClient(settings.paperless_base_url, settings.paperless_token)
|
||||
from .index_store import IndexRow
|
||||
added_total = 0
|
||||
skipped = 0
|
||||
next_url: str | None = None
|
||||
fetched = 0
|
||||
|
||||
@@ -205,13 +206,18 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key)
|
||||
doc_id = doc.get("id")
|
||||
if not doc_id:
|
||||
continue
|
||||
text = client.get_document_text(int(doc_id))
|
||||
if not text:
|
||||
try:
|
||||
text = client.get_document_text(int(doc_id))
|
||||
if not text:
|
||||
skipped += 1
|
||||
continue
|
||||
parts = chunk_text(text)
|
||||
for i, t in enumerate(parts):
|
||||
vec = ollama.embeddings(settings.embedding_model, t)
|
||||
to_append.append(IndexRow(id=f"paperless:{doc_id}:{i}", text=t, vector=vec, source="paperless"))
|
||||
except Exception:
|
||||
skipped += 1
|
||||
continue
|
||||
parts = chunk_text(text)
|
||||
for i, t in enumerate(parts):
|
||||
vec = ollama.embeddings(settings.embedding_model, t)
|
||||
to_append.append(IndexRow(id=f"paperless:{doc_id}:{i}", text=t, vector=vec, source="paperless"))
|
||||
if to_append:
|
||||
added_total += index.append(to_append)
|
||||
fetched += len(results)
|
||||
@@ -221,7 +227,7 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key)
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
return {"status": "synced", "added": added_total}
|
||||
return {"status": "synced", "added": added_total, "skipped": skipped}
|
||||
|
||||
|
||||
# OpenAI-compatible chat completions (minimal)
|
||||
|
||||
@@ -9,6 +9,14 @@ class PaperlessClient:
|
||||
def __init__(self, base_url: str | None = None, token: str | None = None) -> None:
|
||||
self.base_url = (base_url or os.getenv("PAPERLESS_BASE_URL", "")).rstrip("/")
|
||||
self.token = token or os.getenv("PAPERLESS_TOKEN", "")
|
||||
verify_env = os.getenv("PAPERLESS_VERIFY_SSL", "true").lower().strip()
|
||||
ca_bundle = os.getenv("PAPERLESS_CA_BUNDLE", "").strip()
|
||||
if ca_bundle:
|
||||
self.verify: Any = ca_bundle
|
||||
elif verify_env in ("0", "false", "no"):
|
||||
self.verify = False
|
||||
else:
|
||||
self.verify = True
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
headers: Dict[str, str] = {"Accept": "application/json"}
|
||||
@@ -20,7 +28,7 @@ class PaperlessClient:
|
||||
if not self.base_url:
|
||||
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
||||
url = f"{self.base_url}/api/documents/{doc_id}/"
|
||||
resp = requests.get(url, headers=self._headers(), timeout=60)
|
||||
resp = requests.get(url, headers=self._headers(), timeout=60, verify=self.verify)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
@@ -30,7 +38,7 @@ class PaperlessClient:
|
||||
# Try content endpoint
|
||||
url_content = f"{self.base_url}/api/documents/{doc_id}/content/"
|
||||
try:
|
||||
r = requests.get(url_content, headers=self._headers(), timeout=60)
|
||||
r = requests.get(url_content, headers=self._headers(), timeout=60, verify=self.verify)
|
||||
if r.status_code == 200 and r.text:
|
||||
return r.text
|
||||
except Exception:
|
||||
@@ -38,7 +46,7 @@ class PaperlessClient:
|
||||
# Try txt download
|
||||
url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt"
|
||||
try:
|
||||
r = requests.get(url_txt, headers=self._headers(), timeout=60)
|
||||
r = requests.get(url_txt, headers=self._headers(), timeout=60, verify=self.verify)
|
||||
if r.status_code == 200 and r.text:
|
||||
return r.text
|
||||
except Exception:
|
||||
@@ -56,7 +64,7 @@ class PaperlessClient:
|
||||
if query:
|
||||
params["query"] = query
|
||||
url = f"{self.base_url}/api/documents/"
|
||||
resp = requests.get(url, headers=self._headers(), params=params, timeout=60)
|
||||
resp = requests.get(url, headers=self._headers(), params=params, timeout=60, verify=self.verify)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user