From 9c70d3e8a154cb4b1d1e19c42b7598a44b670f0f Mon Sep 17 00:00:00 2001 From: hyungi Date: Wed, 13 Aug 2025 08:38:30 +0900 Subject: [PATCH] chore: save WIP before importing Document-AI subtree --- .gitignore | 3 +++ HYUNGI-HOME-CA.crt | 12 +++++++++ README.md | 2 ++ ca/ca-bundle.pem | 13 +++++++++ ca/intermediate_ca.crt | 13 +++++++++ ca/standard-cert.crt | 32 ++++++++++++++++++++++ scripts/install_launchd.sh | 54 ++++++++++++++++++++++++++++++++++++++ server/main.py | 20 +++++++++----- server/paperless_client.py | 16 ++++++++--- 9 files changed, 154 insertions(+), 11 deletions(-) create mode 100644 HYUNGI-HOME-CA.crt create mode 100644 ca/ca-bundle.pem create mode 100644 ca/intermediate_ca.crt create mode 100644 ca/standard-cert.crt create mode 100755 scripts/install_launchd.sh diff --git a/.gitignore b/.gitignore index f068030..e27c67c 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,6 @@ build/ data/ *.pdf +# Local env +.env + diff --git a/HYUNGI-HOME-CA.crt b/HYUNGI-HOME-CA.crt new file mode 100644 index 0000000..9ede6b5 --- /dev/null +++ b/HYUNGI-HOME-CA.crt @@ -0,0 +1,12 @@ +-----BEGIN CERTIFICATE----- +MIIBtzCCAV6gAwIBAgIRAJlMAYJ+9FWuLuhaeqLKuzEwCgYIKoZIzj0EAwIwOjEX +MBUGA1UEChMOSFlVTkdJLUhPTUUtQ0ExHzAdBgNVBAMTFkhZVU5HSS1IT01FLUNB +IFJvb3QgQ0EwHhcNMjUwODEwMjI1NjA0WhcNMzUwODA4MjI1NjA0WjA6MRcwFQYD +VQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0EgUm9v +dCBDQTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABBrpCKBTfIvPdTDXW/qXUnqO +sOMOmSR4cBsDIh5hpNqTzDmAGWv8y7iSJ3s0KBtPfOE80IsgAEMGkO8iWIQQDESj +RTBDMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/AgEBMB0GA1UdDgQW +BBRPNRdB/SiyYcBFf5TimQ7YI01ZcjAKBggqhkjOPQQDAgNHADBEAiBZ1VLgInhw +Ad/fdgAg7mKPeZGhAq7XZ0RIlrzbGw0JTAIgT415n4A3kLKhsHhrkrfWuJvOavgN +D4csz04qpbswPgM= +-----END CERTIFICATE----- diff --git a/README.md b/README.md index 86be96b..5c5ef24 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,8 @@ curl -s -X POST http://localhost:26000/paperless/sync \ - `EMBEDDING_MODEL`(기본 `nomic-embed-text`) - `INDEX_PATH`(기본 `data/index.jsonl`) - `PAPERLESS_BASE_URL`, `PAPERLESS_TOKEN`(선택): Paperless API 연동 시 사용 +- `PAPERLESS_VERIFY_SSL`(기본 `true`): Paperless HTTPS 검증 비활성화는 `false` +- `PAPERLESS_CA_BUNDLE`(선택): 신뢰할 CA 번들 경로 지정 시 해당 번들로 검증 - `API_KEY`(선택): 설정 시 모든 민감 엔드포인트 호출에 `X-API-Key` 헤더 필요 - `CORS_ORIGINS`(선택): CORS 허용 오리진(쉼표 구분), 미설정 시 `*` diff --git a/ca/ca-bundle.pem b/ca/ca-bundle.pem new file mode 100644 index 0000000..f497325 --- /dev/null +++ b/ca/ca-bundle.pem @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4DCCAYagAwIBAgIQNYeMnRkkRCMSymCTYWVHLzAKBggqhkjOPQQDAjA6MRcw +FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0Eg +Um9vdCBDQTAeFw0yNTA4MTAyMjU2MDVaFw0zNTA4MDgyMjU2MDVaMEIxFzAVBgNV +BAoTDkhZVU5HSS1IT01FLUNBMScwJQYDVQQDEx5IWVVOR0ktSE9NRS1DQSBJbnRl +cm1lZGlhdGUgQ0EwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARuqjmRgxRCr7aW +VDEhP2cquiFwdL6QYEHQOsC1L0MFQRcF42oohIST3D+cA4r42KLvUyBmpd+MId1m +R7mwvt2Go2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAd +BgNVHQ4EFgQUKaSBWtPK3Fq3F4mS3i+INcb5LTQwHwYDVR0jBBgwFoAUTzUXQf0o +smHARX+U4pkO2CNNWXIwCgYIKoZIzj0EAwIDSAAwRQIgBXlUO6QZNqJMZLs5q+DB +mJX5mQOKLAX9xve1zDK5XFYCIQDHT1myj9bWHDF5ZKMdzqtQCGNsTxK9x99gxmhn +fFW+3g== +-----END CERTIFICATE----- diff --git a/ca/intermediate_ca.crt b/ca/intermediate_ca.crt new file mode 100644 index 0000000..f497325 --- /dev/null +++ b/ca/intermediate_ca.crt @@ -0,0 +1,13 @@ +-----BEGIN CERTIFICATE----- +MIIB4DCCAYagAwIBAgIQNYeMnRkkRCMSymCTYWVHLzAKBggqhkjOPQQDAjA6MRcw +FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0Eg +Um9vdCBDQTAeFw0yNTA4MTAyMjU2MDVaFw0zNTA4MDgyMjU2MDVaMEIxFzAVBgNV +BAoTDkhZVU5HSS1IT01FLUNBMScwJQYDVQQDEx5IWVVOR0ktSE9NRS1DQSBJbnRl +cm1lZGlhdGUgQ0EwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARuqjmRgxRCr7aW +VDEhP2cquiFwdL6QYEHQOsC1L0MFQRcF42oohIST3D+cA4r42KLvUyBmpd+MId1m +R7mwvt2Go2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAd +BgNVHQ4EFgQUKaSBWtPK3Fq3F4mS3i+INcb5LTQwHwYDVR0jBBgwFoAUTzUXQf0o +smHARX+U4pkO2CNNWXIwCgYIKoZIzj0EAwIDSAAwRQIgBXlUO6QZNqJMZLs5q+DB +mJX5mQOKLAX9xve1zDK5XFYCIQDHT1myj9bWHDF5ZKMdzqtQCGNsTxK9x99gxmhn +fFW+3g== +-----END CERTIFICATE----- diff --git a/ca/standard-cert.crt b/ca/standard-cert.crt new file mode 100644 index 0000000..26f5cfe --- /dev/null +++ b/ca/standard-cert.crt @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIDAjCCAqmgAwIBAgIQX0j/5HufTq45+4leMkBrDDAKBggqhkjOPQQDAjBCMRcw +FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEnMCUGA1UEAxMeSFlVTkdJLUhPTUUtQ0Eg +SW50ZXJtZWRpYXRlIENBMB4XDTI1MDgxMTAwMjkxOFoXDTI3MDgxMTAwMzAxOFow +FTETMBEGA1UEAxMKaHl1bmdpLm5ldDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC +AQoCggEBAKdg4RayoCrBAyQw4Ql4ojQr6cGKO8qmLPwkk026UI1xjoPqXcYya2CF +P0yvSrlsuEGlltBFAwSyYcCiRKQzQ1E7o5PN6wFwYo1eo1BpXbBUQlrwRz3Vd1ZJ +6zWoFka3EbK6Ht4iB6Fp8/PDB7bqDiLXjuBwkQb6YeWn5Ff0kXxaiXsk0VbOjtrr +lPkq/M0COJTp33DVAKsW4CzjsTdSKns1k6xPuh19bIsXA56BpoyVks9YbFN2rx8b +J3jPSXwsipV6QxIeqvbXSwqSxrvUzhansyAQNaHOuJu3ZBpv4EOhqslXi157rVb9 +jYFuqBexVd69rPutuzjmbw5X+/JX+H8CAwEAAaOB4jCB3zAOBgNVHQ8BAf8EBAMC +BaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMB0GA1UdDgQWBBSvyMdI +BvLKmIul2mYiR4YLqLSA7jAfBgNVHSMEGDAWgBQppIFa08rcWrcXiZLeL4g1xvkt +NDAjBgNVHREEHDAaggpoeXVuZ2kubmV0ggwqLmh5dW5naS5uZXQwSQYMKwYBBAGC +pGTGKEABBDkwNwIBAQQFYWRtaW4EKzlOUG5ZdVRYTXBGMHAzemtSdEZRbjl5OEht +T3pRUnVUWm9mRFNJcGV4M28wCgYIKoZIzj0EAwIDRwAwRAIgH3rAfdCvSsjhRuQ/ +WVQre2/8bnE5Pdwj/GiQmrrgwhoCIFDntMaqd/2c820gJ+juoeRQwVZkKRPwGQOE +86Fsjnb4 +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIB4DCCAYagAwIBAgIQNYeMnRkkRCMSymCTYWVHLzAKBggqhkjOPQQDAjA6MRcw +FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0Eg +Um9vdCBDQTAeFw0yNTA4MTAyMjU2MDVaFw0zNTA4MDgyMjU2MDVaMEIxFzAVBgNV +BAoTDkhZVU5HSS1IT01FLUNBMScwJQYDVQQDEx5IWVVOR0ktSE9NRS1DQSBJbnRl +cm1lZGlhdGUgQ0EwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARuqjmRgxRCr7aW +VDEhP2cquiFwdL6QYEHQOsC1L0MFQRcF42oohIST3D+cA4r42KLvUyBmpd+MId1m +R7mwvt2Go2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAd +BgNVHQ4EFgQUKaSBWtPK3Fq3F4mS3i+INcb5LTQwHwYDVR0jBBgwFoAUTzUXQf0o +smHARX+U4pkO2CNNWXIwCgYIKoZIzj0EAwIDSAAwRQIgBXlUO6QZNqJMZLs5q+DB +mJX5mQOKLAX9xve1zDK5XFYCIQDHT1myj9bWHDF5ZKMdzqtQCGNsTxK9x99gxmhn +fFW+3g== +-----END CERTIFICATE----- diff --git a/scripts/install_launchd.sh b/scripts/install_launchd.sh new file mode 100755 index 0000000..ea9fa9d --- /dev/null +++ b/scripts/install_launchd.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +set -euo pipefail + +LABEL="net.hyungi.ai-server" +PLIST="$HOME/Library/LaunchAgents/${LABEL}.plist" +WORKDIR="$(pwd)" + +# load .env if present +if [ -f "$WORKDIR/.env" ]; then + set -a + # shellcheck disable=SC1091 + . "$WORKDIR/.env" + set +a +fi + +cat > "$PLIST" < + + + + Label${LABEL} + ProgramArguments + + ${WORKDIR}/.venv/bin/uvicorn + server.main:app + --host0.0.0.0 + --port${AI_SERVER_PORT:-26000} + + EnvironmentVariables + + OLLAMA_HOST${OLLAMA_HOST:-http://localhost:11434} + BASE_MODEL${BASE_MODEL:-qwen2.5:7b-instruct} + BOOST_MODEL${BOOST_MODEL:-qwen2.5:14b-instruct} + ENGLISH_MODEL${ENGLISH_MODEL:-llama3:8b-instruct} + ENGLISH_RATIO_THRESHOLD${ENGLISH_RATIO_THRESHOLD:-0.65} + EMBEDDING_MODEL${EMBEDDING_MODEL:-bge-m3} + INDEX_PATH${INDEX_PATH:-data/index.jsonl} + API_KEY${API_KEY:-} + CORS_ORIGINS${CORS_ORIGINS:-} + PAPERLESS_BASE_URL${PAPERLESS_BASE_URL:-} + PAPERLESS_TOKEN${PAPERLESS_TOKEN:-} + + WorkingDirectory${WORKDIR} + StandardOutPath${WORKDIR}/ai-server.out.log + StandardErrorPath${WORKDIR}/ai-server.err.log + RunAtLoad + KeepAlive + + +PLIST + +launchctl unload "$PLIST" 2>/dev/null || true +launchctl load -w "$PLIST" +echo "[ok] launchd agent installed: $PLIST" diff --git a/server/main.py b/server/main.py index 0ca0be4..0e6e649 100644 --- a/server/main.py +++ b/server/main.py @@ -188,6 +188,7 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key) client = PaperlessClient(settings.paperless_base_url, settings.paperless_token) from .index_store import IndexRow added_total = 0 + skipped = 0 next_url: str | None = None fetched = 0 @@ -205,13 +206,18 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key) doc_id = doc.get("id") if not doc_id: continue - text = client.get_document_text(int(doc_id)) - if not text: + try: + text = client.get_document_text(int(doc_id)) + if not text: + skipped += 1 + continue + parts = chunk_text(text) + for i, t in enumerate(parts): + vec = ollama.embeddings(settings.embedding_model, t) + to_append.append(IndexRow(id=f"paperless:{doc_id}:{i}", text=t, vector=vec, source="paperless")) + except Exception: + skipped += 1 continue - parts = chunk_text(text) - for i, t in enumerate(parts): - vec = ollama.embeddings(settings.embedding_model, t) - to_append.append(IndexRow(id=f"paperless:{doc_id}:{i}", text=t, vector=vec, source="paperless")) if to_append: added_total += index.append(to_append) fetched += len(results) @@ -221,7 +227,7 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key) if not next_url: break - return {"status": "synced", "added": added_total} + return {"status": "synced", "added": added_total, "skipped": skipped} # OpenAI-compatible chat completions (minimal) diff --git a/server/paperless_client.py b/server/paperless_client.py index f3e3185..0fad888 100644 --- a/server/paperless_client.py +++ b/server/paperless_client.py @@ -9,6 +9,14 @@ class PaperlessClient: def __init__(self, base_url: str | None = None, token: str | None = None) -> None: self.base_url = (base_url or os.getenv("PAPERLESS_BASE_URL", "")).rstrip("/") self.token = token or os.getenv("PAPERLESS_TOKEN", "") + verify_env = os.getenv("PAPERLESS_VERIFY_SSL", "true").lower().strip() + ca_bundle = os.getenv("PAPERLESS_CA_BUNDLE", "").strip() + if ca_bundle: + self.verify: Any = ca_bundle + elif verify_env in ("0", "false", "no"): + self.verify = False + else: + self.verify = True def _headers(self) -> Dict[str, str]: headers: Dict[str, str] = {"Accept": "application/json"} @@ -20,7 +28,7 @@ class PaperlessClient: if not self.base_url: raise RuntimeError("PAPERLESS_BASE_URL not configured") url = f"{self.base_url}/api/documents/{doc_id}/" - resp = requests.get(url, headers=self._headers(), timeout=60) + resp = requests.get(url, headers=self._headers(), timeout=60, verify=self.verify) resp.raise_for_status() return resp.json() @@ -30,7 +38,7 @@ class PaperlessClient: # Try content endpoint url_content = f"{self.base_url}/api/documents/{doc_id}/content/" try: - r = requests.get(url_content, headers=self._headers(), timeout=60) + r = requests.get(url_content, headers=self._headers(), timeout=60, verify=self.verify) if r.status_code == 200 and r.text: return r.text except Exception: @@ -38,7 +46,7 @@ class PaperlessClient: # Try txt download url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt" try: - r = requests.get(url_txt, headers=self._headers(), timeout=60) + r = requests.get(url_txt, headers=self._headers(), timeout=60, verify=self.verify) if r.status_code == 200 and r.text: return r.text except Exception: @@ -56,7 +64,7 @@ class PaperlessClient: if query: params["query"] = query url = f"{self.base_url}/api/documents/" - resp = requests.get(url, headers=self._headers(), params=params, timeout=60) + resp = requests.get(url, headers=self._headers(), params=params, timeout=60, verify=self.verify) resp.raise_for_status() return resp.json()