chore: save WIP before importing Document-AI subtree
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -29,3 +29,6 @@ build/
|
|||||||
data/
|
data/
|
||||||
*.pdf
|
*.pdf
|
||||||
|
|
||||||
|
# Local env
|
||||||
|
.env
|
||||||
|
|
||||||
|
|||||||
12
HYUNGI-HOME-CA.crt
Normal file
12
HYUNGI-HOME-CA.crt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIBtzCCAV6gAwIBAgIRAJlMAYJ+9FWuLuhaeqLKuzEwCgYIKoZIzj0EAwIwOjEX
|
||||||
|
MBUGA1UEChMOSFlVTkdJLUhPTUUtQ0ExHzAdBgNVBAMTFkhZVU5HSS1IT01FLUNB
|
||||||
|
IFJvb3QgQ0EwHhcNMjUwODEwMjI1NjA0WhcNMzUwODA4MjI1NjA0WjA6MRcwFQYD
|
||||||
|
VQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0EgUm9v
|
||||||
|
dCBDQTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABBrpCKBTfIvPdTDXW/qXUnqO
|
||||||
|
sOMOmSR4cBsDIh5hpNqTzDmAGWv8y7iSJ3s0KBtPfOE80IsgAEMGkO8iWIQQDESj
|
||||||
|
RTBDMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/AgEBMB0GA1UdDgQW
|
||||||
|
BBRPNRdB/SiyYcBFf5TimQ7YI01ZcjAKBggqhkjOPQQDAgNHADBEAiBZ1VLgInhw
|
||||||
|
Ad/fdgAg7mKPeZGhAq7XZ0RIlrzbGw0JTAIgT415n4A3kLKhsHhrkrfWuJvOavgN
|
||||||
|
D4csz04qpbswPgM=
|
||||||
|
-----END CERTIFICATE-----
|
||||||
@@ -249,6 +249,8 @@ curl -s -X POST http://localhost:26000/paperless/sync \
|
|||||||
- `EMBEDDING_MODEL`(기본 `nomic-embed-text`)
|
- `EMBEDDING_MODEL`(기본 `nomic-embed-text`)
|
||||||
- `INDEX_PATH`(기본 `data/index.jsonl`)
|
- `INDEX_PATH`(기본 `data/index.jsonl`)
|
||||||
- `PAPERLESS_BASE_URL`, `PAPERLESS_TOKEN`(선택): Paperless API 연동 시 사용
|
- `PAPERLESS_BASE_URL`, `PAPERLESS_TOKEN`(선택): Paperless API 연동 시 사용
|
||||||
|
- `PAPERLESS_VERIFY_SSL`(기본 `true`): Paperless HTTPS 검증 비활성화는 `false`
|
||||||
|
- `PAPERLESS_CA_BUNDLE`(선택): 신뢰할 CA 번들 경로 지정 시 해당 번들로 검증
|
||||||
- `API_KEY`(선택): 설정 시 모든 민감 엔드포인트 호출에 `X-API-Key` 헤더 필요
|
- `API_KEY`(선택): 설정 시 모든 민감 엔드포인트 호출에 `X-API-Key` 헤더 필요
|
||||||
- `CORS_ORIGINS`(선택): CORS 허용 오리진(쉼표 구분), 미설정 시 `*`
|
- `CORS_ORIGINS`(선택): CORS 허용 오리진(쉼표 구분), 미설정 시 `*`
|
||||||
|
|
||||||
|
|||||||
13
ca/ca-bundle.pem
Normal file
13
ca/ca-bundle.pem
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIB4DCCAYagAwIBAgIQNYeMnRkkRCMSymCTYWVHLzAKBggqhkjOPQQDAjA6MRcw
|
||||||
|
FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0Eg
|
||||||
|
Um9vdCBDQTAeFw0yNTA4MTAyMjU2MDVaFw0zNTA4MDgyMjU2MDVaMEIxFzAVBgNV
|
||||||
|
BAoTDkhZVU5HSS1IT01FLUNBMScwJQYDVQQDEx5IWVVOR0ktSE9NRS1DQSBJbnRl
|
||||||
|
cm1lZGlhdGUgQ0EwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARuqjmRgxRCr7aW
|
||||||
|
VDEhP2cquiFwdL6QYEHQOsC1L0MFQRcF42oohIST3D+cA4r42KLvUyBmpd+MId1m
|
||||||
|
R7mwvt2Go2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAd
|
||||||
|
BgNVHQ4EFgQUKaSBWtPK3Fq3F4mS3i+INcb5LTQwHwYDVR0jBBgwFoAUTzUXQf0o
|
||||||
|
smHARX+U4pkO2CNNWXIwCgYIKoZIzj0EAwIDSAAwRQIgBXlUO6QZNqJMZLs5q+DB
|
||||||
|
mJX5mQOKLAX9xve1zDK5XFYCIQDHT1myj9bWHDF5ZKMdzqtQCGNsTxK9x99gxmhn
|
||||||
|
fFW+3g==
|
||||||
|
-----END CERTIFICATE-----
|
||||||
13
ca/intermediate_ca.crt
Normal file
13
ca/intermediate_ca.crt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIB4DCCAYagAwIBAgIQNYeMnRkkRCMSymCTYWVHLzAKBggqhkjOPQQDAjA6MRcw
|
||||||
|
FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0Eg
|
||||||
|
Um9vdCBDQTAeFw0yNTA4MTAyMjU2MDVaFw0zNTA4MDgyMjU2MDVaMEIxFzAVBgNV
|
||||||
|
BAoTDkhZVU5HSS1IT01FLUNBMScwJQYDVQQDEx5IWVVOR0ktSE9NRS1DQSBJbnRl
|
||||||
|
cm1lZGlhdGUgQ0EwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARuqjmRgxRCr7aW
|
||||||
|
VDEhP2cquiFwdL6QYEHQOsC1L0MFQRcF42oohIST3D+cA4r42KLvUyBmpd+MId1m
|
||||||
|
R7mwvt2Go2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAd
|
||||||
|
BgNVHQ4EFgQUKaSBWtPK3Fq3F4mS3i+INcb5LTQwHwYDVR0jBBgwFoAUTzUXQf0o
|
||||||
|
smHARX+U4pkO2CNNWXIwCgYIKoZIzj0EAwIDSAAwRQIgBXlUO6QZNqJMZLs5q+DB
|
||||||
|
mJX5mQOKLAX9xve1zDK5XFYCIQDHT1myj9bWHDF5ZKMdzqtQCGNsTxK9x99gxmhn
|
||||||
|
fFW+3g==
|
||||||
|
-----END CERTIFICATE-----
|
||||||
32
ca/standard-cert.crt
Normal file
32
ca/standard-cert.crt
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIDAjCCAqmgAwIBAgIQX0j/5HufTq45+4leMkBrDDAKBggqhkjOPQQDAjBCMRcw
|
||||||
|
FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEnMCUGA1UEAxMeSFlVTkdJLUhPTUUtQ0Eg
|
||||||
|
SW50ZXJtZWRpYXRlIENBMB4XDTI1MDgxMTAwMjkxOFoXDTI3MDgxMTAwMzAxOFow
|
||||||
|
FTETMBEGA1UEAxMKaHl1bmdpLm5ldDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC
|
||||||
|
AQoCggEBAKdg4RayoCrBAyQw4Ql4ojQr6cGKO8qmLPwkk026UI1xjoPqXcYya2CF
|
||||||
|
P0yvSrlsuEGlltBFAwSyYcCiRKQzQ1E7o5PN6wFwYo1eo1BpXbBUQlrwRz3Vd1ZJ
|
||||||
|
6zWoFka3EbK6Ht4iB6Fp8/PDB7bqDiLXjuBwkQb6YeWn5Ff0kXxaiXsk0VbOjtrr
|
||||||
|
lPkq/M0COJTp33DVAKsW4CzjsTdSKns1k6xPuh19bIsXA56BpoyVks9YbFN2rx8b
|
||||||
|
J3jPSXwsipV6QxIeqvbXSwqSxrvUzhansyAQNaHOuJu3ZBpv4EOhqslXi157rVb9
|
||||||
|
jYFuqBexVd69rPutuzjmbw5X+/JX+H8CAwEAAaOB4jCB3zAOBgNVHQ8BAf8EBAMC
|
||||||
|
BaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMB0GA1UdDgQWBBSvyMdI
|
||||||
|
BvLKmIul2mYiR4YLqLSA7jAfBgNVHSMEGDAWgBQppIFa08rcWrcXiZLeL4g1xvkt
|
||||||
|
NDAjBgNVHREEHDAaggpoeXVuZ2kubmV0ggwqLmh5dW5naS5uZXQwSQYMKwYBBAGC
|
||||||
|
pGTGKEABBDkwNwIBAQQFYWRtaW4EKzlOUG5ZdVRYTXBGMHAzemtSdEZRbjl5OEht
|
||||||
|
T3pRUnVUWm9mRFNJcGV4M28wCgYIKoZIzj0EAwIDRwAwRAIgH3rAfdCvSsjhRuQ/
|
||||||
|
WVQre2/8bnE5Pdwj/GiQmrrgwhoCIFDntMaqd/2c820gJ+juoeRQwVZkKRPwGQOE
|
||||||
|
86Fsjnb4
|
||||||
|
-----END CERTIFICATE-----
|
||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIB4DCCAYagAwIBAgIQNYeMnRkkRCMSymCTYWVHLzAKBggqhkjOPQQDAjA6MRcw
|
||||||
|
FQYDVQQKEw5IWVVOR0ktSE9NRS1DQTEfMB0GA1UEAxMWSFlVTkdJLUhPTUUtQ0Eg
|
||||||
|
Um9vdCBDQTAeFw0yNTA4MTAyMjU2MDVaFw0zNTA4MDgyMjU2MDVaMEIxFzAVBgNV
|
||||||
|
BAoTDkhZVU5HSS1IT01FLUNBMScwJQYDVQQDEx5IWVVOR0ktSE9NRS1DQSBJbnRl
|
||||||
|
cm1lZGlhdGUgQ0EwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAARuqjmRgxRCr7aW
|
||||||
|
VDEhP2cquiFwdL6QYEHQOsC1L0MFQRcF42oohIST3D+cA4r42KLvUyBmpd+MId1m
|
||||||
|
R7mwvt2Go2YwZDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADAd
|
||||||
|
BgNVHQ4EFgQUKaSBWtPK3Fq3F4mS3i+INcb5LTQwHwYDVR0jBBgwFoAUTzUXQf0o
|
||||||
|
smHARX+U4pkO2CNNWXIwCgYIKoZIzj0EAwIDSAAwRQIgBXlUO6QZNqJMZLs5q+DB
|
||||||
|
mJX5mQOKLAX9xve1zDK5XFYCIQDHT1myj9bWHDF5ZKMdzqtQCGNsTxK9x99gxmhn
|
||||||
|
fFW+3g==
|
||||||
|
-----END CERTIFICATE-----
|
||||||
54
scripts/install_launchd.sh
Executable file
54
scripts/install_launchd.sh
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
LABEL="net.hyungi.ai-server"
|
||||||
|
PLIST="$HOME/Library/LaunchAgents/${LABEL}.plist"
|
||||||
|
WORKDIR="$(pwd)"
|
||||||
|
|
||||||
|
# load .env if present
|
||||||
|
if [ -f "$WORKDIR/.env" ]; then
|
||||||
|
set -a
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "$WORKDIR/.env"
|
||||||
|
set +a
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat > "$PLIST" <<PLIST
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||||
|
<plist version="1.0">
|
||||||
|
<dict>
|
||||||
|
<key>Label</key><string>${LABEL}</string>
|
||||||
|
<key>ProgramArguments</key>
|
||||||
|
<array>
|
||||||
|
<string>${WORKDIR}/.venv/bin/uvicorn</string>
|
||||||
|
<string>server.main:app</string>
|
||||||
|
<string>--host</string><string>0.0.0.0</string>
|
||||||
|
<string>--port</string><string>${AI_SERVER_PORT:-26000}</string>
|
||||||
|
</array>
|
||||||
|
<key>EnvironmentVariables</key>
|
||||||
|
<dict>
|
||||||
|
<key>OLLAMA_HOST</key><string>${OLLAMA_HOST:-http://localhost:11434}</string>
|
||||||
|
<key>BASE_MODEL</key><string>${BASE_MODEL:-qwen2.5:7b-instruct}</string>
|
||||||
|
<key>BOOST_MODEL</key><string>${BOOST_MODEL:-qwen2.5:14b-instruct}</string>
|
||||||
|
<key>ENGLISH_MODEL</key><string>${ENGLISH_MODEL:-llama3:8b-instruct}</string>
|
||||||
|
<key>ENGLISH_RATIO_THRESHOLD</key><string>${ENGLISH_RATIO_THRESHOLD:-0.65}</string>
|
||||||
|
<key>EMBEDDING_MODEL</key><string>${EMBEDDING_MODEL:-bge-m3}</string>
|
||||||
|
<key>INDEX_PATH</key><string>${INDEX_PATH:-data/index.jsonl}</string>
|
||||||
|
<key>API_KEY</key><string>${API_KEY:-}</string>
|
||||||
|
<key>CORS_ORIGINS</key><string>${CORS_ORIGINS:-}</string>
|
||||||
|
<key>PAPERLESS_BASE_URL</key><string>${PAPERLESS_BASE_URL:-}</string>
|
||||||
|
<key>PAPERLESS_TOKEN</key><string>${PAPERLESS_TOKEN:-}</string>
|
||||||
|
</dict>
|
||||||
|
<key>WorkingDirectory</key><string>${WORKDIR}</string>
|
||||||
|
<key>StandardOutPath</key><string>${WORKDIR}/ai-server.out.log</string>
|
||||||
|
<key>StandardErrorPath</key><string>${WORKDIR}/ai-server.err.log</string>
|
||||||
|
<key>RunAtLoad</key><true/>
|
||||||
|
<key>KeepAlive</key><true/>
|
||||||
|
</dict>
|
||||||
|
</plist>
|
||||||
|
PLIST
|
||||||
|
|
||||||
|
launchctl unload "$PLIST" 2>/dev/null || true
|
||||||
|
launchctl load -w "$PLIST"
|
||||||
|
echo "[ok] launchd agent installed: $PLIST"
|
||||||
@@ -188,6 +188,7 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key)
|
|||||||
client = PaperlessClient(settings.paperless_base_url, settings.paperless_token)
|
client = PaperlessClient(settings.paperless_base_url, settings.paperless_token)
|
||||||
from .index_store import IndexRow
|
from .index_store import IndexRow
|
||||||
added_total = 0
|
added_total = 0
|
||||||
|
skipped = 0
|
||||||
next_url: str | None = None
|
next_url: str | None = None
|
||||||
fetched = 0
|
fetched = 0
|
||||||
|
|
||||||
@@ -205,13 +206,18 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key)
|
|||||||
doc_id = doc.get("id")
|
doc_id = doc.get("id")
|
||||||
if not doc_id:
|
if not doc_id:
|
||||||
continue
|
continue
|
||||||
text = client.get_document_text(int(doc_id))
|
try:
|
||||||
if not text:
|
text = client.get_document_text(int(doc_id))
|
||||||
|
if not text:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
parts = chunk_text(text)
|
||||||
|
for i, t in enumerate(parts):
|
||||||
|
vec = ollama.embeddings(settings.embedding_model, t)
|
||||||
|
to_append.append(IndexRow(id=f"paperless:{doc_id}:{i}", text=t, vector=vec, source="paperless"))
|
||||||
|
except Exception:
|
||||||
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
parts = chunk_text(text)
|
|
||||||
for i, t in enumerate(parts):
|
|
||||||
vec = ollama.embeddings(settings.embedding_model, t)
|
|
||||||
to_append.append(IndexRow(id=f"paperless:{doc_id}:{i}", text=t, vector=vec, source="paperless"))
|
|
||||||
if to_append:
|
if to_append:
|
||||||
added_total += index.append(to_append)
|
added_total += index.append(to_append)
|
||||||
fetched += len(results)
|
fetched += len(results)
|
||||||
@@ -221,7 +227,7 @@ def paperless_sync(req: PaperlessSyncRequest, _: None = Depends(require_api_key)
|
|||||||
if not next_url:
|
if not next_url:
|
||||||
break
|
break
|
||||||
|
|
||||||
return {"status": "synced", "added": added_total}
|
return {"status": "synced", "added": added_total, "skipped": skipped}
|
||||||
|
|
||||||
|
|
||||||
# OpenAI-compatible chat completions (minimal)
|
# OpenAI-compatible chat completions (minimal)
|
||||||
|
|||||||
@@ -9,6 +9,14 @@ class PaperlessClient:
|
|||||||
def __init__(self, base_url: str | None = None, token: str | None = None) -> None:
|
def __init__(self, base_url: str | None = None, token: str | None = None) -> None:
|
||||||
self.base_url = (base_url or os.getenv("PAPERLESS_BASE_URL", "")).rstrip("/")
|
self.base_url = (base_url or os.getenv("PAPERLESS_BASE_URL", "")).rstrip("/")
|
||||||
self.token = token or os.getenv("PAPERLESS_TOKEN", "")
|
self.token = token or os.getenv("PAPERLESS_TOKEN", "")
|
||||||
|
verify_env = os.getenv("PAPERLESS_VERIFY_SSL", "true").lower().strip()
|
||||||
|
ca_bundle = os.getenv("PAPERLESS_CA_BUNDLE", "").strip()
|
||||||
|
if ca_bundle:
|
||||||
|
self.verify: Any = ca_bundle
|
||||||
|
elif verify_env in ("0", "false", "no"):
|
||||||
|
self.verify = False
|
||||||
|
else:
|
||||||
|
self.verify = True
|
||||||
|
|
||||||
def _headers(self) -> Dict[str, str]:
|
def _headers(self) -> Dict[str, str]:
|
||||||
headers: Dict[str, str] = {"Accept": "application/json"}
|
headers: Dict[str, str] = {"Accept": "application/json"}
|
||||||
@@ -20,7 +28,7 @@ class PaperlessClient:
|
|||||||
if not self.base_url:
|
if not self.base_url:
|
||||||
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
raise RuntimeError("PAPERLESS_BASE_URL not configured")
|
||||||
url = f"{self.base_url}/api/documents/{doc_id}/"
|
url = f"{self.base_url}/api/documents/{doc_id}/"
|
||||||
resp = requests.get(url, headers=self._headers(), timeout=60)
|
resp = requests.get(url, headers=self._headers(), timeout=60, verify=self.verify)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp.json()
|
return resp.json()
|
||||||
|
|
||||||
@@ -30,7 +38,7 @@ class PaperlessClient:
|
|||||||
# Try content endpoint
|
# Try content endpoint
|
||||||
url_content = f"{self.base_url}/api/documents/{doc_id}/content/"
|
url_content = f"{self.base_url}/api/documents/{doc_id}/content/"
|
||||||
try:
|
try:
|
||||||
r = requests.get(url_content, headers=self._headers(), timeout=60)
|
r = requests.get(url_content, headers=self._headers(), timeout=60, verify=self.verify)
|
||||||
if r.status_code == 200 and r.text:
|
if r.status_code == 200 and r.text:
|
||||||
return r.text
|
return r.text
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -38,7 +46,7 @@ class PaperlessClient:
|
|||||||
# Try txt download
|
# Try txt download
|
||||||
url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt"
|
url_txt = f"{self.base_url}/api/documents/{doc_id}/download/?format=txt"
|
||||||
try:
|
try:
|
||||||
r = requests.get(url_txt, headers=self._headers(), timeout=60)
|
r = requests.get(url_txt, headers=self._headers(), timeout=60, verify=self.verify)
|
||||||
if r.status_code == 200 and r.text:
|
if r.status_code == 200 and r.text:
|
||||||
return r.text
|
return r.text
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -56,7 +64,7 @@ class PaperlessClient:
|
|||||||
if query:
|
if query:
|
||||||
params["query"] = query
|
params["query"] = query
|
||||||
url = f"{self.base_url}/api/documents/"
|
url = f"{self.base_url}/api/documents/"
|
||||||
resp = requests.get(url, headers=self._headers(), params=params, timeout=60)
|
resp = requests.get(url, headers=self._headers(), params=params, timeout=60, verify=self.verify)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp.json()
|
return resp.json()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user