feat: local AI server scaffolding (FastAPI, RAG, embeddings). Port policy (>=26000), README/API docs, scripts.

2025-08-13 07:24:06 +09:00
commit 72d889f5ef
15 changed files with 2486 additions and 0 deletions
--- a/server/ollama_client.py
+++ b/server/ollama_client.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+import requests
+from typing import List, Dict, Any
+
+
+class OllamaClient:
+    def __init__(self, host: str) -> None:
+        host = host.strip()
+        if not host.startswith("http://") and not host.startswith("https://"):
+            host = "http://" + host
+        self.host = host.rstrip("/")
+
+    def embeddings(self, model: str, text: str) -> List[float]:
+        url = f"{self.host}/api/embeddings"
+        resp = requests.post(url, json={"model": model, "prompt": text}, timeout=120)
+        resp.raise_for_status()
+        data = resp.json()
+        return data["embedding"]
+
+    def chat(self, model: str, messages: List[Dict[str, str]], stream: bool = False, options: Dict[str, Any] | None = None) -> Dict[str, Any]:
+        url = f"{self.host}/api/chat"
+        payload: Dict[str, Any] = {"model": model, "messages": messages, "stream": stream}
+        if options:
+            payload["options"] = options
+        resp = requests.post(url, json=payload, timeout=600)
+        resp.raise_for_status()
+        return resp.json()
+