ai-server/server/ollama_client.py

from __future__ import annotations

import requests
from typing import List, Dict, Any


class OllamaClient:
    def __init__(self, host: str) -> None:
        host = host.strip()
        if not host.startswith("http://") and not host.startswith("https://"):
            host = "http://" + host
        self.host = host.rstrip("/")

    def embeddings(self, model: str, text: str) -> List[float]:
        url = f"{self.host}/api/embeddings"
        resp = requests.post(url, json={"model": model, "prompt": text}, timeout=120)
        resp.raise_for_status()
        data = resp.json()
        return data["embedding"]

    def chat(self, model: str, messages: List[Dict[str, str]], stream: bool = False, options: Dict[str, Any] | None = None) -> Dict[str, Any]:
        url = f"{self.host}/api/chat"
        payload: Dict[str, Any] = {"model": model, "messages": messages, "stream": stream}
        if options:
            payload["options"] = options
        resp = requests.post(url, json=payload, timeout=600)
        resp.raise_for_status()
        return resp.json()