import httpx from config import settings class OllamaClient: def __init__(self): self.base_url = settings.OLLAMA_BASE_URL self.timeout = httpx.Timeout(float(settings.OLLAMA_TIMEOUT), connect=10.0) async def generate_embedding(self, text: str) -> list[float]: async with httpx.AsyncClient(timeout=self.timeout) as client: response = await client.post( f"{self.base_url}/api/embeddings", json={"model": settings.OLLAMA_EMBED_MODEL, "prompt": text}, ) response.raise_for_status() return response.json()["embedding"] async def batch_embeddings(self, texts: list[str]) -> list[list[float]]: results = [] for text in texts: emb = await self.generate_embedding(text) results.append(emb) return results async def generate_text(self, prompt: str, system: str = None) -> str: messages = [] if system: messages.append({"role": "system", "content": system}) messages.append({"role": "user", "content": prompt}) async with httpx.AsyncClient(timeout=self.timeout) as client: response = await client.post( f"{self.base_url}/api/chat", json={ "model": settings.OLLAMA_TEXT_MODEL, "messages": messages, "stream": False, "options": {"temperature": 0.3, "num_predict": 2048}, }, ) response.raise_for_status() return response.json()["message"]["content"] async def check_health(self) -> dict: try: async with httpx.AsyncClient(timeout=httpx.Timeout(5.0)) as client: response = await client.get(f"{self.base_url}/api/tags") models = response.json().get("models", []) return { "status": "connected", "models": [m["name"] for m in models], } except Exception: return {"status": "disconnected"} ollama_client = OllamaClient()