"""ModelAdapter — 범용 OpenAI-compat 모델 어댑터.""" from __future__ import annotations import json import logging from collections.abc import AsyncGenerator import httpx logger = logging.getLogger(__name__) class ModelAdapter: """OpenAI-compatible /v1/chat/completions 백엔드 범용 어댑터. Ollama, MLX 등 모두 동일 인터페이스로 호출.""" def __init__( self, name: str, base_url: str, model: str, system_prompt: str, temperature: float = 0.7, timeout: float = 120.0, max_tokens: int = 4096, ): self.name = name self.base_url = base_url self.model = model self.system_prompt = system_prompt self.temperature = temperature self.timeout = timeout self.max_tokens = max_tokens async def stream_chat(self, message: str, *, messages: list[dict] | None = None) -> AsyncGenerator[str, None]: """스트리밍 호출. content chunk를 yield. messages 직접 전달 가능.""" if messages is None: messages = [ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": message}, ] payload = { "model": self.model, "messages": messages, "stream": True, "temperature": self.temperature, "max_tokens": self.max_tokens, } async with httpx.AsyncClient(timeout=self.timeout) as client: try: async with client.stream( "POST", f"{self.base_url}/v1/chat/completions", json=payload, ) as resp: if resp.status_code != 200: body = await resp.aread() logger.error("%s error %d: %s", self.name, resp.status_code, body.decode()) raise RuntimeError(f"{self.name} 응답 실패 ({resp.status_code})") async for line in resp.aiter_lines(): line = line.strip() if not line or not line.startswith("data: "): continue payload_str = line[len("data: "):] if payload_str == "[DONE]": return try: chunk = json.loads(payload_str) delta = chunk.get("choices", [{}])[0].get("delta", {}) content = delta.get("content", "") if content: yield content except (json.JSONDecodeError, IndexError, KeyError): continue except httpx.ConnectError: logger.error("%s connection failed: %s", self.name, self.base_url) raise except httpx.ReadTimeout: logger.error("%s read timeout", self.name) raise async def complete_chat(self, message: str, *, messages: list[dict] | None = None) -> str: """비스트리밍 호출. 전체 응답 텍스트 반환. messages 직접 전달 가능.""" if messages is None: messages = [ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": message}, ] payload = { "model": self.model, "messages": messages, "stream": False, "temperature": self.temperature, "max_tokens": self.max_tokens, } async with httpx.AsyncClient(timeout=self.timeout) as client: resp = await client.post( f"{self.base_url}/v1/chat/completions", json=payload, ) resp.raise_for_status() data = resp.json() return data["choices"][0]["message"]["content"] async def health_check(self) -> bool: """GET /v1/models — 5초 timeout.""" try: async with httpx.AsyncClient(timeout=5.0) as client: resp = await client.get(f"{self.base_url}/v1/models") return resp.status_code < 500 except Exception: return False