hyungi_document_server/app/ai/client.py

"""AI 추상화 레이어 — 통합 클라이언트. 기본값은 항상 Qwen3.5."""

from pathlib import Path

import httpx

from core.config import settings

# 프롬프트 로딩
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"


def _load_prompt(name: str) -> str:
    return (PROMPTS_DIR / name).read_text(encoding="utf-8")


CLASSIFY_PROMPT = _load_prompt("classify.txt") if (PROMPTS_DIR / "classify.txt").exists() else ""


class AIClient:
    """AI Gateway를 통한 통합 클라이언트. 기본값은 항상 Qwen3.5."""

    def __init__(self):
        self.ai = settings.ai
        self._http = httpx.AsyncClient(timeout=120)

    async def classify(self, text: str) -> dict:
        """문서 분류 — 항상 primary(Qwen3.5) 사용"""
        prompt = CLASSIFY_PROMPT.replace("{document_text}", text)
        response = await self._call_chat(self.ai.primary, prompt)
        return response

    async def summarize(self, text: str, force_premium: bool = False) -> str:
        """문서 요약 — 기본 Qwen3.5, 장문이거나 명시적 요청 시만 Claude"""
        model = self.ai.primary
        if force_premium or len(text) > 15000:
            model = self.ai.premium
        return await self._call_chat(model, f"다음 문서를 500자 이내로 요약해주세요:\n\n{text}")

    async def embed(self, text: str) -> list[float]:
        """벡터 임베딩 — GPU 서버 전용"""
        response = await self._http.post(
            self.ai.embedding.endpoint,
            json={"model": self.ai.embedding.model, "prompt": text},
        )
        response.raise_for_status()
        return response.json()["embedding"]

    async def ocr(self, image_bytes: bytes) -> str:
        """이미지 OCR — GPU 서버 전용"""
        # TODO: Qwen2.5-VL-7B 비전 모델 호출 구현
        raise NotImplementedError("OCR는 Phase 1에서 구현")

    async def _call_chat(self, model_config, prompt: str) -> str:
        """OpenAI 호환 API 호출 + 자동 폴백"""
        try:
            return await self._request(model_config, prompt)
        except (httpx.TimeoutException, httpx.ConnectError):
            if model_config == self.ai.primary:
                return await self._request(self.ai.fallback, prompt)
            raise

    async def _request(self, model_config, prompt: str) -> str:
        """단일 모델 API 호출"""
        response = await self._http.post(
            model_config.endpoint,
            json={
                "model": model_config.model,
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": model_config.max_tokens,
            },
            timeout=model_config.timeout,
        )
        response.raise_for_status()
        data = response.json()
        return data["choices"][0]["message"]["content"]

    async def close(self):
        await self._http.aclose()