gpu-services/hub-api/routers/embeddings.py

from typing import List, Union

from fastapi import APIRouter, HTTPException, Request
from pydantic import BaseModel

from services import proxy_ollama
from services.registry import registry

router = APIRouter(prefix="/v1", tags=["embeddings"])


class EmbeddingRequest(BaseModel):
    model: str
    input: Union[str, List[str]]


@router.post("/embeddings")
async def create_embedding(body: EmbeddingRequest, request: Request):
    role = getattr(request.state, "role", "anonymous")
    if role == "anonymous":
        raise HTTPException(
            status_code=401,
            detail={"error": {"message": "Authentication required", "type": "auth_error", "code": "unauthorized"}},
        )

    result = registry.resolve_model(body.model, role)
    if not result:
        raise HTTPException(
            status_code=404,
            detail={
                "error": {
                    "message": f"Model '{body.model}' not found or not available",
                    "type": "invalid_request_error",
                    "code": "model_not_found",
                }
            },
        )

    backend, model_info = result

    if "embed" not in model_info.capabilities:
        raise HTTPException(
            status_code=400,
            detail={
                "error": {
                    "message": f"Model '{body.model}' does not support embeddings",
                    "type": "invalid_request_error",
                    "code": "capability_mismatch",
                }
            },
        )

    if backend.type == "ollama":
        return await proxy_ollama.generate_embedding(
            backend.url, body.model, body.input
        )

    raise HTTPException(
        status_code=501,
        detail={
            "error": {
                "message": f"Embedding not supported for backend type '{backend.type}'",
                "type": "api_error",
                "code": "not_implemented",
            }
        },
    )