feat: home-gateway 초기 구성 — Mac mini에서 GPU 서버로 전면 이전

OrbStack 라이선스 만료로 Mac mini Docker 서비스를 GPU 서버로 통합. nginx → Caddy 전환, 12개 서브도메인 자동 HTTPS, fail2ban Caddy JSON 연동. 주요 변경: - home-caddy: Caddy 리버스 프록시 (Let's Encrypt 자동 HTTPS) - home-fail2ban: Caddy JSON 로그 기반 보안 모니터링 - home-ddns: Cloudflare DDNS (API 키 .env 분리) - gpu-hub-api/web: AI 백엔드 라우터 + 웹 UI (gpu-services에서 이전) - AI 런타임(Ollama) 내부망 전용, 외부는 gpu-hub 인증 게이트웨이 경유 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 04:55:28 +00:00
commit 79c09cede4
52 changed files with 6847 additions and 0 deletions
--- a/hub-api/routers/init.py
+++ b/hub-api/routers/init.py
--- a/hub-api/routers/auth.py
+++ b/hub-api/routers/auth.py
@@ -0,0 +1,79 @@
+from fastapi import APIRouter, Request, Response
+from pydantic import BaseModel
+
+from config import settings
+from middleware.auth import (
+    check_login_rate_limit,
+    create_token,
+    record_login_attempt,
+)
+
+router = APIRouter(prefix="/auth", tags=["auth"])
+
+
+class LoginRequest(BaseModel):
+    password: str
+
+
+class LoginResponse(BaseModel):
+    role: str
+    token: str
+
+
+@router.post("/login")
+async def login(body: LoginRequest, request: Request, response: Response):
+    ip = request.client.host if request.client else "unknown"
+
+    if not check_login_rate_limit(ip):
+        return _error_response(429, "Too many login attempts. Try again in 1 minute.")
+
+    record_login_attempt(ip)
+
+    if body.password == settings.owner_password:
+        role = "owner"
+    elif body.password == settings.guest_password:
+        role = "guest"
+    else:
+        return _error_response(401, "Invalid password")
+
+    token = create_token(role)
+
+    # Set httpOnly cookie for web UI
+    response.set_cookie(
+        key="token",
+        value=token,
+        httponly=True,
+        samesite="lax",
+        max_age=settings.jwt_expire_hours * 3600,
+    )
+
+    return LoginResponse(role=role, token=token)
+
+
+@router.get("/me")
+async def me(request: Request):
+    role = getattr(request.state, "role", "anonymous")
+    if role == "anonymous":
+        return _error_response(401, "Not authenticated")
+    return {"role": role}
+
+
+@router.post("/logout")
+async def logout(response: Response):
+    response.delete_cookie("token")
+    return {"ok": True}
+
+
+def _error_response(status_code: int, message: str):
+    from fastapi.responses import JSONResponse
+
+    return JSONResponse(
+        status_code=status_code,
+        content={
+            "error": {
+                "message": message,
+                "type": "auth_error",
+                "code": f"auth_{status_code}",
+            }
+        },
+    )
--- a/hub-api/routers/chat.py
+++ b/hub-api/routers/chat.py
@@ -0,0 +1,112 @@
+from typing import List, Optional
+
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel
+
+from middleware.rate_limit import check_backend_rate_limit
+from services import proxy_ollama, proxy_openai
+from services.registry import registry
+
+router = APIRouter(prefix="/v1", tags=["chat"])
+
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+
+class ChatRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    stream: bool = False
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+
+
+@router.post("/chat/completions")
+async def chat_completions(body: ChatRequest, request: Request):
+    role = getattr(request.state, "role", "anonymous")
+    if role == "anonymous":
+        raise HTTPException(
+            status_code=401,
+            detail={"error": {"message": "Authentication required", "type": "auth_error", "code": "unauthorized"}},
+        )
+
+    # Resolve model to backend
+    result = registry.resolve_model(body.model, role)
+    if not result:
+        raise HTTPException(
+            status_code=404,
+            detail={
+                "error": {
+                    "message": f"Model '{body.model}' not found or not available",
+                    "type": "invalid_request_error",
+                    "code": "model_not_found",
+                }
+            },
+        )
+
+    backend, model_info = result
+
+    # Check rate limit
+    check_backend_rate_limit(backend.id)
+
+    # Record request for rate limiting
+    registry.record_request(backend.id)
+
+    messages = [{"role": m.role, "content": m.content} for m in body.messages]
+    kwargs = {}
+    if body.temperature is not None:
+        kwargs["temperature"] = body.temperature
+
+    # Use backend-specific model ID if configured, otherwise use the user-facing ID
+    actual_model = model_info.backend_model_id or body.model
+
+    # Route to appropriate proxy
+    if backend.type == "ollama":
+        if body.stream:
+            return StreamingResponse(
+                proxy_ollama.stream_chat(
+                    backend.url, actual_model, messages, **kwargs
+                ),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "X-Accel-Buffering": "no",
+                },
+            )
+        else:
+            result = await proxy_ollama.complete_chat(
+                backend.url, actual_model, messages, **kwargs
+            )
+            return JSONResponse(content=result)
+
+    if backend.type == "openai-compat":
+        if body.stream:
+            return StreamingResponse(
+                proxy_openai.stream_chat(
+                    backend.url, actual_model, messages, **kwargs
+                ),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "X-Accel-Buffering": "no",
+                },
+            )
+        else:
+            result = await proxy_openai.complete_chat(
+                backend.url, actual_model, messages, **kwargs
+            )
+            return JSONResponse(content=result)
+
+    raise HTTPException(
+        status_code=501,
+        detail={
+            "error": {
+                "message": f"Backend type '{backend.type}' not yet implemented",
+                "type": "api_error",
+                "code": "not_implemented",
+            }
+        },
+    )
--- a/hub-api/routers/embeddings.py
+++ b/hub-api/routers/embeddings.py
@@ -0,0 +1,67 @@
+from typing import List, Union
+
+from fastapi import APIRouter, HTTPException, Request
+from pydantic import BaseModel
+
+from services import proxy_ollama
+from services.registry import registry
+
+router = APIRouter(prefix="/v1", tags=["embeddings"])
+
+
+class EmbeddingRequest(BaseModel):
+    model: str
+    input: Union[str, List[str]]
+
+
+@router.post("/embeddings")
+async def create_embedding(body: EmbeddingRequest, request: Request):
+    role = getattr(request.state, "role", "anonymous")
+    if role == "anonymous":
+        raise HTTPException(
+            status_code=401,
+            detail={"error": {"message": "Authentication required", "type": "auth_error", "code": "unauthorized"}},
+        )
+
+    result = registry.resolve_model(body.model, role)
+    if not result:
+        raise HTTPException(
+            status_code=404,
+            detail={
+                "error": {
+                    "message": f"Model '{body.model}' not found or not available",
+                    "type": "invalid_request_error",
+                    "code": "model_not_found",
+                }
+            },
+        )
+
+    backend, model_info = result
+
+    if "embed" not in model_info.capabilities:
+        raise HTTPException(
+            status_code=400,
+            detail={
+                "error": {
+                    "message": f"Model '{body.model}' does not support embeddings",
+                    "type": "invalid_request_error",
+                    "code": "capability_mismatch",
+                }
+            },
+        )
+
+    if backend.type == "ollama":
+        return await proxy_ollama.generate_embedding(
+            backend.url, body.model, body.input
+        )
+
+    raise HTTPException(
+        status_code=501,
+        detail={
+            "error": {
+                "message": f"Embedding not supported for backend type '{backend.type}'",
+                "type": "api_error",
+                "code": "not_implemented",
+            }
+        },
+    )
--- a/hub-api/routers/gpu.py
+++ b/hub-api/routers/gpu.py
@@ -0,0 +1,13 @@
+from fastapi import APIRouter
+
+from services.gpu_monitor import get_gpu_info
+
+router = APIRouter(tags=["gpu"])
+
+
+@router.get("/gpu")
+async def gpu_status():
+    info = await get_gpu_info()
+    if not info:
+        return {"error": {"message": "GPU info unavailable", "type": "api_error", "code": "gpu_unavailable"}}
+    return info
--- a/hub-api/routers/health.py
+++ b/hub-api/routers/health.py
@@ -0,0 +1,31 @@
+from fastapi import APIRouter
+
+from services.gpu_monitor import get_gpu_info
+from services.registry import registry
+
+router = APIRouter(tags=["health"])
+
+
+@router.get("/health")
+async def health():
+    gpu = await get_gpu_info()
+    return {
+        "status": "ok",
+        "backends": registry.get_health_summary(),
+        "gpu": gpu,
+    }
+
+
+@router.get("/health/{backend_id}")
+async def backend_health(backend_id: str):
+    backend = registry.backends.get(backend_id)
+    if not backend:
+        return {"error": {"message": f"Backend '{backend_id}' not found"}}
+
+    return {
+        "id": backend.id,
+        "type": backend.type,
+        "status": "healthy" if backend.healthy else "down",
+        "models": [m.id for m in backend.models],
+        "latency_ms": backend.latency_ms,
+    }
--- a/hub-api/routers/models.py
+++ b/hub-api/routers/models.py
@@ -0,0 +1,12 @@
+from fastapi import APIRouter, Request
+
+from services.registry import registry
+
+router = APIRouter(prefix="/v1", tags=["models"])
+
+
+@router.get("/models")
+async def list_models(request: Request):
+    role = getattr(request.state, "role", "anonymous")
+    models = registry.list_models(role)
+    return {"object": "list", "data": models}