Files
Hyungi Ahn b1f9e87d6a feat(infra): MCP 인프라 서버 통합 — 7개 도구 + core/ 분리
mcp-infra-server를 gpu-services/infra/로 통합.
core/ 순수 로직은 Agent/NanoClaude에서도 직접 import 가능.
도구: docker_status, docker_logs, service_health, disk_usage,
tailscale_status, ollama_models, mlx_models.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 13:11:54 +09:00

202 lines
6.6 KiB
Python

"""Service health checks with per-service validators."""
from __future__ import annotations
import json
import time
from datetime import datetime, timezone
from ..config import HOSTS
from ..schemas import HealthResult
from .ssh import run_command, SSHError
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
async def _validate_document_server() -> HealthResult:
"""Document Server: /health endpoint must return ok + database connected."""
cfg = HOSTS["gpu"]
try:
t0 = time.monotonic()
stdout, _ = await run_command(cfg, "curl -sf http://localhost:8000/health")
latency_ms = int((time.monotonic() - t0) * 1000)
data = json.loads(stdout)
db_ok = data.get("database") == "connected"
status_ok = data.get("status") == "ok"
warnings = []
if not db_ok:
warnings.append("database disconnected")
return HealthResult(
ok=status_ok and db_ok,
checked_at=_now(),
service="document-server",
status="healthy" if (status_ok and db_ok) else "degraded",
details={
"status": data.get("status"),
"database": data.get("database"),
"version": data.get("version"),
"latency_ms": latency_ms,
},
warnings=warnings,
raw=stdout.strip(),
)
except SSHError as e:
return HealthResult(
ok=False, checked_at=_now(), service="document-server",
status="down", error_type=e.error_type, error=str(e),
)
except (json.JSONDecodeError, KeyError) as e:
return HealthResult(
ok=False, checked_at=_now(), service="document-server",
status="down", error_type="parse_error", error=f"응답 파싱 실패: {e}",
raw=stdout.strip() if 'stdout' in dir() else None,
)
async def _validate_mlx() -> HealthResult:
"""MLX Server: /v1/models must return at least 1 model within 5s."""
cfg = HOSTS["macmini"]
try:
t0 = time.monotonic()
stdout, _ = await run_command(cfg, "curl -sf http://localhost:8800/v1/models")
latency_ms = int((time.monotonic() - t0) * 1000)
data = json.loads(stdout)
models = data.get("data", [])
model_ids = [m.get("id", "unknown") for m in models]
warnings = []
if latency_ms > 5000:
warnings.append(f"응답 지연 {latency_ms}ms (임계값 5000ms)")
return HealthResult(
ok=len(models) > 0 and latency_ms <= 5000,
checked_at=_now(),
service="mlx",
status="healthy" if (len(models) > 0 and latency_ms <= 5000) else "degraded",
details={
"model_count": len(models),
"models": model_ids,
"latency_ms": latency_ms,
},
warnings=warnings,
raw=stdout.strip(),
)
except SSHError as e:
return HealthResult(
ok=False, checked_at=_now(), service="mlx",
status="down", error_type=e.error_type, error=str(e),
)
async def _validate_mlx_proxy() -> HealthResult:
"""MLX Proxy (:8801): must return models via proxy."""
cfg = HOSTS["macmini"]
try:
t0 = time.monotonic()
stdout, _ = await run_command(cfg, "curl -sf http://localhost:8801/v1/models")
latency_ms = int((time.monotonic() - t0) * 1000)
data = json.loads(stdout)
models = data.get("data", [])
return HealthResult(
ok=len(models) > 0,
checked_at=_now(),
service="mlx-proxy",
status="healthy" if models else "down",
details={"model_count": len(models), "latency_ms": latency_ms},
raw=stdout.strip(),
)
except SSHError as e:
return HealthResult(
ok=False, checked_at=_now(), service="mlx-proxy",
status="down", error_type=e.error_type, error=str(e),
)
async def _validate_nanoclaude() -> HealthResult:
"""NanoClaude: /health on port 8100."""
cfg = HOSTS["gpu"]
try:
t0 = time.monotonic()
stdout, _ = await run_command(cfg, "curl -sf http://localhost:8100/health")
latency_ms = int((time.monotonic() - t0) * 1000)
return HealthResult(
ok=True,
checked_at=_now(),
service="nanoclaude",
status="healthy",
details={"latency_ms": latency_ms, "response": stdout.strip()[:200]},
raw=stdout.strip(),
)
except SSHError as e:
return HealthResult(
ok=False, checked_at=_now(), service="nanoclaude",
status="down", error_type=e.error_type, error=str(e),
)
async def _validate_ollama(host: str) -> HealthResult:
"""Ollama: `ollama list` must succeed and return non-empty."""
service_name = f"ollama-{host}" if host != "gpu" else "ollama-gpu"
cfg = HOSTS[host]
try:
t0 = time.monotonic()
stdout, _ = await run_command(cfg, "ollama list")
latency_ms = int((time.monotonic() - t0) * 1000)
lines = [l for l in stdout.strip().splitlines()[1:] if l.strip()] # skip header
model_count = len(lines)
warnings = []
if model_count == 0:
warnings.append("모델 없음")
return HealthResult(
ok=model_count > 0,
checked_at=_now(),
service=service_name,
status="healthy" if model_count > 0 else "degraded",
details={"model_count": model_count, "latency_ms": latency_ms},
warnings=warnings,
raw=stdout.strip(),
)
except SSHError as e:
return HealthResult(
ok=False, checked_at=_now(), service=service_name,
status="down", error_type=e.error_type, error=str(e),
)
# Validator registry
VALIDATORS: dict[str, object] = {
"document-server": _validate_document_server,
"mlx": _validate_mlx,
"mlx-proxy": _validate_mlx_proxy,
"nanoclaude": _validate_nanoclaude,
"ollama-gpu": lambda: _validate_ollama("gpu"),
"ollama-macmini": lambda: _validate_ollama("macmini"),
}
VALID_SERVICES = list(VALIDATORS.keys())
async def service_health(service: str) -> HealthResult:
"""Run health check for a specific service."""
validator = VALIDATORS.get(service)
if not validator:
return HealthResult(
ok=False, checked_at=_now(), service=service,
status="unknown",
error_type="parse_error",
error=f"알 수 없는 서비스: '{service}'. 허용: {', '.join(VALID_SERVICES)}",
)
return await validator()