From b1f9e87d6a2dd6ee0d29f2ecdbc978e2a8b77aab Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Mon, 13 Apr 2026 13:11:54 +0900
Subject: [PATCH] =?UTF-8?q?feat(infra):=20MCP=20=EC=9D=B8=ED=94=84?=
 =?UTF-8?q?=EB=9D=BC=20=EC=84=9C=EB=B2=84=20=ED=86=B5=ED=95=A9=20=E2=80=94?=
 =?UTF-8?q?=207=EA=B0=9C=20=EB=8F=84=EA=B5=AC=20+=20core/=20=EB=B6=84?=
 =?UTF-8?q?=EB=A6=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mcp-infra-server를 gpu-services/infra/로 통합.
core/ 순수 로직은 Agent/NanoClaude에서도 직접 import 가능.
도구: docker_status, docker_logs, service_health, disk_usage,
tailscale_status, ollama_models, mlx_models.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 infra/.env.example     |   2 +
 infra/__init__.py      |   0
 infra/config.py        |  73 +++++++++++++++
 infra/core/__init__.py |   0
 infra/core/docker.py   | 113 +++++++++++++++++++++++
 infra/core/health.py   | 201 +++++++++++++++++++++++++++++++++++++++++
 infra/core/models.py   |  97 ++++++++++++++++++++
 infra/core/network.py  |  83 +++++++++++++++++
 infra/core/ssh.py      | 123 +++++++++++++++++++++++++
 infra/core/system.py   |  79 ++++++++++++++++
 infra/mcp_server.py    | 107 ++++++++++++++++++++++
 infra/pyproject.toml   |  14 +++
 infra/run.sh           |   3 +
 infra/schemas.py       | 101 +++++++++++++++++++++
 14 files changed, 996 insertions(+)
 create mode 100644 infra/.env.example
 create mode 100644 infra/__init__.py
 create mode 100644 infra/config.py
 create mode 100644 infra/core/__init__.py
 create mode 100644 infra/core/docker.py
 create mode 100644 infra/core/health.py
 create mode 100644 infra/core/models.py
 create mode 100644 infra/core/network.py
 create mode 100644 infra/core/ssh.py
 create mode 100644 infra/core/system.py
 create mode 100644 infra/mcp_server.py
 create mode 100644 infra/pyproject.toml
 create mode 100755 infra/run.sh
 create mode 100644 infra/schemas.py

diff --git a/infra/.env.example b/infra/.env.example
new file mode 100644
index 0000000..d3faeda
--- /dev/null
+++ b/infra/.env.example
@@ -0,0 +1,2 @@
+# Copy to .env and fill in values
+NAS_COMPANY_PASSWORD=
diff --git a/infra/__init__.py b/infra/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/infra/config.py b/infra/config.py
new file mode 100644
index 0000000..798f8d3
--- /dev/null
+++ b/infra/config.py
@@ -0,0 +1,73 @@
+"""Host configuration and tool-host validation.
+
+All host IPs are Tailscale IPs (except nas-company which also works via Tailscale).
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+@dataclass(frozen=True)
+class HostConfig:
+    ip: str
+    user: str
+    auth: str                          # "key" | "password"
+    docker_path: str = "docker"
+    needs_sudo: bool = False
+    password: str | None = None        # only for auth="password"
+
+
+HOSTS: dict[str, HostConfig] = {
+    "gpu": HostConfig(
+        ip="100.111.160.84",
+        user="hyungi",
+        auth="key",
+    ),
+    "macmini": HostConfig(
+        ip="100.76.254.116",
+        user="hyungi",
+        auth="key",
+    ),
+    "nas-company": HostConfig(
+        ip="100.71.132.52",
+        user="hyungi",
+        auth="password",
+        docker_path="/usr/local/bin/docker",
+        needs_sudo=True,
+        password=os.getenv("NAS_COMPANY_PASSWORD"),
+    ),
+}
+
+# Per-tool allowed hosts — invalid host → immediate error
+TOOL_HOST_MAP: dict[str, list[str]] = {
+    "docker_status":  ["gpu", "nas-company"],
+    "docker_logs":    ["gpu", "nas-company"],
+    "disk_usage":     ["gpu", "macmini", "nas-company"],
+    "ollama_models":  ["gpu", "macmini"],
+    "mlx_models":     ["macmini"],
+}
+
+# SSH timeouts
+SSH_TIMEOUT = 5       # connection timeout (seconds)
+CMD_TIMEOUT = 10      # command execution timeout (seconds)
+MAX_RETRIES = 1       # retry once on failure
+
+
+def validate_host(tool: str, host: str) -> HostConfig:
+    """Validate host is allowed for tool and return config. Raises ValueError if invalid."""
+    allowed = TOOL_HOST_MAP.get(tool)
+    if allowed and host not in allowed:
+        raise ValueError(
+            f"'{host}'는 {tool}에서 지원하지 않습니다. 허용: {', '.join(allowed)}"
+        )
+    config = HOSTS.get(host)
+    if not config:
+        raise ValueError(
+            f"알 수 없는 호스트: '{host}'. 허용: {', '.join(HOSTS.keys())}"
+        )
+    return config
diff --git a/infra/core/__init__.py b/infra/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/infra/core/docker.py b/infra/core/docker.py
new file mode 100644
index 0000000..e803b86
--- /dev/null
+++ b/infra/core/docker.py
@@ -0,0 +1,113 @@
+"""Docker status and logs tools."""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+from ..config import validate_host, HOSTS
+from ..schemas import DockerStatusResult, DockerLogsResult, ContainerInfo
+from .ssh import run_command, SSHError
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+async def docker_status(host: str) -> DockerStatusResult:
+    """List all Docker containers on a host with structured status."""
+    try:
+        cfg = validate_host("docker_status", host)
+    except ValueError as e:
+        return DockerStatusResult(
+            ok=False, checked_at=_now(), host=host,
+            error_type="parse_error", error=str(e),
+        )
+
+    docker = cfg.docker_path
+    fmt = '{{.Names}}|{{.Status}}|{{.Ports}}|{{.Image}}'
+    cmd = f"{docker} ps -a --format '{fmt}'"
+
+    try:
+        stdout, _ = await run_command(cfg, cmd, use_sudo=cfg.needs_sudo)
+    except SSHError as e:
+        return DockerStatusResult(
+            ok=False, checked_at=_now(), host=host,
+            error_type=e.error_type, error=str(e),
+        )
+
+    containers: list[ContainerInfo] = []
+    for line in stdout.strip().splitlines():
+        parts = line.split("|", 3)
+        if len(parts) < 4:
+            continue
+        name, status_str, ports, image = parts
+        # Extract running state from status string
+        state = "running" if status_str.startswith("Up") else "exited"
+        if "Restarting" in status_str:
+            state = "restarting"
+        containers.append(ContainerInfo(
+            name=name, status=state, uptime=status_str, ports=ports, image=image,
+        ))
+
+    running = sum(1 for c in containers if c.status == "running")
+    total = len(containers)
+    summary = f"{running}/{total} running"
+    if running < total:
+        non_running = [c.name for c in containers if c.status != "running"]
+        summary += f", down: {', '.join(non_running)}"
+
+    warnings: list[str] = []
+    for c in containers:
+        if c.status == "restarting":
+            warnings.append(f"{c.name} is restarting")
+        elif c.status == "exited":
+            warnings.append(f"{c.name} is exited")
+
+    return DockerStatusResult(
+        ok=running == total,
+        checked_at=_now(),
+        host=host,
+        containers=containers,
+        summary=summary,
+        warnings=warnings,
+        raw=stdout.strip(),
+    )
+
+
+async def docker_logs(host: str, container: str, lines: int = 50) -> DockerLogsResult:
+    """Get recent logs from a container."""
+    try:
+        cfg = validate_host("docker_logs", host)
+    except ValueError as e:
+        return DockerLogsResult(
+            ok=False, checked_at=_now(), host=host, container=container,
+            lines=lines, error_type="parse_error", error=str(e),
+        )
+
+    docker = cfg.docker_path
+    # Request one extra line to detect truncation
+    cmd = f"{docker} logs --tail {lines + 1} {container} 2>&1"
+
+    try:
+        stdout, stderr = await run_command(cfg, cmd, use_sudo=cfg.needs_sudo, timeout=15)
+    except SSHError as e:
+        return DockerLogsResult(
+            ok=False, checked_at=_now(), host=host, container=container,
+            lines=lines, error_type=e.error_type, error=str(e),
+        )
+
+    all_lines = stdout.strip().splitlines()
+    truncated = len(all_lines) > lines
+    content = "\n".join(all_lines[:lines]) if truncated else "\n".join(all_lines)
+
+    return DockerLogsResult(
+        ok=True,
+        checked_at=_now(),
+        host=host,
+        container=container,
+        lines=lines,
+        truncated=truncated,
+        content=content,
+        stderr=stderr.strip() if stderr else "",
+        raw=stdout.strip(),
+    )
diff --git a/infra/core/health.py b/infra/core/health.py
new file mode 100644
index 0000000..e1785d7
--- /dev/null
+++ b/infra/core/health.py
@@ -0,0 +1,201 @@
+"""Service health checks with per-service validators."""
+
+from __future__ import annotations
+
+import json
+import time
+from datetime import datetime, timezone
+
+from ..config import HOSTS
+from ..schemas import HealthResult
+from .ssh import run_command, SSHError
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+async def _validate_document_server() -> HealthResult:
+    """Document Server: /health endpoint must return ok + database connected."""
+    cfg = HOSTS["gpu"]
+    try:
+        t0 = time.monotonic()
+        stdout, _ = await run_command(cfg, "curl -sf http://localhost:8000/health")
+        latency_ms = int((time.monotonic() - t0) * 1000)
+
+        data = json.loads(stdout)
+        db_ok = data.get("database") == "connected"
+        status_ok = data.get("status") == "ok"
+
+        warnings = []
+        if not db_ok:
+            warnings.append("database disconnected")
+
+        return HealthResult(
+            ok=status_ok and db_ok,
+            checked_at=_now(),
+            service="document-server",
+            status="healthy" if (status_ok and db_ok) else "degraded",
+            details={
+                "status": data.get("status"),
+                "database": data.get("database"),
+                "version": data.get("version"),
+                "latency_ms": latency_ms,
+            },
+            warnings=warnings,
+            raw=stdout.strip(),
+        )
+    except SSHError as e:
+        return HealthResult(
+            ok=False, checked_at=_now(), service="document-server",
+            status="down", error_type=e.error_type, error=str(e),
+        )
+    except (json.JSONDecodeError, KeyError) as e:
+        return HealthResult(
+            ok=False, checked_at=_now(), service="document-server",
+            status="down", error_type="parse_error", error=f"응답 파싱 실패: {e}",
+            raw=stdout.strip() if 'stdout' in dir() else None,
+        )
+
+
+async def _validate_mlx() -> HealthResult:
+    """MLX Server: /v1/models must return at least 1 model within 5s."""
+    cfg = HOSTS["macmini"]
+    try:
+        t0 = time.monotonic()
+        stdout, _ = await run_command(cfg, "curl -sf http://localhost:8800/v1/models")
+        latency_ms = int((time.monotonic() - t0) * 1000)
+
+        data = json.loads(stdout)
+        models = data.get("data", [])
+        model_ids = [m.get("id", "unknown") for m in models]
+
+        warnings = []
+        if latency_ms > 5000:
+            warnings.append(f"응답 지연 {latency_ms}ms (임계값 5000ms)")
+
+        return HealthResult(
+            ok=len(models) > 0 and latency_ms <= 5000,
+            checked_at=_now(),
+            service="mlx",
+            status="healthy" if (len(models) > 0 and latency_ms <= 5000) else "degraded",
+            details={
+                "model_count": len(models),
+                "models": model_ids,
+                "latency_ms": latency_ms,
+            },
+            warnings=warnings,
+            raw=stdout.strip(),
+        )
+    except SSHError as e:
+        return HealthResult(
+            ok=False, checked_at=_now(), service="mlx",
+            status="down", error_type=e.error_type, error=str(e),
+        )
+
+
+async def _validate_mlx_proxy() -> HealthResult:
+    """MLX Proxy (:8801): must return models via proxy."""
+    cfg = HOSTS["macmini"]
+    try:
+        t0 = time.monotonic()
+        stdout, _ = await run_command(cfg, "curl -sf http://localhost:8801/v1/models")
+        latency_ms = int((time.monotonic() - t0) * 1000)
+
+        data = json.loads(stdout)
+        models = data.get("data", [])
+
+        return HealthResult(
+            ok=len(models) > 0,
+            checked_at=_now(),
+            service="mlx-proxy",
+            status="healthy" if models else "down",
+            details={"model_count": len(models), "latency_ms": latency_ms},
+            raw=stdout.strip(),
+        )
+    except SSHError as e:
+        return HealthResult(
+            ok=False, checked_at=_now(), service="mlx-proxy",
+            status="down", error_type=e.error_type, error=str(e),
+        )
+
+
+async def _validate_nanoclaude() -> HealthResult:
+    """NanoClaude: /health on port 8100."""
+    cfg = HOSTS["gpu"]
+    try:
+        t0 = time.monotonic()
+        stdout, _ = await run_command(cfg, "curl -sf http://localhost:8100/health")
+        latency_ms = int((time.monotonic() - t0) * 1000)
+
+        return HealthResult(
+            ok=True,
+            checked_at=_now(),
+            service="nanoclaude",
+            status="healthy",
+            details={"latency_ms": latency_ms, "response": stdout.strip()[:200]},
+            raw=stdout.strip(),
+        )
+    except SSHError as e:
+        return HealthResult(
+            ok=False, checked_at=_now(), service="nanoclaude",
+            status="down", error_type=e.error_type, error=str(e),
+        )
+
+
+async def _validate_ollama(host: str) -> HealthResult:
+    """Ollama: `ollama list` must succeed and return non-empty."""
+    service_name = f"ollama-{host}" if host != "gpu" else "ollama-gpu"
+    cfg = HOSTS[host]
+    try:
+        t0 = time.monotonic()
+        stdout, _ = await run_command(cfg, "ollama list")
+        latency_ms = int((time.monotonic() - t0) * 1000)
+
+        lines = [l for l in stdout.strip().splitlines()[1:] if l.strip()]  # skip header
+        model_count = len(lines)
+
+        warnings = []
+        if model_count == 0:
+            warnings.append("모델 없음")
+
+        return HealthResult(
+            ok=model_count > 0,
+            checked_at=_now(),
+            service=service_name,
+            status="healthy" if model_count > 0 else "degraded",
+            details={"model_count": model_count, "latency_ms": latency_ms},
+            warnings=warnings,
+            raw=stdout.strip(),
+        )
+    except SSHError as e:
+        return HealthResult(
+            ok=False, checked_at=_now(), service=service_name,
+            status="down", error_type=e.error_type, error=str(e),
+        )
+
+
+# Validator registry
+VALIDATORS: dict[str, object] = {
+    "document-server": _validate_document_server,
+    "mlx": _validate_mlx,
+    "mlx-proxy": _validate_mlx_proxy,
+    "nanoclaude": _validate_nanoclaude,
+    "ollama-gpu": lambda: _validate_ollama("gpu"),
+    "ollama-macmini": lambda: _validate_ollama("macmini"),
+}
+
+VALID_SERVICES = list(VALIDATORS.keys())
+
+
+async def service_health(service: str) -> HealthResult:
+    """Run health check for a specific service."""
+    validator = VALIDATORS.get(service)
+    if not validator:
+        return HealthResult(
+            ok=False, checked_at=_now(), service=service,
+            status="unknown",
+            error_type="parse_error",
+            error=f"알 수 없는 서비스: '{service}'. 허용: {', '.join(VALID_SERVICES)}",
+        )
+    return await validator()
diff --git a/infra/core/models.py b/infra/core/models.py
new file mode 100644
index 0000000..0e842bc
--- /dev/null
+++ b/infra/core/models.py
@@ -0,0 +1,97 @@
+"""Model inventory tools — Ollama and MLX model listing."""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+
+from ..config import validate_host, HOSTS
+from ..schemas import ModelsResult, ModelInfo
+from .ssh import run_command, SSHError
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _parse_ollama_list(output: str) -> list[ModelInfo]:
+    """Parse `ollama list` output."""
+    models = []
+    for line in output.strip().splitlines()[1:]:  # skip header
+        parts = line.split()
+        if len(parts) < 2:
+            continue
+        model_id = parts[0]
+        # Remaining fields vary: ID, SIZE, MODIFIED
+        size = parts[2] + " " + parts[3] if len(parts) > 3 else ""
+        modified = " ".join(parts[4:]) if len(parts) > 4 else ""
+        models.append(ModelInfo(id=model_id, size=size, modified=modified))
+    return models
+
+
+async def ollama_models(host: str) -> ModelsResult:
+    """List Ollama models on a host."""
+    try:
+        cfg = validate_host("ollama_models", host)
+    except ValueError as e:
+        return ModelsResult(
+            ok=False, checked_at=_now(), host=host, source="ollama",
+            error_type="parse_error", error=str(e),
+        )
+
+    try:
+        stdout, _ = await run_command(cfg, "ollama list")
+    except SSHError as e:
+        return ModelsResult(
+            ok=False, checked_at=_now(), host=host, source="ollama",
+            error_type=e.error_type, error=str(e),
+        )
+
+    models = _parse_ollama_list(stdout)
+    return ModelsResult(
+        ok=True,
+        checked_at=_now(),
+        host=host,
+        source="ollama",
+        models=models,
+        raw=stdout.strip(),
+    )
+
+
+async def mlx_models() -> ModelsResult:
+    """List MLX models loaded on Mac mini."""
+    cfg = HOSTS["macmini"]
+    try:
+        stdout, _ = await run_command(cfg, "curl -sf http://localhost:8800/v1/models")
+    except SSHError as e:
+        return ModelsResult(
+            ok=False, checked_at=_now(), host="macmini", source="mlx",
+            error_type=e.error_type, error=str(e),
+        )
+
+    try:
+        data = json.loads(stdout)
+        model_list = data.get("data", [])
+        models = [
+            ModelInfo(
+                id=m.get("id", "unknown"),
+                size=str(m.get("size", "")),
+                modified=str(m.get("created", "")),
+            )
+            for m in model_list
+        ]
+    except (json.JSONDecodeError, KeyError) as e:
+        return ModelsResult(
+            ok=False, checked_at=_now(), host="macmini", source="mlx",
+            error_type="parse_error", error=f"JSON 파싱 실패: {e}",
+            raw=stdout.strip(),
+        )
+
+    return ModelsResult(
+        ok=True,
+        checked_at=_now(),
+        host="macmini",
+        source="mlx",
+        models=models,
+        raw=stdout.strip(),
+    )
diff --git a/infra/core/network.py b/infra/core/network.py
new file mode 100644
index 0000000..4851f36
--- /dev/null
+++ b/infra/core/network.py
@@ -0,0 +1,83 @@
+"""Network tools — Tailscale status."""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+from ..schemas import TailscaleResult, TailscalePeer
+from .ssh import run_local, SSHError
+
+TAILSCALE_BIN = "/Applications/Tailscale.app/Contents/MacOS/Tailscale"
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _parse_tailscale(output: str) -> list[TailscalePeer]:
+    """Parse `tailscale status` output into peer list.
+
+    Format: IP  HOSTNAME  USER@  OS  STATUS_INFO
+    Status examples: "-" (connected/active), "idle, tx ... rx ...", "offline, last seen ..."
+    """
+    peers = []
+    for line in output.strip().splitlines():
+        parts = line.split()
+        if len(parts) < 4:
+            continue
+        # Skip header-like lines
+        if parts[0].startswith("#") or parts[0] == "IP":
+            continue
+
+        ip = parts[0]
+        hostname = parts[1]
+        # parts[2] = user@ (skip)
+        os_name = parts[3] if len(parts) > 3 else ""
+
+        # Remaining is status info
+        status_text = " ".join(parts[4:]) if len(parts) > 4 else ""
+
+        if "offline" in status_text:
+            status = "offline"
+        elif "idle" in status_text:
+            status = "idle"
+        elif status_text == "-" or status_text == "":
+            status = "active"
+        else:
+            status = "active"
+
+        peers.append(TailscalePeer(
+            hostname=hostname,
+            ip=ip,
+            status=status,
+            os=os_name,
+        ))
+    return peers
+
+
+async def tailscale_status() -> TailscaleResult:
+    """Get Tailscale network status (runs locally)."""
+    try:
+        stdout, _ = await run_local(f"{TAILSCALE_BIN} status")
+    except SSHError as e:
+        return TailscaleResult(
+            ok=False, checked_at=_now(),
+            error_type=e.error_type, error=str(e),
+        )
+
+    peers = _parse_tailscale(stdout)
+
+    warnings = []
+    expected_hosts = {"sub-server", "hyungi-macmini", "hyungi-macbookpro"}
+    found_hosts = {p.hostname for p in peers}
+    missing = expected_hosts - found_hosts
+    for h in missing:
+        warnings.append(f"{h} not found in tailnet")
+
+    return TailscaleResult(
+        ok=True,
+        checked_at=_now(),
+        peers=peers,
+        warnings=warnings,
+        raw=stdout.strip(),
+    )
diff --git a/infra/core/ssh.py b/infra/core/ssh.py
new file mode 100644
index 0000000..7537153
--- /dev/null
+++ b/infra/core/ssh.py
@@ -0,0 +1,123 @@
+"""SSH connection layer — asyncssh based.
+
+Provides run_command() which handles:
+- Key-based auth (GPU, Mac mini)
+- Password auth + sudo (company NAS)
+- Timeout / retry
+- Structured error classification
+"""
+
+from __future__ import annotations
+
+import asyncio
+from datetime import datetime, timezone
+
+import asyncssh
+
+from ..config import HostConfig, SSH_TIMEOUT, CMD_TIMEOUT, MAX_RETRIES
+
+
+class SSHError(Exception):
+    """Typed SSH error with error_type classification."""
+
+    def __init__(self, error_type: str, message: str):
+        self.error_type = error_type
+        super().__init__(message)
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+async def _connect(host: HostConfig) -> asyncssh.SSHClientConnection:
+    """Open SSH connection with appropriate auth method."""
+    kwargs: dict = {
+        "host": host.ip,
+        "username": host.user,
+        "connect_timeout": SSH_TIMEOUT,
+        "known_hosts": None,  # accept any host key (Tailscale internal network)
+    }
+    if host.auth == "password" and host.password:
+        kwargs["password"] = host.password
+        kwargs["client_keys"] = []  # don't try key auth
+    # key auth is the default (uses ~/.ssh/)
+
+    return await asyncssh.connect(**kwargs)
+
+
+async def run_command(
+    host: HostConfig,
+    command: str,
+    timeout: int = CMD_TIMEOUT,
+    use_sudo: bool = False,
+) -> tuple[str, str]:
+    """Run a command on remote host. Returns (stdout, stderr).
+
+    For NAS with sudo: wraps command with sudo using password via stdin.
+    Raises SSHError with typed error_type on failure.
+    """
+    if use_sudo and host.needs_sudo and host.password:
+        # Pipe password to sudo via stdin
+        command = f"echo '{host.password}' | sudo -S {command}"
+
+    last_error: Exception | None = None
+    for attempt in range(1 + MAX_RETRIES):
+        try:
+            conn = await _connect(host)
+            async with conn:
+                result = await asyncio.wait_for(
+                    conn.run(command, check=False),
+                    timeout=timeout,
+                )
+                stdout = result.stdout or ""
+                stderr = result.stderr or ""
+
+                if result.exit_status != 0:
+                    # Command ran but returned non-zero
+                    # Filter out sudo password prompt from stderr
+                    stderr_clean = "\n".join(
+                        line for line in stderr.splitlines()
+                        if "[sudo]" not in line and "Password:" not in line
+                    )
+                    raise SSHError(
+                        "command_failed",
+                        f"exit {result.exit_status}: {stderr_clean.strip() or stdout.strip()}"
+                    )
+                return stdout, stderr
+
+        except SSHError:
+            raise
+        except asyncio.TimeoutError:
+            raise SSHError("timeout", f"명령 실행 시간 초과 ({timeout}초)")
+        except asyncssh.PermissionDenied:
+            raise SSHError("auth", f"SSH 인증 실패: {host.user}@{host.ip}")
+        except (OSError, asyncssh.Error) as e:
+            last_error = e
+            if attempt < MAX_RETRIES:
+                await asyncio.sleep(1)
+                continue
+            raise SSHError("timeout", f"SSH 연결 실패: {host.ip} — {e}")
+
+    raise SSHError("timeout", f"SSH 최대 재시도 초과: {host.ip}")
+
+
+async def run_local(command: str, timeout: int = CMD_TIMEOUT) -> tuple[str, str]:
+    """Run a command locally. Returns (stdout, stderr)."""
+    try:
+        proc = await asyncio.create_subprocess_shell(
+            command,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout_bytes, stderr_bytes = await asyncio.wait_for(
+            proc.communicate(), timeout=timeout
+        )
+        stdout = stdout_bytes.decode() if stdout_bytes else ""
+        stderr = stderr_bytes.decode() if stderr_bytes else ""
+
+        if proc.returncode != 0:
+            raise SSHError("command_failed", f"exit {proc.returncode}: {stderr.strip() or stdout.strip()}")
+
+        return stdout, stderr
+    except asyncio.TimeoutError:
+        raise SSHError("timeout", f"로컬 명령 시간 초과 ({timeout}초)")
diff --git a/infra/core/system.py b/infra/core/system.py
new file mode 100644
index 0000000..26ecc45
--- /dev/null
+++ b/infra/core/system.py
@@ -0,0 +1,79 @@
+"""System tools — disk usage."""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+from ..config import validate_host
+from ..schemas import DiskResult, FileSystemInfo
+from .ssh import run_command, SSHError
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _parse_df(output: str) -> list[FileSystemInfo]:
+    """Parse `df -h` output into structured filesystem info."""
+    filesystems = []
+    for line in output.strip().splitlines()[1:]:  # skip header
+        parts = line.split()
+        if len(parts) < 6:
+            continue
+        # df -h columns: Filesystem Size Used Avail Use% Mounted
+        mount = parts[-1]
+        # Skip pseudo-filesystems
+        if mount.startswith(("/dev", "/sys", "/proc", "/run", "/snap")):
+            continue
+        if parts[0] in ("tmpfs", "devtmpfs", "overlay", "shm", "none"):
+            continue
+
+        try:
+            used_pct = int(parts[4].rstrip("%"))
+        except ValueError:
+            continue
+
+        filesystems.append(FileSystemInfo(
+            mount=mount,
+            total=parts[1],
+            used=parts[2],
+            avail=parts[3],
+            used_pct=used_pct,
+        ))
+    return filesystems
+
+
+async def disk_usage(host: str) -> DiskResult:
+    """Get disk usage for a host with structured filesystem info."""
+    try:
+        cfg = validate_host("disk_usage", host)
+    except ValueError as e:
+        return DiskResult(
+            ok=False, checked_at=_now(), host=host,
+            error_type="parse_error", error=str(e),
+        )
+
+    try:
+        stdout, _ = await run_command(cfg, "df -h", use_sudo=cfg.needs_sudo)
+    except SSHError as e:
+        return DiskResult(
+            ok=False, checked_at=_now(), host=host,
+            error_type=e.error_type, error=str(e),
+        )
+
+    filesystems = _parse_df(stdout)
+
+    warnings = []
+    WARN_THRESHOLD = 85
+    for fs in filesystems:
+        if fs.used_pct >= WARN_THRESHOLD:
+            warnings.append(f"{fs.mount} 사용률 {fs.used_pct}% — 임계값 {WARN_THRESHOLD}% 초과")
+
+    return DiskResult(
+        ok=True,
+        checked_at=_now(),
+        host=host,
+        filesystems=filesystems,
+        warnings=warnings,
+        raw=stdout.strip(),
+    )
diff --git a/infra/mcp_server.py b/infra/mcp_server.py
new file mode 100644
index 0000000..5939e40
--- /dev/null
+++ b/infra/mcp_server.py
@@ -0,0 +1,107 @@
+"""MCP Infra Server — thin wrapper over core/ functions.
+
+This file ONLY does:
+1. MCP tool registration (decorators)
+2. Parameter validation
+3. Call core/ functions
+4. Return results as JSON text
+
+All actual logic lives in src/core/.
+"""
+
+from __future__ import annotations
+
+from mcp.server.fastmcp import FastMCP
+
+from .core.docker import docker_status, docker_logs
+from .core.health import service_health, VALID_SERVICES
+from .core.system import disk_usage
+from .core.network import tailscale_status
+from .core.models import ollama_models, mlx_models
+
+mcp = FastMCP(
+    "infra",
+    instructions=(
+        "인프라 모니터링 도구. GPU 서버, Mac mini, 회사 NAS의 "
+        "Docker 상태, 서비스 헬스체크, 디스크 사용량, 네트워크, 모델 목록을 확인합니다."
+    ),
+)
+
+
+@mcp.tool()
+async def check_docker_status(host: str) -> str:
+    """Docker 컨테이너 상태 확인.
+
+    Args:
+        host: 대상 호스트 (gpu | nas-company)
+    """
+    result = await docker_status(host)
+    return result.model_dump_json(indent=2)
+
+
+@mcp.tool()
+async def check_docker_logs(host: str, container: str, lines: int = 50) -> str:
+    """Docker 컨테이너 최근 로그 조회.
+
+    Args:
+        host: 대상 호스트 (gpu | nas-company)
+        container: 컨테이너 이름
+        lines: 조회할 줄 수 (기본 50)
+    """
+    result = await docker_logs(host, container, lines)
+    return result.model_dump_json(indent=2)
+
+
+@mcp.tool()
+async def check_service_health(service: str) -> str:
+    """서비스 헬스체크. 서비스별 정상 판정 기준이 다름.
+
+    Args:
+        service: 서비스 이름 (document-server | mlx | mlx-proxy | nanoclaude | ollama-gpu | ollama-macmini)
+    """
+    result = await service_health(service)
+    return result.model_dump_json(indent=2)
+
+
+@mcp.tool()
+async def check_disk_usage(host: str) -> str:
+    """디스크 사용량 확인. 85% 초과 시 경고.
+
+    Args:
+        host: 대상 호스트 (gpu | macmini | nas-company)
+    """
+    result = await disk_usage(host)
+    return result.model_dump_json(indent=2)
+
+
+@mcp.tool()
+async def check_tailscale() -> str:
+    """Tailscale 네트워크 상태 확인. 모든 피어 연결 상태를 반환."""
+    result = await tailscale_status()
+    return result.model_dump_json(indent=2)
+
+
+@mcp.tool()
+async def check_ollama_models(host: str) -> str:
+    """Ollama 설치 모델 목록 조회.
+
+    Args:
+        host: 대상 호스트 (gpu | macmini)
+    """
+    result = await ollama_models(host)
+    return result.model_dump_json(indent=2)
+
+
+@mcp.tool()
+async def check_mlx_models() -> str:
+    """Mac mini MLX 서버에 로드된 모델 목록 조회."""
+    result = await mlx_models()
+    return result.model_dump_json(indent=2)
+
+
+def main():
+    mcp.run(transport="stdio")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/infra/pyproject.toml b/infra/pyproject.toml
new file mode 100644
index 0000000..dcd1b3f
--- /dev/null
+++ b/infra/pyproject.toml
@@ -0,0 +1,14 @@
+[project]
+name = "mcp-infra-server"
+version = "0.1.0"
+description = "MCP server for infrastructure monitoring — GPU server, Mac mini, NAS"
+requires-python = ">=3.11"
+dependencies = [
+    "mcp>=1.27.0",
+    "asyncssh>=2.22.0",
+    "pydantic>=2.12.0",
+    "python-dotenv>=1.0.0",
+]
+
+[project.optional-dependencies]
+dev = ["pytest", "pytest-asyncio"]
diff --git a/infra/run.sh b/infra/run.sh
new file mode 100755
index 0000000..37236ba
--- /dev/null
+++ b/infra/run.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+cd /Users/hyungiahn/Documents/code/gpu-services
+exec /opt/homebrew/bin/python3.11 -m infra.mcp_server
diff --git a/infra/schemas.py b/infra/schemas.py
new file mode 100644
index 0000000..4461ceb
--- /dev/null
+++ b/infra/schemas.py
@@ -0,0 +1,101 @@
+"""Pydantic models for all tool results.
+
+Every tool returns a subclass of BaseResult.
+- ok=true + warnings: 성공이지만 주의 필요
+- ok=false + error_type + error: 실패
+- raw: 디버깅 전용 보조 필드 (상위 레이어에서 기본 숨김)
+- checked_at: 모든 결과에 포함 (수집 시점 ISO timestamp)
+"""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+
+class BaseResult(BaseModel):
+    ok: bool
+    checked_at: str
+    warnings: list[str] = Field(default_factory=list)
+    error_type: str | None = None  # "timeout" | "auth" | "command_failed" | "parse_error"
+    error: str | None = None
+
+
+# -- Docker ------------------------------------------------------------------
+
+class ContainerInfo(BaseModel):
+    name: str
+    status: str       # "running" | "exited" | "restarting" | ...
+    uptime: str       # "Up 3 days" etc.
+    ports: str        # published ports summary
+    image: str
+
+
+class DockerStatusResult(BaseResult):
+    host: str
+    containers: list[ContainerInfo] = Field(default_factory=list)
+    summary: str = ""   # "5/5 running" | "4/5 running, 1 exited"
+    raw: str = ""
+
+
+class DockerLogsResult(BaseResult):
+    host: str
+    container: str
+    lines: int             # requested line count
+    truncated: bool = False
+    content: str = ""      # stdout
+    stderr: str = ""       # stderr (separate)
+    raw: str = ""
+
+
+# -- Health -------------------------------------------------------------------
+
+class HealthResult(BaseResult):
+    service: str
+    status: str = "unknown"   # "healthy" | "degraded" | "down"
+    details: dict = Field(default_factory=dict)
+    raw: str | None = None
+
+
+# -- System -------------------------------------------------------------------
+
+class FileSystemInfo(BaseModel):
+    mount: str
+    used_pct: int
+    used: str
+    avail: str
+    total: str
+
+
+class DiskResult(BaseResult):
+    host: str
+    filesystems: list[FileSystemInfo] = Field(default_factory=list)
+    raw: str = ""
+
+
+# -- Network ------------------------------------------------------------------
+
+class TailscalePeer(BaseModel):
+    hostname: str
+    ip: str
+    status: str        # "active" | "idle" | "offline"
+    os: str
+
+
+class TailscaleResult(BaseResult):
+    peers: list[TailscalePeer] = Field(default_factory=list)
+    raw: str = ""
+
+
+# -- Models -------------------------------------------------------------------
+
+class ModelInfo(BaseModel):
+    id: str
+    size: str = ""
+    modified: str = ""
+
+
+class ModelsResult(BaseResult):
+    host: str
+    source: str           # "ollama" | "mlx"
+    models: list[ModelInfo] = Field(default_factory=list)
+    raw: str = ""