feat(infra): docker_restart 쓰기 도구 추가

보호 컨테이너(home-caddy, home-fail2ban, nanoclaude) 재시작 차단.
MCP 11개 도구 + NanoClaude wrapper.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-13 15:06:40 +09:00
parent d47c04317c
commit 03e3df058f
4 changed files with 74 additions and 5 deletions
+44 -2
View File
@@ -5,8 +5,8 @@ from __future__ import annotations
from datetime import datetime, timezone from datetime import datetime, timezone
from ..config import validate_host, HOSTS from ..config import validate_host, HOSTS
from ..schemas import DockerStatusResult, DockerLogsResult, ContainerInfo from ..schemas import DockerStatusResult, DockerLogsResult, ContainerInfo, BaseResult
from .ssh import run_command, SSHError from .ssh import run_command, run_local, SSHError, _is_local_host
def _now() -> str: def _now() -> str:
@@ -111,3 +111,45 @@ async def docker_logs(host: str, container: str, lines: int = 50) -> DockerLogsR
stderr=stderr.strip() if stderr else "", stderr=stderr.strip() if stderr else "",
raw=stdout.strip(), raw=stdout.strip(),
) )
# Containers that must NEVER be restarted via this tool
PROTECTED_CONTAINERS = {
"home-caddy", # ingress — 재시작 시 전체 서비스 일시 중단
"home-fail2ban", # 보안
"nanoclaude", # 자기 자신
}
async def docker_restart(host: str, container: str) -> BaseResult:
"""Restart a Docker container. Protected containers are blocked."""
try:
cfg = validate_host("docker_status", host) # same host validation as docker_status
except ValueError as e:
return BaseResult(ok=False, checked_at=_now(), error_type="parse_error", error=str(e))
if container in PROTECTED_CONTAINERS:
return BaseResult(
ok=False, checked_at=_now(),
error_type="command_failed",
error=f"보호된 컨테이너입니다: {container}. 직접 재시작하세요.",
)
docker = cfg.docker_path
cmd = f"{docker} restart {container}"
try:
if _is_local_host(cfg):
stdout, _ = await run_local(cmd, timeout=30)
else:
stdout, _ = await run_command(cfg, cmd, use_sudo=cfg.needs_sudo, timeout=30)
except SSHError as e:
return BaseResult(
ok=False, checked_at=_now(),
error_type=e.error_type, error=str(e),
)
return BaseResult(
ok=True, checked_at=_now(),
warnings=[f"{container} 재시작 완료 (host: {host})"],
)
+13 -1
View File
@@ -14,7 +14,7 @@ from __future__ import annotations
import json import json
from mcp.server.fastmcp import FastMCP from mcp.server.fastmcp import FastMCP
from .core.docker import docker_status, docker_logs from .core.docker import docker_status, docker_logs, docker_restart
from .core.health import service_health, VALID_SERVICES from .core.health import service_health, VALID_SERVICES
from .core.system import disk_usage from .core.system import disk_usage
from .core.network import tailscale_status from .core.network import tailscale_status
@@ -55,6 +55,18 @@ async def check_docker_logs(host: str, container: str, lines: int = 50) -> str:
return result.model_dump_json(indent=2) return result.model_dump_json(indent=2)
@mcp.tool()
async def restart_docker_container(host: str, container: str) -> str:
"""Docker 컨테이너 재시작. 보호된 컨테이너(home-caddy, home-fail2ban, nanoclaude)는 거부.
Args:
host: 대상 호스트 (gpu | nas-company)
container: 재시작할 컨테이너 이름
"""
result = await docker_restart(host, container)
return result.model_dump_json(indent=2)
@mcp.tool() @mcp.tool()
async def check_service_health(service: str) -> str: async def check_service_health(service: str) -> str:
"""서비스 헬스체크. 서비스별 정상 판정 기준이 다름. """서비스 헬스체크. 서비스별 정상 판정 기준이 다름.
+14 -1
View File
@@ -9,7 +9,7 @@ from __future__ import annotations
import asyncio import asyncio
import logging import logging
from infra.core.docker import docker_status from infra.core.docker import docker_status, docker_restart as _docker_restart
from infra.core.health import service_health, VALID_SERVICES from infra.core.health import service_health, VALID_SERVICES
from infra.core.system import disk_usage from infra.core.system import disk_usage
from infra.core.network import tailscale_status from infra.core.network import tailscale_status
@@ -128,3 +128,16 @@ async def queue() -> dict:
async def verify(check_name: str = "gpu-snapshot") -> dict: async def verify(check_name: str = "gpu-snapshot") -> dict:
"""Run predefined verify command.""" """Run predefined verify command."""
return await _run_verify(check_name) return await _run_verify(check_name)
async def restart(host: str = "gpu", container: str = "") -> dict:
"""Restart a Docker container."""
if not container:
return {"ok": False, "tool": "infra", "operation": "restart",
"data": [], "summary": "", "error": "컨테이너 이름을 지정해주세요."}
result = await _docker_restart(host, container)
ok = result.ok
return {"ok": ok, "tool": "infra", "operation": "restart",
"data": result.warnings if ok else [],
"summary": result.warnings[0] if ok and result.warnings else "",
"error": result.error or ("재시작 실패" if not ok else "")}
+3 -1
View File
@@ -21,7 +21,7 @@ ALLOWED_OPS = {
"calendar": {"today", "search", "create_draft", "create_confirmed"}, "calendar": {"today", "search", "create_draft", "create_confirmed"},
"email": {"search", "read"}, "email": {"search", "read"},
"document": {"search", "read"}, "document": {"search", "read"},
"infra": {"status", "health", "disk", "network", "models", "scheduler", "queue", "verify"}, "infra": {"status", "health", "disk", "network", "models", "scheduler", "queue", "verify", "restart"},
} }
# payload hard limit # payload hard limit
@@ -119,6 +119,8 @@ async def _exec_infra(operation: str, params: dict) -> dict:
return await infra_tool.queue() return await infra_tool.queue()
elif operation == "verify": elif operation == "verify":
return await infra_tool.verify(params.get("check_name", "gpu-snapshot")) return await infra_tool.verify(params.get("check_name", "gpu-snapshot"))
elif operation == "restart":
return await infra_tool.restart(params.get("host", "gpu"), params.get("container", ""))
return _error("infra", operation, "미구현") return _error("infra", operation, "미구현")