From 03e3df058fbce550bfc8efc40ae605a0453fdece Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Mon, 13 Apr 2026 15:06:40 +0900 Subject: [PATCH] =?UTF-8?q?feat(infra):=20docker=5Frestart=20=EC=93=B0?= =?UTF-8?q?=EA=B8=B0=20=EB=8F=84=EA=B5=AC=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 보호 컨테이너(home-caddy, home-fail2ban, nanoclaude) 재시작 차단. MCP 11개 도구 + NanoClaude wrapper. Co-Authored-By: Claude Opus 4.6 (1M context) --- infra/core/docker.py | 46 ++++++++++++++++++++++++++++++++-- infra/mcp_server.py | 14 ++++++++++- nanoclaude/tools/infra_tool.py | 15 ++++++++++- nanoclaude/tools/registry.py | 4 ++- 4 files changed, 74 insertions(+), 5 deletions(-) diff --git a/infra/core/docker.py b/infra/core/docker.py index e803b86..6fb67b2 100644 --- a/infra/core/docker.py +++ b/infra/core/docker.py @@ -5,8 +5,8 @@ from __future__ import annotations from datetime import datetime, timezone from ..config import validate_host, HOSTS -from ..schemas import DockerStatusResult, DockerLogsResult, ContainerInfo -from .ssh import run_command, SSHError +from ..schemas import DockerStatusResult, DockerLogsResult, ContainerInfo, BaseResult +from .ssh import run_command, run_local, SSHError, _is_local_host def _now() -> str: @@ -111,3 +111,45 @@ async def docker_logs(host: str, container: str, lines: int = 50) -> DockerLogsR stderr=stderr.strip() if stderr else "", raw=stdout.strip(), ) + + +# Containers that must NEVER be restarted via this tool +PROTECTED_CONTAINERS = { + "home-caddy", # ingress — 재시작 시 전체 서비스 일시 중단 + "home-fail2ban", # 보안 + "nanoclaude", # 자기 자신 +} + + +async def docker_restart(host: str, container: str) -> BaseResult: + """Restart a Docker container. Protected containers are blocked.""" + try: + cfg = validate_host("docker_status", host) # same host validation as docker_status + except ValueError as e: + return BaseResult(ok=False, checked_at=_now(), error_type="parse_error", error=str(e)) + + if container in PROTECTED_CONTAINERS: + return BaseResult( + ok=False, checked_at=_now(), + error_type="command_failed", + error=f"보호된 컨테이너입니다: {container}. 직접 재시작하세요.", + ) + + docker = cfg.docker_path + cmd = f"{docker} restart {container}" + + try: + if _is_local_host(cfg): + stdout, _ = await run_local(cmd, timeout=30) + else: + stdout, _ = await run_command(cfg, cmd, use_sudo=cfg.needs_sudo, timeout=30) + except SSHError as e: + return BaseResult( + ok=False, checked_at=_now(), + error_type=e.error_type, error=str(e), + ) + + return BaseResult( + ok=True, checked_at=_now(), + warnings=[f"{container} 재시작 완료 (host: {host})"], + ) diff --git a/infra/mcp_server.py b/infra/mcp_server.py index c76b890..4a819fd 100644 --- a/infra/mcp_server.py +++ b/infra/mcp_server.py @@ -14,7 +14,7 @@ from __future__ import annotations import json from mcp.server.fastmcp import FastMCP -from .core.docker import docker_status, docker_logs +from .core.docker import docker_status, docker_logs, docker_restart from .core.health import service_health, VALID_SERVICES from .core.system import disk_usage from .core.network import tailscale_status @@ -55,6 +55,18 @@ async def check_docker_logs(host: str, container: str, lines: int = 50) -> str: return result.model_dump_json(indent=2) +@mcp.tool() +async def restart_docker_container(host: str, container: str) -> str: + """Docker 컨테이너 재시작. 보호된 컨테이너(home-caddy, home-fail2ban, nanoclaude)는 거부. + + Args: + host: 대상 호스트 (gpu | nas-company) + container: 재시작할 컨테이너 이름 + """ + result = await docker_restart(host, container) + return result.model_dump_json(indent=2) + + @mcp.tool() async def check_service_health(service: str) -> str: """서비스 헬스체크. 서비스별 정상 판정 기준이 다름. diff --git a/nanoclaude/tools/infra_tool.py b/nanoclaude/tools/infra_tool.py index 93b0006..f535c5a 100644 --- a/nanoclaude/tools/infra_tool.py +++ b/nanoclaude/tools/infra_tool.py @@ -9,7 +9,7 @@ from __future__ import annotations import asyncio import logging -from infra.core.docker import docker_status +from infra.core.docker import docker_status, docker_restart as _docker_restart from infra.core.health import service_health, VALID_SERVICES from infra.core.system import disk_usage from infra.core.network import tailscale_status @@ -128,3 +128,16 @@ async def queue() -> dict: async def verify(check_name: str = "gpu-snapshot") -> dict: """Run predefined verify command.""" return await _run_verify(check_name) + + +async def restart(host: str = "gpu", container: str = "") -> dict: + """Restart a Docker container.""" + if not container: + return {"ok": False, "tool": "infra", "operation": "restart", + "data": [], "summary": "", "error": "컨테이너 이름을 지정해주세요."} + result = await _docker_restart(host, container) + ok = result.ok + return {"ok": ok, "tool": "infra", "operation": "restart", + "data": result.warnings if ok else [], + "summary": result.warnings[0] if ok and result.warnings else "", + "error": result.error or ("재시작 실패" if not ok else "")} diff --git a/nanoclaude/tools/registry.py b/nanoclaude/tools/registry.py index fafb7a1..1aaee77 100644 --- a/nanoclaude/tools/registry.py +++ b/nanoclaude/tools/registry.py @@ -21,7 +21,7 @@ ALLOWED_OPS = { "calendar": {"today", "search", "create_draft", "create_confirmed"}, "email": {"search", "read"}, "document": {"search", "read"}, - "infra": {"status", "health", "disk", "network", "models", "scheduler", "queue", "verify"}, + "infra": {"status", "health", "disk", "network", "models", "scheduler", "queue", "verify", "restart"}, } # payload hard limit @@ -119,6 +119,8 @@ async def _exec_infra(operation: str, params: dict) -> dict: return await infra_tool.queue() elif operation == "verify": return await infra_tool.verify(params.get("check_name", "gpu-snapshot")) + elif operation == "restart": + return await infra_tool.restart(params.get("host", "gpu"), params.get("container", "")) return _error("infra", operation, "미구현")