b1f9e87d6a
mcp-infra-server를 gpu-services/infra/로 통합. core/ 순수 로직은 Agent/NanoClaude에서도 직접 import 가능. 도구: docker_status, docker_logs, service_health, disk_usage, tailscale_status, ollama_models, mlx_models. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
114 lines
3.6 KiB
Python
114 lines
3.6 KiB
Python
"""Docker status and logs tools."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
from ..config import validate_host, HOSTS
|
|
from ..schemas import DockerStatusResult, DockerLogsResult, ContainerInfo
|
|
from .ssh import run_command, SSHError
|
|
|
|
|
|
def _now() -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
async def docker_status(host: str) -> DockerStatusResult:
|
|
"""List all Docker containers on a host with structured status."""
|
|
try:
|
|
cfg = validate_host("docker_status", host)
|
|
except ValueError as e:
|
|
return DockerStatusResult(
|
|
ok=False, checked_at=_now(), host=host,
|
|
error_type="parse_error", error=str(e),
|
|
)
|
|
|
|
docker = cfg.docker_path
|
|
fmt = '{{.Names}}|{{.Status}}|{{.Ports}}|{{.Image}}'
|
|
cmd = f"{docker} ps -a --format '{fmt}'"
|
|
|
|
try:
|
|
stdout, _ = await run_command(cfg, cmd, use_sudo=cfg.needs_sudo)
|
|
except SSHError as e:
|
|
return DockerStatusResult(
|
|
ok=False, checked_at=_now(), host=host,
|
|
error_type=e.error_type, error=str(e),
|
|
)
|
|
|
|
containers: list[ContainerInfo] = []
|
|
for line in stdout.strip().splitlines():
|
|
parts = line.split("|", 3)
|
|
if len(parts) < 4:
|
|
continue
|
|
name, status_str, ports, image = parts
|
|
# Extract running state from status string
|
|
state = "running" if status_str.startswith("Up") else "exited"
|
|
if "Restarting" in status_str:
|
|
state = "restarting"
|
|
containers.append(ContainerInfo(
|
|
name=name, status=state, uptime=status_str, ports=ports, image=image,
|
|
))
|
|
|
|
running = sum(1 for c in containers if c.status == "running")
|
|
total = len(containers)
|
|
summary = f"{running}/{total} running"
|
|
if running < total:
|
|
non_running = [c.name for c in containers if c.status != "running"]
|
|
summary += f", down: {', '.join(non_running)}"
|
|
|
|
warnings: list[str] = []
|
|
for c in containers:
|
|
if c.status == "restarting":
|
|
warnings.append(f"{c.name} is restarting")
|
|
elif c.status == "exited":
|
|
warnings.append(f"{c.name} is exited")
|
|
|
|
return DockerStatusResult(
|
|
ok=running == total,
|
|
checked_at=_now(),
|
|
host=host,
|
|
containers=containers,
|
|
summary=summary,
|
|
warnings=warnings,
|
|
raw=stdout.strip(),
|
|
)
|
|
|
|
|
|
async def docker_logs(host: str, container: str, lines: int = 50) -> DockerLogsResult:
|
|
"""Get recent logs from a container."""
|
|
try:
|
|
cfg = validate_host("docker_logs", host)
|
|
except ValueError as e:
|
|
return DockerLogsResult(
|
|
ok=False, checked_at=_now(), host=host, container=container,
|
|
lines=lines, error_type="parse_error", error=str(e),
|
|
)
|
|
|
|
docker = cfg.docker_path
|
|
# Request one extra line to detect truncation
|
|
cmd = f"{docker} logs --tail {lines + 1} {container} 2>&1"
|
|
|
|
try:
|
|
stdout, stderr = await run_command(cfg, cmd, use_sudo=cfg.needs_sudo, timeout=15)
|
|
except SSHError as e:
|
|
return DockerLogsResult(
|
|
ok=False, checked_at=_now(), host=host, container=container,
|
|
lines=lines, error_type=e.error_type, error=str(e),
|
|
)
|
|
|
|
all_lines = stdout.strip().splitlines()
|
|
truncated = len(all_lines) > lines
|
|
content = "\n".join(all_lines[:lines]) if truncated else "\n".join(all_lines)
|
|
|
|
return DockerLogsResult(
|
|
ok=True,
|
|
checked_at=_now(),
|
|
host=host,
|
|
container=container,
|
|
lines=lines,
|
|
truncated=truncated,
|
|
content=content,
|
|
stderr=stderr.strip() if stderr else "",
|
|
raw=stdout.strip(),
|
|
)
|