1abec083e7
- Dockerfile: infra/ 복사, openssh-client, healthcheck 추가 - requirements.txt: asyncssh, python-dotenv 추가 - core/ssh.py: INFRA_LOCAL_HOST 환경변수로 self-SSH 대신 로컬 실행 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
145 lines
4.9 KiB
Python
145 lines
4.9 KiB
Python
"""SSH connection layer — asyncssh based.
|
|
|
|
Provides run_command() which handles:
|
|
- Key-based auth (GPU, Mac mini)
|
|
- Password auth + sudo (company NAS)
|
|
- Timeout / retry
|
|
- Structured error classification
|
|
- Local execution for self-host (INFRA_LOCAL_HOST env)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import os
|
|
from datetime import datetime, timezone
|
|
|
|
import asyncssh
|
|
|
|
from ..config import HostConfig, SSH_TIMEOUT, CMD_TIMEOUT, MAX_RETRIES
|
|
|
|
# If set, commands targeting this host use run_local() instead of SSH.
|
|
# Avoids self-SSH issues in Docker containers (Tailscale routing, overhead).
|
|
# Example: INFRA_LOCAL_HOST=gpu
|
|
_LOCAL_HOST = os.getenv("INFRA_LOCAL_HOST", "")
|
|
|
|
|
|
class SSHError(Exception):
|
|
"""Typed SSH error with error_type classification."""
|
|
|
|
def __init__(self, error_type: str, message: str):
|
|
self.error_type = error_type
|
|
super().__init__(message)
|
|
|
|
|
|
def _now_iso() -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
async def _connect(host: HostConfig) -> asyncssh.SSHClientConnection:
|
|
"""Open SSH connection with appropriate auth method."""
|
|
kwargs: dict = {
|
|
"host": host.ip,
|
|
"username": host.user,
|
|
"connect_timeout": SSH_TIMEOUT,
|
|
"known_hosts": None, # accept any host key (Tailscale internal network)
|
|
}
|
|
if host.auth == "password" and host.password:
|
|
kwargs["password"] = host.password
|
|
kwargs["client_keys"] = [] # don't try key auth
|
|
# key auth is the default (uses ~/.ssh/)
|
|
|
|
return await asyncssh.connect(**kwargs)
|
|
|
|
|
|
def _is_local_host(host: HostConfig) -> bool:
|
|
"""Check if this host should use local execution instead of SSH."""
|
|
if not _LOCAL_HOST:
|
|
return False
|
|
from ..config import HOSTS
|
|
local_cfg = HOSTS.get(_LOCAL_HOST)
|
|
return local_cfg is not None and host.ip == local_cfg.ip
|
|
|
|
|
|
async def run_command(
|
|
host: HostConfig,
|
|
command: str,
|
|
timeout: int = CMD_TIMEOUT,
|
|
use_sudo: bool = False,
|
|
) -> tuple[str, str]:
|
|
"""Run a command on remote host. Returns (stdout, stderr).
|
|
|
|
If host matches INFRA_LOCAL_HOST, runs locally instead of SSH.
|
|
For NAS with sudo: wraps command with sudo using password via stdin.
|
|
Raises SSHError with typed error_type on failure.
|
|
"""
|
|
# Local execution for self-host
|
|
if _is_local_host(host):
|
|
return await run_local(command, timeout=timeout)
|
|
|
|
if use_sudo and host.needs_sudo and host.password:
|
|
# Pipe password to sudo via stdin
|
|
command = f"echo '{host.password}' | sudo -S {command}"
|
|
|
|
last_error: Exception | None = None
|
|
for attempt in range(1 + MAX_RETRIES):
|
|
try:
|
|
conn = await _connect(host)
|
|
async with conn:
|
|
result = await asyncio.wait_for(
|
|
conn.run(command, check=False),
|
|
timeout=timeout,
|
|
)
|
|
stdout = result.stdout or ""
|
|
stderr = result.stderr or ""
|
|
|
|
if result.exit_status != 0:
|
|
# Command ran but returned non-zero
|
|
# Filter out sudo password prompt from stderr
|
|
stderr_clean = "\n".join(
|
|
line for line in stderr.splitlines()
|
|
if "[sudo]" not in line and "Password:" not in line
|
|
)
|
|
raise SSHError(
|
|
"command_failed",
|
|
f"exit {result.exit_status}: {stderr_clean.strip() or stdout.strip()}"
|
|
)
|
|
return stdout, stderr
|
|
|
|
except SSHError:
|
|
raise
|
|
except asyncio.TimeoutError:
|
|
raise SSHError("timeout", f"명령 실행 시간 초과 ({timeout}초)")
|
|
except asyncssh.PermissionDenied:
|
|
raise SSHError("auth", f"SSH 인증 실패: {host.user}@{host.ip}")
|
|
except (OSError, asyncssh.Error) as e:
|
|
last_error = e
|
|
if attempt < MAX_RETRIES:
|
|
await asyncio.sleep(1)
|
|
continue
|
|
raise SSHError("timeout", f"SSH 연결 실패: {host.ip} — {e}")
|
|
|
|
raise SSHError("timeout", f"SSH 최대 재시도 초과: {host.ip}")
|
|
|
|
|
|
async def run_local(command: str, timeout: int = CMD_TIMEOUT) -> tuple[str, str]:
|
|
"""Run a command locally. Returns (stdout, stderr)."""
|
|
try:
|
|
proc = await asyncio.create_subprocess_shell(
|
|
command,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
|
proc.communicate(), timeout=timeout
|
|
)
|
|
stdout = stdout_bytes.decode() if stdout_bytes else ""
|
|
stderr = stderr_bytes.decode() if stderr_bytes else ""
|
|
|
|
if proc.returncode != 0:
|
|
raise SSHError("command_failed", f"exit {proc.returncode}: {stderr.strip() or stdout.strip()}")
|
|
|
|
return stdout, stderr
|
|
except asyncio.TimeoutError:
|
|
raise SSHError("timeout", f"로컬 명령 시간 초과 ({timeout}초)")
|