d47c04317c
scheduler_status, queue_status, run_verify 추가. MCP 10개 도구 + NanoClaude wrapper + pre-route 키워드. worker.py trace print 제거. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
131 lines
4.6 KiB
Python
131 lines
4.6 KiB
Python
"""Infra tool — NanoClaude wrapper over infra.core/ functions.
|
|
|
|
Converts infra.core results to NanoClaude tool return format:
|
|
{"ok": bool, "tool": "infra", "operation": str, "data": list, "summary": str, "error": str}
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
|
|
from infra.core.docker import docker_status
|
|
from infra.core.health import service_health, VALID_SERVICES
|
|
from infra.core.system import disk_usage
|
|
from infra.core.network import tailscale_status
|
|
from infra.core.models import ollama_models, mlx_models
|
|
from infra.core.docserver import scheduler_status as _scheduler_status, queue_status as _queue_status
|
|
from infra.core.verify import run_verify as _run_verify, VERIFY_COMMANDS
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def status(host: str = "gpu") -> dict:
|
|
"""Docker container status overview."""
|
|
result = await docker_status(host)
|
|
# SSH/연결 실패 시에만 에러 반환. 컨테이너가 exited여도 데이터는 전달.
|
|
if result.error_type:
|
|
return {"ok": False, "tool": "infra", "operation": "status",
|
|
"data": [], "summary": "", "error": result.error or "확인 실패"}
|
|
|
|
data = [{"name": c.name, "status": c.status, "uptime": c.uptime}
|
|
for c in result.containers]
|
|
return {"ok": True, "tool": "infra", "operation": "status",
|
|
"data": data, "summary": result.summary, "error": ""}
|
|
|
|
|
|
async def health(service: str = "") -> dict:
|
|
"""Service health check. If no service specified, check all critical ones."""
|
|
if service:
|
|
services = [service]
|
|
else:
|
|
services = ["document-server", "mlx", "ollama-gpu"]
|
|
|
|
results = []
|
|
all_ok = True
|
|
for svc in services:
|
|
r = await service_health(svc)
|
|
results.append({
|
|
"service": r.service, "status": r.status, "ok": r.ok,
|
|
"details": r.details,
|
|
})
|
|
if not r.ok:
|
|
all_ok = False
|
|
|
|
summary_parts = []
|
|
for r in results:
|
|
icon = "정상" if r["ok"] else "이상"
|
|
summary_parts.append(f"{r['service']}: {icon}")
|
|
|
|
return {"ok": all_ok, "tool": "infra", "operation": "health",
|
|
"data": results, "summary": ", ".join(summary_parts), "error": ""}
|
|
|
|
|
|
async def disk(host: str = "") -> dict:
|
|
"""Disk usage. If no host, check gpu + macmini."""
|
|
hosts = [host] if host else ["gpu", "macmini"]
|
|
all_data = []
|
|
warnings = []
|
|
|
|
for h in hosts:
|
|
result = await disk_usage(h)
|
|
if not result.ok:
|
|
warnings.append(f"{h}: {result.error}")
|
|
continue
|
|
for fs in result.filesystems:
|
|
all_data.append({"host": h, "mount": fs.mount,
|
|
"used_pct": fs.used_pct, "used": fs.used, "total": fs.total})
|
|
warnings.extend(result.warnings)
|
|
|
|
summary = ", ".join(f"{d['host']}:{d['mount']} {d['used_pct']}%" for d in all_data[:5])
|
|
return {"ok": len(warnings) == 0, "tool": "infra", "operation": "disk",
|
|
"data": all_data, "summary": summary,
|
|
"error": "; ".join(warnings) if warnings else ""}
|
|
|
|
|
|
async def network() -> dict:
|
|
"""Tailscale network status."""
|
|
result = await tailscale_status()
|
|
if not result.ok:
|
|
return {"ok": False, "tool": "infra", "operation": "network",
|
|
"data": [], "summary": "", "error": result.error or "확인 실패"}
|
|
|
|
data = [{"hostname": p.hostname, "ip": p.ip, "status": p.status, "os": p.os}
|
|
for p in result.peers]
|
|
online = sum(1 for p in result.peers if p.status != "offline")
|
|
summary = f"{online}/{len(result.peers)} 온라인"
|
|
return {"ok": True, "tool": "infra", "operation": "network",
|
|
"data": data, "summary": summary, "error": ""}
|
|
|
|
|
|
async def models(host: str = "gpu") -> dict:
|
|
"""Model inventory."""
|
|
if host == "mlx" or host == "macmini":
|
|
result = await mlx_models()
|
|
else:
|
|
result = await ollama_models(host)
|
|
|
|
if not result.ok:
|
|
return {"ok": False, "tool": "infra", "operation": "models",
|
|
"data": [], "summary": "", "error": result.error or "확인 실패"}
|
|
|
|
data = [{"id": m.id, "size": m.size} for m in result.models]
|
|
summary = f"{result.source} on {result.host}: {len(result.models)}개 모델"
|
|
return {"ok": True, "tool": "infra", "operation": "models",
|
|
"data": data, "summary": summary, "error": ""}
|
|
|
|
|
|
async def scheduler() -> dict:
|
|
"""Document Server scheduler status."""
|
|
return await _scheduler_status()
|
|
|
|
|
|
async def queue() -> dict:
|
|
"""Document Server queue status."""
|
|
return await _queue_status()
|
|
|
|
|
|
async def verify(check_name: str = "gpu-snapshot") -> dict:
|
|
"""Run predefined verify command."""
|
|
return await _run_verify(check_name)
|