feat(infra): Phase 1.5 진단 도구 3개 + trace 정리
scheduler_status, queue_status, run_verify 추가. MCP 10개 도구 + NanoClaude wrapper + pre-route 키워드. worker.py trace print 제거. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,63 @@
|
||||
"""Document Server diagnostic tools — scheduler and queue status."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..config import HOSTS
|
||||
from ..schemas import BaseResult
|
||||
from .ssh import run_command, run_local, SSHError, _is_local_host
|
||||
|
||||
|
||||
def _now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
async def scheduler_status() -> dict:
|
||||
"""Get APScheduler job status from Document Server logs."""
|
||||
cfg = HOSTS["gpu"]
|
||||
cmd = "docker logs hyungi_document_server-fastapi-1 --tail 100 2>&1 | grep -iE 'scheduler|apscheduler|job|trigger|cron|interval' | tail -20"
|
||||
|
||||
try:
|
||||
if _is_local_host(cfg):
|
||||
stdout, _ = await run_local(cmd)
|
||||
else:
|
||||
stdout, _ = await run_command(cfg, cmd)
|
||||
except SSHError as e:
|
||||
return {"ok": False, "checked_at": _now(), "error_type": e.error_type,
|
||||
"error": str(e), "data": [], "summary": ""}
|
||||
|
||||
lines = [l.strip() for l in stdout.strip().splitlines() if l.strip()]
|
||||
return {
|
||||
"ok": True,
|
||||
"checked_at": _now(),
|
||||
"data": lines,
|
||||
"summary": f"최근 스케줄러 로그 {len(lines)}줄",
|
||||
"error": "",
|
||||
}
|
||||
|
||||
|
||||
async def queue_status() -> dict:
|
||||
"""Get document processing queue status from Document Server logs."""
|
||||
cfg = HOSTS["gpu"]
|
||||
cmd = "docker logs hyungi_document_server-fastapi-1 --tail 200 2>&1 | grep -iE 'queue_consumer|pending|processing|completed|stale|batch' | tail -20"
|
||||
|
||||
try:
|
||||
if _is_local_host(cfg):
|
||||
stdout, _ = await run_local(cmd)
|
||||
else:
|
||||
stdout, _ = await run_command(cfg, cmd)
|
||||
except SSHError as e:
|
||||
return {"ok": False, "checked_at": _now(), "error_type": e.error_type,
|
||||
"error": str(e), "data": [], "summary": ""}
|
||||
|
||||
lines = [l.strip() for l in stdout.strip().splitlines() if l.strip()]
|
||||
return {
|
||||
"ok": True,
|
||||
"checked_at": _now(),
|
||||
"data": lines,
|
||||
"summary": f"최근 큐 로그 {len(lines)}줄",
|
||||
"error": "",
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
"""Run predefined verify commands from infra_inventory.md."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from ..config import HOSTS
|
||||
from .ssh import run_command, run_local, SSHError, _is_local_host
|
||||
|
||||
|
||||
def _now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
# Predefined verify commands (from infra_inventory.md)
|
||||
VERIFY_COMMANDS = {
|
||||
"gpu-snapshot": {
|
||||
"host": "gpu",
|
||||
"cmd": "docker compose ls 2>/dev/null; echo '---'; ollama list 2>/dev/null; echo '---'; pgrep -af 'Plex Media' | head -1 2>/dev/null",
|
||||
"desc": "GPU 서버 전체 스냅샷 (docker, ollama, plex)",
|
||||
},
|
||||
"macmini-snapshot": {
|
||||
"host": "macmini",
|
||||
"cmd": "bash -lc 'launchctl list | grep com.user; curl -s localhost:8800/v1/models 2>/dev/null | python3 -m json.tool 2>/dev/null; ollama list 2>/dev/null'",
|
||||
"desc": "Mac mini 전체 스냅샷 (launchd, MLX, ollama)",
|
||||
},
|
||||
"docserver-health": {
|
||||
"host": "gpu",
|
||||
"cmd": "curl -sf http://localhost:8000/health",
|
||||
"desc": "Document Server 헬스체크",
|
||||
},
|
||||
"config-model-match": {
|
||||
"host": "gpu",
|
||||
"cmd": "grep -E 'model:' ~/Documents/code/hyungi_Document_Server/config.yaml 2>/dev/null; echo '---'; ollama list 2>/dev/null",
|
||||
"desc": "config.yaml 모델과 실제 설치 모델 비교",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def run_verify(check_name: str) -> dict:
|
||||
"""Run a predefined verify command."""
|
||||
if check_name not in VERIFY_COMMANDS:
|
||||
available = ", ".join(VERIFY_COMMANDS.keys())
|
||||
return {
|
||||
"ok": False, "checked_at": _now(),
|
||||
"error_type": "parse_error",
|
||||
"error": f"알 수 없는 체크: '{check_name}'. 사용 가능: {available}",
|
||||
"data": [], "summary": "", "raw": "",
|
||||
}
|
||||
|
||||
spec = VERIFY_COMMANDS[check_name]
|
||||
cfg = HOSTS[spec["host"]]
|
||||
|
||||
try:
|
||||
if _is_local_host(cfg):
|
||||
stdout, _ = await run_local(spec["cmd"], timeout=15)
|
||||
else:
|
||||
stdout, _ = await run_command(cfg, spec["cmd"], timeout=15)
|
||||
except SSHError as e:
|
||||
return {
|
||||
"ok": False, "checked_at": _now(),
|
||||
"error_type": e.error_type, "error": str(e),
|
||||
"data": [], "summary": spec["desc"], "raw": "",
|
||||
}
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"checked_at": _now(),
|
||||
"data": stdout.strip().splitlines(),
|
||||
"summary": spec["desc"],
|
||||
"error": "",
|
||||
"raw": stdout.strip(),
|
||||
}
|
||||
@@ -11,6 +11,7 @@ All actual logic lives in src/core/.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from .core.docker import docker_status, docker_logs
|
||||
@@ -18,6 +19,8 @@ from .core.health import service_health, VALID_SERVICES
|
||||
from .core.system import disk_usage
|
||||
from .core.network import tailscale_status
|
||||
from .core.models import ollama_models, mlx_models
|
||||
from .core.docserver import scheduler_status as _scheduler_status, queue_status as _queue_status
|
||||
from .core.verify import run_verify as _run_verify, VERIFY_COMMANDS
|
||||
|
||||
mcp = FastMCP(
|
||||
"infra",
|
||||
@@ -99,6 +102,31 @@ async def check_mlx_models() -> str:
|
||||
return result.model_dump_json(indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def check_scheduler_status() -> str:
|
||||
"""Document Server APScheduler 잡 상태. 최근 스케줄러 로그에서 추출."""
|
||||
result = await _scheduler_status()
|
||||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def check_queue_status() -> str:
|
||||
"""Document Server 문서 처리 큐 현황. 최근 큐 로그에서 추출."""
|
||||
result = await _queue_status()
|
||||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def check_verify(check_name: str) -> str:
|
||||
"""인프라 검증 명령 실행 (infra_inventory.md 기반).
|
||||
|
||||
Args:
|
||||
check_name: 체크 이름 (gpu-snapshot | macmini-snapshot | docserver-health | config-model-match)
|
||||
"""
|
||||
result = await _run_verify(check_name)
|
||||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def main():
|
||||
mcp.run(transport="stdio")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user