512 lines
21 KiB
Python
512 lines
21 KiB
Python
import json
|
|
import os
|
|
import shlex
|
|
import shutil
|
|
import csv
|
|
from datetime import datetime, timezone
|
|
|
|
import psutil
|
|
from fastapi import HTTPException
|
|
|
|
from app.core.settings import EDGE_BOTS_WORKSPACE_ROOT, EDGE_NODE_ID, EDGE_NODE_NAME
|
|
from app.runtime.base import EdgeRuntimeBackend
|
|
from app.runtime.factory import build_edge_runtime_backends, preferred_edge_runtime_kind
|
|
from app.schemas.edge import (
|
|
EdgeCommandRequest,
|
|
EdgeLogsResponse,
|
|
EdgeMonitorEnsureResponse,
|
|
EdgeMonitorPacket,
|
|
EdgeMonitorPacketsResponse,
|
|
EdgeNodeHeartbeatResponse,
|
|
EdgeNodeResourcesResponse,
|
|
EdgeNodeSelfResponse,
|
|
EdgeStatusResponse,
|
|
NODE_PROTOCOL_VERSION,
|
|
)
|
|
from app.schemas.runtime import EdgeStartBotRequest
|
|
|
|
|
|
class EdgeRuntimeService:
|
|
def __init__(self) -> None:
|
|
self._runtime_backends: dict[str, EdgeRuntimeBackend] = {
|
|
str(kind).strip().lower(): backend
|
|
for kind, backend in build_edge_runtime_backends().items()
|
|
if isinstance(kind, str)
|
|
}
|
|
self._recent_packets: dict[str, list[dict]] = {}
|
|
self._packet_counters: dict[str, int] = {}
|
|
self._backfilled_bots: set[str] = set()
|
|
|
|
def _runtime_kind(self) -> str:
|
|
return preferred_edge_runtime_kind(self._runtime_backends)
|
|
|
|
def capabilities(self) -> dict:
|
|
caps: dict = {"protocol": {"version": NODE_PROTOCOL_VERSION}}
|
|
runtime_caps: dict[str, bool] = {}
|
|
workspace_caps: dict[str, bool] = {}
|
|
monitor_caps: dict[str, bool] = {}
|
|
process_caps: dict[str, object] = {}
|
|
|
|
for backend in self._runtime_backends.values():
|
|
current = dict(backend.capabilities() if hasattr(backend, "capabilities") else {})
|
|
for key, value in dict(current.get("runtime") or {}).items():
|
|
normalized = str(key or "").strip().lower()
|
|
if not normalized:
|
|
continue
|
|
runtime_caps[normalized] = bool(runtime_caps.get(normalized) or value is True)
|
|
for key, value in dict(current.get("workspace") or {}).items():
|
|
normalized = str(key or "").strip()
|
|
if not normalized:
|
|
continue
|
|
workspace_caps[normalized] = bool(workspace_caps.get(normalized) or value is True)
|
|
for key, value in dict(current.get("monitor") or {}).items():
|
|
normalized = str(key or "").strip()
|
|
if not normalized:
|
|
continue
|
|
monitor_caps[normalized] = bool(monitor_caps.get(normalized) or value is True)
|
|
for key, value in dict(current.get("process") or {}).items():
|
|
normalized = str(key or "").strip()
|
|
if normalized:
|
|
process_caps[normalized] = value
|
|
|
|
if runtime_caps:
|
|
caps["runtime"] = runtime_caps
|
|
if workspace_caps:
|
|
caps["workspace"] = workspace_caps
|
|
if monitor_caps:
|
|
caps["monitor"] = monitor_caps
|
|
if process_caps:
|
|
caps["process"] = process_caps
|
|
return caps
|
|
|
|
async def start_bot(self, *, bot_id: str, payload: EdgeStartBotRequest) -> EdgeStatusResponse:
|
|
runtime_kind = self._resolve_runtime_kind(bot_id, preferred=payload.runtime_kind)
|
|
backend = self._backend_for_bot(bot_id, preferred=runtime_kind)
|
|
self._write_runtime_target(
|
|
bot_id=bot_id,
|
|
runtime_kind=runtime_kind,
|
|
workspace_root=str(payload.workspace_root or "").strip() or None,
|
|
)
|
|
success = backend.start_bot(
|
|
bot_id=bot_id,
|
|
image_tag=str(payload.image_tag or "").strip(),
|
|
env_vars=dict(payload.env_vars or {}),
|
|
workspace_root=str(payload.workspace_root or "").strip() or None,
|
|
native_command=str(payload.native_command or "").strip() or None,
|
|
native_workdir=str(payload.native_workdir or "").strip() or None,
|
|
cpu_cores=float(payload.cpu_cores),
|
|
memory_mb=int(payload.memory_mb),
|
|
storage_gb=int(payload.storage_gb),
|
|
on_state_change=self._record_monitor_packet,
|
|
)
|
|
if not success:
|
|
detail = backend.get_last_delivery_error(bot_id) or f"Failed to start bot {bot_id} on dashboard-edge"
|
|
raise HTTPException(status_code=500, detail=detail)
|
|
return EdgeStatusResponse(status="started")
|
|
|
|
def stop_bot(self, *, bot_id: str) -> EdgeStatusResponse:
|
|
resolved_kind = self._resolve_runtime_kind(bot_id)
|
|
ordered_kinds: list[str] = []
|
|
if resolved_kind:
|
|
ordered_kinds.append(resolved_kind)
|
|
for kind in self._runtime_backends.keys():
|
|
if kind not in ordered_kinds:
|
|
ordered_kinds.append(kind)
|
|
for kind in ordered_kinds:
|
|
backend = self._runtime_backends.get(kind)
|
|
if backend is None:
|
|
continue
|
|
try:
|
|
backend.stop_bot(bot_id)
|
|
except Exception:
|
|
continue
|
|
return EdgeStatusResponse(status="stopped")
|
|
|
|
def send_command(self, *, bot_id: str, payload: EdgeCommandRequest) -> EdgeStatusResponse:
|
|
backend = self._backend_for_bot(bot_id)
|
|
ok = backend.send_command(bot_id, payload.command, media=list(payload.media or []))
|
|
if not ok:
|
|
detail = backend.get_last_delivery_error(bot_id) or "command delivery failed"
|
|
raise HTTPException(status_code=502, detail=detail)
|
|
return EdgeStatusResponse(status="ok")
|
|
|
|
def ensure_monitor(self, *, bot_id: str) -> EdgeMonitorEnsureResponse:
|
|
backend = self._backend_for_bot(bot_id)
|
|
ensured = backend.ensure_monitor(bot_id, self._record_monitor_packet)
|
|
return EdgeMonitorEnsureResponse(ensured=bool(ensured))
|
|
|
|
def get_recent_logs(self, *, bot_id: str, tail: int) -> EdgeLogsResponse:
|
|
backend = self._backend_for_bot(bot_id)
|
|
return EdgeLogsResponse(bot_id=bot_id, logs=backend.get_recent_logs(bot_id, tail=tail))
|
|
|
|
def get_monitor_packets(self, *, bot_id: str, after_seq: int = 0, limit: int = 200) -> EdgeMonitorPacketsResponse:
|
|
self._backfill_monitor_packets(bot_id=bot_id)
|
|
rows = [
|
|
dict(row)
|
|
for row in self._recent_packets.get(bot_id, [])
|
|
if int(row.get("seq") or 0) > max(0, int(after_seq or 0))
|
|
]
|
|
rows.sort(key=lambda row: int(row.get("seq") or 0))
|
|
if limit > 0:
|
|
rows = rows[: int(limit)]
|
|
latest_seq = int(self._packet_counters.get(bot_id, 0) or 0)
|
|
return EdgeMonitorPacketsResponse(
|
|
protocol_version=NODE_PROTOCOL_VERSION,
|
|
node_id=EDGE_NODE_ID,
|
|
bot_id=bot_id,
|
|
latest_seq=latest_seq,
|
|
packets=[
|
|
EdgeMonitorPacket.model_validate(
|
|
{
|
|
"protocol_version": NODE_PROTOCOL_VERSION,
|
|
"node_id": EDGE_NODE_ID,
|
|
"bot_id": bot_id,
|
|
**row,
|
|
}
|
|
)
|
|
for row in rows
|
|
],
|
|
)
|
|
|
|
def get_runtime_status(self, *, bot_id: str) -> EdgeStatusResponse:
|
|
backend = self._backend_for_bot(bot_id)
|
|
return EdgeStatusResponse(status=backend.get_bot_status(bot_id))
|
|
|
|
def get_resource_snapshot(self, *, bot_id: str) -> dict:
|
|
backend = self._backend_for_bot(bot_id)
|
|
snapshot = dict(backend.get_bot_resource_snapshot(bot_id) or {})
|
|
snapshot.setdefault("runtime_kind", self._resolve_runtime_kind(bot_id))
|
|
return snapshot
|
|
|
|
def get_node_identity(self) -> EdgeNodeSelfResponse:
|
|
resources = self.get_node_resource_summary()
|
|
return EdgeNodeSelfResponse(
|
|
protocol_version=resources.protocol_version,
|
|
node_id=EDGE_NODE_ID,
|
|
display_name=EDGE_NODE_NAME,
|
|
service="dashboard-edge",
|
|
transport_kind="edge",
|
|
runtime_kind=self._runtime_kind(),
|
|
core_adapter="nanobot",
|
|
capabilities=self.capabilities(),
|
|
resources=dict(resources.resources or {}),
|
|
reported_at=resources.reported_at,
|
|
)
|
|
|
|
def get_node_resource_summary(self) -> EdgeNodeResourcesResponse:
|
|
cpu_percent = 0.0
|
|
try:
|
|
cpu_percent = float(psutil.cpu_percent(interval=None) or 0.0)
|
|
except Exception:
|
|
cpu_percent = 0.0
|
|
|
|
memory_total = 0
|
|
memory_used = 0
|
|
try:
|
|
memory = psutil.virtual_memory()
|
|
memory_total = int(getattr(memory, "total", 0) or 0)
|
|
memory_used = int(getattr(memory, "used", 0) or 0)
|
|
except Exception:
|
|
memory_total = 0
|
|
memory_used = 0
|
|
|
|
workspace_limit = 0
|
|
workspace_used = 0
|
|
try:
|
|
disk = psutil.disk_usage(EDGE_BOTS_WORKSPACE_ROOT)
|
|
workspace_limit = int(getattr(disk, "total", 0) or 0)
|
|
workspace_used = int(getattr(disk, "used", 0) or 0)
|
|
except Exception:
|
|
workspace_limit = 0
|
|
workspace_used = self._calc_workspace_used_bytes()
|
|
|
|
cpu_cores = 0.0
|
|
try:
|
|
cpu_cores = float(psutil.cpu_count(logical=True) or 0)
|
|
except Exception:
|
|
cpu_cores = 0.0
|
|
|
|
return EdgeNodeResourcesResponse(
|
|
protocol_version=NODE_PROTOCOL_VERSION,
|
|
node_id=EDGE_NODE_ID,
|
|
display_name=EDGE_NODE_NAME,
|
|
transport_kind="edge",
|
|
runtime_kind=self._runtime_kind(),
|
|
core_adapter="nanobot",
|
|
resources={
|
|
"configured_cpu_cores": round(cpu_cores, 2),
|
|
"configured_memory_bytes": memory_total,
|
|
"configured_storage_bytes": workspace_limit,
|
|
"live_cpu_percent": round(cpu_percent, 2),
|
|
"live_memory_used_bytes": memory_used,
|
|
"live_memory_limit_bytes": memory_total,
|
|
"workspace_used_bytes": workspace_used,
|
|
"workspace_limit_bytes": workspace_limit,
|
|
},
|
|
reported_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
)
|
|
|
|
def heartbeat(self) -> EdgeNodeHeartbeatResponse:
|
|
node_resources = self.get_node_resource_summary()
|
|
return EdgeNodeHeartbeatResponse(
|
|
protocol_version=NODE_PROTOCOL_VERSION,
|
|
node_id=EDGE_NODE_ID,
|
|
display_name=EDGE_NODE_NAME,
|
|
service="dashboard-edge",
|
|
transport_kind="edge",
|
|
runtime_kind=self._runtime_kind(),
|
|
core_adapter="nanobot",
|
|
capabilities=self.capabilities(),
|
|
resources=dict(node_resources.resources or {}),
|
|
reported_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
)
|
|
|
|
def native_preflight(self, *, native_command: str | None = None, native_workdir: str | None = None) -> dict:
|
|
raw_command = str(native_command or "").strip()
|
|
command_parts: list[str] = []
|
|
parse_error = ""
|
|
if raw_command:
|
|
command_parts, parse_error = self._parse_native_command(raw_command)
|
|
if not raw_command and not command_parts:
|
|
backend = self._runtime_backends.get("native")
|
|
process_caps = {}
|
|
if backend is not None:
|
|
process_caps = dict((backend.capabilities() or {}).get("process") or {})
|
|
command_parts = [str(item or "").strip() for item in list(process_caps.get("command") or []) if str(item or "").strip()]
|
|
|
|
command_available = bool(command_parts and shutil.which(command_parts[0]))
|
|
configured_workdir = str(native_workdir or "").strip()
|
|
if configured_workdir:
|
|
workdir = os.path.abspath(configured_workdir)
|
|
workdir_exists = os.path.isdir(workdir)
|
|
else:
|
|
workdir = ""
|
|
workdir_exists = True
|
|
|
|
ok = bool(command_available and workdir_exists)
|
|
detail_parts: list[str] = []
|
|
if not command_available:
|
|
detail_parts.append("native command not available")
|
|
if not workdir_exists:
|
|
detail_parts.append("native workdir does not exist")
|
|
if parse_error:
|
|
detail_parts.append(parse_error)
|
|
if not detail_parts:
|
|
detail_parts.append("native launcher ready")
|
|
|
|
return {
|
|
"ok": ok,
|
|
"command": command_parts,
|
|
"workdir": workdir,
|
|
"command_available": command_available,
|
|
"workdir_exists": workdir_exists,
|
|
"detail": "; ".join(detail_parts),
|
|
}
|
|
|
|
@staticmethod
|
|
def _parse_native_command(raw_command: str) -> tuple[list[str], str]:
|
|
text = str(raw_command or "").strip()
|
|
if not text:
|
|
return [], ""
|
|
if text.startswith("[") and text.endswith("]"):
|
|
try:
|
|
payload = json.loads(text)
|
|
if isinstance(payload, list):
|
|
rows = [str(item or "").strip() for item in payload if str(item or "").strip()]
|
|
if rows:
|
|
return rows, ""
|
|
return [], "native command JSON list is empty"
|
|
except Exception:
|
|
return [], "native command JSON is invalid"
|
|
if "," in text and any(mark in text for mark in ['"', "'"]):
|
|
try:
|
|
rows = [str(item or "").strip() for item in next(csv.reader([text], skipinitialspace=True)) if str(item or "").strip()]
|
|
if rows:
|
|
return rows, ""
|
|
except Exception:
|
|
pass
|
|
try:
|
|
rows = [str(item or "").strip() for item in shlex.split(text) if str(item or "").strip()]
|
|
if rows:
|
|
return rows, ""
|
|
return [], "native command is empty"
|
|
except Exception:
|
|
return [], "native command format is invalid"
|
|
|
|
def _record_monitor_packet(self, bot_id: str, packet: dict) -> None:
|
|
rows = self._recent_packets.setdefault(bot_id, [])
|
|
next_seq = int(self._packet_counters.get(bot_id, 0) or 0) + 1
|
|
self._packet_counters[bot_id] = next_seq
|
|
captured_at = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
rows.append(
|
|
{
|
|
"protocol_version": NODE_PROTOCOL_VERSION,
|
|
"node_id": EDGE_NODE_ID,
|
|
"bot_id": bot_id,
|
|
"seq": next_seq,
|
|
"captured_at": captured_at,
|
|
"packet": dict(packet or {}),
|
|
}
|
|
)
|
|
if len(rows) > 200:
|
|
del rows[:-200]
|
|
|
|
def _backfill_monitor_packets(self, bot_id: str) -> None:
|
|
if bot_id in self._backfilled_bots:
|
|
return
|
|
self._backfilled_bots.add(bot_id)
|
|
backend = self._backend_for_bot(bot_id)
|
|
for line in backend.get_recent_logs(bot_id, tail=500):
|
|
packet = backend.parse_monitor_packet(line)
|
|
if packet:
|
|
self._record_monitor_packet(bot_id, packet)
|
|
|
|
def _backend_for_bot(self, bot_id: str, preferred: str | None = None) -> EdgeRuntimeBackend:
|
|
runtime_kind = self._resolve_runtime_kind(bot_id, preferred=preferred)
|
|
backend = self._runtime_backends.get(runtime_kind)
|
|
if backend is None:
|
|
raise HTTPException(status_code=501, detail=f"dashboard-edge runtime is not available: {runtime_kind}")
|
|
return backend
|
|
|
|
def _resolve_runtime_kind(self, bot_id: str, preferred: str | None = None) -> str:
|
|
normalized_preferred = self._normalize_runtime_kind(preferred, allow_empty=True)
|
|
if normalized_preferred and normalized_preferred in self._runtime_backends:
|
|
return normalized_preferred
|
|
|
|
persisted = self._normalize_runtime_kind(self._read_runtime_target(bot_id), allow_empty=True)
|
|
if persisted and persisted in self._runtime_backends:
|
|
return persisted
|
|
|
|
for runtime_kind, backend in self._runtime_backends.items():
|
|
try:
|
|
if str(backend.get_bot_status(bot_id) or "").strip().upper() == "RUNNING":
|
|
return runtime_kind
|
|
except Exception:
|
|
continue
|
|
return self._runtime_kind()
|
|
|
|
@staticmethod
|
|
def _normalize_runtime_kind(value: str | None, *, allow_empty: bool = False) -> str:
|
|
text = str(value or "").strip().lower()
|
|
if allow_empty and not text:
|
|
return ""
|
|
return text if text in {"docker", "native"} else "docker"
|
|
|
|
@staticmethod
|
|
def _runtime_target_path(bot_id: str) -> str:
|
|
return os.path.join(EDGE_BOTS_WORKSPACE_ROOT, str(bot_id or "").strip(), ".nanobot", "runtime-target.json")
|
|
|
|
@staticmethod
|
|
def _config_path(bot_id: str) -> str:
|
|
return os.path.join(EDGE_BOTS_WORKSPACE_ROOT, str(bot_id or "").strip(), ".nanobot", "config.json")
|
|
|
|
def _read_runtime_target(self, bot_id: str) -> str:
|
|
payload = self._read_runtime_target_payload(bot_id)
|
|
if isinstance(payload, dict):
|
|
return str(payload.get("runtime_kind") or "").strip().lower()
|
|
return ""
|
|
|
|
def _read_runtime_target_payload(self, bot_id: str) -> dict:
|
|
for path in self._runtime_target_paths_for_read(bot_id):
|
|
if not os.path.isfile(path):
|
|
continue
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as fh:
|
|
payload = json.load(fh)
|
|
if isinstance(payload, dict):
|
|
return payload
|
|
except Exception:
|
|
continue
|
|
return {}
|
|
|
|
def _write_runtime_target(self, *, bot_id: str, runtime_kind: str, workspace_root: str | None = None) -> None:
|
|
payload = dict(self._read_runtime_target_payload(bot_id))
|
|
payload["runtime_kind"] = self._normalize_runtime_kind(runtime_kind)
|
|
if workspace_root is not None:
|
|
normalized_root = str(workspace_root or "").strip()
|
|
if normalized_root:
|
|
payload["workspace_root"] = os.path.abspath(os.path.expanduser(normalized_root))
|
|
else:
|
|
payload.pop("workspace_root", None)
|
|
paths = self._runtime_target_paths(bot_id=bot_id, payload=payload)
|
|
for path in paths:
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
with open(path, "w", encoding="utf-8") as fh:
|
|
json.dump(payload, fh, ensure_ascii=False, indent=2)
|
|
primary = self._runtime_target_path(bot_id)
|
|
if primary not in paths and os.path.isfile(primary):
|
|
try:
|
|
os.remove(primary)
|
|
except Exception:
|
|
pass
|
|
|
|
def _runtime_target_paths(self, *, bot_id: str, payload: dict) -> list[str]:
|
|
primary = self._runtime_target_path(bot_id)
|
|
workspace_root = str(payload.get("workspace_root") or "").strip()
|
|
if workspace_root:
|
|
external = os.path.join(
|
|
os.path.abspath(os.path.expanduser(workspace_root)),
|
|
str(bot_id or "").strip(),
|
|
".nanobot",
|
|
"runtime-target.json",
|
|
)
|
|
if os.path.abspath(external) != os.path.abspath(primary):
|
|
return [external]
|
|
return [primary]
|
|
|
|
def _runtime_target_paths_for_read(self, bot_id: str) -> list[str]:
|
|
primary = self._runtime_target_path(bot_id)
|
|
rows: list[str] = [primary]
|
|
workspace_root = self._workspace_root_from_config(bot_id)
|
|
if workspace_root:
|
|
external = os.path.join(
|
|
workspace_root,
|
|
str(bot_id or "").strip(),
|
|
".nanobot",
|
|
"runtime-target.json",
|
|
)
|
|
if os.path.abspath(external) != os.path.abspath(primary):
|
|
rows.insert(0, external)
|
|
return rows
|
|
|
|
def _workspace_root_from_config(self, bot_id: str) -> str:
|
|
path = self._config_path(bot_id)
|
|
if not os.path.isfile(path):
|
|
return ""
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as fh:
|
|
payload = json.load(fh)
|
|
if not isinstance(payload, dict):
|
|
return ""
|
|
agents = payload.get("agents")
|
|
if not isinstance(agents, dict):
|
|
return ""
|
|
defaults = agents.get("defaults")
|
|
if not isinstance(defaults, dict):
|
|
return ""
|
|
workspace = str(defaults.get("workspace") or "").strip()
|
|
if not workspace:
|
|
return ""
|
|
normalized_workspace = os.path.abspath(os.path.expanduser(workspace))
|
|
marker = f"{os.sep}{str(bot_id or '').strip()}{os.sep}.nanobot{os.sep}workspace"
|
|
if marker in normalized_workspace:
|
|
return normalized_workspace.rsplit(marker, 1)[0]
|
|
except Exception:
|
|
return ""
|
|
return ""
|
|
|
|
@staticmethod
|
|
def _calc_workspace_used_bytes() -> int:
|
|
total = 0
|
|
for root, _, files in os.walk(EDGE_BOTS_WORKSPACE_ROOT):
|
|
for filename in files:
|
|
path = os.path.join(root, filename)
|
|
try:
|
|
total += int(os.path.getsize(path))
|
|
except Exception:
|
|
continue
|
|
return total
|
|
|
|
|
|
edge_runtime_service = EdgeRuntimeService()
|