import json import os import shlex import shutil import csv from datetime import datetime, timezone import psutil from fastapi import HTTPException from app.core.settings import EDGE_BOTS_WORKSPACE_ROOT, EDGE_NODE_ID, EDGE_NODE_NAME from app.runtime.base import EdgeRuntimeBackend from app.runtime.factory import build_edge_runtime_backends, preferred_edge_runtime_kind from app.schemas.edge import ( EdgeCommandRequest, EdgeLogsResponse, EdgeMonitorEnsureResponse, EdgeMonitorPacket, EdgeMonitorPacketsResponse, EdgeNodeHeartbeatResponse, EdgeNodeResourcesResponse, EdgeNodeSelfResponse, EdgeStatusResponse, NODE_PROTOCOL_VERSION, ) from app.schemas.runtime import EdgeStartBotRequest class EdgeRuntimeService: def __init__(self) -> None: self._runtime_backends: dict[str, EdgeRuntimeBackend] = { str(kind).strip().lower(): backend for kind, backend in build_edge_runtime_backends().items() if isinstance(kind, str) } self._recent_packets: dict[str, list[dict]] = {} self._packet_counters: dict[str, int] = {} self._backfilled_bots: set[str] = set() def _runtime_kind(self) -> str: return preferred_edge_runtime_kind(self._runtime_backends) def capabilities(self) -> dict: caps: dict = {"protocol": {"version": NODE_PROTOCOL_VERSION}} runtime_caps: dict[str, bool] = {} workspace_caps: dict[str, bool] = {} monitor_caps: dict[str, bool] = {} process_caps: dict[str, object] = {} for backend in self._runtime_backends.values(): current = dict(backend.capabilities() if hasattr(backend, "capabilities") else {}) for key, value in dict(current.get("runtime") or {}).items(): normalized = str(key or "").strip().lower() if not normalized: continue runtime_caps[normalized] = bool(runtime_caps.get(normalized) or value is True) for key, value in dict(current.get("workspace") or {}).items(): normalized = str(key or "").strip() if not normalized: continue workspace_caps[normalized] = bool(workspace_caps.get(normalized) or value is True) for key, value in dict(current.get("monitor") or {}).items(): normalized = str(key or "").strip() if not normalized: continue monitor_caps[normalized] = bool(monitor_caps.get(normalized) or value is True) for key, value in dict(current.get("process") or {}).items(): normalized = str(key or "").strip() if normalized: process_caps[normalized] = value if runtime_caps: caps["runtime"] = runtime_caps if workspace_caps: caps["workspace"] = workspace_caps if monitor_caps: caps["monitor"] = monitor_caps if process_caps: caps["process"] = process_caps return caps async def start_bot(self, *, bot_id: str, payload: EdgeStartBotRequest) -> EdgeStatusResponse: runtime_kind = self._resolve_runtime_kind(bot_id, preferred=payload.runtime_kind) backend = self._backend_for_bot(bot_id, preferred=runtime_kind) self._write_runtime_target( bot_id=bot_id, runtime_kind=runtime_kind, workspace_root=str(payload.workspace_root or "").strip() or None, ) success = backend.start_bot( bot_id=bot_id, image_tag=str(payload.image_tag or "").strip(), env_vars=dict(payload.env_vars or {}), workspace_root=str(payload.workspace_root or "").strip() or None, native_command=str(payload.native_command or "").strip() or None, native_workdir=str(payload.native_workdir or "").strip() or None, cpu_cores=float(payload.cpu_cores), memory_mb=int(payload.memory_mb), storage_gb=int(payload.storage_gb), on_state_change=self._record_monitor_packet, ) if not success: detail = backend.get_last_delivery_error(bot_id) or f"Failed to start bot {bot_id} on dashboard-edge" raise HTTPException(status_code=500, detail=detail) return EdgeStatusResponse(status="started") def stop_bot(self, *, bot_id: str) -> EdgeStatusResponse: resolved_kind = self._resolve_runtime_kind(bot_id) ordered_kinds: list[str] = [] if resolved_kind: ordered_kinds.append(resolved_kind) for kind in self._runtime_backends.keys(): if kind not in ordered_kinds: ordered_kinds.append(kind) for kind in ordered_kinds: backend = self._runtime_backends.get(kind) if backend is None: continue try: backend.stop_bot(bot_id) except Exception: continue return EdgeStatusResponse(status="stopped") def send_command(self, *, bot_id: str, payload: EdgeCommandRequest) -> EdgeStatusResponse: backend = self._backend_for_bot(bot_id) ok = backend.send_command(bot_id, payload.command, media=list(payload.media or [])) if not ok: detail = backend.get_last_delivery_error(bot_id) or "command delivery failed" raise HTTPException(status_code=502, detail=detail) return EdgeStatusResponse(status="ok") def ensure_monitor(self, *, bot_id: str) -> EdgeMonitorEnsureResponse: backend = self._backend_for_bot(bot_id) ensured = backend.ensure_monitor(bot_id, self._record_monitor_packet) return EdgeMonitorEnsureResponse(ensured=bool(ensured)) def get_recent_logs(self, *, bot_id: str, tail: int) -> EdgeLogsResponse: backend = self._backend_for_bot(bot_id) return EdgeLogsResponse(bot_id=bot_id, logs=backend.get_recent_logs(bot_id, tail=tail)) def get_monitor_packets(self, *, bot_id: str, after_seq: int = 0, limit: int = 200) -> EdgeMonitorPacketsResponse: self._backfill_monitor_packets(bot_id=bot_id) rows = [ dict(row) for row in self._recent_packets.get(bot_id, []) if int(row.get("seq") or 0) > max(0, int(after_seq or 0)) ] rows.sort(key=lambda row: int(row.get("seq") or 0)) if limit > 0: rows = rows[: int(limit)] latest_seq = int(self._packet_counters.get(bot_id, 0) or 0) return EdgeMonitorPacketsResponse( protocol_version=NODE_PROTOCOL_VERSION, node_id=EDGE_NODE_ID, bot_id=bot_id, latest_seq=latest_seq, packets=[ EdgeMonitorPacket.model_validate( { "protocol_version": NODE_PROTOCOL_VERSION, "node_id": EDGE_NODE_ID, "bot_id": bot_id, **row, } ) for row in rows ], ) def get_runtime_status(self, *, bot_id: str) -> EdgeStatusResponse: backend = self._backend_for_bot(bot_id) return EdgeStatusResponse(status=backend.get_bot_status(bot_id)) def get_resource_snapshot(self, *, bot_id: str) -> dict: backend = self._backend_for_bot(bot_id) snapshot = dict(backend.get_bot_resource_snapshot(bot_id) or {}) snapshot.setdefault("runtime_kind", self._resolve_runtime_kind(bot_id)) return snapshot def get_node_identity(self) -> EdgeNodeSelfResponse: resources = self.get_node_resource_summary() return EdgeNodeSelfResponse( protocol_version=resources.protocol_version, node_id=EDGE_NODE_ID, display_name=EDGE_NODE_NAME, service="dashboard-edge", transport_kind="edge", runtime_kind=self._runtime_kind(), core_adapter="nanobot", capabilities=self.capabilities(), resources=dict(resources.resources or {}), reported_at=resources.reported_at, ) def get_node_resource_summary(self) -> EdgeNodeResourcesResponse: cpu_percent = 0.0 try: cpu_percent = float(psutil.cpu_percent(interval=None) or 0.0) except Exception: cpu_percent = 0.0 memory_total = 0 memory_used = 0 try: memory = psutil.virtual_memory() memory_total = int(getattr(memory, "total", 0) or 0) memory_used = int(getattr(memory, "used", 0) or 0) except Exception: memory_total = 0 memory_used = 0 workspace_limit = 0 workspace_used = 0 try: disk = psutil.disk_usage(EDGE_BOTS_WORKSPACE_ROOT) workspace_limit = int(getattr(disk, "total", 0) or 0) workspace_used = int(getattr(disk, "used", 0) or 0) except Exception: workspace_limit = 0 workspace_used = self._calc_workspace_used_bytes() cpu_cores = 0.0 try: cpu_cores = float(psutil.cpu_count(logical=True) or 0) except Exception: cpu_cores = 0.0 return EdgeNodeResourcesResponse( protocol_version=NODE_PROTOCOL_VERSION, node_id=EDGE_NODE_ID, display_name=EDGE_NODE_NAME, transport_kind="edge", runtime_kind=self._runtime_kind(), core_adapter="nanobot", resources={ "configured_cpu_cores": round(cpu_cores, 2), "configured_memory_bytes": memory_total, "configured_storage_bytes": workspace_limit, "live_cpu_percent": round(cpu_percent, 2), "live_memory_used_bytes": memory_used, "live_memory_limit_bytes": memory_total, "workspace_used_bytes": workspace_used, "workspace_limit_bytes": workspace_limit, }, reported_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), ) def heartbeat(self) -> EdgeNodeHeartbeatResponse: node_resources = self.get_node_resource_summary() return EdgeNodeHeartbeatResponse( protocol_version=NODE_PROTOCOL_VERSION, node_id=EDGE_NODE_ID, display_name=EDGE_NODE_NAME, service="dashboard-edge", transport_kind="edge", runtime_kind=self._runtime_kind(), core_adapter="nanobot", capabilities=self.capabilities(), resources=dict(node_resources.resources or {}), reported_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), ) def native_preflight(self, *, native_command: str | None = None, native_workdir: str | None = None) -> dict: raw_command = str(native_command or "").strip() command_parts: list[str] = [] parse_error = "" if raw_command: command_parts, parse_error = self._parse_native_command(raw_command) if not raw_command and not command_parts: backend = self._runtime_backends.get("native") process_caps = {} if backend is not None: process_caps = dict((backend.capabilities() or {}).get("process") or {}) command_parts = [str(item or "").strip() for item in list(process_caps.get("command") or []) if str(item or "").strip()] command_available = bool(command_parts and shutil.which(command_parts[0])) configured_workdir = str(native_workdir or "").strip() if configured_workdir: workdir = os.path.abspath(configured_workdir) workdir_exists = os.path.isdir(workdir) else: workdir = "" workdir_exists = True ok = bool(command_available and workdir_exists) detail_parts: list[str] = [] if not command_available: detail_parts.append("native command not available") if not workdir_exists: detail_parts.append("native workdir does not exist") if parse_error: detail_parts.append(parse_error) if not detail_parts: detail_parts.append("native launcher ready") return { "ok": ok, "command": command_parts, "workdir": workdir, "command_available": command_available, "workdir_exists": workdir_exists, "detail": "; ".join(detail_parts), } @staticmethod def _parse_native_command(raw_command: str) -> tuple[list[str], str]: text = str(raw_command or "").strip() if not text: return [], "" if text.startswith("[") and text.endswith("]"): try: payload = json.loads(text) if isinstance(payload, list): rows = [str(item or "").strip() for item in payload if str(item or "").strip()] if rows: return rows, "" return [], "native command JSON list is empty" except Exception: return [], "native command JSON is invalid" if "," in text and any(mark in text for mark in ['"', "'"]): try: rows = [str(item or "").strip() for item in next(csv.reader([text], skipinitialspace=True)) if str(item or "").strip()] if rows: return rows, "" except Exception: pass try: rows = [str(item or "").strip() for item in shlex.split(text) if str(item or "").strip()] if rows: return rows, "" return [], "native command is empty" except Exception: return [], "native command format is invalid" def _record_monitor_packet(self, bot_id: str, packet: dict) -> None: rows = self._recent_packets.setdefault(bot_id, []) next_seq = int(self._packet_counters.get(bot_id, 0) or 0) + 1 self._packet_counters[bot_id] = next_seq captured_at = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") rows.append( { "protocol_version": NODE_PROTOCOL_VERSION, "node_id": EDGE_NODE_ID, "bot_id": bot_id, "seq": next_seq, "captured_at": captured_at, "packet": dict(packet or {}), } ) if len(rows) > 200: del rows[:-200] def _backfill_monitor_packets(self, bot_id: str) -> None: if bot_id in self._backfilled_bots: return self._backfilled_bots.add(bot_id) backend = self._backend_for_bot(bot_id) for line in backend.get_recent_logs(bot_id, tail=500): packet = backend.parse_monitor_packet(line) if packet: self._record_monitor_packet(bot_id, packet) def _backend_for_bot(self, bot_id: str, preferred: str | None = None) -> EdgeRuntimeBackend: runtime_kind = self._resolve_runtime_kind(bot_id, preferred=preferred) backend = self._runtime_backends.get(runtime_kind) if backend is None: raise HTTPException(status_code=501, detail=f"dashboard-edge runtime is not available: {runtime_kind}") return backend def _resolve_runtime_kind(self, bot_id: str, preferred: str | None = None) -> str: normalized_preferred = self._normalize_runtime_kind(preferred, allow_empty=True) if normalized_preferred and normalized_preferred in self._runtime_backends: return normalized_preferred persisted = self._normalize_runtime_kind(self._read_runtime_target(bot_id), allow_empty=True) if persisted and persisted in self._runtime_backends: return persisted for runtime_kind, backend in self._runtime_backends.items(): try: if str(backend.get_bot_status(bot_id) or "").strip().upper() == "RUNNING": return runtime_kind except Exception: continue return self._runtime_kind() @staticmethod def _normalize_runtime_kind(value: str | None, *, allow_empty: bool = False) -> str: text = str(value or "").strip().lower() if allow_empty and not text: return "" return text if text in {"docker", "native"} else "docker" @staticmethod def _runtime_target_path(bot_id: str) -> str: return os.path.join(EDGE_BOTS_WORKSPACE_ROOT, str(bot_id or "").strip(), ".nanobot", "runtime-target.json") @staticmethod def _config_path(bot_id: str) -> str: return os.path.join(EDGE_BOTS_WORKSPACE_ROOT, str(bot_id or "").strip(), ".nanobot", "config.json") def _read_runtime_target(self, bot_id: str) -> str: payload = self._read_runtime_target_payload(bot_id) if isinstance(payload, dict): return str(payload.get("runtime_kind") or "").strip().lower() return "" def _read_runtime_target_payload(self, bot_id: str) -> dict: for path in self._runtime_target_paths_for_read(bot_id): if not os.path.isfile(path): continue try: with open(path, "r", encoding="utf-8") as fh: payload = json.load(fh) if isinstance(payload, dict): return payload except Exception: continue return {} def _write_runtime_target(self, *, bot_id: str, runtime_kind: str, workspace_root: str | None = None) -> None: payload = dict(self._read_runtime_target_payload(bot_id)) payload["runtime_kind"] = self._normalize_runtime_kind(runtime_kind) if workspace_root is not None: normalized_root = str(workspace_root or "").strip() if normalized_root: payload["workspace_root"] = os.path.abspath(os.path.expanduser(normalized_root)) else: payload.pop("workspace_root", None) paths = self._runtime_target_paths(bot_id=bot_id, payload=payload) for path in paths: os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w", encoding="utf-8") as fh: json.dump(payload, fh, ensure_ascii=False, indent=2) primary = self._runtime_target_path(bot_id) if primary not in paths and os.path.isfile(primary): try: os.remove(primary) except Exception: pass def _runtime_target_paths(self, *, bot_id: str, payload: dict) -> list[str]: primary = self._runtime_target_path(bot_id) workspace_root = str(payload.get("workspace_root") or "").strip() if workspace_root: external = os.path.join( os.path.abspath(os.path.expanduser(workspace_root)), str(bot_id or "").strip(), ".nanobot", "runtime-target.json", ) if os.path.abspath(external) != os.path.abspath(primary): return [external] return [primary] def _runtime_target_paths_for_read(self, bot_id: str) -> list[str]: primary = self._runtime_target_path(bot_id) rows: list[str] = [primary] workspace_root = self._workspace_root_from_config(bot_id) if workspace_root: external = os.path.join( workspace_root, str(bot_id or "").strip(), ".nanobot", "runtime-target.json", ) if os.path.abspath(external) != os.path.abspath(primary): rows.insert(0, external) return rows def _workspace_root_from_config(self, bot_id: str) -> str: path = self._config_path(bot_id) if not os.path.isfile(path): return "" try: with open(path, "r", encoding="utf-8") as fh: payload = json.load(fh) if not isinstance(payload, dict): return "" agents = payload.get("agents") if not isinstance(agents, dict): return "" defaults = agents.get("defaults") if not isinstance(defaults, dict): return "" workspace = str(defaults.get("workspace") or "").strip() if not workspace: return "" normalized_workspace = os.path.abspath(os.path.expanduser(workspace)) marker = f"{os.sep}{str(bot_id or '').strip()}{os.sep}.nanobot{os.sep}workspace" if marker in normalized_workspace: return normalized_workspace.rsplit(marker, 1)[0] except Exception: return "" return "" @staticmethod def _calc_workspace_used_bytes() -> int: total = 0 for root, _, files in os.walk(EDGE_BOTS_WORKSPACE_ROOT): for filename in files: path = os.path.join(root, filename) try: total += int(os.path.getsize(path)) except Exception: continue return total edge_runtime_service = EdgeRuntimeService()