dashboard-nanobot/dashboard-edge/app/services/workspace_service.py

442 lines
18 KiB
Python

import mimetypes
import json
import os
import re
from datetime import datetime
from typing import Any, Dict, Generator, List, Optional
from fastapi import HTTPException, Request, UploadFile
from fastapi.responses import FileResponse, Response, StreamingResponse
from app.core.settings import EDGE_ALLOWED_ATTACHMENT_EXTENSIONS, EDGE_UPLOAD_MAX_MB
class EdgeWorkspaceService:
def __init__(self, *, host_data_root: str) -> None:
self._host_data_root = host_data_root
def list_tree(
self,
*,
bot_id: str,
path: Optional[str] = None,
recursive: bool = False,
workspace_root: Optional[str] = None,
) -> Dict[str, Any]:
root = self._workspace_root(bot_id, workspace_root=workspace_root)
if not os.path.isdir(root):
return {"bot_id": bot_id, "root": root, "cwd": "", "parent": None, "entries": []}
_, target = self._resolve_workspace_path(bot_id, path, workspace_root=workspace_root)
if not os.path.isdir(target):
raise HTTPException(status_code=400, detail="workspace path is not a directory")
cwd = os.path.relpath(target, root).replace("\\", "/")
if cwd == ".":
cwd = ""
parent = None
if cwd:
parent = os.path.dirname(cwd).replace("\\", "/")
if parent == ".":
parent = ""
return {
"bot_id": bot_id,
"root": root,
"cwd": cwd,
"parent": parent,
"entries": self._list_workspace_dir_recursive(target, root) if recursive else self._list_workspace_dir(target, root),
}
def read_file(
self,
*,
bot_id: str,
path: str,
max_bytes: int = 200000,
workspace_root: Optional[str] = None,
) -> Dict[str, Any]:
root, target = self._resolve_workspace_path(bot_id, path, workspace_root=workspace_root)
if not os.path.isfile(target):
raise HTTPException(status_code=404, detail="workspace file not found")
safe_max = max(4096, min(int(max_bytes), 1000000))
with open(target, "rb") as fh:
raw = fh.read(safe_max + 1)
if b"\x00" in raw:
raise HTTPException(status_code=400, detail="binary file is not previewable")
truncated = len(raw) > safe_max
body = raw[:safe_max] if truncated else raw
rel_path = os.path.relpath(target, root).replace("\\", "/")
ext = os.path.splitext(target)[1].lower()
return {
"bot_id": bot_id,
"path": rel_path,
"size": os.path.getsize(target),
"is_markdown": ext in {".md", ".markdown"},
"truncated": truncated,
"content": body.decode("utf-8", errors="replace"),
}
def write_markdown(
self,
*,
bot_id: str,
path: str,
content: str,
workspace_root: Optional[str] = None,
) -> Dict[str, Any]:
root, target = self._resolve_workspace_path(bot_id, path, workspace_root=workspace_root)
if not os.path.isfile(target):
raise HTTPException(status_code=404, detail="workspace file not found")
ext = os.path.splitext(target)[1].lower()
if ext not in {".md", ".markdown"}:
raise HTTPException(status_code=400, detail=f"editing is only supported for markdown files: {ext or '(none)'}")
encoded = str(content or "").encode("utf-8")
if len(encoded) > 2_000_000:
raise HTTPException(status_code=413, detail="markdown file too large to save")
if "\x00" in str(content or ""):
raise HTTPException(status_code=400, detail="markdown content contains invalid null bytes")
self._write_text_atomic(target, str(content or ""))
rel_path = os.path.relpath(target, root).replace("\\", "/")
return {
"bot_id": bot_id,
"path": rel_path,
"size": os.path.getsize(target),
"is_markdown": True,
"truncated": False,
"content": str(content or ""),
}
async def upload_files(
self,
*,
bot_id: str,
files: List[UploadFile],
path: Optional[str] = None,
workspace_root: Optional[str] = None,
) -> Dict[str, Any]:
if not files:
raise HTTPException(status_code=400, detail="no files uploaded")
max_bytes = EDGE_UPLOAD_MAX_MB * 1024 * 1024
allowed_extensions = set(EDGE_ALLOWED_ATTACHMENT_EXTENSIONS)
root, upload_dir = self._resolve_workspace_path(bot_id, path or "uploads", workspace_root=workspace_root)
os.makedirs(upload_dir, exist_ok=True)
safe_dir_real = os.path.abspath(upload_dir)
if os.path.commonpath([root, safe_dir_real]) != root:
raise HTTPException(status_code=400, detail="invalid upload target path")
rows: List[Dict[str, Any]] = []
for upload in files:
original = (upload.filename or "upload.bin").strip() or "upload.bin"
name = os.path.basename(original).replace("\\", "_").replace("/", "_")
name = re.sub(r"[^\w.\-()+@ ]+", "_", name)
if not name:
name = "upload.bin"
ext = str(os.path.splitext(name)[1] or "").strip().lower()
if allowed_extensions and ext not in allowed_extensions:
raise HTTPException(
status_code=400,
detail=f"File '{name}' extension is not allowed. Allowed: {', '.join(sorted(allowed_extensions))}",
)
abs_path = os.path.join(safe_dir_real, name)
if os.path.exists(abs_path):
base, file_ext = os.path.splitext(name)
name = f"{base}-{int(datetime.utcnow().timestamp())}{file_ext}"
abs_path = os.path.join(safe_dir_real, name)
total_size = 0
try:
with open(abs_path, "wb") as fh:
while True:
chunk = await upload.read(1024 * 1024)
if not chunk:
break
total_size += len(chunk)
if total_size > max_bytes:
raise HTTPException(
status_code=413,
detail=f"File '{name}' too large (max {EDGE_UPLOAD_MAX_MB}MB)",
)
fh.write(chunk)
except HTTPException:
if os.path.exists(abs_path):
os.remove(abs_path)
raise
except OSError as exc:
if os.path.exists(abs_path):
os.remove(abs_path)
raise HTTPException(
status_code=500,
detail=f"Failed to write file '{name}': {exc.strerror or str(exc)}",
)
except Exception:
if os.path.exists(abs_path):
os.remove(abs_path)
raise HTTPException(status_code=500, detail=f"Failed to upload file '{name}'")
finally:
await upload.close()
rel = os.path.relpath(abs_path, root).replace("\\", "/")
rows.append({"name": name, "path": rel, "size": total_size})
return {"bot_id": bot_id, "files": rows}
def serve_file(
self,
*,
bot_id: str,
path: str,
download: bool,
request: Request,
workspace_root: Optional[str] = None,
) -> Response:
_root, target = self._resolve_workspace_path(bot_id, path, workspace_root=workspace_root)
if not os.path.isfile(target):
raise HTTPException(status_code=404, detail="File not found")
media_type, _ = mimetypes.guess_type(target)
range_header = request.headers.get("range", "")
if range_header and not download:
return self._build_ranged_workspace_response(target, media_type or "application/octet-stream", range_header)
common_headers = {"Accept-Ranges": "bytes"}
if download:
return FileResponse(
target,
media_type=media_type or "application/octet-stream",
filename=os.path.basename(target),
headers=common_headers,
)
return FileResponse(target, media_type=media_type or "application/octet-stream", headers=common_headers)
def purge_bot_workspace(self, *, bot_id: str, workspace_root: Optional[str] = None) -> Dict[str, Any]:
deleted = False
state_root = self._state_bot_root(bot_id, workspace_root=workspace_root)
workspace_bot_root = self._workspace_bot_root(bot_id, workspace_root=workspace_root)
targets = [state_root]
if os.path.abspath(workspace_bot_root) != os.path.abspath(state_root):
targets.append(workspace_bot_root)
import shutil
for target in targets:
if not target or target in {"/", "."}:
raise HTTPException(status_code=400, detail="invalid bot workspace root")
if os.path.isdir(target):
shutil.rmtree(target, ignore_errors=True)
deleted = True
return {"bot_id": str(bot_id or "").strip(), "deleted": deleted}
def _workspace_root(self, bot_id: str, workspace_root: Optional[str] = None) -> str:
return os.path.abspath(os.path.join(self._workspace_bot_root(bot_id, workspace_root=workspace_root), ".nanobot", "workspace"))
def _state_bot_root(self, bot_id: str, workspace_root: Optional[str] = None) -> str:
configured_workspace_root = str(workspace_root or "").strip()
if configured_workspace_root:
normalized_root = os.path.abspath(os.path.expanduser(configured_workspace_root))
return os.path.abspath(os.path.join(normalized_root, str(bot_id or "").strip()))
return os.path.abspath(os.path.join(self._host_data_root, str(bot_id or "").strip()))
def _runtime_target_path(self, bot_id: str) -> str:
return os.path.join(self._state_bot_root(bot_id), ".nanobot", "runtime-target.json")
def _runtime_target_payload(self, bot_id: str) -> Dict[str, Any]:
path = self._runtime_target_path(bot_id)
if not os.path.isfile(path):
return {}
try:
with open(path, "r", encoding="utf-8") as fh:
payload = json.load(fh)
if isinstance(payload, dict):
return payload
except Exception:
return {}
return {}
def _workspace_bot_root(self, bot_id: str, workspace_root: Optional[str] = None) -> str:
configured_workspace_root = str(workspace_root or "").strip()
if configured_workspace_root:
normalized_root = os.path.abspath(os.path.expanduser(configured_workspace_root))
return os.path.abspath(os.path.join(normalized_root, str(bot_id or "").strip()))
payload = self._runtime_target_payload(bot_id)
workspace_root = str(payload.get("workspace_root") or "").strip()
if workspace_root:
normalized_root = os.path.abspath(os.path.expanduser(workspace_root))
return os.path.abspath(os.path.join(normalized_root, str(bot_id or "").strip()))
from_config = self._workspace_bot_root_from_config(bot_id)
if from_config:
return from_config
return self._state_bot_root(bot_id)
def _workspace_bot_root_from_config(self, bot_id: str) -> str:
config_path = os.path.join(self._state_bot_root(bot_id), ".nanobot", "config.json")
if not os.path.isfile(config_path):
return ""
try:
with open(config_path, "r", encoding="utf-8") as fh:
payload = json.load(fh)
if not isinstance(payload, dict):
return ""
agents = payload.get("agents")
if not isinstance(agents, dict):
return ""
defaults = agents.get("defaults")
if not isinstance(defaults, dict):
return ""
workspace = str(defaults.get("workspace") or "").strip()
if not workspace:
return ""
normalized_workspace = os.path.abspath(os.path.expanduser(workspace))
if normalized_workspace.endswith("/.nanobot/workspace"):
return os.path.abspath(os.path.dirname(os.path.dirname(normalized_workspace)))
marker = f"{os.sep}.nanobot{os.sep}workspace"
if marker in normalized_workspace:
return os.path.abspath(normalized_workspace.split(marker, 1)[0])
except Exception:
return ""
return ""
def _resolve_workspace_path(
self,
bot_id: str,
rel_path: Optional[str] = None,
workspace_root: Optional[str] = None,
) -> tuple[str, str]:
root = self._workspace_root(bot_id, workspace_root=workspace_root)
rel = (rel_path or "").strip().replace("\\", "/")
target = os.path.abspath(os.path.join(root, rel))
if os.path.commonpath([root, target]) != root:
raise HTTPException(status_code=400, detail="invalid workspace path")
return root, target
@staticmethod
def _ctime_iso(stat: os.stat_result) -> str:
ts = getattr(stat, "st_birthtime", None)
if ts is None:
ts = getattr(stat, "st_ctime", None)
try:
return datetime.utcfromtimestamp(float(ts)).isoformat() + "Z"
except Exception:
return datetime.utcfromtimestamp(stat.st_mtime).isoformat() + "Z"
@staticmethod
def _write_text_atomic(target: str, content: str) -> None:
os.makedirs(os.path.dirname(target), exist_ok=True)
tmp = f"{target}.tmp"
with open(tmp, "w", encoding="utf-8") as fh:
fh.write(content)
os.replace(tmp, target)
@staticmethod
def _stream_file_range(target: str, start: int, end: int, chunk_size: int = 1024 * 1024) -> Generator[bytes, None, None]:
with open(target, "rb") as fh:
fh.seek(start)
remaining = end - start + 1
while remaining > 0:
chunk = fh.read(min(chunk_size, remaining))
if not chunk:
break
remaining -= len(chunk)
yield chunk
def _build_ranged_workspace_response(self, target: str, media_type: str, range_header: str) -> Response:
file_size = os.path.getsize(target)
range_match = re.match(r"bytes=(\d*)-(\d*)", range_header.strip())
if not range_match:
raise HTTPException(status_code=416, detail="Invalid range")
start_raw, end_raw = range_match.groups()
if start_raw == "" and end_raw == "":
raise HTTPException(status_code=416, detail="Invalid range")
if start_raw == "":
length = int(end_raw)
if length <= 0:
raise HTTPException(status_code=416, detail="Invalid range")
start = max(file_size - length, 0)
end = file_size - 1
else:
start = int(start_raw)
end = int(end_raw) if end_raw else file_size - 1
if start >= file_size or start < 0:
raise HTTPException(status_code=416, detail="Requested range not satisfiable")
end = min(end, file_size - 1)
if end < start:
raise HTTPException(status_code=416, detail="Requested range not satisfiable")
content_length = end - start + 1
headers = {
"Accept-Ranges": "bytes",
"Content-Range": f"bytes {start}-{end}/{file_size}",
"Content-Length": str(content_length),
}
return StreamingResponse(
self._stream_file_range(target, start, end),
status_code=206,
media_type=media_type or "application/octet-stream",
headers=headers,
)
def _list_workspace_dir(self, path: str, root: str) -> List[Dict[str, Any]]:
rows: List[Dict[str, Any]] = []
names = sorted(os.listdir(path), key=lambda v: (not os.path.isdir(os.path.join(path, v)), v.lower()))
for name in names:
if name in {".DS_Store"}:
continue
abs_path = os.path.join(path, name)
stat = os.stat(abs_path)
rows.append(
{
"name": name,
"path": os.path.relpath(abs_path, root).replace("\\", "/"),
"type": "dir" if os.path.isdir(abs_path) else "file",
"size": stat.st_size if os.path.isfile(abs_path) else None,
"ext": os.path.splitext(name)[1].lower() if os.path.isfile(abs_path) else "",
"ctime": self._ctime_iso(stat),
"mtime": datetime.utcfromtimestamp(stat.st_mtime).isoformat() + "Z",
}
)
return rows
def _list_workspace_dir_recursive(self, path: str, root: str) -> List[Dict[str, Any]]:
rows: List[Dict[str, Any]] = []
for walk_root, dirnames, filenames in os.walk(path):
dirnames.sort(key=lambda v: v.lower())
filenames.sort(key=lambda v: v.lower())
for name in dirnames:
if name in {".DS_Store"}:
continue
abs_path = os.path.join(walk_root, name)
stat = os.stat(abs_path)
rows.append(
{
"name": name,
"path": os.path.relpath(abs_path, root).replace("\\", "/"),
"type": "dir",
"size": None,
"ext": "",
"ctime": self._ctime_iso(stat),
"mtime": datetime.utcfromtimestamp(stat.st_mtime).isoformat() + "Z",
}
)
for name in filenames:
if name in {".DS_Store"}:
continue
abs_path = os.path.join(walk_root, name)
stat = os.stat(abs_path)
rows.append(
{
"name": name,
"path": os.path.relpath(abs_path, root).replace("\\", "/"),
"type": "file",
"size": stat.st_size,
"ext": os.path.splitext(name)[1].lower(),
"ctime": self._ctime_iso(stat),
"mtime": datetime.utcfromtimestamp(stat.st_mtime).isoformat() + "Z",
}
)
return rows
edge_workspace_service: EdgeWorkspaceService | None = None