dashboard-nanobot/backend/services/topic_service.py

718 lines
24 KiB
Python

import json
import logging
import os
import re
import secrets
from datetime import datetime
from typing import Any, Dict, List, Optional
from sqlmodel import Session, select
from core.settings import BOTS_WORKSPACE_ROOT, TOPIC_MCP_INTERNAL_URL
from models.bot import BotInstance
from models.topic import TopicItem, TopicTopic
logger = logging.getLogger("dashboard.topic_mcp")
BOT_ID_PATTERN = re.compile(r"^[A-Za-z0-9_]+$")
TOPIC_MCP_SERVER_NAME = "topic_mcp"
TOPIC_MCP_TOKEN_HEADER = "x-topic-mcp-token"
TOPIC_MCP_DEFAULT_URL = TOPIC_MCP_INTERNAL_URL
TOPIC_MCP_DEFAULT_TIMEOUT = 30
TOPIC_MCP_PROTOCOL_VERSION = "2025-03-26"
TOPIC_DEDUPE_WINDOW_SECONDS = 10 * 60
TOPIC_LEVEL_SET = {"info", "warn", "error", "success"}
_TOPIC_KEY_RE = re.compile(r"^[a-z0-9][a-z0-9_.-]{0,63}$")
def _bot_data_root(bot_id: str) -> str:
return os.path.join(BOTS_WORKSPACE_ROOT, bot_id, ".nanobot")
def _config_json_path(bot_id: str) -> str:
return os.path.join(_bot_data_root(bot_id), "config.json")
def _read_bot_config(bot_id: str) -> Dict[str, Any]:
path = _config_json_path(bot_id)
if not os.path.isfile(path):
return {}
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return data if isinstance(data, dict) else {}
except Exception:
return {}
def _write_bot_config(bot_id: str, config_data: Dict[str, Any]) -> None:
path = _config_json_path(bot_id)
os.makedirs(os.path.dirname(path), exist_ok=True)
tmp = f"{path}.tmp"
with open(tmp, "w", encoding="utf-8") as f:
json.dump(config_data, f, ensure_ascii=False, indent=2)
os.replace(tmp, path)
def _dict_get_ci(raw: Any, key: str) -> Any:
if not isinstance(raw, dict):
return None
wanted = str(key or "").strip().lower()
for k, v in raw.items():
if str(k or "").strip().lower() == wanted:
return v
return None
def _as_bool(value: Any) -> bool:
if isinstance(value, bool):
return value
text = str(value or "").strip().lower()
return text in {"1", "true", "yes", "on", "y"}
def _extract_topic_mcp_token(server_cfg: Any) -> str:
headers = server_cfg.get("headers") if isinstance(server_cfg, dict) else None
return str(_dict_get_ci(headers, TOPIC_MCP_TOKEN_HEADER) or "").strip()
def _generate_topic_mcp_token(bot_id: str) -> str:
return f"{bot_id}.{secrets.token_urlsafe(24)}"
def _build_locked_topic_mcp_server(bot_id: str, token: str) -> Dict[str, Any]:
fixed_token = str(token or "").strip() or _generate_topic_mcp_token(bot_id)
return {
"type": "streamableHttp",
"url": TOPIC_MCP_DEFAULT_URL,
"headers": {TOPIC_MCP_TOKEN_HEADER: fixed_token},
"toolTimeout": TOPIC_MCP_DEFAULT_TIMEOUT,
}
def _annotate_locked_mcp_servers(raw_servers: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
rows: Dict[str, Dict[str, Any]] = {}
for name, cfg in raw_servers.items():
if not isinstance(cfg, dict):
continue
row = dict(cfg)
row["locked"] = name == TOPIC_MCP_SERVER_NAME
rows[name] = row
return rows
def _ensure_topic_mcp_server(bot_id: str, config_data: Optional[Dict[str, Any]] = None, persist: bool = True) -> Dict[str, Any]:
working = dict(config_data) if isinstance(config_data, dict) else _read_bot_config(bot_id)
tools_cfg = working.get("tools")
if not isinstance(tools_cfg, dict):
tools_cfg = {}
mcp_servers = tools_cfg.get("mcpServers")
if not isinstance(mcp_servers, dict):
mcp_servers = {}
existing_server = mcp_servers.get(TOPIC_MCP_SERVER_NAME)
existing_token = _extract_topic_mcp_token(existing_server)
locked_server = _build_locked_topic_mcp_server(bot_id, existing_token)
changed = mcp_servers.get(TOPIC_MCP_SERVER_NAME) != locked_server
mcp_servers[TOPIC_MCP_SERVER_NAME] = locked_server
tools_cfg["mcpServers"] = mcp_servers
working["tools"] = tools_cfg
if persist and changed:
_write_bot_config(bot_id, working)
return locked_server
def _resolve_topic_mcp_bot_id_by_token(session: Session, token: str) -> Optional[str]:
incoming = str(token or "").strip()
if not incoming:
return None
candidates: List[str] = []
hinted_bot_id = incoming.split(".", 1)[0].strip()
if hinted_bot_id and BOT_ID_PATTERN.fullmatch(hinted_bot_id):
candidates.append(hinted_bot_id)
for bot in session.exec(select(BotInstance)).all():
if bot.id not in candidates:
candidates.append(bot.id)
for bot_id in candidates:
config_data = _read_bot_config(bot_id)
tools_cfg = config_data.get("tools")
if not isinstance(tools_cfg, dict):
continue
mcp_servers = tools_cfg.get("mcpServers")
if not isinstance(mcp_servers, dict):
continue
expected = _extract_topic_mcp_token(mcp_servers.get(TOPIC_MCP_SERVER_NAME))
if expected and secrets.compare_digest(expected, incoming):
return bot_id
return None
def _normalize_topic_key(raw: Any) -> str:
value = str(raw or "").strip().lower()
if not value:
return ""
return value
def _ensure_topic_defaults(session: Session, bot_id: str) -> None:
# Deprecated: topic feed global switch/fallback removed.
# Keep as no-op for call-site compatibility.
_ = session
_ = bot_id
return None
def _parse_json_dict(raw: str) -> Dict[str, Any]:
text = str(raw or "").strip()
if not text:
return {}
try:
data = json.loads(text)
return data if isinstance(data, dict) else {}
except Exception:
return {}
def _parse_json_list(raw: str) -> List[Any]:
text = str(raw or "").strip()
if not text:
return []
try:
data = json.loads(text)
except Exception:
return []
return data if isinstance(data, list) else []
def _topic_to_dict(row: TopicTopic) -> Dict[str, Any]:
return {
"id": row.id,
"bot_id": row.bot_id,
"topic_key": str(row.topic_key or "").strip().lower(),
"name": row.name or "",
"description": row.description or "",
"is_active": bool(row.is_active),
"routing": _parse_json_dict(row.routing_json or "{}"),
"view_schema": _parse_json_dict(row.view_schema_json or "{}"),
"created_at": row.created_at.isoformat() if row.created_at else None,
"updated_at": row.updated_at.isoformat() if row.updated_at else None,
}
def _list_topics(session: Session, bot_id: str) -> List[Dict[str, Any]]:
rows = session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.order_by(TopicTopic.is_active.desc(), TopicTopic.topic_key.asc())
).all()
return [_topic_to_dict(row) for row in rows]
def _topic_item_to_dict(row: TopicItem) -> Dict[str, Any]:
return {
"id": row.id,
"bot_id": row.bot_id,
"topic_key": str(row.topic_key or "").strip().lower(),
"title": row.title or "",
"content": row.content or "",
"level": str(row.level or "info").strip().lower(),
"tags": _parse_json_list(row.tags_json or "[]"),
"view": _parse_json_dict(row.view_json or "{}"),
"source": row.source or "mcp",
"dedupe_key": row.dedupe_key or "",
"is_read": bool(row.is_read),
"created_at": row.created_at.isoformat() if row.created_at else None,
}
def _topic_get_row(session: Session, bot_id: str, topic_key: str) -> Optional[TopicTopic]:
normalized = _normalize_topic_key(topic_key)
if not normalized:
return None
return session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.where(TopicTopic.topic_key == normalized)
.limit(1)
).first()
def _normalize_topic_keywords(raw: Any) -> List[str]:
rows: List[str] = []
if isinstance(raw, list):
for item in raw:
text = str(item or "").strip().lower()
if text and text not in rows:
rows.append(text)
elif isinstance(raw, str):
text = raw.strip().lower()
if text:
rows.append(text)
return rows
def _topic_filter_reason(payload: Dict[str, Any]) -> str:
if _as_bool(payload.get("is_progress")):
return "progress message is filtered"
if _as_bool(payload.get("is_tool_hint")):
return "tool hint message is filtered"
source = str(payload.get("source") or payload.get("type") or "").strip().lower()
if source in {"progress", "tool_hint", "sendprogress", "sendtoolhints"}:
return f"{source} message is filtered"
return ""
def _topic_route_pick(
session: Session,
bot_id: str,
payload: Dict[str, Any],
requested_topic_key: str = "",
) -> Dict[str, Any]:
active_topics = session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.where(TopicTopic.is_active == True)
.order_by(TopicTopic.topic_key.asc())
).all()
if not active_topics:
return {
"matched": False,
"topic_key": None,
"confidence": 1.0,
"reason": "no active topic configured",
}
req_key = _normalize_topic_key(requested_topic_key or payload.get("topic_key") or payload.get("topic"))
if req_key:
row = _topic_get_row(session, bot_id, req_key)
if row and bool(row.is_active):
return {
"matched": True,
"topic_key": req_key,
"confidence": 0.99,
"reason": "explicit topic key accepted",
}
return {
"matched": False,
"topic_key": None,
"confidence": 0.72,
"reason": f"requested topic {req_key} unavailable or inactive",
}
text = " ".join(
[
str(payload.get("title") or "").strip(),
str(payload.get("content") or payload.get("text") or "").strip(),
" ".join([str(v or "").strip() for v in (payload.get("tags") or [])]),
]
).strip().lower()
if not text:
return {
"matched": False,
"topic_key": None,
"confidence": 1.0,
"reason": "no routing evidence",
}
best_key = ""
best_score = -10.0
best_reason = "no topic matched"
matched_include = False
for topic in active_topics:
key = _normalize_topic_key(topic.topic_key)
if not key:
continue
routing = _parse_json_dict(topic.routing_json or "{}")
include_when = _normalize_topic_keywords(routing.get("include_when"))
exclude_when = _normalize_topic_keywords(routing.get("exclude_when"))
priority_raw = routing.get("priority", 0)
try:
priority = max(0, min(int(priority_raw), 100))
except Exception:
priority = 0
include_hits = [kw for kw in include_when if kw in text]
exclude_hits = [kw for kw in exclude_when if kw in text]
if not include_hits:
continue
matched_include = True
score = float(len(include_hits) * 2 - len(exclude_hits) * 3) + (priority / 1000.0)
if score > best_score:
best_score = score
best_key = key
if include_hits:
best_reason = f"matched include_when: {', '.join(include_hits[:3])}"
elif exclude_hits:
best_reason = f"matched exclude_when: {', '.join(exclude_hits[:3])}"
else:
best_reason = "no include/exclude match, used highest priority active topic"
if not matched_include:
return {
"matched": False,
"topic_key": None,
"confidence": 0.68,
"reason": "no include_when matched",
}
if best_score <= 0:
return {
"matched": False,
"topic_key": None,
"confidence": 0.68,
"reason": "no positive routing score",
}
confidence = min(0.95, max(0.61, 0.61 + best_score / 12.0))
return {
"matched": True,
"topic_key": best_key,
"confidence": round(confidence, 3),
"reason": best_reason,
}
def _topic_publish_internal(session: Session, bot_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
filter_reason = _topic_filter_reason(payload)
if filter_reason:
return {
"published": False,
"skipped": True,
"reason": filter_reason,
}
title = str(payload.get("title") or "").strip()
content = str(payload.get("content") or payload.get("text") or "").strip()
if not title and not content:
return {
"published": False,
"skipped": True,
"reason": "empty title/content",
}
level = str(payload.get("level") or "info").strip().lower()
if level not in TOPIC_LEVEL_SET:
level = "info"
tags = payload.get("tags")
tags_rows: List[str] = []
if isinstance(tags, list):
for tag in tags:
text = str(tag or "").strip()
if text and text not in tags_rows:
tags_rows.append(text[:64])
route_result = _topic_route_pick(session, bot_id, payload, requested_topic_key=str(payload.get("topic_key") or ""))
if not bool(route_result.get("matched")):
return {
"published": False,
"skipped": True,
"reason": str(route_result.get("reason") or "no topic matched"),
"route": route_result,
}
topic_key = _normalize_topic_key(route_result.get("topic_key"))
if not topic_key:
return {
"published": False,
"skipped": True,
"reason": "invalid topic route result",
"route": route_result,
}
row = _topic_get_row(session, bot_id, topic_key)
if not row or not bool(row.is_active):
return {
"published": False,
"skipped": True,
"reason": f"topic {topic_key} unavailable or inactive",
"route": route_result,
}
dedupe_key = str(payload.get("dedupe_key") or "").strip()
if dedupe_key:
existing = session.exec(
select(TopicItem)
.where(TopicItem.bot_id == bot_id)
.where(TopicItem.dedupe_key == dedupe_key)
.order_by(TopicItem.id.desc())
.limit(1)
).first()
if existing and existing.created_at:
age_s = (datetime.utcnow() - existing.created_at).total_seconds()
if age_s <= TOPIC_DEDUPE_WINDOW_SECONDS:
return {
"published": False,
"deduped": True,
"dedupe_window_seconds": TOPIC_DEDUPE_WINDOW_SECONDS,
"topic_key": _normalize_topic_key(existing.topic_key),
"reason": "dedupe_key hit within dedupe window",
"item": _topic_item_to_dict(existing),
}
view = payload.get("view")
view_json = json.dumps(view, ensure_ascii=False) if isinstance(view, dict) else None
source = str(payload.get("source") or "mcp").strip().lower() or "mcp"
now = datetime.utcnow()
item = TopicItem(
bot_id=bot_id,
topic_key=topic_key,
title=title[:2000],
content=content[:20000],
level=level,
tags_json=json.dumps(tags_rows, ensure_ascii=False) if tags_rows else None,
view_json=view_json,
source=source[:64],
dedupe_key=dedupe_key[:200] if dedupe_key else None,
is_read=False,
created_at=now,
)
session.add(item)
session.commit()
session.refresh(item)
return {
"published": True,
"topic_key": topic_key,
"item": _topic_item_to_dict(item),
"route": route_result,
}
def _jsonrpc_success(rpc_id: Any, result: Any) -> Dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": rpc_id,
"result": result,
}
def _jsonrpc_error(rpc_id: Any, code: int, message: str, data: Any = None) -> Dict[str, Any]:
payload: Dict[str, Any] = {
"jsonrpc": "2.0",
"id": rpc_id,
"error": {
"code": int(code),
"message": str(message or "unknown error"),
},
}
if data is not None:
payload["error"]["data"] = data
return payload
def _mcp_tool_result(structured: Dict[str, Any], is_error: bool = False) -> Dict[str, Any]:
return {
"content": [
{
"type": "text",
"text": json.dumps(structured, ensure_ascii=False),
}
],
"structuredContent": structured,
"isError": bool(is_error),
}
def _topic_mcp_tools() -> List[Dict[str, Any]]:
return [
{
"name": "topic_list_topics",
"description": "List available topics for the current bot.",
"inputSchema": {
"type": "object",
"properties": {
"include_inactive": {"type": "boolean"},
},
"additionalProperties": False,
},
},
{
"name": "topic_get_schema",
"description": "Get allowed view schema and optional topic-specific schema.",
"inputSchema": {
"type": "object",
"properties": {
"topic_key": {"type": "string"},
},
"additionalProperties": False,
},
},
{
"name": "topic_route",
"description": "Route candidate content to a topic and decide if publish is needed.",
"inputSchema": {
"type": "object",
"properties": {
"topic_key": {"type": "string"},
"title": {"type": "string"},
"content": {"type": "string"},
"tags": {"type": "array", "items": {"type": "string"}},
"is_progress": {"type": "boolean"},
"is_tool_hint": {"type": "boolean"},
"source": {"type": "string"},
},
"additionalProperties": True,
},
},
{
"name": "topic_publish",
"description": "Publish one item into topic feed with dedupe support.",
"inputSchema": {
"type": "object",
"properties": {
"topic_key": {"type": "string"},
"title": {"type": "string"},
"content": {"type": "string"},
"level": {"type": "string"},
"tags": {"type": "array", "items": {"type": "string"}},
"view": {"type": "object"},
"dedupe_key": {"type": "string"},
"source": {"type": "string"},
"is_progress": {"type": "boolean"},
"is_tool_hint": {"type": "boolean"},
},
"additionalProperties": True,
},
},
]
def _topic_mcp_list_topics(session: Session, bot_id: str, args: Dict[str, Any]) -> Dict[str, Any]:
_ensure_topic_defaults(session, bot_id)
include_inactive = _as_bool(args.get("include_inactive")) or ("include_inactive" not in args)
topics = _list_topics(session, bot_id)
if not include_inactive:
topics = [row for row in topics if bool(row.get("is_active"))]
return {
"bot_id": bot_id,
"topics": topics,
}
def _topic_mcp_get_schema(session: Session, bot_id: str, args: Dict[str, Any]) -> Dict[str, Any]:
_ensure_topic_defaults(session, bot_id)
topic_key = _normalize_topic_key(args.get("topic_key"))
topic_payload: Optional[Dict[str, Any]] = None
if topic_key:
row = _topic_get_row(session, bot_id, topic_key)
if row:
topic_payload = _topic_to_dict(row)
return {
"version": "v1",
"view_types": ["markdown", "card", "table", "checklist", "metric", "timeline"],
"topic": topic_payload,
"view_schema": {
"type": "object",
"description": "Declarative view payload only. Scripts and unsafe HTML are not allowed.",
},
"publish_constraints": {
"level": sorted(list(TOPIC_LEVEL_SET)),
"dedupe_window_seconds": TOPIC_DEDUPE_WINDOW_SECONDS,
},
}
def _topic_mcp_route(session: Session, bot_id: str, args: Dict[str, Any]) -> Dict[str, Any]:
_ensure_topic_defaults(session, bot_id)
filter_reason = _topic_filter_reason(args)
if filter_reason:
return {
"should_publish": False,
"topic_key": None,
"confidence": 1.0,
"reason": filter_reason,
}
title = str(args.get("title") or "").strip()
content = str(args.get("content") or args.get("text") or "").strip()
if not title and not content:
return {
"should_publish": False,
"topic_key": None,
"confidence": 1.0,
"reason": "empty title/content",
}
route = _topic_route_pick(session, bot_id, args, requested_topic_key=str(args.get("topic_key") or ""))
return {
"should_publish": bool(route.get("matched")),
"topic_key": route.get("topic_key"),
"confidence": route.get("confidence"),
"reason": route.get("reason"),
}
def _topic_mcp_publish(session: Session, bot_id: str, args: Dict[str, Any]) -> Dict[str, Any]:
return _topic_publish_internal(session, bot_id, args)
def _dispatch_topic_mcp_method(session: Session, bot_id: str, method: str, params: Dict[str, Any]) -> Any:
if method == "initialize":
return {
"protocolVersion": TOPIC_MCP_PROTOCOL_VERSION,
"capabilities": {
"tools": {},
},
"serverInfo": {
"name": TOPIC_MCP_SERVER_NAME,
"version": "0.1.0",
},
}
if method in {"notifications/initialized", "initialized"}:
return None
if method == "ping":
return {}
if method == "tools/list":
return {
"tools": _topic_mcp_tools(),
}
if method != "tools/call":
raise KeyError(f"Unknown method: {method}")
tool_name = str(params.get("name") or "").strip()
arguments = params.get("arguments")
if not isinstance(arguments, dict):
arguments = {}
if tool_name == "topic_list_topics":
return _mcp_tool_result(_topic_mcp_list_topics(session, bot_id, arguments))
if tool_name == "topic_get_schema":
return _mcp_tool_result(_topic_mcp_get_schema(session, bot_id, arguments))
if tool_name == "topic_route":
return _mcp_tool_result(_topic_mcp_route(session, bot_id, arguments))
if tool_name == "topic_publish":
return _mcp_tool_result(_topic_mcp_publish(session, bot_id, arguments))
return _mcp_tool_result(
{
"error": f"unknown tool: {tool_name}",
"available_tools": [tool["name"] for tool in _topic_mcp_tools()],
},
is_error=True,
)
def _handle_topic_mcp_rpc_item(session: Session, bot_id: str, item: Any) -> Optional[Dict[str, Any]]:
if not isinstance(item, dict):
return _jsonrpc_error(None, -32600, "Invalid Request")
rpc_id = item.get("id")
method = str(item.get("method") or "").strip()
if not method:
return _jsonrpc_error(rpc_id, -32600, "Invalid Request: method is required")
params = item.get("params")
if params is None:
params = {}
if not isinstance(params, dict):
return _jsonrpc_error(rpc_id, -32602, "Invalid params")
try:
result = _dispatch_topic_mcp_method(session, bot_id, method, params)
except KeyError as exc:
return _jsonrpc_error(rpc_id, -32601, str(exc))
except ValueError as exc:
return _jsonrpc_error(rpc_id, -32602, str(exc))
except Exception as exc:
logger.exception("topic_mcp method failed: %s", method)
return _jsonrpc_error(rpc_id, -32000, f"topic_mcp execution failed: {type(exc).__name__}: {exc}")
if rpc_id is None:
return None
return _jsonrpc_success(rpc_id, result)