dashboard-nanobot/backend/services/topic_service.py

341 lines
11 KiB
Python

import json
import re
from datetime import datetime
from typing import Any, Dict, List, Optional
from sqlmodel import Session, select
from models.topic import TopicItem, TopicTopic
TOPIC_DEDUPE_WINDOW_SECONDS = 10 * 60
TOPIC_LEVEL_SET = {"info", "warn", "error", "success"}
_TOPIC_KEY_RE = re.compile(r"^[a-z0-9][a-z0-9_.-]{0,63}$")
def _as_bool(value: Any) -> bool:
if isinstance(value, bool):
return value
text = str(value or "").strip().lower()
return text in {"1", "true", "yes", "on", "y"}
def _normalize_topic_key(raw: Any) -> str:
value = str(raw or "").strip().lower()
if not value:
return ""
return value
def _parse_json_dict(raw: str) -> Dict[str, Any]:
text = str(raw or "").strip()
if not text:
return {}
try:
data = json.loads(text)
return data if isinstance(data, dict) else {}
except Exception:
return {}
def _parse_json_list(raw: str) -> List[Any]:
text = str(raw or "").strip()
if not text:
return []
try:
data = json.loads(text)
except Exception:
return []
return data if isinstance(data, list) else []
def _topic_to_dict(row: TopicTopic) -> Dict[str, Any]:
return {
"id": row.id,
"bot_id": row.bot_id,
"topic_key": str(row.topic_key or "").strip().lower(),
"name": row.name or "",
"description": row.description or "",
"is_active": bool(row.is_active),
"routing": _parse_json_dict(row.routing_json or "{}"),
"view_schema": _parse_json_dict(row.view_schema_json or "{}"),
"created_at": row.created_at.isoformat() if row.created_at else None,
"updated_at": row.updated_at.isoformat() if row.updated_at else None,
}
def _list_topics(session: Session, bot_id: str) -> List[Dict[str, Any]]:
rows = session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.order_by(TopicTopic.is_active.desc(), TopicTopic.topic_key.asc())
).all()
return [_topic_to_dict(row) for row in rows]
def _topic_item_to_dict(row: TopicItem) -> Dict[str, Any]:
return {
"id": row.id,
"bot_id": row.bot_id,
"topic_key": str(row.topic_key or "").strip().lower(),
"title": row.title or "",
"content": row.content or "",
"level": str(row.level or "info").strip().lower(),
"tags": _parse_json_list(row.tags_json or "[]"),
"view": _parse_json_dict(row.view_json or "{}"),
"source": row.source or "dashboard",
"dedupe_key": row.dedupe_key or "",
"is_read": bool(row.is_read),
"created_at": row.created_at.isoformat() if row.created_at else None,
}
def _topic_get_row(session: Session, bot_id: str, topic_key: str) -> Optional[TopicTopic]:
normalized = _normalize_topic_key(topic_key)
if not normalized:
return None
return session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.where(TopicTopic.topic_key == normalized)
.limit(1)
).first()
def _normalize_topic_keywords(raw: Any) -> List[str]:
rows: List[str] = []
if isinstance(raw, list):
for item in raw:
text = str(item or "").strip().lower()
if text and text not in rows:
rows.append(text)
elif isinstance(raw, str):
text = raw.strip().lower()
if text:
rows.append(text)
return rows
def _topic_filter_reason(payload: Dict[str, Any]) -> str:
if _as_bool(payload.get("is_progress")):
return "progress message is filtered"
if _as_bool(payload.get("is_tool_hint")):
return "tool hint message is filtered"
source = str(payload.get("source") or payload.get("type") or "").strip().lower()
if source in {"progress", "tool_hint", "sendprogress", "sendtoolhints"}:
return f"{source} message is filtered"
return ""
def _topic_route_pick(
session: Session,
bot_id: str,
payload: Dict[str, Any],
requested_topic_key: str = "",
) -> Dict[str, Any]:
active_topics = session.exec(
select(TopicTopic)
.where(TopicTopic.bot_id == bot_id)
.where(TopicTopic.is_active == True)
.order_by(TopicTopic.topic_key.asc())
).all()
if not active_topics:
return {
"matched": False,
"topic_key": None,
"confidence": 1.0,
"reason": "no active topic configured",
}
req_key = _normalize_topic_key(requested_topic_key or payload.get("topic_key") or payload.get("topic"))
if req_key:
row = _topic_get_row(session, bot_id, req_key)
if row and bool(row.is_active):
return {
"matched": True,
"topic_key": req_key,
"confidence": 0.99,
"reason": "explicit topic key accepted",
}
return {
"matched": False,
"topic_key": None,
"confidence": 0.72,
"reason": f"requested topic {req_key} unavailable or inactive",
}
text = " ".join(
[
str(payload.get("title") or "").strip(),
str(payload.get("content") or payload.get("text") or "").strip(),
" ".join([str(v or "").strip() for v in (payload.get("tags") or [])]),
]
).strip().lower()
if not text:
return {
"matched": False,
"topic_key": None,
"confidence": 1.0,
"reason": "no routing evidence",
}
best_key = ""
best_score = -10.0
best_reason = "no topic matched"
matched_include = False
for topic in active_topics:
key = _normalize_topic_key(topic.topic_key)
if not key:
continue
routing = _parse_json_dict(topic.routing_json or "{}")
include_when = _normalize_topic_keywords(routing.get("include_when"))
exclude_when = _normalize_topic_keywords(routing.get("exclude_when"))
priority_raw = routing.get("priority", 0)
try:
priority = max(0, min(int(priority_raw), 100))
except Exception:
priority = 0
include_hits = [kw for kw in include_when if kw in text]
exclude_hits = [kw for kw in exclude_when if kw in text]
if not include_hits:
continue
matched_include = True
score = float(len(include_hits) * 2 - len(exclude_hits) * 3) + (priority / 1000.0)
if score > best_score:
best_score = score
best_key = key
if include_hits:
best_reason = f"matched include_when: {', '.join(include_hits[:3])}"
elif exclude_hits:
best_reason = f"matched exclude_when: {', '.join(exclude_hits[:3])}"
else:
best_reason = "no include/exclude match, used highest priority active topic"
if not matched_include:
return {
"matched": False,
"topic_key": None,
"confidence": 0.68,
"reason": "no include_when matched",
}
if best_score <= 0:
return {
"matched": False,
"topic_key": None,
"confidence": 0.68,
"reason": "no positive routing score",
}
confidence = min(0.95, max(0.61, 0.61 + best_score / 12.0))
return {
"matched": True,
"topic_key": best_key,
"confidence": round(confidence, 3),
"reason": best_reason,
}
def _topic_publish_internal(session: Session, bot_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
filter_reason = _topic_filter_reason(payload)
if filter_reason:
return {
"published": False,
"skipped": True,
"reason": filter_reason,
}
title = str(payload.get("title") or "").strip()
content = str(payload.get("content") or payload.get("text") or "").strip()
if not title and not content:
return {
"published": False,
"skipped": True,
"reason": "empty title/content",
}
level = str(payload.get("level") or "info").strip().lower()
if level not in TOPIC_LEVEL_SET:
level = "info"
tags = payload.get("tags")
tags_rows: List[str] = []
if isinstance(tags, list):
for tag in tags:
text = str(tag or "").strip()
if text and text not in tags_rows:
tags_rows.append(text[:64])
route_result = _topic_route_pick(session, bot_id, payload, requested_topic_key=str(payload.get("topic_key") or ""))
if not bool(route_result.get("matched")):
return {
"published": False,
"skipped": True,
"reason": str(route_result.get("reason") or "no topic matched"),
"route": route_result,
}
topic_key = _normalize_topic_key(route_result.get("topic_key"))
if not topic_key:
return {
"published": False,
"skipped": True,
"reason": "invalid topic route result",
"route": route_result,
}
row = _topic_get_row(session, bot_id, topic_key)
if not row or not bool(row.is_active):
return {
"published": False,
"skipped": True,
"reason": f"topic {topic_key} unavailable or inactive",
"route": route_result,
}
dedupe_key = str(payload.get("dedupe_key") or "").strip()
if dedupe_key:
existing = session.exec(
select(TopicItem)
.where(TopicItem.bot_id == bot_id)
.where(TopicItem.dedupe_key == dedupe_key)
.order_by(TopicItem.id.desc())
.limit(1)
).first()
if existing and existing.created_at:
age_s = (datetime.utcnow() - existing.created_at).total_seconds()
if age_s <= TOPIC_DEDUPE_WINDOW_SECONDS:
return {
"published": False,
"deduped": True,
"dedupe_window_seconds": TOPIC_DEDUPE_WINDOW_SECONDS,
"topic_key": _normalize_topic_key(existing.topic_key),
"reason": "dedupe_key hit within dedupe window",
"item": _topic_item_to_dict(existing),
}
view = payload.get("view")
view_json = json.dumps(view, ensure_ascii=False) if isinstance(view, dict) else None
source = str(payload.get("source") or "dashboard").strip().lower() or "dashboard"
now = datetime.utcnow()
item = TopicItem(
bot_id=bot_id,
topic_key=topic_key,
title=title[:2000],
content=content[:20000],
level=level,
tags_json=json.dumps(tags_rows, ensure_ascii=False) if tags_rows else None,
view_json=view_json,
source=source[:64],
dedupe_key=dedupe_key[:200] if dedupe_key else None,
is_read=False,
created_at=now,
)
session.add(item)
session.commit()
session.refresh(item)
return {
"published": True,
"topic_key": topic_key,
"item": _topic_item_to_dict(item),
"route": route_result,
}