测试版本第一版

main
Bifang 2026-06-09 10:38:24 +08:00
commit d5c9368c1b
15 changed files with 1579 additions and 0 deletions

16
.env.example 100644
View File

@ -0,0 +1,16 @@
# LLM API
LLM_API_KEY=sk-your-api-key
LLM_BASE_URL=https://api.deepseek.com/v1
LLM_MODEL=deepseek-chat
# Embedding API
EMBEDDING_API_KEY=sk-your-embedding-key
EMBEDDING_BASE_URL=https://api.openai.com/v1
EMBEDDING_MODEL=text-embedding-3-small
# Neo4j
NEO4J_ENABLED=false
NEO4J_URI=bolt://localhost:7687
NEO4J_USER=neo4j
NEO4J_PASSWORD=
NEO4J_DATABASE=neo4j

4
.gitignore vendored 100644
View File

@ -0,0 +1,4 @@
__pycache__/
vector_store_data/
.env
.vscode

18
MIGRATION_TASKS.md 100644
View File

@ -0,0 +1,18 @@
# Migration Tasks
## In Progress
- [ ] Rework deduplication and identity resolution for entities, action items, and metrics
## Todo
## Done
- [x] Remove Obsidian from the project documentation and dependency surface
- [x] Remove Obsidian from the runtime processing pipeline
- [x] Move raw meeting archival to `data/raw`
- [x] Move meeting state storage to `data/meeting_state.json`
- [x] Introduce Neo4j configuration and a minimal graph storage layer
- [x] Write extracted meeting entities and relations into Neo4j
- [x] Add graph statistics to the CLI status output
- [x] Redesign retrieval to combine vector recall with graph facts

114
README.md 100644
View File

@ -0,0 +1,114 @@
# 会议纪要长期记忆系统
基于 `LLM + LlamaIndex + 本地状态存储` 的会议纪要长期记忆原型,当前聚焦三件事:
- 会议内容结构化抽取
- 跨会议行动项与指标状态追踪
- 语义检索与重复内容过滤
## 当前处理链路
```text
meeting.md
-> 内容哈希去重
-> 语义相似去重
-> LLM 结构化抽取
-> 状态合并
-> 原文归档到 data/raw
-> 写入向量索引
```
当前版本已经移除了 `Obsidian` 作为主流程依赖,后续会继续引入图数据库来承载关系层。
## 快速开始
```bash
cd meeting_memory
python -m venv .venv
.venv\Scripts\pip install -r requirements.txt
copy .env.example .env
# 然后补充 API 配置
.venv\Scripts\python main.py process meeting_example.md
```
## 用法
```bash
python main.py
python main.py process meeting_example.md
python main.py process meeting_example.md -f
python main.py query "弱光指标目标值是多少"
python main.py stats
python main.py text "今天会议讨论了..."
python main.py batch "meetings/*.md" -f
```
## 目录
```text
meeting_memory/
├── config.py 配置
├── extractor.py LLM 结构化抽取
├── meeting_processor.py 主处理流程
├── meeting_state.py 跨会议状态追踪
├── raw_store.py 原文归档
├── vector_store.py 向量索引与语义检索
├── main.py CLI 入口
├── data/
│ ├── raw/ 原始会议文本归档
│ └── meeting_state.json 状态持久化
└── vector_store_data/ 向量索引持久化目录
```
## 核心能力
### 1. 结构化抽取
从会议文本中提取:
- 标题、日期、参会人
- 实体
- 关系
- 行动项
- 指标
- 决策
- 摘要
### 2. 长期状态追踪
- 行动项按 `task + assignee` 做稳定 ID
- 指标按 `metric_name + owner` 做稳定 ID
- 保留历史状态演化
- 支持跨会议合并
### 3. 双重去重
- 内容哈希精确去重
- 语义相似度去重
### 4. 语义检索
- 会议内容写入向量库
- 支持自然语言查询
- 索引持久化,重启自动加载
## 配置
编辑 `.env`
```ini
LLM_API_KEY=sk-xxx
LLM_BASE_URL=https://api.deepseek.com/v1
LLM_MODEL=deepseek-chat
EMBEDDING_API_KEY=sk-xxx
EMBEDDING_BASE_URL=https://api.openai.com/v1
EMBEDDING_MODEL=text-embedding-3-small
```
## 迁移计划
当前迁移进度见 [MIGRATION_TASKS.md](/d:/github_project/my_code/meeting_memory/MIGRATION_TASKS.md:1)。

51
config.py 100644
View File

@ -0,0 +1,51 @@
import os
from dotenv import load_dotenv
from pydantic import BaseModel, Field
load_dotenv()
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
class LLMConfig(BaseModel):
api_key: str = Field(default=os.getenv("LLM_API_KEY", ""))
base_url: str = Field(default=os.getenv("LLM_BASE_URL", "https://api.deepseek.com/v1"))
model: str = Field(default=os.getenv("LLM_MODEL", "deepseek-chat"))
max_tokens: int = Field(default=64000)
temperature: float = Field(default=0.95)
class EmbeddingConfig(BaseModel):
api_key: str = Field(default=os.getenv("EMBEDDING_API_KEY", ""))
api_base: str = Field(default=os.getenv("EMBEDDING_BASE_URL", "https://api.openai.com/v1"))
model: str = Field(default=os.getenv("EMBEDDING_MODEL", "text-embedding-3-small"))
class StorageConfig(BaseModel):
data_dir: str = Field(default=os.path.join(PROJECT_ROOT, "data"))
raw_dir: str = Field(default=os.path.join(PROJECT_ROOT, "data", "raw"))
class VectorStoreConfig(BaseModel):
persist_dir: str = Field(default=os.path.join(PROJECT_ROOT, "vector_store_data"))
class Neo4jConfig(BaseModel):
enabled: bool = Field(default=os.getenv("NEO4J_ENABLED", "false").lower() == "true")
uri: str = Field(default=os.getenv("NEO4J_URI", "bolt://localhost:7687"))
user: str = Field(default=os.getenv("NEO4J_USER", "neo4j"))
password: str = Field(default=os.getenv("NEO4J_PASSWORD", ""))
database: str = Field(default=os.getenv("NEO4J_DATABASE", "neo4j"))
class ProjectConfig(BaseModel):
llm: LLMConfig = Field(default_factory=LLMConfig)
embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig)
storage: StorageConfig = Field(default_factory=StorageConfig)
vector_store: VectorStoreConfig = Field(default_factory=VectorStoreConfig)
neo4j: Neo4jConfig = Field(default_factory=Neo4jConfig)
state_path: str = Field(default=os.path.join(PROJECT_ROOT, "data", "meeting_state.json"))
config = ProjectConfig()

157
extractor.py 100644
View File

@ -0,0 +1,157 @@
import json
import logging
import re
from typing import List, Optional
from pydantic import BaseModel
from openai import OpenAI
from config import config
logger = logging.getLogger(__name__)
client = OpenAI(
api_key=config.llm.api_key or None,
base_url=config.llm.base_url if config.llm.base_url else None,
)
class Entity(BaseModel):
name: str
entity_type: str
description: str = ""
class Relation(BaseModel):
subject: str
subject_type: str
predicate: str
object: str
object_type: str
description: str = ""
class ActionItem(BaseModel):
task: str
assignee: str = ""
deadline: str = ""
status: str = "待办"
priority: str = ""
class Decision(BaseModel):
content: str
proposer: str = ""
status: str = "已决"
class MeetingMetric(BaseModel):
metric_name: str
value: str
target: str = ""
owner: str = ""
trend: str = ""
class MeetingExtraction(BaseModel):
title: str
date: str = ""
participants: List[str] = []
agenda: List[str] = []
entities: List[Entity] = []
relations: List[Relation] = []
action_items: List[ActionItem] = []
decisions: List[Decision] = []
metrics: List[MeetingMetric] = []
summary: str = ""
EXTRACTION_SYSTEM_PROMPT = """
你是一个专业的会议纪要信息抽取专家你的任务是从中文会议记录中抽取结构化信息并严格按照要求的JSON格式返回
## 抽取内容
### 1. 实体
- 人物参会人员提及的人员
- 组织/部门公司部门团队
- 项目/任务正在进行的项目任务
- 指标/KPI关键绩效指标如转化率退单率等
- 概念/制度管理概念制度要求
- 地点会议地点项目地点
### 2. 关系 (主体-关系谓词-客体)
抽取事实性关系例如
- {"subject": "建维部", "subject_type": "组织", "predicate": "负责", "object": "网络运维", "object_type": "任务", "description": ""}
- {"subject": "弱光指标", "subject_type": "指标", "predicate": "目标值", "object": "0.5以下", "object_type": "数值", "description": ""}
### 3. 行动项
谁负责什么任务截止时间优先级
### 4. 决策
做出的决定和结论
### 5. 指标数据
具体的数字指标当前值目标值负责人趋势(向好/持平/恶化)
## 规则
- 只提取事实性信息
- 过滤比喻假设主观评价
- 数字指标要精确提取
- entitiesrelationsaction_itemsdecisionsmetrics 如果没有则返回空数组
"""
def _call_llm(system: str, user: str) -> str:
response = client.chat.completions.create(
model=config.llm.model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
max_tokens=config.llm.max_tokens,
temperature=config.llm.temperature,
)
content = response.choices[0].message.content
if content is None:
raise ValueError("LLM returned empty response")
return content
def extract_meeting_info(text: str) -> MeetingExtraction:
user_prompt = f"""
从以下会议记录中抽取结构化信息
JSON字段说明
- title: 会议标题
- date: 会议日期
- participants: 参会人列表
- agenda: 议程列表
- entities: 实体列表每个实体包含 name(名称), entity_type(类型), description(描述)
- relations: 关系列表每个关系包含 subject(主体), subject_type(主体类型), predicate(关系谓词), object(客体), object_type(客体类型), description(描述)
- action_items: 行动项列表每条包含 task(任务), assignee(负责人), deadline(截止时间), status(状态), priority(优先级)
- decisions: 决策列表每条包含 content(决策内容), proposer(提出人), status(状态)
- metrics: 指标列表每条包含 metric_name(指标名), value(当前值), target(目标值), owner(负责人), trend(趋势)
- summary: 会议摘要
请直接返回JSON对象不要包含任何额外说明文字
会议记录
{text}
"""
content = _call_llm(EXTRACTION_SYSTEM_PROMPT, user_prompt)
data = _try_parse_json(content)
return MeetingExtraction(**data)
def _try_parse_json(content: str) -> dict:
try:
return json.loads(content)
except json.JSONDecodeError:
logger.warning("JSON解析失败尝试修复...")
match = re.search(r'\{.*\}', content, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError as e:
logger.error(f"修复后的JSON仍无法解析: {e}")
raise

250
graph_store.py 100644
View File

@ -0,0 +1,250 @@
import logging
from typing import Any, Dict, List
from config import config
logger = logging.getLogger(__name__)
class Neo4jGraphStore:
def __init__(self):
self._driver = None
self._enabled = False
self._connect()
def _connect(self):
if not config.neo4j.enabled:
logger.info("Neo4j graph store disabled")
return
try:
from neo4j import GraphDatabase
except ImportError:
logger.warning("neo4j package is not installed")
return
if not config.neo4j.password:
logger.warning("Neo4j is enabled but NEO4J_PASSWORD is empty")
return
self._driver = GraphDatabase.driver(
config.neo4j.uri,
auth=(config.neo4j.user, config.neo4j.password),
)
self._enabled = True
@property
def enabled(self) -> bool:
return self._enabled and self._driver is not None
def close(self):
if self._driver is not None:
self._driver.close()
def run_query(self, query: str, **params) -> List[Dict[str, Any]]:
if not self.enabled:
return []
with self._driver.session(database=config.neo4j.database) as session:
result = session.run(query, **params)
return [record.data() for record in result]
def initialize_schema(self):
if not self.enabled:
return
statements = [
"CREATE CONSTRAINT meeting_id IF NOT EXISTS FOR (m:Meeting) REQUIRE m.meeting_id IS UNIQUE",
"CREATE CONSTRAINT entity_name IF NOT EXISTS FOR (e:Entity) REQUIRE e.name IS UNIQUE",
"CREATE INDEX meeting_title IF NOT EXISTS FOR (m:Meeting) ON (m.title)",
"CREATE INDEX entity_type IF NOT EXISTS FOR (e:Entity) ON (e.entity_type)",
]
for statement in statements:
self.run_query(statement)
def get_stats(self) -> Dict[str, Any]:
if not self.enabled:
return {"enabled": False}
rows = self.run_query(
"""
CALL {
MATCH (m:Meeting)
RETURN count(m) AS meetings
}
CALL {
MATCH (e:Entity)
RETURN count(e) AS entities
}
RETURN meetings, entities
"""
)
if not rows:
return {"enabled": True, "meetings": 0, "entities": 0}
return {"enabled": True, **rows[0]}
def upsert_meeting_subgraph(self, meeting_data: dict) -> None:
if not self.enabled:
return
meeting_id = meeting_data.get("_graph_meeting_id")
if not meeting_id:
return
self.initialize_schema()
self.run_query(
"""
MERGE (m:Meeting {meeting_id: $meeting_id})
SET m.title = $title,
m.date = $date,
m.summary = $summary,
m.content_hash = $content_hash,
m.updated_at = datetime()
""",
meeting_id=meeting_id,
title=meeting_data.get("title", ""),
date=meeting_data.get("date", ""),
summary=meeting_data.get("summary", ""),
content_hash=meeting_data.get("_content_hash", ""),
)
for entity in meeting_data.get("entities", []):
self._upsert_entity(meeting_id, entity)
for participant in meeting_data.get("participants", []):
self._upsert_entity(
meeting_id,
{"name": participant, "entity_type": "participant", "description": ""},
)
for relation in meeting_data.get("relations", []):
self._upsert_relation(meeting_id, relation, meeting_data.get("date", ""))
def _upsert_entity(self, meeting_id: str, entity: dict) -> None:
name = entity.get("name", "").strip()
if not name:
return
self.run_query(
"""
MATCH (m:Meeting {meeting_id: $meeting_id})
MERGE (e:Entity {name: $name})
SET e.entity_type = CASE
WHEN $entity_type <> '' THEN $entity_type
ELSE coalesce(e.entity_type, '')
END,
e.description = CASE
WHEN $description <> '' THEN $description
ELSE coalesce(e.description, '')
END,
e.updated_at = datetime()
MERGE (m)-[:MENTIONS]->(e)
""",
meeting_id=meeting_id,
name=name,
entity_type=entity.get("entity_type", ""),
description=entity.get("description", ""),
)
def _upsert_relation(self, meeting_id: str, relation: dict, meeting_date: str) -> None:
subject = relation.get("subject", "").strip()
predicate = relation.get("predicate", "").strip()
obj = relation.get("object", "").strip()
if not subject or not predicate or not obj:
return
self._upsert_entity(
meeting_id,
{
"name": subject,
"entity_type": relation.get("subject_type", ""),
"description": "",
},
)
self._upsert_entity(
meeting_id,
{
"name": obj,
"entity_type": relation.get("object_type", ""),
"description": "",
},
)
self.run_query(
"""
MATCH (s:Entity {name: $subject})
MATCH (o:Entity {name: $object})
MERGE (s)-[r:RELATES_TO {
meeting_id: $meeting_id,
predicate: $predicate,
object_name: $object
}]->(o)
SET r.description = $description,
r.meeting_date = $meeting_date,
r.updated_at = datetime()
""",
meeting_id=meeting_id,
subject=subject,
predicate=predicate,
object=obj,
description=relation.get("description", ""),
meeting_date=meeting_date,
)
def remove_meeting_subgraph(self, meeting_id: str) -> None:
if not self.enabled:
return
self.run_query(
"""
MATCH (m:Meeting {meeting_id: $meeting_id})
OPTIONAL MATCH (m)-[mentions:MENTIONS]->(:Entity)
DELETE mentions
WITH m
OPTIONAL MATCH ()-[r:RELATES_TO {meeting_id: $meeting_id}]->()
DELETE r
WITH m
DELETE m
""",
meeting_id=meeting_id,
)
def search_facts(self, question: str, limit: int = 5) -> List[Dict[str, Any]]:
if not self.enabled or not question.strip():
return []
rows = self.run_query(
"""
CALL {
MATCH (m:Meeting)
WHERE toLower(m.title) CONTAINS toLower($question)
OR toLower(coalesce(m.summary, '')) CONTAINS toLower($question)
RETURN 'meeting' AS kind,
m.title AS title,
coalesce(m.summary, '') AS text,
m.date AS date
UNION
MATCH (e:Entity)
WHERE toLower(e.name) CONTAINS toLower($question)
OR toLower(coalesce(e.description, '')) CONTAINS toLower($question)
OPTIONAL MATCH (e)-[r:RELATES_TO]-(other:Entity)
RETURN 'entity' AS kind,
e.name AS title,
coalesce(
head(collect(
e.name + ' -[' + coalesce(r.predicate, '') + ']-> ' + coalesce(other.name, '')
)),
coalesce(e.description, '')
) AS text,
'' AS date
}
RETURN kind, title, text, date
LIMIT $limit
""",
question=question,
limit=limit,
)
return rows
graph_store = Neo4jGraphStore()

185
main.py 100644
View File

@ -0,0 +1,185 @@
import argparse
import glob as glob_module
import logging
import os
import sys
if sys.stdout.encoding and sys.stdout.encoding.lower() == "gbk":
sys.stdout.reconfigure(encoding="utf-8")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
logger = logging.getLogger(__name__)
def cmd_process(args):
from meeting_processor import meeting_processor
filepath = args.file
if not os.path.exists(filepath):
print(f"错误:文件不存在 {filepath}")
sys.exit(1)
print(f"正在处理会议文件:{filepath}")
archive_path = meeting_processor.process_meeting_file(filepath, force=getattr(args, "force", False))
if archive_path:
print("\n处理完成")
print(f"原文归档:{archive_path}")
else:
print("\n处理失败或已跳过")
sys.exit(1)
def cmd_text(args):
from meeting_processor import meeting_processor
print("正在处理会议文本...")
archive_path = meeting_processor.process_meeting_text(args.text, force=getattr(args, "force", False))
if archive_path:
print("\n处理完成")
print(f"原文归档:{archive_path}")
else:
print("\n处理失败或已跳过")
def cmd_query(args):
from meeting_processor import meeting_processor
print(f"查询:{args.question}")
print("-" * 40)
result = meeting_processor.query(args.question, top_k=args.top_k)
print(result if result else "未找到相关信息")
def cmd_stats(args):
from meeting_processor import meeting_processor
stats = meeting_processor.stats()
print("会议记忆系统统计")
print("-" * 40)
print(f"向量节点数:{stats.get('vector_index', {}).get('node_count', 0)}")
print(f"Neo4j 启用:{stats.get('graph', {}).get('enabled', False)}")
print(f"图谱会议数:{stats.get('graph', {}).get('meetings', 0)}")
print(f"图谱实体数:{stats.get('graph', {}).get('entities', 0)}")
print(f"行动项数:{stats.get('state', {}).get('action_items_tracked', 0)}")
print(f"指标数:{stats.get('state', {}).get('metrics_tracked', 0)}")
print(f"会议系列数:{stats.get('state', {}).get('meeting_series', 0)}")
print(f"原文归档目录:{stats.get('raw_dir', '')}")
print(f"状态文件:{stats.get('state_path', '')}")
def cmd_batch(args):
from meeting_processor import meeting_processor
files = glob_module.glob(args.pattern, recursive=True)
if not files:
print(f"未匹配到任何文件:{args.pattern}")
sys.exit(1)
print(f"找到 {len(files)} 个文件,开始批量处理...")
success = 0
for path in files:
try:
print(f"\n处理:{path}")
result = meeting_processor.process_meeting_file(path, force=getattr(args, "force", False))
if result:
success += 1
except Exception as exc:
logger.error("处理失败: %s - %s", path, exc)
print(f"\n批量处理完成:{success}/{len(files)} 成功")
def cmd_interactive():
from meeting_processor import meeting_processor
print("会议纪要长期记忆系统")
print("=" * 50)
print("可用命令:")
print(" query <问题> 语义查询会议记忆")
print(" process <路径> 处理会议文件")
print(" stats 查看统计")
print(" help 显示帮助")
print(" exit/quit 退出")
print("=" * 50)
while True:
try:
line = input("\n> ").strip()
except (EOFError, KeyboardInterrupt):
print()
break
if not line:
continue
if line in ("exit", "quit", "q"):
break
if line == "help":
print(" query <问题>")
print(" process <路径>")
print(" stats")
print(" exit/quit")
continue
if line == "stats":
cmd_stats(None)
continue
if line.startswith("process "):
filepath = line[8:].strip()
if not os.path.exists(filepath):
print(f"文件不存在:{filepath}")
continue
result = meeting_processor.process_meeting_file(filepath)
print(f"完成:{result}" if result else "处理失败或已跳过")
continue
question = line[6:].strip() if line.startswith("query ") else line
result = meeting_processor.query(question, top_k=3)
print(result if result else "未找到相关信息")
print("bye!")
def main():
parser = argparse.ArgumentParser(description="会议纪要长期记忆系统")
subparsers = parser.add_subparsers(dest="command")
p_process = subparsers.add_parser("process", help="处理会议 markdown 文件")
p_process.add_argument("file", help="会议文件路径")
p_process.add_argument("-f", "--force", action="store_true", help="发现重复时自动覆盖")
p_text = subparsers.add_parser("text", help="直接处理一段会议文本")
p_text.add_argument("text", help="会议文本内容")
p_text.add_argument("-f", "--force", action="store_true", help="发现重复时自动覆盖")
p_query = subparsers.add_parser("query", help="语义查询会议记忆")
p_query.add_argument("question", help="查询问题")
p_query.add_argument("--top-k", type=int, default=3, help="返回结果数量")
subparsers.add_parser("stats", help="查看统计")
p_batch = subparsers.add_parser("batch", help="批量处理会议文件")
p_batch.add_argument("pattern", help="glob 模式,如 meetings/*.md")
p_batch.add_argument("-f", "--force", action="store_true", help="发现重复时自动覆盖")
args = parser.parse_args()
if args.command == "process":
cmd_process(args)
elif args.command == "text":
cmd_text(args)
elif args.command == "query":
cmd_query(args)
elif args.command == "stats":
cmd_stats(args)
elif args.command == "batch":
cmd_batch(args)
else:
cmd_interactive()
if __name__ == "__main__":
main()

27
meeting1.md 100644
View File

@ -0,0 +1,27 @@
会议概述
会议主要围绕宽带运维指标、综合行政与工会管理、市场政企业务推进、满意度提升及年度考核准备等议题展开,旨在总结阶段性工作进度,协调跨部门资源,明确后续重点任务与考核要求。
主要讨论点
宽带运维与网络质量上周上门量及安装进度受天气影响弱光指标趋近目标FPTR达标但主动过境偏后。PCDN专线在学校端持续恶化已报市公司分析IP并拟限速。超频基站故障已恢复专线巡检进度符合预期。客服培训后内部机房问题已梳理清单。
综合行政与工会事务2025年剩余两项工作按原计划推进。工会经费压减食堂改造拟于5月结合更替修补进行拟引入自饮机降本。第四届体育文化节筹备中因主力选手受伤需各部门抽调人员补充方阵。主题教育简报已发基层党组织学习已完成。
区表彰、主题教育与招待费管理区级担当作为表彰正在对接建议争取参与以拉开竞争差距。招待费实行每年公开1次制度综合部超预算需调整26年预算其他部门严控成本。政企对外接待需统筹严禁客户经理个人垫资。
拆迁商客、满意度考核与KPI准备拆迁以二次升套为主社区与单位集中营销并行。满意度测评发现开卷考试形式导致拉分相关经理思想松懈。KPI考核已明确工信部有责及投诉率两项核心指标强调日清日结与执行力。
决策事项
二级基站拆除及下电服务费调整需在4月15日前全量完成。
招待费实行每年公开1次制度综合部超预算需调整26年预算其他部门严控成本。
满意度测评取消开卷考试形式,后续采用“后机评”模式,市场部需按四公司规定制定满意客户样板及不满客户处理流程。
第四届体育文化节方阵缺编9人需各部门协调抽调确定名单后统一采购服装并安排下班后排练。
年度考核指标已初步定调,各部门需提前与市公司沟通争取有利政策,避免起跑线落后。
待办事项
宽带/客服部跟进学校IP限速机制建立处理客服内部机房问题清单并与客户沟通。
综合部:完成食堂改造审计及自饮机引入批复跟进;落实招待费公示及纪检报备。
市场部:本周内汇报招聘进度、农村渠道进度及营销方案;每日微信发送满意度日报。
各部门:今日确定体育文化节方阵补充人员名单;针对专线助账客指标拿出具体保障方案并回复。
关键信息
会议时间2026-05-06 13:37
核心考核导向KPI考核聚焦工信部有责与投诉率强调执行力与日清日结习惯。
业务风险点PCDN专线恶化、满意度测评因形式问题导致拉分、招待费预算超支。
AI建议
针对满意度考核风险,建议市场部立即复盘测评机制,避免形式主义拉低指标,并提前演练“后机评”应对策略。
针对招待费超支及客户经理个人垫资问题,建议综合部建立统一审批与结算台账,明确费用归属与报销时效,规避财务与合规风险。
针对KPI考核准备建议建立市公司指标动态跟踪表提前模拟考核场景强化跨部门协同与数据预埋确保年底考核不被动。

75
meeting_example.md 100644
View File

@ -0,0 +1,75 @@
# 会议记录
议 题合川分公司周例会2026第X期
时 间2026年5月6日 13:37—14:23
地 点:分公司会议室
主持人AlanPaine
参加人:分公司领导、各部门经理及相关人员
议程:
一、各部门汇报
二、分公司领导指示部署
---
## 会议内容
### 一、各部门汇报
建维部、综合部、商客市场负责人按议程现场按顺序做汇报。建维部汇报宽带安装受天气影响进度偏后弱光指标0.51持续向好三代终端年度目标5.5需持续压降九零工程月度转化率87.35%接近90%目标退单率6.53%PCDN专线学校出口问题正协调限速机制二级基站拆除预计4月中旬完成综合部通报建委相关工作清单及投资计划已汇报打印设备已协调保障招投标需求工会经费压减后严考严用食堂改造及自饮机引入方案正在推进第四届体育文化节方阵人员招募与排练已部署商客市场2月收入88.5万元实现增长三期项目二期拆迁完成1145户社区与单位清洗服务5场落实签约量待提升。
---
### 二、部署强调
#### 建维部负责人强调:
1. **网络运维与指标管控:**
- 弱光指标0.51持续向好三代终端年度目标5.5需持续压降FPTR已达标但主动过境0.3靠后。
- 九零工程月度转化率87.35%接近90%目标退单率6.53%主要受用户原因及改约影响已建议施工优化BtoC审核撤单流程。
- PCDN专线因学校出口带宽问题持续恶化正协调限速机制超频基站故障已及时处理专线巡检按计划推进。
2. **工作反馈与执行要求:**
- 强调养成“日清日结”习惯,工作回复必须量化、有结果、有措施,杜绝工作拖延数月未动。
- 针对关键业务上量指标缺乏保障措施问题,要求本周内出具具体可行方案并明确责任人。
---
#### 综合部负责人强调:
1. **工会经费与后勤保障:**
- 工会经费全面压减,需严考严用、以更少资金办更好实事。软性工程(更衣室、食堂改造)已确定上报市公司,拟引入自饮机解决饮水问题并节约成本。
2. **奖项申报策略:**
- 针对河川区“担当作为先进集体和先进个人”申报,评选条件多为定性要求,建议提前与区领导或分管领导沟通确认意向后再行申报,避免盲目提交浪费资源。
---
#### 市场部负责人强调:
1. **季度收官与二季度谋划:**
- 市场部需提前谋划季度收官及二季度业务活动打破淡季思维全力推动商客、H业务及AI军团活动升温。
- 本周内完成招聘情况、农村渠道进度及营销方案汇报;本周六上午视频汇报运动会筹备情况。
2. **满意度与考核管控:**
- 深刻反思满意度测评前期工作未做到位问题要求主管亲自抓。明确满意度及投诉考核标准不满客户需严格按5:30及5:35节点操作报警。
- 要求商客经理每日微信发送日报,跟进考核细节,确保指标可控。
---
#### 分公司主要领导强调:
1. **强化执行力与作风:**
- 各部门及一线人员必须摒弃“知道怎么做却不去做”的作风,做到事不做好不收兵。分管领导需加强政企部等部门督导力度,必要时亲自沟通。
2. **年度考核提前摸底:**
- 针对四公司年度考核及集团相关指标提升,要求各部门提前深入了解考核细则及可能产生重大影响的不利因素并及时上报,切忌定稿后被动。
- 市公司会统筹考虑分公司整体情况,务必提前布局、赢在起跑线。

View File

@ -0,0 +1,174 @@
import hashlib
import logging
import os
from typing import Optional
from config import config
from extractor import MeetingExtraction, extract_meeting_info
from graph_store import graph_store
from meeting_state import MeetingStateStore
from raw_store import raw_meeting_store
from vector_store import meeting_vector_store
logger = logging.getLogger(__name__)
state_store = MeetingStateStore(config.state_path)
class MeetingProcessor:
def process_meeting_file(self, filepath: str, force: bool = False) -> Optional[str]:
with open(filepath, "r", encoding="utf-8") as f:
text = f.read()
return self.process_meeting_text(text, force=force)
def process_meeting_text(self, text: str, force: bool = False) -> Optional[str]:
content_hash = self._compute_content_hash(text)
if not force and state_store.has_content_hash(content_hash):
print("\n检测到重复内容,已跳过。")
logger.info("Duplicate content hash skipped: %s", content_hash[:12])
return None
if not force:
similar = meeting_vector_store.find_similar_text(text, threshold=0.92)
if similar:
meta = similar["metadata"]
print(
f"\n发现相似会议:{meta.get('title', '')} ({meta.get('date', '')}) "
f"相似度 {similar['score']:.2%}"
)
while True:
choice = input("选择 [s]跳过 / [o]覆盖(默认 s").strip().lower() or "s"
if choice == "s":
logger.info("Skipped similar meeting: %s", meta.get("title", ""))
return None
if choice == "o":
force = True
break
print("请输入 s 或 o。")
meeting_data = self._extract(text)
if not meeting_data:
logger.error("Failed to extract meeting information")
return None
data_dict = meeting_data.model_dump()
data_dict["_content_hash"] = content_hash
data_dict["_graph_meeting_id"] = meeting_vector_store._meeting_id(data_dict)
should_skip = self._handle_duplicate(data_dict, force)
if should_skip:
return None
meeting_title = data_dict.get("title", "")
meeting_date = data_dict.get("date", "")
raw_path = raw_meeting_store.save(text, title=meeting_title, date=meeting_date)
data_dict["_original_text"] = text
data_dict["_original_text_path"] = raw_path
meeting_filename = f"{meeting_vector_store._meeting_id(data_dict)}.md"
data_dict["action_items"] = state_store.merge_action_items(
data_dict.get("action_items", []),
meeting_title,
meeting_date,
meeting_filename,
)
data_dict["metrics"] = state_store.merge_metrics(
data_dict.get("metrics", []),
meeting_title,
meeting_date,
meeting_filename,
)
state_store.add_content_hash(content_hash, meeting_title, meeting_date, meeting_filename)
state_store.save()
meeting_vector_store.add_meeting(data_dict)
graph_store.upsert_meeting_subgraph(data_dict)
logger.info("Meeting processed: %s", meeting_title)
return raw_path
def _handle_duplicate(self, data_dict: dict, force: bool) -> bool:
title = data_dict.get("title", "")
date = data_dict.get("date", "")
existing = meeting_vector_store.find_meeting(title, date)
if not existing:
return False
if force:
logger.info("Duplicate meeting found; overwriting in force mode: %s", title)
self._remove_old(data_dict, existing)
return False
print(f"\n发现重复会议:{title} ({date})")
while True:
choice = input("选择 [s]跳过 / [o]覆盖(默认 s").strip().lower() or "s"
if choice == "s":
logger.info("Skipped duplicate meeting: %s", title)
return True
if choice == "o":
self._remove_old(data_dict, existing)
return False
print("请输入 s 或 o。")
def _remove_old(self, data_dict: dict, existing: Optional[dict] = None):
meeting_id = meeting_vector_store._meeting_id(data_dict)
meeting_vector_store.remove_meeting(meeting_id)
graph_store.remove_meeting_subgraph(meeting_id)
new_hash = data_dict.get("_content_hash", "")
if new_hash:
state_store.remove_content_hash(new_hash)
if existing:
old_hash = existing.get("content_hash", "")
if old_hash and old_hash != new_hash:
state_store.remove_content_hash(old_hash)
logger.info("Removed old meeting artifacts: %s", data_dict.get("title", ""))
def _compute_content_hash(self, text: str) -> str:
normalized = text.strip().replace("\r\n", "\n")
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
def _extract(self, text: str) -> Optional[MeetingExtraction]:
try:
return extract_meeting_info(text)
except Exception as exc:
logger.error("LLM extraction failed: %s", exc)
return None
def query(self, question: str, top_k: int = 3) -> str:
vector_context = meeting_vector_store.query_as_context(question, top_k=top_k)
graph_results = graph_store.search_facts(question, limit=top_k)
parts = []
if vector_context:
parts.append("=== Vector Context ===\n" + vector_context)
if graph_results:
graph_lines = []
for idx, row in enumerate(graph_results, start=1):
title = row.get("title", row.get("kind", "graph"))
text = row.get("text", "")
date = row.get("date", "")
suffix = f" ({date})" if date else ""
graph_lines.append(f"[{idx}] {title}{suffix}\n{text}")
parts.append("=== Graph Facts ===\n" + "\n\n".join(graph_lines))
return "\n\n".join(parts)
def stats(self) -> dict:
return {
"vector_index": meeting_vector_store.get_stats(),
"graph": graph_store.get_stats(),
"state": state_store.get_stats(),
"raw_dir": config.storage.raw_dir,
"state_path": config.state_path,
}
meeting_processor = MeetingProcessor()

189
meeting_state.py 100644
View File

@ -0,0 +1,189 @@
import hashlib
import json
import logging
import os
from datetime import datetime
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
def _item_id(task: str, assignee: str) -> str:
raw = f"{task}|{assignee}"
return hashlib.md5(raw.encode("utf-8")).hexdigest()[:8]
def _metric_id(metric_name: str, owner: str) -> str:
raw = f"{metric_name}|{owner}"
return hashlib.md5(raw.encode("utf-8")).hexdigest()[:8]
class MeetingStateStore:
def __init__(self, state_path: str):
self.state_path = state_path
self._state = self._load()
def _load(self) -> dict:
if os.path.exists(self.state_path):
try:
with open(self.state_path, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.warning(f"加载状态文件失败,将创建新状态: {e}")
return {
"action_items": {},
"metrics": {},
"meeting_series": {},
"content_hashes": {},
}
def save(self):
os.makedirs(os.path.dirname(self.state_path), exist_ok=True)
with open(self.state_path, "w", encoding="utf-8") as f:
json.dump(self._state, f, ensure_ascii=False, indent=2)
def _ensure_series(self, meeting_title: str, meeting_date: str) -> str:
series_name = self._detect_series(meeting_title)
series = self._state["meeting_series"].get(series_name)
if not series:
series = {"latest_date": meeting_date, "processed_titles": []}
self._state["meeting_series"][series_name] = series
if meeting_date > series.get("latest_date", ""):
series["latest_date"] = meeting_date
if meeting_title not in series["processed_titles"]:
series["processed_titles"].append(meeting_title)
return series_name
def _detect_series(self, title: str) -> str:
import re
cleaned = re.sub(r"\d{4}\w+期)", "", title)
cleaned = re.sub(r"\(\d{4}\w+期\)", "", cleaned)
cleaned = re.sub(r"\d{4}\w+期", "", cleaned)
cleaned = re.sub(r"\d{4}年第\w+次", "", cleaned)
cleaned = cleaned.strip("-_ ")
return cleaned or title
def merge_action_items(
self,
new_items: List[dict],
meeting_title: str,
meeting_date: str,
meeting_filename: str,
) -> List[dict]:
series_name = self._ensure_series(meeting_title, meeting_date)
merged = []
for item in new_items:
task = item.get("task", "")
assignee = item.get("assignee", "")
iid = _item_id(task, assignee)
history_entry = {
"date": meeting_date,
"meeting": meeting_filename,
"status": item.get("status", "待办"),
"priority": item.get("priority", ""),
"deadline": item.get("deadline", ""),
}
existing = self._state["action_items"].get(iid)
if existing:
existing["history"].append(history_entry)
existing["latest"] = history_entry
latest = existing["history"][-1]
item["_item_id"] = iid
item["_history"] = list(existing["history"])
item["status"] = latest["status"]
item["priority"] = latest["priority"]
item["deadline"] = latest["deadline"]
else:
self._state["action_items"][iid] = {
"item_id": iid,
"task": task,
"assignee": assignee,
"series": series_name,
"created_meeting": meeting_filename,
"history": [history_entry],
"latest": history_entry,
}
item["_item_id"] = iid
item["_history"] = [history_entry]
merged.append(item)
return merged
def merge_metrics(
self,
new_metrics: List[dict],
meeting_title: str,
meeting_date: str,
meeting_filename: str,
) -> List[dict]:
merged = []
for m in new_metrics:
metric_name = m.get("metric_name", "")
owner = m.get("owner", "")
mid = _metric_id(metric_name, owner)
history_entry = {
"date": meeting_date,
"meeting": meeting_filename,
"value": m.get("value", ""),
"target": m.get("target", ""),
"trend": m.get("trend", ""),
}
existing = self._state["metrics"].get(mid)
if existing:
existing["history"].append(history_entry)
existing["latest"] = history_entry
item = m
item["_metric_id"] = mid
item["_history"] = list(existing["history"])
else:
self._state["metrics"][mid] = {
"metric_id": mid,
"metric_name": metric_name,
"owner": owner,
"history": [history_entry],
"latest": history_entry,
}
m["_metric_id"] = mid
m["_history"] = [history_entry]
merged.append(m)
return merged
def get_action_item_history(self, item_id: str) -> Optional[dict]:
return self._state["action_items"].get(item_id)
def get_metric_history(self, metric_id: str) -> Optional[dict]:
return self._state["metrics"].get(metric_id)
def get_series_info(self, title: str) -> Optional[dict]:
series_name = self._detect_series(title)
return self._state["meeting_series"].get(series_name)
def has_content_hash(self, content_hash: str) -> bool:
return content_hash in self._state["content_hashes"]
def add_content_hash(self, content_hash: str, title: str, date: str, filename: str):
self._state["content_hashes"][content_hash] = {
"title": title,
"date": date,
"filename": filename,
}
def remove_content_hash(self, content_hash: str):
self._state["content_hashes"].pop(content_hash, None)
def get_stats(self) -> dict:
return {
"action_items_tracked": len(self._state["action_items"]),
"metrics_tracked": len(self._state["metrics"]),
"meeting_series": len(self._state["meeting_series"]),
"content_hashes": len(self._state["content_hashes"]),
}

52
raw_store.py 100644
View File

@ -0,0 +1,52 @@
import logging
import os
from datetime import datetime
from config import config
logger = logging.getLogger(__name__)
def _sanitize_filename(name: str) -> str:
if not name:
return "untitled"
invalid = '<>:"/\\|?*'
for char in invalid:
name = name.replace(char, "")
name = name.replace(" ", "_").strip("._")
return name or "untitled"
class RawMeetingStore:
def __init__(self):
self.raw_dir = config.storage.raw_dir
os.makedirs(self.raw_dir, exist_ok=True)
def save(self, text: str, title: str = "", date: str = "") -> str:
date_str = date or datetime.now().strftime("%Y-%m-%d")
safe_title = _sanitize_filename(title)[:60]
filename = f"{date_str}_{safe_title}.md"
filepath = os.path.join(self.raw_dir, filename)
content = "\n".join(
[
"---",
f'title: "{title}"',
f'date: "{date_str}"',
"status: archived",
"---",
"",
f"# {title or 'Untitled Meeting'}",
"",
text,
"",
]
)
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
logger.info("Saved raw meeting text: %s", filepath)
return filepath
raw_meeting_store = RawMeetingStore()

8
requirements.txt 100644
View File

@ -0,0 +1,8 @@
openai>=1.0.0
pydantic>=2.0.0
llama-index>=0.10.0
llama-index-embeddings-openai>=0.1.0
llama-index-vector-stores-chroma>=0.1.0
chromadb>=0.5.0
python-dotenv>=1.0.0
neo4j>=5.26.0

259
vector_store.py 100644
View File

@ -0,0 +1,259 @@
import hashlib
import json
import logging
import os
import re
from typing import List, Optional
from openai import OpenAI as OpenAI_Client
from llama_index.core import (
Document,
VectorStoreIndex,
StorageContext,
load_index_from_storage,
)
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.settings import Settings
from config import config
logger = logging.getLogger(__name__)
class CustomOpenAIEmbedding(BaseEmbedding):
def __init__(
self,
model: str = "text-embedding-ada-002",
api_key: Optional[str] = None,
api_base: Optional[str] = None,
**kwargs,
):
super().__init__(model_name=model, **kwargs)
self._client = OpenAI_Client(
api_key=api_key or "not-needed",
base_url=api_base,
)
self._model = model
async def _aget_query_embedding(self, query: str) -> List[float]:
return self._get_embedding(query)
async def _aget_text_embedding(self, text: str) -> List[float]:
return self._get_embedding(text)
def _get_query_embedding(self, query: str) -> List[float]:
return self._get_embedding(query)
def _get_text_embedding(self, text: str) -> List[float]:
return self._get_embedding(text)
def _get_embedding(self, text: str) -> List[float]:
resp = self._client.embeddings.create(
model=self._model,
input=text,
)
return resp.data[0].embedding
class MeetingVectorStore:
def __init__(self):
embed_model = CustomOpenAIEmbedding(
model=config.embedding.model,
api_key=config.embedding.api_key or None,
api_base=config.embedding.api_base if config.embedding.api_base else None,
)
Settings.embed_model = embed_model
self.persist_dir = config.vector_store.persist_dir
self._index: Optional[VectorStoreIndex] = None
self._load_or_create_index()
def _load_or_create_index(self):
if os.path.exists(os.path.join(self.persist_dir, "docstore.json")):
try:
storage_context = StorageContext.from_defaults(persist_dir=self.persist_dir)
self._index = load_index_from_storage(storage_context)
logger.info(f"从磁盘加载向量索引: {self.persist_dir}")
return
except Exception as e:
logger.warning(f"加载向量索引失败,将创建新索引: {e}")
self._index = VectorStoreIndex.from_documents([])
logger.info("创建新的向量索引")
def _save(self):
if self._index:
os.makedirs(self.persist_dir, exist_ok=True)
self._index.storage_context.persist(persist_dir=self.persist_dir)
def _meeting_id(self, meeting_data: dict) -> str:
title = meeting_data.get("title", "")
date = meeting_data.get("date", "")
raw = f"{date}_{title}"
return f"meeting_{hashlib.md5(raw.encode('utf-8')).hexdigest()[:12]}"
def find_meeting(self, title: str, date: str = "") -> Optional[dict]:
if not self._index:
return None
query_text = f"会议标题: {title}"
if date:
query_text += f" 日期: {date}"
try:
results = self.query(query_text, top_k=3)
for r in results:
meta = r.get("metadata", {})
meta_title = meta.get("title", "")
if meta_title == title or (date and meta.get("date") == date):
return meta
return None
except Exception as e:
logger.warning(f"会议查重查询失败: {e}")
return None
def find_similar_text(self, text: str, threshold: float = 0.92) -> Optional[dict]:
if not self._index:
return None
try:
retriever = self._index.as_retriever(similarity_top_k=3)
nodes = retriever.retrieve(text)
for node in nodes:
if node.score is not None and node.score > threshold:
return {
"metadata": node.metadata,
"score": node.score,
}
return None
except Exception as e:
logger.warning(f"文本相似度查重失败: {e}")
return None
def remove_meeting(self, meeting_id: str) -> bool:
if not self._index:
return False
try:
for field in self._FIELD_TYPES:
self._index.delete_ref_doc(f"{meeting_id}_{field}")
self._save()
logger.info(f"已从向量索引移除会议: {meeting_id}")
return True
except Exception as e:
logger.warning(f"移除向量索引失败: {e}")
return False
_FIELD_TYPES = ["header", "summary", "action_items", "metrics", "decisions", "relations", "entities"]
def add_meeting(self, meeting_data: dict) -> bool:
try:
meeting_id = self._meeting_id(meeting_data)
original_text_path = meeting_data.get("_original_text_path", "")
original_text = meeting_data.get("_original_text", "")
base_metadata = {
"title": meeting_data.get("title", ""),
"date": meeting_data.get("date", ""),
"participants": ", ".join(meeting_data.get("participants", [])),
"type": "meeting",
"content_hash": meeting_data.get("_content_hash", ""),
"original_text_path": original_text_path,
"original_text_excerpt": original_text[:500] if original_text else "",
"meeting_id": meeting_id,
}
docs = self._build_field_docs(meeting_data, base_metadata, meeting_id)
if self._index:
for doc in docs:
self._index.insert(doc)
self._save()
logger.info(f"会议 '{meeting_data.get('title')}' 已添加到向量索引 (id={meeting_id}, 字段数={len(docs)})")
return True
except Exception as e:
logger.error(f"添加会议到向量索引失败: {e}")
return False
def _build_field_docs(self, data: dict, base: dict, meeting_id: str) -> List[Document]:
docs = []
header = f"# {data.get('title', '')}"
if data.get("date"):
header += f"\n日期: {data['date']}"
if data.get("participants"):
header += f"\n参会人: {', '.join(data['participants'])}"
docs.append(Document(text=header, metadata={**base, "field": "header"}, doc_id=f"{meeting_id}_header"))
if data.get("summary"):
docs.append(Document(text=data["summary"], metadata={**base, "field": "summary"}, doc_id=f"{meeting_id}_summary"))
if data.get("action_items"):
lines = []
for item in data["action_items"]:
status = item.get('status', '待办')
lines.append(f"- [{status}] {item.get('task', '')} (负责人: {item.get('assignee', '')}, 截止: {item.get('deadline', '')}, 优先级: {item.get('priority', '')})")
history = item.get("_history", [])
if len(history) > 1:
lines.append(" 演变: " + "".join(f"{h.get('date','')}({h.get('status','')})" for h in history))
docs.append(Document(text="\n".join(lines), metadata={**base, "field": "action_items"}, doc_id=f"{meeting_id}_action_items"))
if data.get("metrics"):
lines = []
for m in data["metrics"]:
lines.append(f"- {m.get('metric_name', '')}: {m.get('value', '')} (目标: {m.get('target', '')}, 趋势: {m.get('trend', '')})")
docs.append(Document(text="\n".join(lines), metadata={**base, "field": "metrics"}, doc_id=f"{meeting_id}_metrics"))
if data.get("decisions"):
lines = [f"- {d.get('content', '')}" for d in data["decisions"]]
docs.append(Document(text="\n".join(lines), metadata={**base, "field": "decisions"}, doc_id=f"{meeting_id}_decisions"))
if data.get("relations"):
lines = [f"- {r.get('subject', '')} --{r.get('predicate', '')}--> {r.get('object', '')}" for r in data["relations"]]
docs.append(Document(text="\n".join(lines), metadata={**base, "field": "relations"}, doc_id=f"{meeting_id}_relations"))
if data.get("entities"):
lines = [f"- [{e.get('entity_type', '')}] {e.get('name', '')}: {e.get('description', '')}" for e in data["entities"]]
docs.append(Document(text="\n".join(lines), metadata={**base, "field": "entities"}, doc_id=f"{meeting_id}_entities"))
return docs
def query(self, question: str, top_k: int = 5) -> List[dict]:
if not self._index:
return []
try:
retriever = self._index.as_retriever(similarity_top_k=top_k)
nodes = retriever.retrieve(question)
results = []
for node in nodes:
results.append({
"text": node.text,
"score": node.score,
"metadata": node.metadata,
})
return results
except Exception as e:
logger.error(f"查询向量索引失败: {e}")
return []
def query_as_context(self, question: str, top_k: int = 3) -> str:
results = self.query(question, top_k=top_k)
if not results:
return ""
parts = []
for i, r in enumerate(results):
metadata = r.get("metadata", {})
parts.append(f"[{i+1}] {metadata.get('title', '未知会议')} ({metadata.get('date', '')})\n{r['text']}\n")
return "\n".join(parts)
def get_stats(self) -> dict:
if not self._index:
return {"doc_count": 0, "node_count": 0}
try:
docstore = self._index.docstore
docs = list(docstore.docs.values()) if hasattr(docstore, 'docs') else []
return {
"doc_count": len(docstore.docs) if hasattr(docstore, 'docs') else 0,
"node_count": len(docs),
}
except Exception:
return {"doc_count": 0, "node_count": 0}
meeting_vector_store = MeetingVectorStore()