重构Neo4j保存数据结构

main
Bifang 2026-06-15 13:13:20 +08:00
parent f857a90977
commit b5649cc218
10 changed files with 615 additions and 236 deletions

View File

@ -2,6 +2,7 @@ import json
import logging import logging
import re import re
import sys import sys
from enum import Enum
from typing import Any, List, Optional from typing import Any, List, Optional
from openai import OpenAI from openai import OpenAI
@ -22,25 +23,62 @@ client = OpenAI(
) )
class EntityType(str, Enum):
DEPARTMENT = 'Department'
PROJECT = 'Project'
METRIC = 'Metric'
PERSON = 'Person'
SYSTEM = 'System'
DOCUMENT = 'Document'
PARTICIPANT = 'participant'
UNKNOWN = 'Unknown'
# Normalization map: legacy LLM output → canonical type
_ENTITY_TYPE_ALIASES = {
'组织': 'Department',
'organization': 'Department',
'部门': 'Department',
'指标': 'Metric',
'kpi': 'Metric',
'项目': 'Project',
}
def _canonical_entity_type(raw: str) -> str:
normalized = raw.strip()
if normalized in _ENTITY_TYPE_ALIASES:
return _ENTITY_TYPE_ALIASES[normalized]
for member in EntityType:
if member.value.lower() == normalized.lower():
return member.value
return EntityType.UNKNOWN.value
def _neo4j_labels(entity_type: str) -> list[str]:
canonical = _canonical_entity_type(entity_type)
labels = ['Entity']
if canonical != EntityType.UNKNOWN.value:
labels.append(canonical)
return labels
class Entity(BaseModel): class Entity(BaseModel):
name: str name: str
entity_type: str entity_type: str = EntityType.UNKNOWN.value
description: str = '' description: str = ''
class Relation(BaseModel): class Relation(BaseModel):
subject: str source_entity_name: str
subject_type: str = '' target_entity_name: str
predicate: str relation_type: str
object: str
object_type: str = ''
description: str = ''
fact: str = '' fact: str = ''
qualifiers: List[str] = Field(default_factory=list)
evidence: str = ''
confidence: float = 0.0
valid_at: str = '' valid_at: str = ''
invalid_at: str = '' invalid_at: str = ''
evidence: str = ''
qualifiers: List[str] = Field(default_factory=list)
confidence: float = 0.0
class ActionItem(BaseModel): class ActionItem(BaseModel):
@ -63,6 +101,13 @@ class MeetingMetric(BaseModel):
target: str = '' target: str = ''
owner: str = '' owner: str = ''
trend: str = '' trend: str = ''
unit: str = ''
class DepartmentInfo(BaseModel):
name: str
description: str = ''
projects: List[str] = Field(default_factory=list)
class MeetingExtraction(BaseModel): class MeetingExtraction(BaseModel):
@ -75,6 +120,7 @@ class MeetingExtraction(BaseModel):
action_items: List[ActionItem] = Field(default_factory=list) action_items: List[ActionItem] = Field(default_factory=list)
decisions: List[Decision] = Field(default_factory=list) decisions: List[Decision] = Field(default_factory=list)
metrics: List[MeetingMetric] = Field(default_factory=list) metrics: List[MeetingMetric] = Field(default_factory=list)
departments: List[DepartmentInfo] = Field(default_factory=list)
summary: str = '' summary: str = ''
@ -372,7 +418,7 @@ EXTRACTION_SYSTEM_PROMPT = """
def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction: def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
user_prompt = f""" user_prompt = f"""
请从下面会议记录中提取结构化信息并重点做"深层关系抽取" 请从下面会议记录中提取结构化信息并重点做"深层关系抽取""层次结构识别"
输出 JSON 字段 输出 JSON 字段
- title - title
@ -380,24 +426,28 @@ def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
- participants - participants
- agenda - agenda
- entities: name, entity_type, description - entities: name, entity_type, description
- entity_type 请使用: Department部门Project项目Metric指标Person人物System系统Document文档
- relations: - relations:
- subject - source_entity_name: 源实体名称
- subject_type - target_entity_name: 目标实体名称
- predicate - relation_type: 关系类型 HAS_PROJECTHAS_METRIC负责汇报目标值推进依赖
- object - fact: 一句自然语言事实描述
- object_type - valid_at可选
- description - invalid_at可选
- fact - evidence: 原文证据
- qualifiers - qualifiers: 限定条件列表
- evidence - confidence: 0~1
- confidence
- valid_at
- invalid_at
- action_items: task, assignee, deadline, status, priority - action_items: task, assignee, deadline, status, priority
- decisions: content, proposer, status - decisions: content, proposer, status
- metrics: metric_name, value, target, owner, trend - metrics: metric_name, value, target, owner, trend, unit
- departments: [{{"name": "部门名称", "description": "", "projects": ["项目名1", "项目名2"]}}]
- summary - summary
层次关系规则
1. Department 管辖 Project relation_type HAS_PROJECT
2. Project 拥有 Metric relation_type HAS_METRIC
3. 其他事实关系负责汇报目标值等直接用 relation_type 表达
关系抽取规则 关系抽取规则
1. 不要只抽"汇报了工作"这种会议动作要尽量继续下钻出具体事实 1. 不要只抽"汇报了工作"这种会议动作要尽量继续下钻出具体事实
2. 如果一句话里同时包含"主体 + 指标 + 当前值 + 目标值 + 负责人 + 趋势"应拆成多条关系或在 qualifiers 中保留这些细节 2. 如果一句话里同时包含"主体 + 指标 + 当前值 + 目标值 + 负责人 + 趋势"应拆成多条关系或在 qualifiers 中保留这些细节
@ -432,6 +482,7 @@ def _normalize_meeting_data(data: dict) -> dict:
'action_items': _normalize_action_items(data.get('action_items')), 'action_items': _normalize_action_items(data.get('action_items')),
'decisions': _normalize_decisions(data.get('decisions')), 'decisions': _normalize_decisions(data.get('decisions')),
'metrics': _normalize_metrics(data.get('metrics')), 'metrics': _normalize_metrics(data.get('metrics')),
'departments': _normalize_departments(data.get('departments')),
'summary': _as_str(data.get('summary')), 'summary': _as_str(data.get('summary')),
} }
@ -494,20 +545,16 @@ def _normalize_relations(value) -> List[dict]:
for relation in value: for relation in value:
if not isinstance(relation, dict): if not isinstance(relation, dict):
continue continue
subject = _as_str(relation.get('subject')) source = _as_str(relation.get('source_entity_name') or relation.get('subject', ''))
predicate = _as_str(relation.get('predicate')) target = _as_str(relation.get('target_entity_name') or relation.get('object', ''))
obj = _as_str(relation.get('object')) rtype = _as_str(relation.get('relation_type') or relation.get('predicate', ''))
description = _as_str(relation.get('description'))
fact = _as_str(relation.get('fact')) fact = _as_str(relation.get('fact'))
if not fact and subject and predicate and obj: if not fact and source and rtype and target:
fact = f'{subject} {predicate} {obj}' fact = f'{source} {rtype} {target}'
items.append({ items.append({
'subject': subject, 'source_entity_name': source,
'subject_type': _as_str(relation.get('subject_type')), 'target_entity_name': target,
'predicate': predicate, 'relation_type': rtype,
'object': obj,
'object_type': _as_str(relation.get('object_type')),
'description': description,
'fact': fact, 'fact': fact,
'qualifiers': _as_str_list(relation.get('qualifiers')), 'qualifiers': _as_str_list(relation.get('qualifiers')),
'evidence': _as_str(relation.get('evidence')), 'evidence': _as_str(relation.get('evidence')),
@ -563,5 +610,24 @@ def _normalize_metrics(value) -> List[dict]:
'target': _as_str(metric.get('target')), 'target': _as_str(metric.get('target')),
'owner': _as_str(metric.get('owner')), 'owner': _as_str(metric.get('owner')),
'trend': _as_str(metric.get('trend')), 'trend': _as_str(metric.get('trend')),
'unit': _as_str(metric.get('unit')),
})
return items
def _normalize_departments(value) -> List[dict]:
if not isinstance(value, list):
return []
items = []
for dept in value:
if not isinstance(dept, dict):
continue
name = _as_str(dept.get('name'))
if not name:
continue
items.append({
'name': name,
'description': _as_str(dept.get('description')),
'projects': _as_str_list(dept.get('projects')),
}) })
return items return items

View File

@ -4,6 +4,7 @@ import logging
import re import re
import time import time
from datetime import datetime, timezone from datetime import datetime, timezone
from enum import Enum
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from meeting_memory.config import config from meeting_memory.config import config
@ -32,6 +33,45 @@ def _keyword_score(text: str, question: str) -> float:
return hits / len(terms) return hits / len(terms)
class _EntityType(str, Enum):
DEPARTMENT = 'Department'
PROJECT = 'Project'
METRIC = 'Metric'
PERSON = 'Person'
SYSTEM = 'System'
DOCUMENT = 'Document'
PARTICIPANT = 'participant'
UNKNOWN = 'Unknown'
_ENTITY_TYPE_ALIASES = {
'组织': 'Department',
'organization': 'Department',
'部门': 'Department',
'指标': 'Metric',
'kpi': 'Metric',
'项目': 'Project',
}
def _canonical_entity_type(raw: str) -> str:
normalized = raw.strip()
if normalized in _ENTITY_TYPE_ALIASES:
return _ENTITY_TYPE_ALIASES[normalized]
for member in _EntityType:
if member.value.lower() == normalized.lower():
return member.value
return _EntityType.UNKNOWN.value
def _neo4j_labels(entity_type: str) -> list[str]:
canonical = _canonical_entity_type(entity_type)
labels = ['Entity']
if canonical != _EntityType.UNKNOWN.value:
labels.append(canonical)
return labels
def _keyword_terms(text: str) -> List[str]: def _keyword_terms(text: str) -> List[str]:
normalized = (text or '').lower() normalized = (text or '').lower()
raw_terms = re.findall(r'[a-z0-9]+|[\u4e00-\u9fff]{2,}', normalized) raw_terms = re.findall(r'[a-z0-9]+|[\u4e00-\u9fff]{2,}', normalized)
@ -151,11 +191,11 @@ class Neo4jGraphStore:
'CREATE CONSTRAINT meeting_id IF NOT EXISTS FOR (m:Meeting) REQUIRE m.meeting_id IS UNIQUE', 'CREATE CONSTRAINT meeting_id IF NOT EXISTS FOR (m:Meeting) REQUIRE m.meeting_id IS UNIQUE',
'CREATE CONSTRAINT episode_id IF NOT EXISTS FOR (e:Episode) REQUIRE e.episode_id IS UNIQUE', 'CREATE CONSTRAINT episode_id IF NOT EXISTS FOR (e:Episode) REQUIRE e.episode_id IS UNIQUE',
'CREATE CONSTRAINT entity_name IF NOT EXISTS FOR (e:Entity) REQUIRE e.name IS UNIQUE', 'CREATE CONSTRAINT entity_name IF NOT EXISTS FOR (e:Entity) REQUIRE e.name IS UNIQUE',
'CREATE CONSTRAINT fact_id IF NOT EXISTS FOR (f:Fact) REQUIRE f.fact_id IS UNIQUE',
'CREATE INDEX meeting_title IF NOT EXISTS FOR (m:Meeting) ON (m.title)', 'CREATE INDEX meeting_title IF NOT EXISTS FOR (m:Meeting) ON (m.title)',
'CREATE INDEX episode_title IF NOT EXISTS FOR (e:Episode) ON (e.title)', 'CREATE INDEX episode_title IF NOT EXISTS FOR (e:Episode) ON (e.title)',
'CREATE INDEX entity_type IF NOT EXISTS FOR (e:Entity) ON (e.entity_type)', 'CREATE INDEX entity_type IF NOT EXISTS FOR (e:Entity) ON (e.entity_type)',
'CREATE INDEX fact_predicate IF NOT EXISTS FOR (f:Fact) ON (f.predicate)', 'CREATE INDEX relates_to_name IF NOT EXISTS FOR ()-[r:RELATES_TO]-() ON (r.name)',
'CREATE INDEX relates_to_fact IF NOT EXISTS FOR ()-[r:RELATES_TO]-() ON (r.fact)',
] ]
for statement in statements: for statement in statements:
self.run_query(statement) self.run_query(statement)
@ -167,11 +207,14 @@ class Neo4jGraphStore:
CALL () { MATCH (m:Meeting) RETURN count(m) AS meetings } CALL () { MATCH (m:Meeting) RETURN count(m) AS meetings }
CALL () { MATCH (ep:Episode) RETURN count(ep) AS episodes } CALL () { MATCH (ep:Episode) RETURN count(ep) AS episodes }
CALL () { MATCH (e:Entity) RETURN count(e) AS entities } CALL () { MATCH (e:Entity) RETURN count(e) AS entities }
CALL () { MATCH (f:Fact) RETURN count(f) AS facts } CALL () { MATCH ()-[r:RELATES_TO]->() RETURN count(r) AS relations }
RETURN meetings, episodes, entities, facts CALL () { MATCH (d:Department) RETURN count(d) AS departments }
CALL () { MATCH (p:Project) RETURN count(p) AS projects }
CALL () { MATCH (m:Metric) RETURN count(m) AS metrics }
RETURN meetings, episodes, entities, relations, departments, projects, metrics
''') ''')
if not rows: if not rows:
return {'enabled': False, 'meetings': 0, 'episodes': 0, 'entities': 0, 'facts': 0} return {'enabled': False}
return {'enabled': True, **rows[0]} return {'enabled': True, **rows[0]}
# ==================== Entity Dedup (from Graphiti) ==================== # ==================== Entity Dedup (from Graphiti) ====================
@ -218,19 +261,16 @@ class Neo4jGraphStore:
def get_facts_between(self, source_name: str, target_name: str) -> List[Dict[str, Any]]: def get_facts_between(self, source_name: str, target_name: str) -> List[Dict[str, Any]]:
return self.run_query(''' return self.run_query('''
MATCH (s:Entity {name: $source_name})-[source_rel:FACT_SOURCE]->(f:Fact) MATCH (s:Entity {name: $source_name})-[r:RELATES_TO]->(t:Entity {name: $target_name})
WHERE (f)-[:FACT_TARGET]->(:Entity {name: $target_name}) RETURN r.name AS relation_type,
RETURN f.fact_id AS fact_id, r.fact AS fact,
f.fact AS fact, r.qualifiers AS qualifiers,
f.predicate AS predicate, r.confidence AS confidence,
f.description AS description, r.valid_at AS valid_at,
f.qualifiers AS qualifiers, r.invalid_at AS invalid_at,
f.confidence AS confidence, r.expired_at AS expired_at,
f.valid_at AS valid_at, r.meeting_id AS meeting_id
f.invalid_at AS invalid_at, ORDER BY coalesce(r.valid_at, '') DESC
f.expired_at AS expired_at,
f.meeting_id AS meeting_id
ORDER BY coalesce(f.valid_at, '') DESC
''', source_name=source_name, target_name=target_name) ''', source_name=source_name, target_name=target_name)
def search_related_facts( def search_related_facts(
@ -240,30 +280,23 @@ class Neo4jGraphStore:
return [] return []
query_embedding = embedding_service.embed_text(fact_text) query_embedding = embedding_service.embed_text(fact_text)
rows = self.run_query(''' rows = self.run_query('''
MATCH (ep:Episode)-[:HAS_FACT]->(f:Fact) MATCH (s:Entity)-[r:RELATES_TO]->(t:Entity)
OPTIONAL MATCH (s:Entity)-[:FACT_SOURCE]->(f) RETURN r.fact AS fact,
OPTIONAL MATCH (f)-[:FACT_TARGET]->(o:Entity) r.name AS relation_type,
RETURN f.fact_id AS fact_id, r.fact_embedding AS fact_embedding,
f.fact AS fact, r.valid_at AS valid_at,
f.predicate AS predicate, r.invalid_at AS invalid_at,
f.description AS description, r.expired_at AS expired_at,
f.fact_embedding AS fact_embedding, s.name AS source_name,
f.valid_at AS valid_at, t.name AS target_name
f.invalid_at AS invalid_at,
f.expired_at AS expired_at,
coalesce(s.name, '') AS source_name,
coalesce(o.name, '') AS target_name
''') ''')
scored = [] scored = []
for row in rows: for row in rows:
score = _cosine_similarity(query_embedding, row.get('fact_embedding', [])) score = _cosine_similarity(query_embedding, row.get('fact_embedding', []))
if score > 0.3: if score > 0.3:
scored.append({ scored.append({
'idx': len(scored),
'fact_id': row.get('fact_id', ''),
'fact': row.get('fact', ''), 'fact': row.get('fact', ''),
'predicate': row.get('predicate', ''), 'relation_type': row.get('relation_type', ''),
'description': row.get('description', ''),
'source_name': row.get('source_name', ''), 'source_name': row.get('source_name', ''),
'target_name': row.get('target_name', ''), 'target_name': row.get('target_name', ''),
'valid_at': row.get('valid_at', ''), 'valid_at': row.get('valid_at', ''),
@ -274,15 +307,15 @@ class Neo4jGraphStore:
scored.sort(key=lambda r: r['score'], reverse=True) scored.sort(key=lambda r: r['score'], reverse=True)
return scored[:limit] return scored[:limit]
def mark_fact_expired(self, fact_id: str, expired_at: str | None = None): def mark_relation_expired(self, source_name: str, target_name: str, relation_type: str, expired_at: str | None = None):
if not expired_at: if not expired_at:
expired_at = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') expired_at = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
self.run_query(''' self.run_query('''
MATCH (f:Fact {fact_id: $fact_id}) MATCH (s:Entity {name: $source_name})-[r:RELATES_TO {name: $relation_type}]->(t:Entity {name: $target_name})
SET f.expired_at = $expired_at, SET r.expired_at = $expired_at,
f.invalid_at = $expired_at, r.invalid_at = $expired_at,
f.updated_at = datetime() r.updated_at = datetime()
''', fact_id=fact_id, expired_at=expired_at) ''', source_name=source_name, target_name=target_name, relation_type=relation_type, expired_at=expired_at)
# ==================== Core Write Operations ==================== # ==================== Core Write Operations ====================
@ -335,19 +368,28 @@ class Neo4jGraphStore:
) )
for relation in meeting_data.get('relations', []): for relation in meeting_data.get('relations', []):
self._upsert_relation(meeting_id, relation, meeting_data.get('date', '')) self._upsert_direct_edge(meeting_id, relation, meeting_data.get('date', ''))
self._upsert_hierarchy(meeting_id, meeting_data)
for metric in meeting_data.get('metrics', []):
self._upsert_metric_node(meeting_id, metric, meeting_data.get('date', ''))
def _upsert_entity(self, meeting_id: str, entity: dict) -> None: def _upsert_entity(self, meeting_id: str, entity: dict) -> None:
name = entity.get('name', '').strip() name = entity.get('name', '').strip()
if not name: if not name:
return return
raw_type = entity.get('entity_type', '').strip()
labels = _neo4j_labels(raw_type)
summary = self._entity_summary(entity) summary = self._entity_summary(entity)
name_embedding = embedding_service.embed_text(summary or name) name_embedding = embedding_service.embed_text(summary or name)
self.run_query(''' set_labels = ' SET ' + ', '.join(f'e:{label}' for label in labels[1:]) if len(labels) > 1 else ''
MATCH (:Meeting {meeting_id: $meeting_id})-[:HAS_EPISODE]->(ep:Episode {episode_id: $meeting_id}) self.run_query(f'''
MERGE (e:Entity {name: $name}) MATCH (:Meeting {{meeting_id: $meeting_id}})-[:HAS_EPISODE]->(ep:Episode {{episode_id: $meeting_id}})
MERGE (e:Entity {{name: $name}})
{set_labels}
SET e.entity_type = CASE SET e.entity_type = CASE
WHEN $entity_type <> '' THEN $entity_type WHEN $type <> '' THEN $type
ELSE coalesce(e.entity_type, '') ELSE coalesce(e.entity_type, '')
END, END,
e.description = CASE e.description = CASE
@ -367,92 +409,152 @@ class Neo4jGraphStore:
''', ''',
meeting_id=meeting_id, meeting_id=meeting_id,
name=name, name=name,
entity_type=entity.get('entity_type', ''), type=raw_type,
description=entity.get('description', ''), description=entity.get('description', ''),
summary=summary, summary=summary,
name_embedding=name_embedding, name_embedding=name_embedding,
) )
def _upsert_relation(self, meeting_id: str, relation: dict, meeting_date: str) -> None: def _upsert_direct_edge(self, meeting_id: str, relation: dict, meeting_date: str) -> None:
subject = relation.get('subject', '').strip() source = relation.get('source_entity_name', '').strip()
predicate = relation.get('predicate', '').strip() target = relation.get('target_entity_name', '').strip()
obj = relation.get('object', '').strip() rtype = relation.get('relation_type', '').strip()
if not subject or not predicate or not obj: if not source or not target or not rtype:
return return
self._upsert_entity( self._upsert_entity(
meeting_id, meeting_id,
{'name': subject, 'entity_type': relation.get('subject_type', ''), 'description': ''}, {'name': source, 'entity_type': '', 'description': ''},
) )
self._upsert_entity( self._upsert_entity(
meeting_id, meeting_id,
{'name': obj, 'entity_type': relation.get('object_type', ''), 'description': ''}, {'name': target, 'entity_type': '', 'description': ''},
) )
fact_text = self._fact_text(relation) fact_text = self._relation_text(relation)
fact_id = hashlib.md5(
f'{meeting_id}|{subject}|{predicate}|{obj}'.encode('utf-8')
).hexdigest()
fact_embedding = embedding_service.embed_text(fact_text) fact_embedding = embedding_service.embed_text(fact_text)
self.run_query(''' self.run_query('''
MATCH (:Meeting {meeting_id: $meeting_id})-[:HAS_EPISODE]->(ep:Episode {episode_id: $meeting_id}) MATCH (s:Entity {name: $source})
MATCH (s:Entity {name: $subject}) MATCH (t:Entity {name: $target})
MATCH (o:Entity {name: $object}) MERGE (s)-[r:RELATES_TO {name: $rtype}]->(t)
MERGE (f:Fact {fact_id: $fact_id}) SET r.fact = $fact,
SET f.fact = $fact, r.fact_embedding = $fact_embedding,
f.predicate = $predicate, r.evidence = $evidence,
f.description = $description, r.qualifiers = $qualifiers,
f.qualifiers = $qualifiers, r.confidence = $confidence,
f.evidence = $evidence, r.valid_at = $valid_at,
f.confidence = $confidence, r.invalid_at = $invalid_at,
f.valid_at = $valid_at, r.meeting_id = $meeting_id,
f.invalid_at = $invalid_at, r.meeting_date = $meeting_date,
f.meeting_id = $meeting_id, r.updated_at = datetime()
f.meeting_date = $meeting_date,
f.fact_embedding = $fact_embedding,
f.updated_at = datetime()
MERGE (ep)-[:HAS_FACT]->(f)
MERGE (s)-[:FACT_SOURCE]->(f)
MERGE (f)-[:FACT_TARGET]->(o)
''', ''',
meeting_id=meeting_id, meeting_id=meeting_id,
subject=subject, source=source,
predicate=predicate, target=target,
object=obj, rtype=rtype,
fact_id=fact_id,
fact=fact_text, fact=fact_text,
description=relation.get('description', ''), fact_embedding=fact_embedding,
qualifiers=relation.get('qualifiers', []),
evidence=relation.get('evidence', ''), evidence=relation.get('evidence', ''),
qualifiers=relation.get('qualifiers', []),
confidence=relation.get('confidence', 0.0), confidence=relation.get('confidence', 0.0),
valid_at=relation.get('valid_at', ''), valid_at=relation.get('valid_at', ''),
invalid_at=relation.get('invalid_at', ''), invalid_at=relation.get('invalid_at', ''),
meeting_date=meeting_date, meeting_date=meeting_date,
fact_embedding=fact_embedding, )
def _upsert_hierarchy(self, meeting_id: str, meeting_data: dict) -> None:
entities_map = {e['name']: e for e in meeting_data.get('entities', []) if e.get('name')}
for rel in meeting_data.get('relations', []):
rtype = rel.get('relation_type', '')
if rtype not in ('HAS_PROJECT', 'HAS_METRIC', 'PART_OF'):
continue
source = rel.get('source_entity_name', '')
target = rel.get('target_entity_name', '')
if not source or not target:
continue
if rtype == 'HAS_PROJECT' or rtype == 'PART_OF':
self.run_query('''
MATCH (s:Entity {name: $source})
MATCH (t:Entity {name: $target})
MERGE (s)-[r:HAS_PROJECT]->(t)
SET r.updated_at = datetime(),
r.meeting_id = $meeting_id
''', source=source, target=target, meeting_id=meeting_id)
elif rtype == 'HAS_METRIC':
self.run_query('''
MATCH (s:Entity {name: $source})
MATCH (t:Entity {name: $target})
MERGE (s)-[r:HAS_METRIC]->(t)
SET r.updated_at = datetime(),
r.meeting_id = $meeting_id
''', source=source, target=target, meeting_id=meeting_id)
departments = meeting_data.get('departments', [])
for dept in departments:
dept_name = dept.get('name', '').strip()
if not dept_name or dept_name not in entities_map:
continue
for proj_name in dept.get('projects', []):
if proj_name in entities_map:
self.run_query('''
MATCH (s:Entity {name: $source})
MATCH (t:Entity {name: $target})
MERGE (s)-[r:HAS_PROJECT]->(t)
SET r.updated_at = datetime(),
r.meeting_id = $meeting_id
''', source=dept_name, target=proj_name, meeting_id=meeting_id)
def _upsert_metric_node(self, meeting_id: str, metric: dict, meeting_date: str) -> None:
name = metric.get('metric_name', '').strip()
if not name:
return
entity = {
'name': name,
'entity_type': 'Metric',
'description': f"{metric.get('value', '')} ({metric.get('unit', '')})" if metric.get('unit') else metric.get('value', ''),
}
self._upsert_entity(meeting_id, entity)
self.run_query('''
MATCH (e:Entity {name: $name})
SET e.current_value = $value,
e.target = $target,
e.trend = $trend,
e.unit = $unit,
e.owner = $owner,
e.updated_at = datetime()
''',
name=name,
value=metric.get('value', ''),
target=metric.get('target', ''),
trend=metric.get('trend', ''),
unit=metric.get('unit', ''),
owner=metric.get('owner', ''),
) )
def remove_meeting_subgraph(self, meeting_id: str) -> None: def remove_meeting_subgraph(self, meeting_id: str) -> None:
if not self.enabled: if not self.enabled:
return return
# Phase 1: detach all entities mentioned by this episode
self.run_query(''' self.run_query('''
MATCH (m:Meeting {meeting_id: $meeting_id})-[:HAS_EPISODE]->(ep:Episode {episode_id: $meeting_id}) MATCH (m:Meeting {meeting_id: $meeting_id})-[:HAS_EPISODE]->(ep:Episode)
OPTIONAL MATCH (ep)-[mention:MENTIONS]->(entity:Entity) OPTIONAL MATCH (ep)-[mention:MENTIONS]->(entity:Entity)
OPTIONAL MATCH (ep)-[has_fact:HAS_FACT]->(fact:Fact) OPTIONAL MATCH (entity)-[er]-()
OPTIONAL MATCH (fact)-[target_rel:FACT_TARGET]->(:Entity) DELETE mention, er
OPTIONAL MATCH (:Entity)-[source_rel:FACT_SOURCE]->(fact) ''', meeting_id=meeting_id)
DELETE mention, has_fact, target_rel, source_rel # Phase 2: delete orphan entities no longer mentioned by any episode
WITH m, ep, collect(DISTINCT fact) AS facts, collect(DISTINCT entity) AS entities self.run_query('''
FOREACH (fact IN facts | DELETE fact) MATCH (entity:Entity)
DELETE ep, m WHERE NOT (entity)<-[:MENTIONS]-(:Episode)
WITH entities DETACH DELETE entity
UNWIND entities AS entity ''')
WITH DISTINCT entity WHERE entity IS NOT NULL # Phase 3: delete episode and meeting
OPTIONAL MATCH (entity)<-[m1:MENTIONS]-(:Episode) self.run_query('''
OPTIONAL MATCH (entity)-[m2:FACT_SOURCE|FACT_TARGET]-(:Fact) MATCH (m:Meeting {meeting_id: $meeting_id})-[:HAS_EPISODE]->(ep:Episode)
WITH entity, count(m1) + count(m2) AS refs DETACH DELETE ep, m
WHERE refs = 0
DELETE entity
''', meeting_id=meeting_id) ''', meeting_id=meeting_id)
# ==================== Retrieval ==================== # ==================== Retrieval ====================
@ -533,19 +635,23 @@ class Neo4jGraphStore:
return [] return []
return self.run_query(''' return self.run_query('''
MATCH (n) MATCH (n)
WHERE n:Meeting OR n:Episode OR n:Entity OR n:Fact WHERE n:Meeting OR n:Episode OR n:Entity OR n:Department OR n:Project OR n:Metric
WITH [lbl IN labels(n) WHERE lbl IN ['Meeting','Episode','Entity','Fact']][0] AS kind WITH [lbl IN labels(n) WHERE lbl IN ['Meeting','Episode','Entity','Department','Project','Metric']][0] AS kind
RETURN kind, count(*) AS count ORDER BY count DESC RETURN kind, count(*) AS count ORDER BY count DESC
''') ''')
def get_entity_types(self) -> List[Dict[str, Any]]: def get_entity_types(self) -> List[Dict[str, Any]]:
if not self.enabled: if not self.enabled:
return [] return []
return self.run_query(''' rows = self.run_query('''
MATCH (e:Entity) MATCH (e:Entity)
WHERE coalesce(e.entity_type, '') <> '' WHERE coalesce(e.entity_type, '') <> ''
RETURN e.entity_type AS entity_type, count(*) AS count ORDER BY count DESC RETURN e.entity_type AS entity_type, count(*) AS count ORDER BY count DESC
''') ''')
return [
{'entity_type': _canonical_entity_type(r.get('entity_type', '')), 'count': r.get('count', 0)}
for r in rows
]
def get_graph_snapshot( def get_graph_snapshot(
self, self,
@ -559,34 +665,39 @@ class Neo4jGraphStore:
return {'nodes': [], 'edges': [], 'stats': {'enabled': False}} return {'nodes': [], 'edges': [], 'stats': {'enabled': False}}
keyword_terms = _keyword_terms(query) if query else [] keyword_terms = _keyword_terms(query) if query else []
raw_nodes = self.run_query(''' raw_nodes = self.run_query(f'''
MATCH (n) MATCH (n)
WHERE (n:Meeting OR n:Episode OR n:Entity OR n:Fact) WHERE (n:Meeting OR n:Episode OR n:Entity)
AND ($kinds = [] OR [lbl IN labels(n) WHERE lbl IN ['Meeting','Episode','Entity','Fact']][0] IN $kinds) AND ($kinds = [] OR any(lbl IN labels(n) WHERE lbl IN $kinds))
AND ($terms = [] AND ($terms = []
OR (n:Meeting AND any(t IN $terms WHERE toLower(coalesce(n.title,'')) CONTAINS t OR toLower(coalesce(n.summary,'')) CONTAINS t)) OR (n:Meeting AND any(t IN $terms WHERE toLower(coalesce(n.title,'')) CONTAINS t OR toLower(coalesce(n.summary,'')) CONTAINS t))
OR (n:Episode AND any(t IN $terms WHERE toLower(coalesce(n.title,'')) CONTAINS t OR toLower(coalesce(n.content,'')) CONTAINS t)) OR (n:Episode AND any(t IN $terms WHERE toLower(coalesce(n.title,'')) CONTAINS t OR toLower(coalesce(n.content,'')) CONTAINS t))
OR (n:Entity AND any(t IN $terms WHERE toLower(coalesce(n.name,'')) CONTAINS t OR toLower(coalesce(n.summary,'')) CONTAINS t OR toLower(coalesce(n.description,'')) CONTAINS t)) OR (n:Entity AND any(t IN $terms WHERE toLower(coalesce(n.name,'')) CONTAINS t OR toLower(coalesce(n.summary,'')) CONTAINS t OR toLower(coalesce(n.description,'')) CONTAINS t))
OR (n:Fact AND any(t IN $terms WHERE toLower(coalesce(n.fact,'')) CONTAINS t OR toLower(coalesce(n.predicate,'')) CONTAINS t OR toLower(coalesce(n.description,'')) CONTAINS t))
) )
AND ($types = [] OR NOT n:Entity OR coalesce(n.entity_type, '') IN $types) AND ($types = [] OR NOT n:Entity OR coalesce(n.entity_type, '') IN $types)
OPTIONAL MATCH (n)-[r]-() OPTIONAL MATCH (n)-[r]-()
RETURN n.meeting_id AS meeting_id, RETURN n.meeting_id AS meeting_id,
n.episode_id AS episode_id, n.episode_id AS episode_id,
n.name AS entity_name, n.name AS entity_name,
n.fact_id AS fact_id,
n.title AS title, n.title AS title,
n.summary AS summary, n.summary AS summary,
n.date AS date, n.date AS date,
n.entity_type AS entity_type, n.entity_type AS entity_type,
n.description AS description, n.description AS description,
n.predicate AS predicate,
n.fact AS fact,
n.confidence AS confidence,
n.meeting_date AS meeting_date, n.meeting_date AS meeting_date,
[lbl IN labels(n) WHERE lbl IN ['Meeting','Episode','Entity','Fact']][0] AS kind, n.current_value AS current_value,
n.target AS target_value,
n.trend AS trend,
CASE
WHEN n:Meeting THEN 'Meeting'
WHEN n:Episode THEN 'Episode'
WHEN n:Department THEN 'Department'
WHEN n:Project THEN 'Project'
WHEN n:Metric THEN 'Metric'
ELSE 'Entity'
END AS kind,
count(DISTINCT r) AS degree count(DISTINCT r) AS degree
ORDER BY degree DESC, coalesce(n.title, n.name, n.fact) ASC ORDER BY degree DESC, coalesce(n.title, n.name) ASC
LIMIT $limit_nodes LIMIT $limit_nodes
''', ''',
terms=keyword_terms, terms=keyword_terms,
@ -607,29 +718,30 @@ class Neo4jGraphStore:
elif kind == 'Episode': elif kind == 'Episode':
raw_id = row.get('episode_id', '') raw_id = row.get('episode_id', '')
label = row.get('title', '') or raw_id label = row.get('title', '') or raw_id
elif kind == 'Entity': elif kind in ('Entity', 'Department', 'Project', 'Metric'):
raw_id = row.get('entity_name', '') raw_id = row.get('entity_name', '')
label = raw_id label = raw_id
elif kind == 'Fact':
raw_id = row.get('fact_id', '')
label = row.get('predicate', '') or row.get('fact', '') or raw_id
else: else:
continue continue
if not raw_id: if not raw_id:
continue continue
nid = f'{kind}:{raw_id}' nid = f'{kind}:{raw_id}'
all_raw_ids.add(raw_id) all_raw_ids.add(raw_id)
nodes.append({ node = {
'id': nid, 'id': nid,
'label': label, 'label': label,
'kind': kind, 'kind': kind,
'entity_type': row.get('entity_type', '') if kind == 'Entity' else '', 'entity_type': row.get('entity_type', '') if kind in ('Entity', 'Department', 'Project', 'Metric') else '',
'description': row.get('description', '') or row.get('summary', '') or '', 'description': row.get('description', '') or row.get('summary', '') or '',
'date': row.get('date', '') or row.get('meeting_date', '') or '', 'date': row.get('date', '') or row.get('meeting_date', '') or '',
'degree': row.get('degree', 0), 'degree': row.get('degree', 0),
'fact': row.get('fact', '') if kind == 'Fact' else '',
'summary': row.get('summary', '') or '', 'summary': row.get('summary', '') or '',
}) }
if kind == 'Metric':
node['current_value'] = row.get('current_value', '')
node['target'] = row.get('target_value', '')
node['trend'] = row.get('trend', '')
nodes.append(node)
if not nodes: if not nodes:
return {'nodes': [], 'edges': [], 'stats': self.get_stats()} return {'nodes': [], 'edges': [], 'stats': self.get_stats()}
@ -637,44 +749,39 @@ class Neo4jGraphStore:
ids_list = list(all_raw_ids) ids_list = list(all_raw_ids)
edges_raw = self.run_query(''' edges_raw = self.run_query('''
MATCH (s)-[r]->(t) MATCH (s)-[r]->(t)
WHERE type(r) IN ['HAS_EPISODE','MENTIONS','HAS_FACT','FACT_SOURCE','FACT_TARGET'] WHERE type(r) IN ['HAS_EPISODE','MENTIONS','RELATES_TO','HAS_PROJECT','HAS_METRIC']
AND ( AND (
(s:Meeting AND s.meeting_id IN $ids) (s:Meeting AND s.meeting_id IN $ids)
OR (s:Episode AND s.episode_id IN $ids) OR (s:Episode AND s.episode_id IN $ids)
OR (s:Entity AND s.name IN $ids) OR (s:Entity AND s.name IN $ids)
OR (s:Fact AND s.fact_id IN $ids)
) )
AND ( AND (
(t:Meeting AND t.meeting_id IN $ids) (t:Meeting AND t.meeting_id IN $ids)
OR (t:Episode AND t.episode_id IN $ids) OR (t:Episode AND t.episode_id IN $ids)
OR (t:Entity AND t.name IN $ids) OR (t:Entity AND t.name IN $ids)
OR (t:Fact AND t.fact_id IN $ids)
) )
RETURN type(r) AS predicate, RETURN type(r) AS predicate,
r.name AS relation_name,
r.fact AS relation_fact,
r.confidence AS relation_confidence,
r.meeting_date AS relation_date,
r.meeting_id AS relation_meeting_id,
CASE WHEN s:Meeting THEN s.meeting_id CASE WHEN s:Meeting THEN s.meeting_id
WHEN s:Episode THEN s.episode_id WHEN s:Episode THEN s.episode_id
WHEN s:Entity THEN s.name WHEN s:Entity THEN s.name END AS source_raw,
WHEN s:Fact THEN s.fact_id END AS source_raw,
CASE WHEN t:Meeting THEN t.meeting_id CASE WHEN t:Meeting THEN t.meeting_id
WHEN t:Episode THEN t.episode_id WHEN t:Episode THEN t.episode_id
WHEN t:Entity THEN t.name WHEN t:Entity THEN t.name END AS target_raw,
WHEN t:Fact THEN t.fact_id END AS target_raw,
CASE WHEN s:Meeting THEN 'Meeting' WHEN s:Episode THEN 'Episode' CASE WHEN s:Meeting THEN 'Meeting' WHEN s:Episode THEN 'Episode'
WHEN s:Entity THEN 'Entity' WHEN s:Fact THEN 'Fact' END AS source_kind, WHEN s:Department THEN 'Department'
WHEN s:Project THEN 'Project'
WHEN s:Metric THEN 'Metric'
WHEN s:Entity THEN 'Entity' END AS source_kind,
CASE WHEN t:Meeting THEN 'Meeting' WHEN t:Episode THEN 'Episode' CASE WHEN t:Meeting THEN 'Meeting' WHEN t:Episode THEN 'Episode'
WHEN t:Entity THEN 'Entity' WHEN t:Fact THEN 'Fact' END AS target_kind, WHEN t:Department THEN 'Department'
CASE WHEN s:Fact THEN coalesce(s.predicate, '') WHEN t:Project THEN 'Project'
WHEN t:Fact THEN coalesce(t.predicate, '') ELSE '' END AS fact_predicate, WHEN t:Metric THEN 'Metric'
CASE WHEN s:Fact THEN coalesce(s.fact, '') WHEN t:Entity THEN 'Entity' END AS target_kind
WHEN t:Fact THEN coalesce(t.fact, '') ELSE '' END AS fact_text,
CASE WHEN s:Fact THEN coalesce(s.description, '')
WHEN t:Fact THEN coalesce(t.description, '') ELSE '' END AS fact_description,
CASE WHEN s:Fact THEN coalesce(s.confidence, 0.0)
WHEN t:Fact THEN coalesce(t.confidence, 0.0) ELSE 0.0 END AS fact_confidence,
CASE WHEN s:Fact THEN coalesce(s.meeting_date, '')
WHEN t:Fact THEN coalesce(t.meeting_date, '') ELSE '' END AS fact_date,
CASE WHEN s:Fact THEN coalesce(s.meeting_id, '')
WHEN t:Fact THEN coalesce(t.meeting_id, '') ELSE '' END AS fact_meeting_id
LIMIT $limit_edges LIMIT $limit_edges
''', ids=list(all_raw_ids), limit_edges=limit_edges) ''', ids=list(all_raw_ids), limit_edges=limit_edges)
@ -700,11 +807,11 @@ class Neo4jGraphStore:
'source': f'{sk}:{row["source_raw"]}' if sk and row.get('source_raw') else '', 'source': f'{sk}:{row["source_raw"]}' if sk and row.get('source_raw') else '',
'target': f'{tk}:{row["target_raw"]}' if tk and row.get('target_raw') else '', 'target': f'{tk}:{row["target_raw"]}' if tk and row.get('target_raw') else '',
'predicate': row.get('predicate', ''), 'predicate': row.get('predicate', ''),
'fact': row.get('fact_text', '') or row.get('fact_description', '') or '', 'relation_name': row.get('relation_name', ''),
'description': row.get('fact_description', '') or '', 'fact': row.get('relation_fact', '') or '',
'confidence': row.get('fact_confidence', 0.0), 'confidence': row.get('relation_confidence', 0.0),
'date': row.get('fact_date', '') or '', 'date': row.get('relation_date', '') or '',
'meeting_id': row.get('fact_meeting_id', '') or '', 'meeting_id': row.get('relation_meeting_id', '') or '',
}) })
return {'nodes': nodes, 'edges': edges, 'stats': self.get_stats(), 'query': query} return {'nodes': nodes, 'edges': edges, 'stats': self.get_stats(), 'query': query}
@ -729,24 +836,15 @@ class Neo4jGraphStore:
def _load_fact_candidates(self) -> List[Dict[str, Any]]: def _load_fact_candidates(self) -> List[Dict[str, Any]]:
return self.run_query(''' return self.run_query('''
MATCH (ep:Episode)-[:HAS_FACT]->(f:Fact) MATCH (s:Entity)-[r:RELATES_TO]->(t:Entity)
OPTIONAL MATCH (s:Entity)-[:FACT_SOURCE]->(f) OPTIONAL MATCH (ep:Episode)-[:MENTIONS]->(s)
OPTIONAL MATCH (f)-[:FACT_TARGET]->(o:Entity) WITH s, r, t, collect(DISTINCT ep.date) AS dates, collect(DISTINCT ep.title) AS titles
RETURN 'fact' AS kind, RETURN 'fact' AS kind,
coalesce(s.name + ' -[' + coalesce(f.predicate, '') + ']-> ' + o.name, f.fact) AS title, s.name + ' -[' + r.name + ']-> ' + t.name AS title,
coalesce( coalesce(r.fact, '') AS text,
f.description + CASE head(dates) AS date,
WHEN size(coalesce(f.qualifiers, [])) > 0 head(titles) AS meeting_title,
THEN ' | ' + reduce(acc = '', item IN f.qualifiers | r.fact_embedding AS embedding
acc + CASE WHEN acc = '' THEN item ELSE '; ' + item END
)
ELSE ''
END,
f.fact, ''
) AS text,
ep.date AS date,
ep.title AS meeting_title,
f.fact_embedding AS embedding
''') ''')
def _load_entity_candidates(self) -> List[Dict[str, Any]]: def _load_entity_candidates(self) -> List[Dict[str, Any]]:
@ -774,24 +872,21 @@ class Neo4jGraphStore:
@staticmethod @staticmethod
def _entity_summary(entity: dict) -> str: def _entity_summary(entity: dict) -> str:
entity_type = entity.get('entity_type', '').strip() entity_type = _canonical_entity_type(entity.get('entity_type', '').strip())
name = entity.get('name', '').strip() name = entity.get('name', '').strip()
description = entity.get('description', '').strip() description = entity.get('description', '').strip()
parts = [part for part in [entity_type, name, description] if part] parts = [part for part in [entity_type, name, description] if part]
return ' | '.join(parts) return ' | '.join(parts)
@staticmethod @staticmethod
def _fact_text(relation: dict) -> str: def _relation_text(relation: dict) -> str:
subject = relation.get('subject', '').strip() source = relation.get('source_entity_name', '').strip()
predicate = relation.get('predicate', '').strip() rtype = relation.get('relation_type', '').strip()
obj = relation.get('object', '').strip() target = relation.get('target_entity_name', '').strip()
description = relation.get('description', '').strip() fact = relation.get('fact', '').strip() or f'{source} {rtype} {target}'.strip()
fact = relation.get('fact', '').strip() or f'{subject} {predicate} {obj}'.strip()
qualifiers = relation.get('qualifiers', []) qualifiers = relation.get('qualifiers', [])
qualifier_text = '; '.join(item for item in qualifiers if item) qualifier_text = '; '.join(item for item in qualifiers if item)
parts = [fact] parts = [fact]
if description:
parts.append(description)
if qualifier_text: if qualifier_text:
parts.append(qualifier_text) parts.append(qualifier_text)
return '. '.join(parts) return '. '.join(parts)
@ -808,6 +903,7 @@ class Neo4jGraphStore:
'action_items': meeting_data.get('action_items', []), 'action_items': meeting_data.get('action_items', []),
'metrics': meeting_data.get('metrics', []), 'metrics': meeting_data.get('metrics', []),
'decisions': meeting_data.get('decisions', []), 'decisions': meeting_data.get('decisions', []),
'departments': meeting_data.get('departments', []),
'original_text': meeting_data.get('_original_text', ''), 'original_text': meeting_data.get('_original_text', ''),
} }
return json.dumps(payload, ensure_ascii=False) return json.dumps(payload, ensure_ascii=False)

View File

@ -125,7 +125,7 @@ class MeetingProcessor:
# Step 3: 提取标题、日期、参与人等元信息 # Step 3: 提取标题、日期、参与人等元信息
report(3, total_steps, '抽取会议元信息(标题、日期、参与者等)') report(3, total_steps, '抽取会议元信息(标题、日期、参与者等)')
meta_info = self._extract_monolithic(text) meta_info = self._extract_monolithic(text, stream=interactive)
if not meta_info: if not meta_info:
logger.error('Failed to extract meeting metadata') logger.error('Failed to extract meeting metadata')
return None return None
@ -136,9 +136,10 @@ class MeetingProcessor:
# Step 4: 抽取实体节点LLM 调用 1 # Step 4: 抽取实体节点LLM 调用 1
report(4, total_steps, '第 1 步实体抽取:识别会议中提及的实体') report(4, total_steps, '第 1 步实体抽取:识别会议中提及的实体')
use_stream = interactive
previous_episodes = self._get_previous_episodes_context(data_dict) previous_episodes = self._get_previous_episodes_context(data_dict)
extracted_entities = extract_entities_from_text( extracted_entities = extract_entities_from_text(
text, previous_episodes=previous_episodes, stream=True text, previous_episodes=previous_episodes, stream=use_stream
) )
logger.info('Extracted %d entities from meeting', len(extracted_entities)) logger.info('Extracted %d entities from meeting', len(extracted_entities))
if not extracted_entities: if not extracted_entities:
@ -158,7 +159,7 @@ class MeetingProcessor:
text, resolved_entities, text, resolved_entities,
reference_time=reference_time, reference_time=reference_time,
previous_episodes=previous_episodes, previous_episodes=previous_episodes,
stream=True, stream=use_stream,
) )
logger.info('Extracted %d facts from meeting', len(extracted_facts)) logger.info('Extracted %d facts from meeting', len(extracted_facts))
@ -337,9 +338,9 @@ class MeetingProcessor:
normalized = text.strip().replace('\r\n', '\n') normalized = text.strip().replace('\r\n', '\n')
return hashlib.sha256(normalized.encode('utf-8')).hexdigest() return hashlib.sha256(normalized.encode('utf-8')).hexdigest()
def _extract_monolithic(self, text: str) -> Optional[MeetingExtraction]: def _extract_monolithic(self, text: str, *, stream: bool = True) -> Optional[MeetingExtraction]:
try: try:
return monolithic_extract(text, stream=True) return monolithic_extract(text, stream=stream)
except Exception as exc: except Exception as exc:
logger.error('LLM extraction failed: %s', exc) logger.error('LLM extraction failed: %s', exc)
return None return None

View File

@ -47,11 +47,18 @@ def extract_facts(context: dict[str, Any]) -> list[dict]:
}}] }}]
4. relation_type 避免使用"关联""涉及"等空泛词优先使用具体谓词 4. relation_type 避免使用"关联""涉及"等空泛词优先使用具体谓词
负责汇报目标值当前值低于高于要求督导推进支撑依赖计划完成截止于参与隶属于分管协调审批 负责汇报目标值当前值低于高于要求督导推进支撑依赖计划完成截止于参与隶属于分管协调审批
5. fact 必须是一句完整的自然语言事实保留所有具体信息人名数值产品名地点等 5. 层次关系结构隶属使用以下固定 relation_type
HAS_PROJECT: 部门管辖项目Department -> Project
HAS_METRIC: 项目拥有指标Project -> Metric
PART_OF: 实体属于某个上级实体
6. 如果根据上下文可以判断事实的开始/结束时间填入 valid_at / invalid_at 6. 同一对实体之间可能既有层次关系HAS_PROJECT也有事实关系负责汇报需要分别抽取
7. fact 必须是一句完整的自然语言事实保留所有具体信息人名数值产品名地点等
8. 如果根据上下文可以判断事实的开始/结束时间填入 valid_at / invalid_at
""" """
return [ return [
{'role': 'system', 'content': '你是一个专业的事实关系抽取专家。从会议记录中抽取实体间的结构化事实关系。'}, {'role': 'system', 'content': '你是一个专业的事实关系抽取专家。从会议记录中抽取实体间的结构化事实关系。'},

View File

@ -3,7 +3,7 @@ from typing import Any
SYSTEM_PROMPT = ( SYSTEM_PROMPT = (
'你是会议纪要实体抽取专家。' '你是会议纪要实体抽取专家。'
'从会议记录中抽取明确的实体节点,包括人物、组织、地点、项目、指标等。' '从会议记录中抽取明确的实体节点,包括部门Department、项目Project、指标Metric、人物Person、系统System、文档Document等。'
'不要抽取抽象概念、情感、时间日期或泛泛的名词。' '不要抽取抽象概念、情感、时间日期或泛泛的名词。'
) )
@ -45,7 +45,10 @@ def extract_entities(context: dict[str, Any]) -> list[dict]:
6. description 写一段对该实体的简要描述20字以内 6. description 写一段对该实体的简要描述20字以内
7. evidence 从原文中摘录提及该实体的关键短句 7. evidence 从原文中摘录提及该实体的关键短句
注意实体类型建议使用 Person人物Organization组织Location地点Project项目Metric指标System系统Document文档 注意实体类型建议使用 Department部门Project项目Metric指标Person人物System系统Document文档请确保
- 部门Department会议中提到的具体部门名称"技术部""市场部"
- 项目Project部门负责的具体项目名称
- 指标Metric项目中提到的具体量化指标"响应时间""完成率"
""" """
return [ return [
{'role': 'system', 'content': SYSTEM_PROMPT}, {'role': 'system', 'content': SYSTEM_PROMPT},

View File

@ -85,7 +85,7 @@ function renderStats(graph = {}, state = {}) {
{ label: "Neo4j", value: graph.enabled ? "在线" : "离线", icon: "⬡", color: graph.enabled ? "#34c759" : "#b3261e" }, { label: "Neo4j", value: graph.enabled ? "在线" : "离线", icon: "⬡", color: graph.enabled ? "#34c759" : "#b3261e" },
{ label: "会议", value: graph.meetings ?? 0, icon: "📋", color: "#4a90d9" }, { label: "会议", value: graph.meetings ?? 0, icon: "📋", color: "#4a90d9" },
{ label: "实体", value: graph.entities ?? 0, icon: "◆", color: "#53c2da" }, { label: "实体", value: graph.entities ?? 0, icon: "◆", color: "#53c2da" },
{ label: "关系", value: graph.facts ?? 0, icon: "↗", color: "#ff9500" }, { label: "关系", value: graph.relations ?? 0, icon: "↗", color: "#ff9500" },
{ label: "行动项", value: state.action_items_tracked ?? 0, icon: "☐", color: "#7f8bff" }, { label: "行动项", value: state.action_items_tracked ?? 0, icon: "☐", color: "#7f8bff" },
{ label: "指标", value: state.metrics_tracked ?? 0, icon: "📊", color: "#af52de" }, { label: "指标", value: state.metrics_tracked ?? 0, icon: "📊", color: "#af52de" },
]; ];

View File

@ -199,7 +199,6 @@ function renderGraph(payload) {
text.setAttribute("y", r + 16); text.setAttribute("y", r + 16);
text.setAttribute("text-anchor", "middle"); text.setAttribute("text-anchor", "middle");
text.setAttribute("font-size", "11"); text.setAttribute("font-size", "11");
text.setAttribute("fill", "#22264d");
text.setAttribute("data-type", "node-label"); text.setAttribute("data-type", "node-label");
text.textContent = truncate(node.label, TRUNCATE_LENGTH); text.textContent = truncate(node.label, TRUNCATE_LENGTH);
g.appendChild(text); g.appendChild(text);
@ -390,21 +389,22 @@ function renderGraph(payload) {
${node.date ? `<span class="chip">${h(node.date)}</span>` : ""} ${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
<span class="chip">关系 ${h(related.length)}</span> <span class="chip">关系 ${h(related.length)}</span>
</div>`; </div>`;
} else if (kind === "fact") {
body = `
<p>${h(node.fact || node.description || "暂无描述")}</p>
<div class="chip-row">
${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
<span class="chip">关系 ${h(related.length)}</span>
</div>`;
} else { } else {
const isMetric = (node.entity_type || "").toLowerCase() === "metric";
body = ` body = `
<p>${h(node.description || "暂无描述")}</p> <p>${h(node.description || "暂无描述")}</p>
<div class="chip-row"> <div class="chip-row">
${node.entity_type ? `<span class="chip">${h(node.entity_type)}</span>` : ""} ${node.entity_type ? `<span class="chip">${h(node.entity_type)}</span>` : ""}
${node.date ? `<span class="chip">${h(node.date)}</span>` : ""} ${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
<span class="chip">关系 ${h(related.length)}</span> <span class="chip">关系 ${h(related.length)}</span>
</div>`; </div>
${isMetric ? `
<div class="metric-fields">
${node.current_value ? `<p><strong>当前值:</strong>${h(node.current_value)}</p>` : ""}
${node.target ? `<p><strong>目标值:</strong>${h(node.target)}</p>` : ""}
${node.unit ? `<p><strong>单位:</strong>${h(node.unit)}</p>` : ""}
${node.trend ? `<p><strong>趋势:</strong>${h(node.trend)}</p>` : ""}
</div>` : ""}`;
} }
renderInspector(` renderInspector(`
<div class="detail-section"> <div class="detail-section">

View File

@ -28,7 +28,7 @@
<span><i class="legend-dot meeting"></i>会议</span> <span><i class="legend-dot meeting"></i>会议</span>
<span><i class="legend-dot episode"></i>片段</span> <span><i class="legend-dot episode"></i>片段</span>
<span><i class="legend-dot entity"></i>实体</span> <span><i class="legend-dot entity"></i>实体</span>
<span><i class="legend-dot fact"></i>事实</span> <span><i class="legend-dot edge"></i>关系</span>
</div> </div>
</aside> </aside>

View File

@ -230,7 +230,7 @@ input, textarea, select { font: inherit; color: inherit; }
.legend-dot.meeting { background: #3b82f6; } .legend-dot.meeting { background: #3b82f6; }
.legend-dot.episode { background: #10b981; } .legend-dot.episode { background: #10b981; }
.legend-dot.entity { background: #06b6d4; } .legend-dot.entity { background: #06b6d4; }
.legend-dot.fact { background: #f59e0b; } .legend-dot.edge { background: #f59e0b; }
/* ── Main Area ── */ /* ── Main Area ── */
.main { .main {
@ -1102,6 +1102,18 @@ textarea {
margin-bottom: 0; margin-bottom: 0;
} }
.detail-section .metric-fields {
margin-top: 12px;
padding: 8px 10px;
background: #f0f4ff;
border-radius: 6px;
}
.detail-section .metric-fields p {
margin: 2px 0;
font-size: 13px;
}
.detail-section .empty-state { .detail-section .empty-state {
margin: 4px 0; margin: 4px 0;
} }
@ -1116,16 +1128,24 @@ textarea {
transition: opacity 0.15s; transition: opacity 0.15s;
} }
.graph-node--meeting circle { fill: #3b82f6; } .graph-node--meeting circle,
.graph-node--episode circle { fill: #10b981; } .graph-node--meeting text[data-type="node-label"] { fill: #3b82f6; }
.graph-node--entity circle { fill: #06b6d4; } .graph-node--episode circle,
.graph-node--fact circle { fill: #f59e0b; } .graph-node--episode text[data-type="node-label"] { fill: #10b981; }
.graph-node--entity circle,
.graph-node--entity text[data-type="node-label"],
.graph-node--department circle,
.graph-node--department text[data-type="node-label"],
.graph-node--project circle,
.graph-node--project text[data-type="node-label"],
.graph-node--metric circle,
.graph-node--metric text[data-type="node-label"] { fill: #06b6d4; }
.graph-node:hover circle { opacity: 0.8; } .graph-node:hover circle { opacity: 0.8; }
.graph-node text { .graph-node text {
font-size: 11px; font-size: 11px;
fill: var(--text); font-weight: 500;
pointer-events: none; pointer-events: none;
user-select: none; user-select: none;
} }

View File

@ -0,0 +1,186 @@
"""
Migration script: v1 (flat Entity + Fact nodes) v2 (composite labels + direct edges)
Steps:
1. Add composite Neo4j labels to existing Entity nodes based on entity_type
2. Convert Fact nodes to RELATES_TO edges between Entity nodes
3. Verify data integrity
"""
import logging
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from meeting_memory.graph_store import graph_store, _canonical_entity_type, _EntityType
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger('migrate')
def get_type_label_map() -> dict[str, str]:
"""Map canonical entity_type -> Neo4j label"""
return {
_EntityType.DEPARTMENT.value: 'Department',
_EntityType.PROJECT.value: 'Project',
_EntityType.METRIC.value: 'Metric',
_EntityType.PERSON.value: 'Person',
_EntityType.SYSTEM.value: 'System',
_EntityType.DOCUMENT.value: 'Document',
}
def step1_add_composite_labels():
"""Add composite labels (e.g., :Department) to existing Entity nodes."""
type_label_map = get_type_label_map()
total = 0
for canonical_type, label in type_label_map.items():
rows = graph_store.run_query(
'MATCH (e:Entity) WHERE e.entity_type = $etype RETURN count(e) AS cnt',
etype=canonical_type,
)
count = rows[0]['cnt'] if rows else 0
if count == 0:
logger.info(' No Entity with entity_type=%s to migrate', canonical_type)
continue
graph_store.run_query(
f'MATCH (e:Entity) WHERE e.entity_type = $etype SET e:{label}',
etype=canonical_type,
)
logger.info(' Added :%s label to %d Entity nodes', label, count)
total += count
# Also handle aliases: Organization -> Department
for alias in ('组织', 'Organization', '部门'):
rows = graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) RETURN count(e) AS cnt',
etype=alias,
)
count = rows[0]['cnt'] if rows else 0
if count == 0:
continue
graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) SET e.entity_type = $canonical, e:Department',
etype=alias, canonical=_EntityType.DEPARTMENT.value,
)
logger.info(' Redirected %d entities from entity_type=%s -> Department', count, alias)
total += count
for alias in ('指标', 'kpi', 'KPI'):
rows = graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) RETURN count(e) AS cnt',
etype=alias,
)
count = rows[0]['cnt'] if rows else 0
if count == 0:
continue
graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) SET e.entity_type = $canonical, e:Metric',
etype=alias, canonical=_EntityType.METRIC.value,
)
logger.info(' Redirected %d entities from entity_type=%s -> Metric', count, alias)
total += count
logger.info('Step 1 done: %d entities got composite labels', total)
def step2_convert_facts_to_edges():
"""Convert existing Fact nodes to RELATES_TO edges, then remove Fact nodes."""
facts = graph_store.run_query('''
MATCH (s:Entity)-[:FACT_SOURCE]->(f:Fact)-[:FACT_TARGET]->(t:Entity)
RETURN s.name AS source, t.name AS target,
f.predicate AS relation_type,
f.fact AS fact,
f.qualifiers AS qualifiers,
f.evidence AS evidence,
f.confidence AS confidence,
f.valid_at AS valid_at,
f.invalid_at AS invalid_at,
f.meeting_id AS meeting_id,
f.meeting_date AS meeting_date,
f.fact_embedding AS fact_embedding
''')
logger.info('Found %d Fact nodes to convert', len(facts))
converted = 0
for f in facts:
source = f.get('source', '')
target = f.get('target', '')
rtype = f.get('relation_type', '') or '关联'
if not source or not target:
continue
fact_embedding = f.get('fact_embedding') or []
graph_store.run_query('''
MATCH (s:Entity {name: $source})
MATCH (t:Entity {name: $target})
MERGE (s)-[r:RELATES_TO {name: $rtype}]->(t)
SET r.fact = $fact,
r.evidence = $evidence,
r.qualifiers = $qualifiers,
r.confidence = $confidence,
r.valid_at = $valid_at,
r.invalid_at = $invalid_at,
r.meeting_id = $meeting_id,
r.meeting_date = $meeting_date,
r.updated_at = datetime()
''',
source=source,
target=target,
rtype=rtype,
fact=f.get('fact', ''),
evidence=f.get('evidence', ''),
qualifiers=f.get('qualifiers', []),
confidence=f.get('confidence', 0.0),
valid_at=f.get('valid_at', ''),
invalid_at=f.get('invalid_at', ''),
meeting_id=f.get('meeting_id', ''),
meeting_date=f.get('meeting_date', ''),
)
if fact_embedding:
graph_store.run_query('''
MATCH (s:Entity {name: $source})-[r:RELATES_TO {name: $rtype}]->(t:Entity {name: $target})
SET r.fact_embedding = $embedding
''', source=source, target=target, rtype=rtype, embedding=fact_embedding)
converted += 1
# Now remove Fact nodes and their incident edges
graph_store.run_query('''
MATCH (f:Fact)
OPTIONAL MATCH (f)-[r]-()
DELETE r, f
''')
logger.info('Step 2 done: converted %d facts to edges, removed Fact nodes', converted)
def verify():
"""Verify migration results."""
stats = graph_store.get_stats()
logger.info('Final stats: %s', stats)
types = graph_store.get_entity_types()
logger.info('Entity types: %s', [(t['entity_type'], t['count']) for t in types])
kinds = graph_store.get_graph_kinds()
logger.info('Graph kinds: %s', [(k['kind'], k['count']) for k in kinds])
# Count labeled entities
for label in ('Department', 'Project', 'Metric', 'Person', 'System', 'Document'):
rows = graph_store.run_query(f'MATCH (n:{label}) RETURN count(n) AS cnt')
count = rows[0]['cnt'] if rows else 0
if count:
logger.info(' :%s nodes: %d', label, count)
edges = graph_store.run_query('MATCH ()-[r:RELATES_TO]->() RETURN count(r) AS cnt')
logger.info(' RELATES_TO edges: %d', edges[0]['cnt'] if edges else 0)
if __name__ == '__main__':
if not graph_store.enabled:
logger.error('Neo4j is not available')
sys.exit(1)
logger.info('Starting v1→v2 migration...')
step1_add_composite_labels()
step2_convert_facts_to_edges()
verify()
logger.info('Migration complete')