摘要总结

main
mula.liu 2025-08-26 21:57:16 +08:00
parent 3528ba717d
commit bcf0acc0bf
8 changed files with 429 additions and 156 deletions

View File

@ -1,16 +1,24 @@
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from app.models.models import Meeting, TranscriptSegment, CreateMeetingRequest, UpdateMeetingRequest, SpeakerTagUpdateRequest, BatchSpeakerTagUpdateRequest
from app.models.models import Meeting, TranscriptSegment, CreateMeetingRequest, UpdateMeetingRequest, SpeakerTagUpdateRequest, BatchSpeakerTagUpdateRequest, TranscriptUpdateRequest, BatchTranscriptUpdateRequest
from app.core.database import get_db_connection
from app.core.config import BASE_DIR, UPLOAD_DIR, AUDIO_DIR, MARKDOWN_DIR, ALLOWED_EXTENSIONS, ALLOWED_IMAGE_EXTENSIONS, MAX_FILE_SIZE, MAX_IMAGE_SIZE
from app.services.qiniu_service import qiniu_service
from app.services.llm_service import LLMService
from typing import Optional
from pydantic import BaseModel
import os
import uuid
import shutil
router = APIRouter()
# 实例化LLM服务
llm_service = LLMService()
# 请求模型
class GenerateSummaryRequest(BaseModel):
user_prompt: Optional[str] = ""
@router.get("/meetings", response_model=list[Meeting])
def get_meetings(user_id: Optional[int] = None):
with get_db_connection() as connection:
@ -114,6 +122,7 @@ def get_meeting_details(meeting_id: int):
@router.get("/meetings/{meeting_id}/transcript", response_model=list[TranscriptSegment])
def get_meeting_transcript(meeting_id: int):
"""获取会议的转录内容"""
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
@ -157,20 +166,25 @@ def create_meeting(meeting_request: CreateMeetingRequest):
INSERT INTO meetings (user_id, title, meeting_time, summary)
VALUES (%s, %s, %s, %s)
'''
cursor.execute(meeting_query, (meeting_request.user_id, meeting_request.title, meeting_request.meeting_time, None))
cursor.execute(meeting_query, (
meeting_request.user_id,
meeting_request.title,
meeting_request.meeting_time,
None # summary starts as None
))
meeting_id = cursor.lastrowid
# Add attendees
for attendee_id in meeting_request.attendee_ids:
attendee_query = '''
INSERT INTO attendees (meeting_id, user_id)
INSERT IGNORE INTO attendees (meeting_id, user_id)
VALUES (%s, %s)
ON DUPLICATE KEY UPDATE meeting_id = meeting_id
'''
cursor.execute(attendee_query, (meeting_id, attendee_id))
connection.commit()
return {"meeting_id": meeting_id, "message": "Meeting created successfully"}
return {"message": "Meeting created successfully", "meeting_id": meeting_id}
@router.put("/meetings/{meeting_id}")
def update_meeting(meeting_id: int, meeting_request: UpdateMeetingRequest):
@ -195,7 +209,7 @@ def update_meeting(meeting_id: int, meeting_request: UpdateMeetingRequest):
meeting_id
))
# Update attendees - remove existing ones and add new ones
# Update attendees - remove existing and add new ones
cursor.execute("DELETE FROM attendees WHERE meeting_id = %s", (meeting_id,))
for attendee_id in meeting_request.attendee_ids:
@ -260,17 +274,15 @@ def regenerate_summary(meeting_id: int):
- [ ] 跟进项目进度"""
# Update meeting summary
cursor.execute(
"UPDATE meetings SET summary = %s WHERE meeting_id = %s",
(mock_summary, meeting_id)
)
update_query = "UPDATE meetings SET summary = %s WHERE meeting_id = %s"
cursor.execute(update_query, (mock_summary, meeting_id))
connection.commit()
return {"summary": mock_summary}
return {"message": "Summary regenerated successfully", "summary": mock_summary}
@router.get("/meetings/{meeting_id}/edit", response_model=Meeting)
def get_meeting_for_edit(meeting_id: int):
"""Get meeting details with full attendee information for editing"""
"""获取会议信息用于编辑"""
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
@ -290,14 +302,14 @@ def get_meeting_for_edit(meeting_id: int):
if not meeting:
raise HTTPException(status_code=404, detail="Meeting not found")
# Get attendees with full info for editing
# Get attendees
attendees_query = '''
SELECT u.user_id, u.caption
FROM attendees a
JOIN users u ON a.user_id = u.user_id
WHERE a.meeting_id = %s
'''
cursor.execute(attendees_query, (meeting['meeting_id'],))
cursor.execute(attendees_query, (meeting_id,))
attendees_data = cursor.fetchall()
attendees = [{'user_id': row['user_id'], 'caption': row['caption']} for row in attendees_data]
@ -313,7 +325,7 @@ def get_meeting_for_edit(meeting_id: int):
)
# Add audio file path if exists
if meeting['audio_file_path']:
if meeting.get('audio_file_path'):
meeting_data.audio_file_path = meeting['audio_file_path']
return meeting_data
@ -459,12 +471,12 @@ async def upload_image(
# 发言人标签更新接口
@router.put("/meetings/{meeting_id}/speaker-tags")
def update_speaker_tag(meeting_id: int, request: SpeakerTagUpdateRequest):
"""更新单个发言人标签"""
"""更新单个发言人标签基于原始的speaker_id值"""
try:
with get_db_connection() as connection:
cursor = connection.cursor()
# 更新指定meeting_id和speaker_id的所有记录的speaker_tag
# 只修改speaker_tag保留speaker_id的原始值
update_query = """
UPDATE transcript_segments
SET speaker_tag = %s
@ -483,13 +495,14 @@ def update_speaker_tag(meeting_id: int, request: SpeakerTagUpdateRequest):
@router.put("/meetings/{meeting_id}/speaker-tags/batch")
def batch_update_speaker_tags(meeting_id: int, request: BatchSpeakerTagUpdateRequest):
"""批量更新发言人标签"""
"""批量更新发言人标签基于原始的speaker_id值"""
try:
with get_db_connection() as connection:
cursor = connection.cursor()
total_updated = 0
for update_item in request.updates:
# 只修改speaker_tag保留speaker_id的原始值
update_query = """
UPDATE transcript_segments
SET speaker_tag = %s
@ -503,3 +516,118 @@ def batch_update_speaker_tags(meeting_id: int, request: BatchSpeakerTagUpdateReq
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to batch update speaker tags: {str(e)}")
# 转录内容更新接口
@router.put("/meetings/{meeting_id}/transcript/batch")
def batch_update_transcript(meeting_id: int, request: BatchTranscriptUpdateRequest):
"""批量更新转录内容"""
try:
with get_db_connection() as connection:
cursor = connection.cursor()
total_updated = 0
for update_item in request.updates:
# 验证segment_id是否属于指定会议
verify_query = "SELECT segment_id FROM transcript_segments WHERE segment_id = %s AND meeting_id = %s"
cursor.execute(verify_query, (update_item.segment_id, meeting_id))
if not cursor.fetchone():
continue # 跳过不属于该会议的转录条目
# 更新转录内容
update_query = """
UPDATE transcript_segments
SET text_content = %s
WHERE segment_id = %s AND meeting_id = %s
"""
cursor.execute(update_query, (update_item.text_content, update_item.segment_id, meeting_id))
total_updated += cursor.rowcount
connection.commit()
return {'message': 'Transcript updated successfully', 'total_updated': total_updated}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to update transcript: {str(e)}")
# AI总结相关接口
@router.post("/meetings/{meeting_id}/generate-summary")
def generate_meeting_summary(meeting_id: int, request: GenerateSummaryRequest):
"""生成会议AI总结"""
try:
# 检查会议是否存在
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# 调用LLM服务生成总结
result = llm_service.generate_meeting_summary(meeting_id, request.user_prompt)
if result.get("error"):
raise HTTPException(status_code=500, detail=result["error"])
return {
"message": "Summary generated successfully",
"summary_id": result["summary_id"],
"content": result["content"],
"meeting_id": meeting_id
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to generate summary: {str(e)}")
@router.get("/meetings/{meeting_id}/summaries")
def get_meeting_summaries(meeting_id: int):
"""获取会议的所有AI总结历史"""
try:
# 检查会议是否存在
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# 获取总结列表
summaries = llm_service.get_meeting_summaries(meeting_id)
return {
"meeting_id": meeting_id,
"summaries": summaries
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to get summaries: {str(e)}")
@router.get("/meetings/{meeting_id}/summaries/{summary_id}")
def get_summary_detail(meeting_id: int, summary_id: int):
"""获取特定总结的详细内容"""
try:
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
query = """
SELECT id, summary_content, user_prompt, created_at
FROM meeting_summaries
WHERE id = %s AND meeting_id = %s
"""
cursor.execute(query, (summary_id, meeting_id))
summary = cursor.fetchone()
if not summary:
raise HTTPException(status_code=404, detail="Summary not found")
return {
"id": summary["id"],
"meeting_id": meeting_id,
"content": summary["summary_content"],
"user_prompt": summary["user_prompt"],
"created_at": summary["created_at"].isoformat() if summary["created_at"] else None
}
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to get summary detail: {str(e)}")

View File

@ -42,4 +42,28 @@ QINIU_BUCKET = os.getenv('QINIU_BUCKET', 'imeeting')
QINIU_DOMAIN = os.getenv('QINIU_DOMAIN', 't0vogyxkz.hn-bkt.clouddn.com')
# Dashscope (Tongyi Qwen) API Key
DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY', 'sk-c2bf06ea56b4491ea3d1e37fdb472b8f')
QWEN_API_KEY = os.getenv('QWEN_API_KEY', 'sk-c2bf06ea56b4491ea3d1e37fdb472b8f')
# LLM配置 - 阿里Qwen3大模型
LLM_CONFIG = {
'model_name': os.getenv('LLM_MODEL_NAME', 'qwen-plus'),
'api_url': os.getenv('LLM_API_URL', 'https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation'),
'max_tokens': int(os.getenv('LLM_MAX_TOKENS', '2000')),
'temperature': float(os.getenv('LLM_TEMPERATURE', '0.7')),
'top_p': float(os.getenv('LLM_TOP_P', '0.9')),
'system_prompt': """你是一个专业的会议记录分析助手。请根据提供的会议转录内容,生成简洁明了的会议总结。
总结应该包括以下几个部分
1. 会议概述 - 简要说明会议的主要目的和背景
2. 主要讨论点 - 列出会议中讨论的重要话题和内容
3. 决策事项 - 明确记录会议中做出的决定和结论
4. 待办事项 - 列出需要后续跟进的任务和责任人
5. 关键信息 - 其他重要的信息点
要求
- 保持客观中性不添加个人观点
- 使用简洁的中文表达
- 按重要性排序各项内容
- 如果某个部分没有相关内容可以说明"无相关内容"
- 总字数控制在500字以内"""
}

View File

@ -65,3 +65,10 @@ class SpeakerTagUpdateRequest(BaseModel):
class BatchSpeakerTagUpdateRequest(BaseModel):
updates: List[SpeakerTagUpdateRequest]
class TranscriptUpdateRequest(BaseModel):
segment_id: int
text_content: str
class BatchTranscriptUpdateRequest(BaseModel):
updates: List[TranscriptUpdateRequest]

View File

@ -0,0 +1,239 @@
import json
import requests
from typing import Optional, Dict, List
from app.core.config import LLM_CONFIG, QWEN_API_KEY
from app.core.database import get_db_connection
class LLMService:
def __init__(self):
self.api_key = QWEN_API_KEY
self.model_name = LLM_CONFIG["model_name"]
self.api_url = LLM_CONFIG["api_url"]
self.system_prompt = LLM_CONFIG["system_prompt"]
self.max_tokens = LLM_CONFIG["max_tokens"]
self.temperature = LLM_CONFIG["temperature"]
self.top_p = LLM_CONFIG["top_p"]
def generate_meeting_summary(self, meeting_id: int, user_prompt: str = "") -> Optional[Dict]:
"""
生成会议总结
Args:
meeting_id: 会议ID
user_prompt: 用户额外提示词
Returns:
包含总结内容的字典如果失败返回None
"""
try:
# 获取会议转录内容
transcript_text = self._get_meeting_transcript(meeting_id)
if not transcript_text:
return {"error": "无法获取会议转录内容"}
# 构建完整提示词
full_prompt = self._build_prompt(transcript_text, user_prompt)
# 调用大模型API
response = self._call_llm_api(full_prompt)
if response:
# 保存总结到数据库
summary_id = self._save_summary_to_db(meeting_id, response, user_prompt)
return {
"summary_id": summary_id,
"content": response,
"meeting_id": meeting_id
}
else:
return {"error": "大模型API调用失败"}
except Exception as e:
print(f"生成会议总结错误: {e}")
return {"error": str(e)}
def _get_meeting_transcript(self, meeting_id: int) -> str:
"""从数据库获取会议转录内容"""
try:
with get_db_connection() as connection:
cursor = connection.cursor()
query = """
SELECT speaker_tag, start_time_ms, end_time_ms, text_content
FROM transcript_segments
WHERE meeting_id = %s
ORDER BY start_time_ms
"""
cursor.execute(query, (meeting_id,))
segments = cursor.fetchall()
if not segments:
return ""
# 组装转录文本
transcript_lines = []
for speaker_tag, start_time, end_time, text in segments:
# 将毫秒转换为分:秒格式
start_min = start_time // 60000
start_sec = (start_time % 60000) // 1000
transcript_lines.append(f"[{start_min:02d}:{start_sec:02d}] 说话人{speaker_tag}: {text}")
return "\n".join(transcript_lines)
except Exception as e:
print(f"获取会议转录内容错误: {e}")
return ""
def _build_prompt(self, transcript_text: str, user_prompt: str) -> str:
"""构建完整的提示词"""
prompt = f"{self.system_prompt}\n\n"
if user_prompt:
prompt += f"用户额外要求:{user_prompt}\n\n"
prompt += f"会议转录内容:\n{transcript_text}\n\n请根据以上内容生成会议总结:"
return prompt
def _call_llm_api(self, prompt: str) -> Optional[str]:
"""调用阿里Qwen3大模型API"""
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
data = {
"model": self.model_name,
"input": {
"messages": [
{
"role": "user",
"content": prompt
}
]
},
"parameters": {
"max_tokens": self.max_tokens,
"temperature": self.temperature,
"top_p": self.top_p,
"incremental_output": False
}
}
try:
response = requests.post(self.api_url, headers=headers, json=data, timeout=60)
response.raise_for_status()
result = response.json()
# 处理阿里Qwen API的响应格式
if result.get("output") and result["output"].get("text"):
return result["output"]["text"]
elif result.get("output") and result["output"].get("choices"):
return result["output"]["choices"][0]["message"]["content"]
else:
print(f"API响应格式错误: {result}")
return None
except requests.exceptions.RequestException as e:
print(f"API请求错误: {e}")
return None
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}")
return None
except Exception as e:
print(f"调用大模型API错误: {e}")
return None
def _save_summary_to_db(self, meeting_id: int, summary_content: str, user_prompt: str) -> Optional[int]:
"""保存总结到数据库 - 更新meetings表的summary字段"""
try:
with get_db_connection() as connection:
cursor = connection.cursor()
# 更新meetings表的summary字段
update_query = """
UPDATE meetings
SET summary = %s
WHERE meeting_id = %s
"""
cursor.execute(update_query, (summary_content, meeting_id))
connection.commit()
print(f"成功保存会议总结到meetings表meeting_id: {meeting_id}")
return meeting_id
except Exception as e:
print(f"保存总结到数据库错误: {e}")
return None
def get_meeting_summaries(self, meeting_id: int) -> List[Dict]:
"""获取会议的当前总结 - 从meetings表的summary字段获取"""
try:
with get_db_connection() as connection:
cursor = connection.cursor()
query = """
SELECT summary
FROM meetings
WHERE meeting_id = %s
"""
cursor.execute(query, (meeting_id,))
result = cursor.fetchone()
# 如果有总结内容返回一个包含当前总结的列表格式保持API一致性
if result and result[0]:
return [{
"id": meeting_id,
"content": result[0],
"user_prompt": "", # meetings表中没有user_prompt字段
"created_at": None # meetings表中没有单独的总结创建时间
}]
else:
return []
except Exception as e:
print(f"获取会议总结错误: {e}")
return []
def get_current_meeting_summary(self, meeting_id: int) -> Optional[str]:
"""获取会议当前的总结内容 - 从meetings表的summary字段获取"""
try:
with get_db_connection() as connection:
cursor = connection.cursor()
query = """
SELECT summary
FROM meetings
WHERE meeting_id = %s
"""
cursor.execute(query, (meeting_id,))
result = cursor.fetchone()
return result[0] if result and result[0] else None
except Exception as e:
print(f"获取会议当前总结错误: {e}")
return None
# 测试代码
if __name__ == '__main__':
# 测试LLM服务
test_meeting_id = 38
test_user_prompt = "请重点关注决策事项和待办任务"
print("--- 运行LLM服务测试 ---")
llm_service = LLMService()
# 生成总结
result = llm_service.generate_meeting_summary(test_meeting_id, test_user_prompt)
if result.get("error"):
print(f"生成总结失败: {result['error']}")
else:
print(f"总结生成成功ID: {result.get('summary_id')}")
print(f"总结内容: {result.get('content')[:200]}...")
# 获取历史总结
summaries = llm_service.get_meeting_summaries(test_meeting_id)
print(f"获取到 {len(summaries)} 个历史总结")
print("--- LLM服务测试完成 ---")

View File

@ -3,12 +3,12 @@ import requests
import json
import dashscope
from dashscope.audio.asr import Transcription
from app.core.config import DASHSCOPE_API_KEY
from app.core.config import QWEN_API_KEY
from app.core.database import get_db_connection
class AIService:
class VoiceService:
def __init__(self):
dashscope.api_key = DASHSCOPE_API_KEY
dashscope.api_key = QWEN_API_KEY
def transcribe(self, file_urls: list[str], meeting_id: int):
print(f"Starting transcription for meeting_id: {meeting_id}, files: {file_urls}")
@ -98,11 +98,11 @@ if __name__ == '__main__':
# 1. Make sure you have a meeting with meeting_id = 1 in your database.
# 2. Make sure the audio file URL is correct and accessible.
test_meeting_id = 38
test_meeting_id = 40
# Please replace with your own publicly accessible audio file URL
test_file_urls = ['http://t0vogyxkz.hn-bkt.clouddn.com/record/meeting_records_2.mp3']
test_file_urls = ['http://t0vogyxkz.hn-bkt.clouddn.com/test/dajiang.m4a']
print("--- Running AI Service Test ---")
ai_service = AIService()
ai_service.transcribe(file_urls=test_file_urls, meeting_id=test_meeting_id)
print("--- AI Service Test Finished ---")
print("--- Running Voice Service Test ---")
voice_service = VoiceService()
voice_service.transcribe(file_urls=test_file_urls, meeting_id=test_meeting_id)
print("--- Voice Service Test Finished ---")

View File

@ -1,104 +0,0 @@
# -*- coding: utf-8 -*-
import os
import sys
import asyncio
from pathlib import Path
# Add the app directory to the path
sys.path.append(os.path.join(os.path.dirname(__file__), 'app'))
# Mock the FastAPI UploadFile
class MockUploadFile:
def __init__(self, filename, content):
self.filename = filename
self.content = content
self.size = len(content)
self._file_pos = 0
async def read(self, size=-1):
if size == -1:
result = self.content[self._file_pos:]
self._file_pos = len(self.content)
else:
result = self.content[self._file_pos:self._file_pos + size]
self._file_pos += len(result)
return result
def file(self):
from io import BytesIO
return BytesIO(self.content.encode() if isinstance(self.content, str) else self.content)
async def test_audio_upload():
from app.api.endpoints.meetings import AUDIO_DIR
from app.services.qiniu_service import qiniu_service
# Path to the problematic audio file
audio_file_path = "/Users/jiliu/工作/projects/imeeting/backend/uploads/audio/31ce039a-f619-4869-91c8-eab934bbd1d4.m4a"
# Read the content of the audio file
try:
with open(audio_file_path, "rb") as f:
test_content = f.read()
print(f"Successfully read content from {audio_file_path}")
except FileNotFoundError:
print(f"Error: The file was not found at {audio_file_path}")
return
# Create mock UploadFile with the real audio content
mock_file = MockUploadFile("31ce039a-f619-4869-91c8-eab934bbd1d4.m4a", test_content)
# Create temporary file for upload (simulating the API endpoint)
file_extension = ".m4a"
from uuid import uuid4
temp_filename = f"{uuid4()}{file_extension}"
temp_path = AUDIO_DIR / temp_filename
print(f"Creating temporary file at: {temp_path}")
# Save file temporarily (simulating the API endpoint)
try:
# Simulate shutil.copyfileobj(mock_file.file(), open(temp_path, "wb"))
with open(temp_path, "wb") as buffer:
buffer.write(mock_file.content) # content is already bytes
print(f"Temporary file created successfully. Exists: {temp_path.exists()}")
print(f"Temporary file size: {temp_path.stat().st_size}")
except Exception as e:
print(f"Failed to save temporary file: {str(e)}")
return
# Test upload to Qiniu (simulating the API endpoint)
try:
print(f"Attempting to upload audio to Qiniu...")
print(f"Temp file path: {temp_path}")
print(f"Temp file exists: {temp_path.exists()}")
success, qiniu_url, error_msg = qiniu_service.upload_audio_file(
str(temp_path), 11, "test-audio.mp3"
)
print(f"Qiniu upload result - success: {success}")
print(f"Qiniu upload result - url: {qiniu_url}")
print(f"Qiniu upload result - error: {error_msg}")
# Clean up temporary file
if temp_path.exists():
temp_path.unlink()
print("Temporary file cleaned up")
if success:
print("Upload successful!")
print(f"URL: {qiniu_url}")
else:
print(f"Upload failed: {error_msg}")
except Exception as e:
print(f"Exception in audio upload: {str(e)}")
import traceback
print(f"Traceback: {traceback.format_exc()}")
# Clean up temporary file in case of error
if temp_path.exists():
temp_path.unlink()
if __name__ == "__main__":
asyncio.run(test_audio_upload())

View File

@ -1,21 +0,0 @@
# -*- coding: utf-8 -*-
# flake8: noqa
from qiniu import Auth, put_data ,put_file_v2, etag
import qiniu.config
#需要填写你的 Access Key 和 Secret Key
access_key = 'A0tp96HCtg-wZCughTgi5vc2pJnw3btClwxRE_e8'
secret_key = 'Lj-MSHpaVbmzpS86kMIjmwikvYOT9iPBjCk9hm6k'
#构建鉴权对象
q = Auth(access_key, secret_key)
#要上传的空间
bucket_name = 'imeeting'
#上传后保存的文件名
key = 'test/result.json'
#生成上传 Token可以指定过期时间等
token = q.upload_token(bucket_name, key, 3600)
#要上传文件的本地路径
localfile = './uploads/result.json'
ret, info = put_file_v2(token, key, localfile, version='v2')
print(info)
assert ret['key'] == key
assert ret['hash'] == etag(localfile)