增加了上传自动总结
parent
72d9ebdc07
commit
3260b99c6b
|
|
@ -0,0 +1,51 @@
|
|||
# Python相关
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
venv/
|
||||
env/
|
||||
ENV/
|
||||
|
||||
# 用户上传文件(最重要!)
|
||||
uploads/
|
||||
|
||||
# 测试和开发文件
|
||||
test/
|
||||
tests/
|
||||
*.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
.gitattributes
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# 日志
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# 环境变量
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# 文档
|
||||
*.md
|
||||
docs/
|
||||
|
||||
# 其他
|
||||
.DS_Store
|
||||
*.bak
|
||||
*.tmp
|
||||
17
Dockerfile
17
Dockerfile
|
|
@ -12,18 +12,19 @@ ENV PYTHONUNBUFFERED=1
|
|||
RUN sed -i 's/deb.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources
|
||||
RUN sed -i 's/security.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources
|
||||
|
||||
# 安装系统依赖
|
||||
# 复制依赖文件
|
||||
COPY requirements-prod.txt .
|
||||
|
||||
# 安装系统依赖、Python依赖,然后清理(一个RUN命令减少层大小)
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
default-libmysqlclient-dev \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 复制依赖文件
|
||||
COPY requirements-prod.txt .
|
||||
|
||||
# 安装Python依赖
|
||||
RUN pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir -r requirements-prod.txt
|
||||
&& pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir -r requirements-prod.txt \
|
||||
&& apt-get purge -y gcc pkg-config \
|
||||
&& apt-get autoremove -y \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# 复制应用代码
|
||||
COPY . .
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from fastapi import APIRouter, UploadFile, File, Form, Depends, BackgroundTasks
|
||||
from fastapi import APIRouter, UploadFile, File, Form, Depends, BackgroundTasks, Request, Header
|
||||
from fastapi.responses import StreamingResponse, Response
|
||||
from app.models.models import Meeting, TranscriptSegment, TranscriptionTaskStatus, CreateMeetingRequest, UpdateMeetingRequest, SpeakerTagUpdateRequest, BatchSpeakerTagUpdateRequest, BatchTranscriptUpdateRequest, Tag
|
||||
from app.core.database import get_db_connection
|
||||
from app.core.config import BASE_DIR, AUDIO_DIR, MARKDOWN_DIR, ALLOWED_EXTENSIONS, ALLOWED_IMAGE_EXTENSIONS
|
||||
|
|
@ -11,9 +12,11 @@ from app.core.response import create_api_response
|
|||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel
|
||||
from urllib.parse import quote
|
||||
import os
|
||||
import uuid
|
||||
import shutil
|
||||
import mimetypes
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
|
@ -23,6 +26,194 @@ transcription_service = AsyncTranscriptionService()
|
|||
class GenerateSummaryRequest(BaseModel):
|
||||
user_prompt: Optional[str] = ""
|
||||
|
||||
def _handle_audio_upload(
|
||||
audio_file: UploadFile,
|
||||
meeting_id: int,
|
||||
force_replace_bool: bool,
|
||||
current_user: dict
|
||||
):
|
||||
"""
|
||||
音频上传的公共处理逻辑
|
||||
|
||||
Args:
|
||||
audio_file: 上传的音频文件
|
||||
meeting_id: 会议ID
|
||||
force_replace_bool: 是否强制替换
|
||||
current_user: 当前用户
|
||||
|
||||
Returns:
|
||||
dict: {
|
||||
"success": bool, # 是否成功
|
||||
"needs_confirmation": bool, # 是否需要用户确认
|
||||
"response": dict, # 如果需要返回,这里是响应数据
|
||||
"file_info": dict, # 文件信息 (成功时)
|
||||
"transcription_task_id": str, # 转录任务ID (成功时)
|
||||
"replaced_existing": bool, # 是否替换了现有文件 (成功时)
|
||||
"has_transcription": bool # 原来是否有转录记录 (成功时)
|
||||
}
|
||||
"""
|
||||
# 1. 文件类型验证
|
||||
file_extension = os.path.splitext(audio_file.filename)[1].lower()
|
||||
if file_extension not in ALLOWED_EXTENSIONS:
|
||||
return {
|
||||
"success": False,
|
||||
"response": create_api_response(
|
||||
code="400",
|
||||
message=f"不支持的文件类型。支持的类型: {', '.join(ALLOWED_EXTENSIONS)}"
|
||||
)
|
||||
}
|
||||
|
||||
# 2. 文件大小验证
|
||||
max_file_size = getattr(config_module, 'MAX_FILE_SIZE', 100 * 1024 * 1024)
|
||||
if audio_file.size > max_file_size:
|
||||
return {
|
||||
"success": False,
|
||||
"response": create_api_response(
|
||||
code="400",
|
||||
message=f"文件大小超过 {max_file_size // (1024 * 1024)}MB 限制"
|
||||
)
|
||||
}
|
||||
|
||||
# 3. 权限和已有文件检查
|
||||
try:
|
||||
with get_db_connection() as connection:
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
|
||||
# 检查会议是否存在及权限
|
||||
cursor.execute("SELECT user_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
|
||||
meeting = cursor.fetchone()
|
||||
if not meeting:
|
||||
return {
|
||||
"success": False,
|
||||
"response": create_api_response(code="404", message="会议不存在")
|
||||
}
|
||||
if meeting['user_id'] != current_user['user_id']:
|
||||
return {
|
||||
"success": False,
|
||||
"response": create_api_response(code="403", message="无权限操作此会议")
|
||||
}
|
||||
|
||||
# 检查已有音频文件
|
||||
cursor.execute(
|
||||
"SELECT file_name, file_path, upload_time FROM audio_files WHERE meeting_id = %s",
|
||||
(meeting_id,)
|
||||
)
|
||||
existing_info = cursor.fetchone()
|
||||
|
||||
# 检查是否有转录记录
|
||||
has_transcription = False
|
||||
if existing_info:
|
||||
cursor.execute(
|
||||
"SELECT COUNT(*) as segment_count FROM transcript_segments WHERE meeting_id = %s",
|
||||
(meeting_id,)
|
||||
)
|
||||
has_transcription = cursor.fetchone()['segment_count'] > 0
|
||||
|
||||
cursor.close()
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"response": create_api_response(code="500", message=f"检查已有文件失败: {str(e)}")
|
||||
}
|
||||
|
||||
# 4. 如果已有转录记录且未确认替换,返回提示
|
||||
if existing_info and has_transcription and not force_replace_bool:
|
||||
return {
|
||||
"success": False,
|
||||
"needs_confirmation": True,
|
||||
"response": create_api_response(
|
||||
code="300",
|
||||
message="该会议已有音频文件和转录记录,重新上传将删除现有的转录内容和会议总结",
|
||||
data={
|
||||
"requires_confirmation": True,
|
||||
"existing_file": {
|
||||
"file_name": existing_info['file_name'],
|
||||
"upload_time": existing_info['upload_time'].isoformat() if existing_info['upload_time'] else None
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
# 5. 保存音频文件
|
||||
meeting_dir = AUDIO_DIR / str(meeting_id)
|
||||
meeting_dir.mkdir(exist_ok=True)
|
||||
unique_filename = f"{uuid.uuid4()}{file_extension}"
|
||||
absolute_path = meeting_dir / unique_filename
|
||||
relative_path = absolute_path.relative_to(BASE_DIR)
|
||||
|
||||
try:
|
||||
with open(absolute_path, "wb") as buffer:
|
||||
shutil.copyfileobj(audio_file.file, buffer)
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"response": create_api_response(code="500", message=f"保存文件失败: {str(e)}")
|
||||
}
|
||||
|
||||
transcription_task_id = None
|
||||
replaced_existing = existing_info is not None
|
||||
|
||||
try:
|
||||
# 6. 更新数据库记录
|
||||
with get_db_connection() as connection:
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
|
||||
# 删除旧的音频文件
|
||||
if replaced_existing and force_replace_bool:
|
||||
if existing_info and existing_info['file_path']:
|
||||
old_file_path = BASE_DIR / existing_info['file_path'].lstrip('/')
|
||||
if old_file_path.exists():
|
||||
try:
|
||||
os.remove(old_file_path)
|
||||
print(f"Deleted old audio file: {old_file_path}")
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to delete old file {old_file_path}: {e}")
|
||||
|
||||
# 更新或插入音频文件记录
|
||||
if replaced_existing:
|
||||
cursor.execute(
|
||||
'UPDATE audio_files SET file_name = %s, file_path = %s, file_size = %s, upload_time = NOW(), task_id = NULL WHERE meeting_id = %s',
|
||||
(audio_file.filename, '/' + str(relative_path), audio_file.size, meeting_id)
|
||||
)
|
||||
else:
|
||||
cursor.execute(
|
||||
'INSERT INTO audio_files (meeting_id, file_name, file_path, file_size, upload_time) VALUES (%s, %s, %s, %s, NOW())',
|
||||
(meeting_id, audio_file.filename, '/' + str(relative_path), audio_file.size)
|
||||
)
|
||||
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
|
||||
# 7. 启动转录任务
|
||||
try:
|
||||
transcription_task_id = transcription_service.start_transcription(meeting_id, '/' + str(relative_path))
|
||||
print(f"Transcription task {transcription_task_id} started for meeting {meeting_id}")
|
||||
except Exception as e:
|
||||
print(f"Failed to start transcription: {e}")
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
# 出错时清理已上传的文件
|
||||
if os.path.exists(absolute_path):
|
||||
os.remove(absolute_path)
|
||||
return {
|
||||
"success": False,
|
||||
"response": create_api_response(code="500", message=f"处理失败: {str(e)}")
|
||||
}
|
||||
|
||||
# 8. 返回成功结果
|
||||
return {
|
||||
"success": True,
|
||||
"file_info": {
|
||||
"file_name": audio_file.filename,
|
||||
"file_path": '/' + str(relative_path),
|
||||
"file_size": audio_file.size
|
||||
},
|
||||
"transcription_task_id": transcription_task_id,
|
||||
"replaced_existing": replaced_existing,
|
||||
"has_transcription": has_transcription
|
||||
}
|
||||
|
||||
def _process_tags(cursor, tag_string: Optional[str], creator_id: Optional[int] = None) -> List[Tag]:
|
||||
"""
|
||||
处理标签:查询已存在的标签,如果提供了 creator_id 则创建不存在的标签
|
||||
|
|
@ -365,86 +556,72 @@ def get_meeting_for_edit(meeting_id: int, current_user: dict = Depends(get_curre
|
|||
return create_api_response(code="200", message="获取会议编辑信息成功", data=meeting_data)
|
||||
|
||||
@router.post("/meetings/upload-audio")
|
||||
async def upload_audio(audio_file: UploadFile = File(...), meeting_id: int = Form(...), force_replace: str = Form("false"), current_user: dict = Depends(get_current_user)):
|
||||
async def upload_audio(
|
||||
audio_file: UploadFile = File(...),
|
||||
meeting_id: int = Form(...),
|
||||
force_replace: str = Form("false"),
|
||||
auto_summarize: str = Form("true"),
|
||||
background_tasks: BackgroundTasks = None,
|
||||
current_user: dict = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
音频文件上传接口
|
||||
|
||||
上传音频文件并启动转录任务,可选择是否自动生成总结
|
||||
|
||||
Args:
|
||||
audio_file: 音频文件
|
||||
meeting_id: 会议ID
|
||||
force_replace: 是否强制替换("true"/"false")
|
||||
auto_summarize: 是否自动生成总结("true"/"false",默认"true")
|
||||
background_tasks: FastAPI后台任务
|
||||
current_user: 当前登录用户
|
||||
|
||||
Returns:
|
||||
HTTP 300: 需要用户确认(已有转录记录)
|
||||
HTTP 200: 处理成功,返回任务ID
|
||||
HTTP 400/403/404/500: 各种错误情况
|
||||
"""
|
||||
force_replace_bool = force_replace.lower() in ("true", "1", "yes")
|
||||
file_extension = os.path.splitext(audio_file.filename)[1].lower()
|
||||
if file_extension not in ALLOWED_EXTENSIONS:
|
||||
return create_api_response(code="400", message=f"Unsupported file type. Allowed types: {', '.join(ALLOWED_EXTENSIONS)}")
|
||||
max_file_size = getattr(config_module, 'MAX_FILE_SIZE', 100 * 1024 * 1024)
|
||||
if audio_file.size > max_file_size:
|
||||
return create_api_response(code="400", message=f"File size exceeds {max_file_size // (1024 * 1024)}MB limit")
|
||||
try:
|
||||
with get_db_connection() as connection:
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute("SELECT user_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
|
||||
meeting = cursor.fetchone()
|
||||
if not meeting:
|
||||
return create_api_response(code="404", message="Meeting not found")
|
||||
if meeting['user_id'] != current_user['user_id']:
|
||||
return create_api_response(code="403", message="Permission denied")
|
||||
cursor.execute("SELECT file_name, file_path, upload_time FROM audio_files WHERE meeting_id = %s", (meeting_id,))
|
||||
existing_info = cursor.fetchone()
|
||||
has_transcription = False
|
||||
if existing_info:
|
||||
cursor.execute("SELECT COUNT(*) as segment_count FROM transcript_segments WHERE meeting_id = %s", (meeting_id,))
|
||||
has_transcription = cursor.fetchone()['segment_count'] > 0
|
||||
cursor.close()
|
||||
except Exception as e:
|
||||
return create_api_response(code="500", message=f"Failed to check existing files: {str(e)}")
|
||||
if existing_info and has_transcription and not force_replace_bool:
|
||||
return create_api_response(code="300", message="该会议已有音频文件和转录记录,重新上传将删除现有的转录内容", data={
|
||||
"requires_confirmation": True,
|
||||
"existing_file": {
|
||||
"file_name": existing_info['file_name'],
|
||||
"upload_time": existing_info['upload_time'].isoformat() if existing_info['upload_time'] else None
|
||||
}
|
||||
})
|
||||
meeting_dir = AUDIO_DIR / str(meeting_id)
|
||||
meeting_dir.mkdir(exist_ok=True)
|
||||
unique_filename = f"{uuid.uuid4()}{file_extension}"
|
||||
absolute_path = meeting_dir / unique_filename
|
||||
relative_path = absolute_path.relative_to(BASE_DIR)
|
||||
try:
|
||||
with open(absolute_path, "wb") as buffer:
|
||||
shutil.copyfileobj(audio_file.file, buffer)
|
||||
except Exception as e:
|
||||
return create_api_response(code="500", message=f"Failed to save file: {str(e)}")
|
||||
task_id = None
|
||||
replaced_existing = existing_info is not None
|
||||
try:
|
||||
with get_db_connection() as connection:
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
if replaced_existing and force_replace_bool:
|
||||
# 只删除旧的音频文件,转录数据由 start_transcription 统一处理
|
||||
if existing_info and existing_info['file_path']:
|
||||
old_file_path = BASE_DIR / existing_info['file_path'].lstrip('/')
|
||||
if old_file_path.exists():
|
||||
try:
|
||||
os.remove(old_file_path)
|
||||
print(f"Deleted old audio file: {old_file_path}")
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to delete old file {old_file_path}: {e}")
|
||||
if replaced_existing:
|
||||
cursor.execute('UPDATE audio_files SET file_name = %s, file_path = %s, file_size = %s, upload_time = NOW(), task_id = NULL WHERE meeting_id = %s', (audio_file.filename, '/'+str(relative_path), audio_file.size, meeting_id))
|
||||
else:
|
||||
cursor.execute('INSERT INTO audio_files (meeting_id, file_name, file_path, file_size, upload_time) VALUES (%s, %s, %s, %s, NOW())', (meeting_id, audio_file.filename, '/'+str(relative_path), audio_file.size))
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
try:
|
||||
# start_transcription 会自动删除旧的转录数据和任务记录
|
||||
task_id = transcription_service.start_transcription(meeting_id, '/'+str(relative_path))
|
||||
print(f"Transcription task started with ID: {task_id}")
|
||||
except Exception as e:
|
||||
print(f"Failed to start transcription task: {e}")
|
||||
except Exception as e:
|
||||
if os.path.exists(absolute_path):
|
||||
os.remove(absolute_path)
|
||||
return create_api_response(code="500", message=f"Failed to save file info: {str(e)}")
|
||||
return create_api_response(code="200", message="Audio file uploaded successfully" + (" and replaced existing file" if replaced_existing else ""), data={
|
||||
"file_name": audio_file.filename, "file_path": '/'+str(relative_path), "task_id": task_id,
|
||||
"transcription_started": task_id is not None, "replaced_existing": replaced_existing,
|
||||
"previous_transcription_cleared": replaced_existing and has_transcription
|
||||
})
|
||||
auto_summarize_bool = auto_summarize.lower() in ("true", "1", "yes")
|
||||
|
||||
# 调用公共处理方法
|
||||
result = _handle_audio_upload(audio_file, meeting_id, force_replace_bool, current_user)
|
||||
|
||||
# 如果不成功,直接返回响应
|
||||
if not result["success"]:
|
||||
return result["response"]
|
||||
|
||||
# 成功:根据auto_summarize参数决定是否添加监控任务
|
||||
transcription_task_id = result["transcription_task_id"]
|
||||
if auto_summarize_bool and transcription_task_id:
|
||||
background_tasks.add_task(
|
||||
async_meeting_service.monitor_and_auto_summarize,
|
||||
meeting_id,
|
||||
transcription_task_id
|
||||
)
|
||||
print(f"[upload-audio] Auto-summarize enabled, monitor task added for meeting {meeting_id}")
|
||||
message_suffix = ",正在进行转录和总结"
|
||||
else:
|
||||
print(f"[upload-audio] Auto-summarize disabled for meeting {meeting_id}")
|
||||
message_suffix = ""
|
||||
|
||||
# 返回成功响应
|
||||
return create_api_response(
|
||||
code="200",
|
||||
message="Audio file uploaded successfully" +
|
||||
(" and replaced existing file" if result["replaced_existing"] else "") +
|
||||
message_suffix,
|
||||
data={
|
||||
"file_name": result["file_info"]["file_name"],
|
||||
"file_path": result["file_info"]["file_path"],
|
||||
"task_id": transcription_task_id,
|
||||
"transcription_started": transcription_task_id is not None,
|
||||
"auto_summarize": auto_summarize_bool,
|
||||
"replaced_existing": result["replaced_existing"],
|
||||
"previous_transcription_cleared": result["replaced_existing"] and result["has_transcription"]
|
||||
}
|
||||
)
|
||||
|
||||
@router.get("/meetings/{meeting_id}/audio")
|
||||
def get_audio_file(meeting_id: int, current_user: dict = Depends(get_current_user)):
|
||||
|
|
@ -456,6 +633,115 @@ def get_audio_file(meeting_id: int, current_user: dict = Depends(get_current_use
|
|||
return create_api_response(code="404", message="Audio file not found for this meeting")
|
||||
return create_api_response(code="200", message="Audio file found", data=audio_file)
|
||||
|
||||
@router.get("/meetings/{meeting_id}/audio/stream")
|
||||
async def stream_audio_file(
|
||||
meeting_id: int,
|
||||
range: Optional[str] = Header(None, alias="Range")
|
||||
):
|
||||
"""
|
||||
音频文件流式传输端点,支持HTTP Range请求(Safari浏览器必需)
|
||||
无需登录认证,用于前端audio标签直接访问
|
||||
"""
|
||||
# 获取音频文件信息
|
||||
with get_db_connection() as connection:
|
||||
cursor = connection.cursor(dictionary=True)
|
||||
cursor.execute("SELECT file_name, file_path, file_size FROM audio_files WHERE meeting_id = %s", (meeting_id,))
|
||||
audio_file = cursor.fetchone()
|
||||
if not audio_file:
|
||||
return Response(content="Audio file not found", status_code=404)
|
||||
|
||||
# 构建完整文件路径
|
||||
file_path = BASE_DIR / audio_file['file_path'].lstrip('/')
|
||||
if not file_path.exists():
|
||||
return Response(content="Audio file not found on disk", status_code=404)
|
||||
|
||||
# 总是使用实际文件大小(不依赖数据库记录,防止文件被优化后大小不匹配)
|
||||
file_size = os.path.getsize(file_path)
|
||||
file_name = audio_file['file_name']
|
||||
|
||||
# 根据文件扩展名确定MIME类型
|
||||
extension = os.path.splitext(file_name)[1].lower()
|
||||
mime_types = {
|
||||
'.mp3': 'audio/mpeg',
|
||||
'.m4a': 'audio/mp4',
|
||||
'.wav': 'audio/wav',
|
||||
'.mpeg': 'audio/mpeg',
|
||||
'.mp4': 'audio/mp4'
|
||||
}
|
||||
content_type = mime_types.get(extension, 'audio/mpeg')
|
||||
|
||||
# 处理Range请求
|
||||
start = 0
|
||||
end = file_size - 1
|
||||
|
||||
if range:
|
||||
# 解析Range头: "bytes=start-end" 或 "bytes=start-"
|
||||
try:
|
||||
range_spec = range.replace("bytes=", "")
|
||||
if "-" in range_spec:
|
||||
parts = range_spec.split("-")
|
||||
if parts[0]:
|
||||
start = int(parts[0])
|
||||
if parts[1]:
|
||||
end = int(parts[1])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
# 确保范围有效
|
||||
if start >= file_size:
|
||||
return Response(
|
||||
content="Range Not Satisfiable",
|
||||
status_code=416,
|
||||
headers={"Content-Range": f"bytes */{file_size}"}
|
||||
)
|
||||
|
||||
end = min(end, file_size - 1)
|
||||
content_length = end - start + 1
|
||||
|
||||
# 对所有文件名统一使用RFC 5987标准的URL编码格式
|
||||
# 这样可以正确处理中文、特殊字符等所有情况
|
||||
encoded_filename = quote(file_name)
|
||||
filename_header = f"inline; filename*=UTF-8''{encoded_filename}"
|
||||
|
||||
# 生成器函数用于流式读取文件
|
||||
def iter_file():
|
||||
with open(file_path, 'rb') as f:
|
||||
f.seek(start)
|
||||
remaining = content_length
|
||||
chunk_size = 64 * 1024 # 64KB chunks
|
||||
while remaining > 0:
|
||||
read_size = min(chunk_size, remaining)
|
||||
data = f.read(read_size)
|
||||
if not data:
|
||||
break
|
||||
remaining -= len(data)
|
||||
yield data
|
||||
|
||||
# 根据是否有Range请求返回不同的响应
|
||||
if range:
|
||||
return StreamingResponse(
|
||||
iter_file(),
|
||||
status_code=206, # Partial Content
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Content-Range": f"bytes {start}-{end}/{file_size}",
|
||||
"Accept-Ranges": "bytes",
|
||||
"Content-Length": str(content_length),
|
||||
"Content-Disposition": filename_header
|
||||
}
|
||||
)
|
||||
else:
|
||||
return StreamingResponse(
|
||||
iter_file(),
|
||||
status_code=200,
|
||||
media_type=content_type,
|
||||
headers={
|
||||
"Accept-Ranges": "bytes",
|
||||
"Content-Length": str(file_size),
|
||||
"Content-Disposition": filename_header
|
||||
}
|
||||
)
|
||||
|
||||
@router.get("/meetings/{meeting_id}/transcription/status")
|
||||
def get_meeting_transcription_status(meeting_id: int, current_user: dict = Depends(get_current_user)):
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -61,6 +61,12 @@ REDIS_CONFIG = {
|
|||
# Dashscope (Tongyi Qwen) API Key
|
||||
QWEN_API_KEY = os.getenv('QWEN_API_KEY', 'sk-c2bf06ea56b4491ea3d1e37fdb472b8f')
|
||||
|
||||
# 转录轮询配置 - 用于 upload-audio-complete 接口
|
||||
TRANSCRIPTION_POLL_CONFIG = {
|
||||
'poll_interval': int(os.getenv('TRANSCRIPTION_POLL_INTERVAL', '10')), # 轮询间隔:10秒
|
||||
'max_wait_time': int(os.getenv('TRANSCRIPTION_MAX_WAIT_TIME', '1800')), # 最大等待:30分钟
|
||||
}
|
||||
|
||||
# LLM配置 - 阿里Qwen3大模型
|
||||
LLM_CONFIG = {
|
||||
'model_name': os.getenv('LLM_MODEL_NAME', 'qwen-plus'),
|
||||
|
|
|
|||
|
|
@ -8,8 +8,9 @@ from datetime import datetime
|
|||
from typing import Optional, Dict, Any, List
|
||||
|
||||
import redis
|
||||
from app.core.config import REDIS_CONFIG
|
||||
from app.core.config import REDIS_CONFIG, TRANSCRIPTION_POLL_CONFIG
|
||||
from app.core.database import get_db_connection
|
||||
from app.services.async_transcription_service import AsyncTranscriptionService
|
||||
from app.services.llm_service import LLMService
|
||||
|
||||
class AsyncMeetingService:
|
||||
|
|
@ -110,6 +111,93 @@ class AsyncMeetingService:
|
|||
self._update_task_in_db(task_id, 'failed', 0, error_message=error_msg)
|
||||
self._update_task_status_in_redis(task_id, 'failed', 0, error_message=error_msg)
|
||||
|
||||
def monitor_and_auto_summarize(self, meeting_id: int, transcription_task_id: str):
|
||||
"""
|
||||
监控转录任务,完成后自动生成总结
|
||||
此方法设计为由BackgroundTasks调用,在后台运行
|
||||
|
||||
Args:
|
||||
meeting_id: 会议ID
|
||||
transcription_task_id: 转录任务ID
|
||||
|
||||
流程:
|
||||
1. 循环轮询转录任务状态
|
||||
2. 转录成功后自动启动总结任务
|
||||
3. 转录失败或超时则停止轮询并记录日志
|
||||
"""
|
||||
print(f"[Monitor] Started monitoring transcription task {transcription_task_id} for meeting {meeting_id}")
|
||||
|
||||
# 获取配置参数
|
||||
poll_interval = TRANSCRIPTION_POLL_CONFIG['poll_interval']
|
||||
max_wait_time = TRANSCRIPTION_POLL_CONFIG['max_wait_time']
|
||||
max_polls = max_wait_time // poll_interval
|
||||
|
||||
# 延迟导入以避免循环导入
|
||||
transcription_service = AsyncTranscriptionService()
|
||||
|
||||
poll_count = 0
|
||||
|
||||
try:
|
||||
while poll_count < max_polls:
|
||||
poll_count += 1
|
||||
elapsed_time = poll_count * poll_interval
|
||||
|
||||
try:
|
||||
# 查询转录任务状态
|
||||
status_info = transcription_service.get_task_status(transcription_task_id)
|
||||
current_status = status_info.get('status', 'unknown')
|
||||
progress = status_info.get('progress', 0)
|
||||
|
||||
print(f"[Monitor] Poll {poll_count}/{max_polls} - Status: {current_status}, Progress: {progress}%, Elapsed: {elapsed_time}s")
|
||||
|
||||
# 检查转录是否完成
|
||||
if current_status == 'completed':
|
||||
print(f"[Monitor] Transcription completed successfully for meeting {meeting_id}")
|
||||
|
||||
# 启动总结任务
|
||||
try:
|
||||
summary_task_id = self.start_summary_generation(meeting_id, user_prompt="")
|
||||
print(f"[Monitor] Summary task {summary_task_id} started for meeting {meeting_id}")
|
||||
|
||||
# 在后台执行总结任务
|
||||
self._process_task(summary_task_id)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to start summary generation: {e}"
|
||||
print(f"[Monitor] {error_msg}")
|
||||
|
||||
# 监控任务完成,退出循环
|
||||
break
|
||||
|
||||
# 检查转录是否失败
|
||||
elif current_status == 'failed':
|
||||
error_msg = status_info.get('error_message', 'Unknown error')
|
||||
print(f"[Monitor] Transcription failed for meeting {meeting_id}: {error_msg}")
|
||||
# 转录失败,停止监控
|
||||
break
|
||||
|
||||
# 转录还在进行中(pending/processing),继续等待
|
||||
elif current_status in ['pending', 'processing']:
|
||||
# 等待一段时间后继续轮询
|
||||
time.sleep(poll_interval)
|
||||
|
||||
else:
|
||||
# 未知状态
|
||||
print(f"[Monitor] Unknown transcription status: {current_status}")
|
||||
time.sleep(poll_interval)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Monitor] Error checking transcription status: {e}")
|
||||
# 出错后等待一段时间继续尝试
|
||||
time.sleep(poll_interval)
|
||||
|
||||
# 检查是否超时
|
||||
if poll_count >= max_polls:
|
||||
print(f"[Monitor] Transcription monitoring timed out after {max_wait_time}s for meeting {meeting_id}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Monitor] Fatal error in monitor_and_auto_summarize: {e}")
|
||||
|
||||
# --- 会议相关方法 ---
|
||||
|
||||
def _get_meeting_transcript(self, meeting_id: int) -> str:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,275 @@
|
|||
"""
|
||||
测试 upload_audio 接口的 auto_summarize 参数
|
||||
"""
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
|
||||
# 配置
|
||||
BASE_URL = "http://localhost:8000/api"
|
||||
# 请替换为你的有效token
|
||||
AUTH_TOKEN = "your_auth_token_here"
|
||||
# 请替换为你的测试会议ID
|
||||
TEST_MEETING_ID = 1
|
||||
|
||||
# 请求头
|
||||
headers = {
|
||||
"Authorization": f"Bearer {AUTH_TOKEN}"
|
||||
}
|
||||
|
||||
|
||||
def test_upload_audio(auto_summarize=True):
|
||||
"""测试音频上传接口"""
|
||||
print("=" * 60)
|
||||
print(f"测试: upload_audio 接口 (auto_summarize={auto_summarize})")
|
||||
print("=" * 60)
|
||||
|
||||
# 准备测试文件
|
||||
audio_file_path = "test_audio.mp3" # 请替换为实际的音频文件路径
|
||||
|
||||
try:
|
||||
with open(audio_file_path, 'rb') as audio_file:
|
||||
files = {
|
||||
'audio_file': ('test_audio.mp3', audio_file, 'audio/mpeg')
|
||||
}
|
||||
data = {
|
||||
'force_replace': 'false',
|
||||
'auto_summarize': 'true' if auto_summarize else 'false'
|
||||
}
|
||||
|
||||
# 发送请求
|
||||
url = f"{BASE_URL}/meetings/upload-audio"
|
||||
print(f"\n发送请求到: {url}")
|
||||
print(f"参数: auto_summarize={data['auto_summarize']}")
|
||||
response = requests.post(url, headers=headers, files=files, data=data)
|
||||
|
||||
print(f"状态码: {response.status_code}")
|
||||
print(f"响应内容:")
|
||||
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
|
||||
|
||||
# 如果上传成功,获取任务ID
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
if response_data.get('code') == '200':
|
||||
task_id = response_data['data'].get('task_id')
|
||||
auto_sum = response_data['data'].get('auto_summarize')
|
||||
print(f"\n✓ 上传成功! 转录任务ID: {task_id}")
|
||||
print(f" 自动总结: {'开启' if auto_sum else '关闭'}")
|
||||
if auto_sum:
|
||||
print(f" 提示: 音频已上传,后台正在自动进行转录和总结")
|
||||
else:
|
||||
print(f" 提示: 音频已上传,正在进行转录(不会自动总结)")
|
||||
print(f"\n 可以通过以下接口查询状态:")
|
||||
print(f" - 转录状态: GET /meetings/{TEST_MEETING_ID}/transcription/status")
|
||||
print(f" - 总结任务: GET /meetings/{TEST_MEETING_ID}/llm-tasks")
|
||||
print(f" - 会议详情: GET /meetings/{TEST_MEETING_ID}")
|
||||
return True
|
||||
elif response_data.get('code') == '300':
|
||||
print("\n⚠ 需要确认替换现有文件")
|
||||
return False
|
||||
else:
|
||||
print(f"\n✗ 上传失败")
|
||||
return False
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"\n✗ 错误: 找不到测试音频文件 {audio_file_path}")
|
||||
print("请创建一个测试音频文件或修改 audio_file_path 变量")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"\n✗ 错误: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_get_transcription_status():
|
||||
"""测试获取转录状态接口"""
|
||||
print("\n" + "=" * 60)
|
||||
print("测试: 获取转录状态")
|
||||
print("=" * 60)
|
||||
|
||||
url = f"{BASE_URL}/meetings/{TEST_MEETING_ID}/transcription/status"
|
||||
print(f"\n发送请求到: {url}")
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers)
|
||||
print(f"状态码: {response.status_code}")
|
||||
print(f"响应内容:")
|
||||
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
|
||||
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
if response_data.get('code') == '200':
|
||||
data = response_data['data']
|
||||
print(f"\n✓ 获取转录状态成功!")
|
||||
print(f" - 任务ID: {data.get('task_id')}")
|
||||
print(f" - 状态: {data.get('status')}")
|
||||
print(f" - 进度: {data.get('progress')}%")
|
||||
return data.get('status'), data.get('progress')
|
||||
else:
|
||||
print(f"\n✗ 获取状态失败")
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n✗ 错误: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def test_get_llm_tasks():
|
||||
"""测试获取LLM任务列表"""
|
||||
print("\n" + "=" * 60)
|
||||
print("测试: 获取LLM任务列表")
|
||||
print("=" * 60)
|
||||
|
||||
url = f"{BASE_URL}/meetings/{TEST_MEETING_ID}/llm-tasks"
|
||||
print(f"\n发送请求到: {url}")
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=headers)
|
||||
print(f"状态码: {response.status_code}")
|
||||
print(f"响应内容:")
|
||||
print(json.dumps(response.json(), indent=2, ensure_ascii=False))
|
||||
|
||||
if response.status_code == 200:
|
||||
response_data = response.json()
|
||||
if response_data.get('code') == '200':
|
||||
tasks = response_data['data'].get('tasks', [])
|
||||
print(f"\n✓ 获取LLM任务成功! 共 {len(tasks)} 个任务")
|
||||
if tasks:
|
||||
latest_task = tasks[0]
|
||||
print(f" 最新任务:")
|
||||
print(f" - 任务ID: {latest_task.get('task_id')}")
|
||||
print(f" - 状态: {latest_task.get('status')}")
|
||||
print(f" - 进度: {latest_task.get('progress')}%")
|
||||
return latest_task.get('status'), latest_task.get('progress')
|
||||
return None, None
|
||||
else:
|
||||
print(f"\n✗ 获取任务失败")
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n✗ 错误: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def monitor_progress():
|
||||
"""持续监控处理进度"""
|
||||
print("\n" + "=" * 60)
|
||||
print("持续监控处理进度 (每10秒查询一次)")
|
||||
print("按 Ctrl+C 停止监控")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
transcription_completed = False
|
||||
summary_completed = False
|
||||
|
||||
while True:
|
||||
print(f"\n[{time.strftime('%H:%M:%S')}] 查询状态...")
|
||||
|
||||
# 查询转录状态
|
||||
trans_status, trans_progress = test_get_transcription_status()
|
||||
|
||||
# 如果转录完成,查询总结状态
|
||||
if trans_status == 'completed' and not transcription_completed:
|
||||
print(f"\n✓ 转录已完成!")
|
||||
transcription_completed = True
|
||||
|
||||
if transcription_completed:
|
||||
summ_status, summ_progress = test_get_llm_tasks()
|
||||
if summ_status == 'completed' and not summary_completed:
|
||||
print(f"\n✓ 总结已完成!")
|
||||
summary_completed = True
|
||||
break
|
||||
elif summ_status == 'failed':
|
||||
print(f"\n✗ 总结失败")
|
||||
break
|
||||
|
||||
# 检查转录是否失败
|
||||
if trans_status == 'failed':
|
||||
print(f"\n✗ 转录失败")
|
||||
break
|
||||
|
||||
# 如果全部完成,退出
|
||||
if transcription_completed and summary_completed:
|
||||
print(f"\n✓ 全部完成!")
|
||||
break
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n⚠ 用户中断监控")
|
||||
except Exception as e:
|
||||
print(f"\n✗ 监控出错: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("\n")
|
||||
print("╔" + "═" * 58 + "╗")
|
||||
print("║" + " " * 12 + "upload_audio 接口测试" + " " * 23 + "║")
|
||||
print("║" + " " * 10 + "(测试 auto_summarize 参数)" + " " * 17 + "║")
|
||||
print("╚" + "═" * 58 + "╝")
|
||||
|
||||
print("\n请确保:")
|
||||
print("1. 后端服务正在运行 (http://localhost:8000)")
|
||||
print("2. 已修改脚本中的 AUTH_TOKEN 和 TEST_MEETING_ID")
|
||||
print("3. 已准备好测试音频文件")
|
||||
|
||||
input("\n按回车键开始测试...")
|
||||
|
||||
# 测试1: 查看当前转录状态
|
||||
test_get_transcription_status()
|
||||
|
||||
# 测试2: 查看当前LLM任务
|
||||
test_get_llm_tasks()
|
||||
|
||||
# 询问要测试哪种模式
|
||||
print("\n" + "-" * 60)
|
||||
print("请选择测试模式:")
|
||||
print("1. 仅转录 (auto_summarize=false)")
|
||||
print("2. 转录+自动总结 (auto_summarize=true)")
|
||||
print("3. 两种模式都测试")
|
||||
choice = input("请输入选项 (1/2/3): ")
|
||||
|
||||
if choice == '1':
|
||||
# 测试:仅转录
|
||||
if test_upload_audio(auto_summarize=False):
|
||||
print("\n⚠ 注意: 此模式下不会自动生成总结")
|
||||
print("如需生成总结,请手动调用: POST /meetings/{meeting_id}/generate-summary-async")
|
||||
elif choice == '2':
|
||||
# 测试:转录+自动总结
|
||||
if test_upload_audio(auto_summarize=True):
|
||||
print("\n" + "-" * 60)
|
||||
choice = input("是否要持续监控处理进度? (y/n): ")
|
||||
if choice.lower() == 'y':
|
||||
monitor_progress()
|
||||
elif choice == '3':
|
||||
# 两种模式都测试
|
||||
print("\n" + "=" * 60)
|
||||
print("测试模式1: 仅转录 (auto_summarize=false)")
|
||||
print("=" * 60)
|
||||
test_upload_audio(auto_summarize=False)
|
||||
|
||||
input("\n按回车键继续测试模式2...")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("测试模式2: 转录+自动总结 (auto_summarize=true)")
|
||||
print("=" * 60)
|
||||
if test_upload_audio(auto_summarize=True):
|
||||
print("\n" + "-" * 60)
|
||||
choice = input("是否要持续监控处理进度? (y/n): ")
|
||||
if choice.lower() == 'y':
|
||||
monitor_progress()
|
||||
else:
|
||||
print("\n✗ 无效选项")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("测试完成!")
|
||||
print("=" * 60)
|
||||
print("\n总结:")
|
||||
print("- auto_summarize=false: 只执行转录,不自动生成总结")
|
||||
print("- auto_summarize=true: 执行转录后自动生成总结")
|
||||
print("- 默认值: true (向前兼容)")
|
||||
print("- 现有页面建议设置: auto_summarize=false")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue