diff --git a/.DS_Store b/.DS_Store index 408a8dd..acab377 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..80da760 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,51 @@ +# Python相关 +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +venv/ +env/ +ENV/ + +# 用户上传文件(最重要!) +uploads/ + +# 测试和开发文件 +test/ +tests/ +*.pytest_cache/ +.coverage +htmlcov/ + +# Git +.git/ +.gitignore +.gitattributes + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# 日志 +*.log +logs/ + +# 环境变量 +.env.local +.env.*.local + +# 文档 +*.md +docs/ + +# 其他 +.DS_Store +*.bak +*.tmp diff --git a/Dockerfile b/Dockerfile index 98e46b0..ae9c31d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,18 +12,19 @@ ENV PYTHONUNBUFFERED=1 RUN sed -i 's/deb.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources RUN sed -i 's/security.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources -# 安装系统依赖 +# 复制依赖文件 +COPY requirements-prod.txt . + +# 安装系统依赖、Python依赖,然后清理(一个RUN命令减少层大小) RUN apt-get update && apt-get install -y \ gcc \ default-libmysqlclient-dev \ pkg-config \ - && rm -rf /var/lib/apt/lists/* - -# 复制依赖文件 -COPY requirements-prod.txt . - -# 安装Python依赖 -RUN pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir -r requirements-prod.txt + && pip install --index-url https://mirrors.aliyun.com/pypi/simple --no-cache-dir -r requirements-prod.txt \ + && apt-get purge -y gcc pkg-config \ + && apt-get autoremove -y \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # 复制应用代码 COPY . . diff --git a/app.zip b/app.zip index 139faa1..37c3f90 100644 Binary files a/app.zip and b/app.zip differ diff --git a/app/api/endpoints/meetings.py b/app/api/endpoints/meetings.py index 4e88c65..03d5821 100644 --- a/app/api/endpoints/meetings.py +++ b/app/api/endpoints/meetings.py @@ -1,4 +1,5 @@ -from fastapi import APIRouter, UploadFile, File, Form, Depends, BackgroundTasks +from fastapi import APIRouter, UploadFile, File, Form, Depends, BackgroundTasks, Request, Header +from fastapi.responses import StreamingResponse, Response from app.models.models import Meeting, TranscriptSegment, TranscriptionTaskStatus, CreateMeetingRequest, UpdateMeetingRequest, SpeakerTagUpdateRequest, BatchSpeakerTagUpdateRequest, BatchTranscriptUpdateRequest, Tag from app.core.database import get_db_connection from app.core.config import BASE_DIR, AUDIO_DIR, MARKDOWN_DIR, ALLOWED_EXTENSIONS, ALLOWED_IMAGE_EXTENSIONS @@ -11,9 +12,11 @@ from app.core.response import create_api_response from typing import List, Optional from datetime import datetime from pydantic import BaseModel +from urllib.parse import quote import os import uuid import shutil +import mimetypes router = APIRouter() @@ -23,6 +26,194 @@ transcription_service = AsyncTranscriptionService() class GenerateSummaryRequest(BaseModel): user_prompt: Optional[str] = "" +def _handle_audio_upload( + audio_file: UploadFile, + meeting_id: int, + force_replace_bool: bool, + current_user: dict +): + """ + 音频上传的公共处理逻辑 + + Args: + audio_file: 上传的音频文件 + meeting_id: 会议ID + force_replace_bool: 是否强制替换 + current_user: 当前用户 + + Returns: + dict: { + "success": bool, # 是否成功 + "needs_confirmation": bool, # 是否需要用户确认 + "response": dict, # 如果需要返回,这里是响应数据 + "file_info": dict, # 文件信息 (成功时) + "transcription_task_id": str, # 转录任务ID (成功时) + "replaced_existing": bool, # 是否替换了现有文件 (成功时) + "has_transcription": bool # 原来是否有转录记录 (成功时) + } + """ + # 1. 文件类型验证 + file_extension = os.path.splitext(audio_file.filename)[1].lower() + if file_extension not in ALLOWED_EXTENSIONS: + return { + "success": False, + "response": create_api_response( + code="400", + message=f"不支持的文件类型。支持的类型: {', '.join(ALLOWED_EXTENSIONS)}" + ) + } + + # 2. 文件大小验证 + max_file_size = getattr(config_module, 'MAX_FILE_SIZE', 100 * 1024 * 1024) + if audio_file.size > max_file_size: + return { + "success": False, + "response": create_api_response( + code="400", + message=f"文件大小超过 {max_file_size // (1024 * 1024)}MB 限制" + ) + } + + # 3. 权限和已有文件检查 + try: + with get_db_connection() as connection: + cursor = connection.cursor(dictionary=True) + + # 检查会议是否存在及权限 + cursor.execute("SELECT user_id FROM meetings WHERE meeting_id = %s", (meeting_id,)) + meeting = cursor.fetchone() + if not meeting: + return { + "success": False, + "response": create_api_response(code="404", message="会议不存在") + } + if meeting['user_id'] != current_user['user_id']: + return { + "success": False, + "response": create_api_response(code="403", message="无权限操作此会议") + } + + # 检查已有音频文件 + cursor.execute( + "SELECT file_name, file_path, upload_time FROM audio_files WHERE meeting_id = %s", + (meeting_id,) + ) + existing_info = cursor.fetchone() + + # 检查是否有转录记录 + has_transcription = False + if existing_info: + cursor.execute( + "SELECT COUNT(*) as segment_count FROM transcript_segments WHERE meeting_id = %s", + (meeting_id,) + ) + has_transcription = cursor.fetchone()['segment_count'] > 0 + + cursor.close() + except Exception as e: + return { + "success": False, + "response": create_api_response(code="500", message=f"检查已有文件失败: {str(e)}") + } + + # 4. 如果已有转录记录且未确认替换,返回提示 + if existing_info and has_transcription and not force_replace_bool: + return { + "success": False, + "needs_confirmation": True, + "response": create_api_response( + code="300", + message="该会议已有音频文件和转录记录,重新上传将删除现有的转录内容和会议总结", + data={ + "requires_confirmation": True, + "existing_file": { + "file_name": existing_info['file_name'], + "upload_time": existing_info['upload_time'].isoformat() if existing_info['upload_time'] else None + } + } + ) + } + + # 5. 保存音频文件 + meeting_dir = AUDIO_DIR / str(meeting_id) + meeting_dir.mkdir(exist_ok=True) + unique_filename = f"{uuid.uuid4()}{file_extension}" + absolute_path = meeting_dir / unique_filename + relative_path = absolute_path.relative_to(BASE_DIR) + + try: + with open(absolute_path, "wb") as buffer: + shutil.copyfileobj(audio_file.file, buffer) + except Exception as e: + return { + "success": False, + "response": create_api_response(code="500", message=f"保存文件失败: {str(e)}") + } + + transcription_task_id = None + replaced_existing = existing_info is not None + + try: + # 6. 更新数据库记录 + with get_db_connection() as connection: + cursor = connection.cursor(dictionary=True) + + # 删除旧的音频文件 + if replaced_existing and force_replace_bool: + if existing_info and existing_info['file_path']: + old_file_path = BASE_DIR / existing_info['file_path'].lstrip('/') + if old_file_path.exists(): + try: + os.remove(old_file_path) + print(f"Deleted old audio file: {old_file_path}") + except Exception as e: + print(f"Warning: Failed to delete old file {old_file_path}: {e}") + + # 更新或插入音频文件记录 + if replaced_existing: + cursor.execute( + 'UPDATE audio_files SET file_name = %s, file_path = %s, file_size = %s, upload_time = NOW(), task_id = NULL WHERE meeting_id = %s', + (audio_file.filename, '/' + str(relative_path), audio_file.size, meeting_id) + ) + else: + cursor.execute( + 'INSERT INTO audio_files (meeting_id, file_name, file_path, file_size, upload_time) VALUES (%s, %s, %s, %s, NOW())', + (meeting_id, audio_file.filename, '/' + str(relative_path), audio_file.size) + ) + + connection.commit() + cursor.close() + + # 7. 启动转录任务 + try: + transcription_task_id = transcription_service.start_transcription(meeting_id, '/' + str(relative_path)) + print(f"Transcription task {transcription_task_id} started for meeting {meeting_id}") + except Exception as e: + print(f"Failed to start transcription: {e}") + raise + + except Exception as e: + # 出错时清理已上传的文件 + if os.path.exists(absolute_path): + os.remove(absolute_path) + return { + "success": False, + "response": create_api_response(code="500", message=f"处理失败: {str(e)}") + } + + # 8. 返回成功结果 + return { + "success": True, + "file_info": { + "file_name": audio_file.filename, + "file_path": '/' + str(relative_path), + "file_size": audio_file.size + }, + "transcription_task_id": transcription_task_id, + "replaced_existing": replaced_existing, + "has_transcription": has_transcription + } + def _process_tags(cursor, tag_string: Optional[str], creator_id: Optional[int] = None) -> List[Tag]: """ 处理标签:查询已存在的标签,如果提供了 creator_id 则创建不存在的标签 @@ -365,86 +556,72 @@ def get_meeting_for_edit(meeting_id: int, current_user: dict = Depends(get_curre return create_api_response(code="200", message="获取会议编辑信息成功", data=meeting_data) @router.post("/meetings/upload-audio") -async def upload_audio(audio_file: UploadFile = File(...), meeting_id: int = Form(...), force_replace: str = Form("false"), current_user: dict = Depends(get_current_user)): +async def upload_audio( + audio_file: UploadFile = File(...), + meeting_id: int = Form(...), + force_replace: str = Form("false"), + auto_summarize: str = Form("true"), + background_tasks: BackgroundTasks = None, + current_user: dict = Depends(get_current_user) +): + """ + 音频文件上传接口 + + 上传音频文件并启动转录任务,可选择是否自动生成总结 + + Args: + audio_file: 音频文件 + meeting_id: 会议ID + force_replace: 是否强制替换("true"/"false") + auto_summarize: 是否自动生成总结("true"/"false",默认"true") + background_tasks: FastAPI后台任务 + current_user: 当前登录用户 + + Returns: + HTTP 300: 需要用户确认(已有转录记录) + HTTP 200: 处理成功,返回任务ID + HTTP 400/403/404/500: 各种错误情况 + """ force_replace_bool = force_replace.lower() in ("true", "1", "yes") - file_extension = os.path.splitext(audio_file.filename)[1].lower() - if file_extension not in ALLOWED_EXTENSIONS: - return create_api_response(code="400", message=f"Unsupported file type. Allowed types: {', '.join(ALLOWED_EXTENSIONS)}") - max_file_size = getattr(config_module, 'MAX_FILE_SIZE', 100 * 1024 * 1024) - if audio_file.size > max_file_size: - return create_api_response(code="400", message=f"File size exceeds {max_file_size // (1024 * 1024)}MB limit") - try: - with get_db_connection() as connection: - cursor = connection.cursor(dictionary=True) - cursor.execute("SELECT user_id FROM meetings WHERE meeting_id = %s", (meeting_id,)) - meeting = cursor.fetchone() - if not meeting: - return create_api_response(code="404", message="Meeting not found") - if meeting['user_id'] != current_user['user_id']: - return create_api_response(code="403", message="Permission denied") - cursor.execute("SELECT file_name, file_path, upload_time FROM audio_files WHERE meeting_id = %s", (meeting_id,)) - existing_info = cursor.fetchone() - has_transcription = False - if existing_info: - cursor.execute("SELECT COUNT(*) as segment_count FROM transcript_segments WHERE meeting_id = %s", (meeting_id,)) - has_transcription = cursor.fetchone()['segment_count'] > 0 - cursor.close() - except Exception as e: - return create_api_response(code="500", message=f"Failed to check existing files: {str(e)}") - if existing_info and has_transcription and not force_replace_bool: - return create_api_response(code="300", message="该会议已有音频文件和转录记录,重新上传将删除现有的转录内容", data={ - "requires_confirmation": True, - "existing_file": { - "file_name": existing_info['file_name'], - "upload_time": existing_info['upload_time'].isoformat() if existing_info['upload_time'] else None - } - }) - meeting_dir = AUDIO_DIR / str(meeting_id) - meeting_dir.mkdir(exist_ok=True) - unique_filename = f"{uuid.uuid4()}{file_extension}" - absolute_path = meeting_dir / unique_filename - relative_path = absolute_path.relative_to(BASE_DIR) - try: - with open(absolute_path, "wb") as buffer: - shutil.copyfileobj(audio_file.file, buffer) - except Exception as e: - return create_api_response(code="500", message=f"Failed to save file: {str(e)}") - task_id = None - replaced_existing = existing_info is not None - try: - with get_db_connection() as connection: - cursor = connection.cursor(dictionary=True) - if replaced_existing and force_replace_bool: - # 只删除旧的音频文件,转录数据由 start_transcription 统一处理 - if existing_info and existing_info['file_path']: - old_file_path = BASE_DIR / existing_info['file_path'].lstrip('/') - if old_file_path.exists(): - try: - os.remove(old_file_path) - print(f"Deleted old audio file: {old_file_path}") - except Exception as e: - print(f"Warning: Failed to delete old file {old_file_path}: {e}") - if replaced_existing: - cursor.execute('UPDATE audio_files SET file_name = %s, file_path = %s, file_size = %s, upload_time = NOW(), task_id = NULL WHERE meeting_id = %s', (audio_file.filename, '/'+str(relative_path), audio_file.size, meeting_id)) - else: - cursor.execute('INSERT INTO audio_files (meeting_id, file_name, file_path, file_size, upload_time) VALUES (%s, %s, %s, %s, NOW())', (meeting_id, audio_file.filename, '/'+str(relative_path), audio_file.size)) - connection.commit() - cursor.close() - try: - # start_transcription 会自动删除旧的转录数据和任务记录 - task_id = transcription_service.start_transcription(meeting_id, '/'+str(relative_path)) - print(f"Transcription task started with ID: {task_id}") - except Exception as e: - print(f"Failed to start transcription task: {e}") - except Exception as e: - if os.path.exists(absolute_path): - os.remove(absolute_path) - return create_api_response(code="500", message=f"Failed to save file info: {str(e)}") - return create_api_response(code="200", message="Audio file uploaded successfully" + (" and replaced existing file" if replaced_existing else ""), data={ - "file_name": audio_file.filename, "file_path": '/'+str(relative_path), "task_id": task_id, - "transcription_started": task_id is not None, "replaced_existing": replaced_existing, - "previous_transcription_cleared": replaced_existing and has_transcription - }) + auto_summarize_bool = auto_summarize.lower() in ("true", "1", "yes") + + # 调用公共处理方法 + result = _handle_audio_upload(audio_file, meeting_id, force_replace_bool, current_user) + + # 如果不成功,直接返回响应 + if not result["success"]: + return result["response"] + + # 成功:根据auto_summarize参数决定是否添加监控任务 + transcription_task_id = result["transcription_task_id"] + if auto_summarize_bool and transcription_task_id: + background_tasks.add_task( + async_meeting_service.monitor_and_auto_summarize, + meeting_id, + transcription_task_id + ) + print(f"[upload-audio] Auto-summarize enabled, monitor task added for meeting {meeting_id}") + message_suffix = ",正在进行转录和总结" + else: + print(f"[upload-audio] Auto-summarize disabled for meeting {meeting_id}") + message_suffix = "" + + # 返回成功响应 + return create_api_response( + code="200", + message="Audio file uploaded successfully" + + (" and replaced existing file" if result["replaced_existing"] else "") + + message_suffix, + data={ + "file_name": result["file_info"]["file_name"], + "file_path": result["file_info"]["file_path"], + "task_id": transcription_task_id, + "transcription_started": transcription_task_id is not None, + "auto_summarize": auto_summarize_bool, + "replaced_existing": result["replaced_existing"], + "previous_transcription_cleared": result["replaced_existing"] and result["has_transcription"] + } + ) @router.get("/meetings/{meeting_id}/audio") def get_audio_file(meeting_id: int, current_user: dict = Depends(get_current_user)): @@ -456,6 +633,115 @@ def get_audio_file(meeting_id: int, current_user: dict = Depends(get_current_use return create_api_response(code="404", message="Audio file not found for this meeting") return create_api_response(code="200", message="Audio file found", data=audio_file) +@router.get("/meetings/{meeting_id}/audio/stream") +async def stream_audio_file( + meeting_id: int, + range: Optional[str] = Header(None, alias="Range") +): + """ + 音频文件流式传输端点,支持HTTP Range请求(Safari浏览器必需) + 无需登录认证,用于前端audio标签直接访问 + """ + # 获取音频文件信息 + with get_db_connection() as connection: + cursor = connection.cursor(dictionary=True) + cursor.execute("SELECT file_name, file_path, file_size FROM audio_files WHERE meeting_id = %s", (meeting_id,)) + audio_file = cursor.fetchone() + if not audio_file: + return Response(content="Audio file not found", status_code=404) + + # 构建完整文件路径 + file_path = BASE_DIR / audio_file['file_path'].lstrip('/') + if not file_path.exists(): + return Response(content="Audio file not found on disk", status_code=404) + + # 总是使用实际文件大小(不依赖数据库记录,防止文件被优化后大小不匹配) + file_size = os.path.getsize(file_path) + file_name = audio_file['file_name'] + + # 根据文件扩展名确定MIME类型 + extension = os.path.splitext(file_name)[1].lower() + mime_types = { + '.mp3': 'audio/mpeg', + '.m4a': 'audio/mp4', + '.wav': 'audio/wav', + '.mpeg': 'audio/mpeg', + '.mp4': 'audio/mp4' + } + content_type = mime_types.get(extension, 'audio/mpeg') + + # 处理Range请求 + start = 0 + end = file_size - 1 + + if range: + # 解析Range头: "bytes=start-end" 或 "bytes=start-" + try: + range_spec = range.replace("bytes=", "") + if "-" in range_spec: + parts = range_spec.split("-") + if parts[0]: + start = int(parts[0]) + if parts[1]: + end = int(parts[1]) + except (ValueError, IndexError): + pass + + # 确保范围有效 + if start >= file_size: + return Response( + content="Range Not Satisfiable", + status_code=416, + headers={"Content-Range": f"bytes */{file_size}"} + ) + + end = min(end, file_size - 1) + content_length = end - start + 1 + + # 对所有文件名统一使用RFC 5987标准的URL编码格式 + # 这样可以正确处理中文、特殊字符等所有情况 + encoded_filename = quote(file_name) + filename_header = f"inline; filename*=UTF-8''{encoded_filename}" + + # 生成器函数用于流式读取文件 + def iter_file(): + with open(file_path, 'rb') as f: + f.seek(start) + remaining = content_length + chunk_size = 64 * 1024 # 64KB chunks + while remaining > 0: + read_size = min(chunk_size, remaining) + data = f.read(read_size) + if not data: + break + remaining -= len(data) + yield data + + # 根据是否有Range请求返回不同的响应 + if range: + return StreamingResponse( + iter_file(), + status_code=206, # Partial Content + media_type=content_type, + headers={ + "Content-Range": f"bytes {start}-{end}/{file_size}", + "Accept-Ranges": "bytes", + "Content-Length": str(content_length), + "Content-Disposition": filename_header + } + ) + else: + return StreamingResponse( + iter_file(), + status_code=200, + media_type=content_type, + headers={ + "Accept-Ranges": "bytes", + "Content-Length": str(file_size), + "Content-Disposition": filename_header + } + ) + @router.get("/meetings/{meeting_id}/transcription/status") def get_meeting_transcription_status(meeting_id: int, current_user: dict = Depends(get_current_user)): try: diff --git a/app/core/config.py b/app/core/config.py index 959e027..bd0949d 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -61,6 +61,12 @@ REDIS_CONFIG = { # Dashscope (Tongyi Qwen) API Key QWEN_API_KEY = os.getenv('QWEN_API_KEY', 'sk-c2bf06ea56b4491ea3d1e37fdb472b8f') +# 转录轮询配置 - 用于 upload-audio-complete 接口 +TRANSCRIPTION_POLL_CONFIG = { + 'poll_interval': int(os.getenv('TRANSCRIPTION_POLL_INTERVAL', '10')), # 轮询间隔:10秒 + 'max_wait_time': int(os.getenv('TRANSCRIPTION_MAX_WAIT_TIME', '1800')), # 最大等待:30分钟 +} + # LLM配置 - 阿里Qwen3大模型 LLM_CONFIG = { 'model_name': os.getenv('LLM_MODEL_NAME', 'qwen-plus'), diff --git a/app/services/async_meeting_service.py b/app/services/async_meeting_service.py index 7fd9b4b..50358e9 100644 --- a/app/services/async_meeting_service.py +++ b/app/services/async_meeting_service.py @@ -8,8 +8,9 @@ from datetime import datetime from typing import Optional, Dict, Any, List import redis -from app.core.config import REDIS_CONFIG +from app.core.config import REDIS_CONFIG, TRANSCRIPTION_POLL_CONFIG from app.core.database import get_db_connection +from app.services.async_transcription_service import AsyncTranscriptionService from app.services.llm_service import LLMService class AsyncMeetingService: @@ -110,6 +111,93 @@ class AsyncMeetingService: self._update_task_in_db(task_id, 'failed', 0, error_message=error_msg) self._update_task_status_in_redis(task_id, 'failed', 0, error_message=error_msg) + def monitor_and_auto_summarize(self, meeting_id: int, transcription_task_id: str): + """ + 监控转录任务,完成后自动生成总结 + 此方法设计为由BackgroundTasks调用,在后台运行 + + Args: + meeting_id: 会议ID + transcription_task_id: 转录任务ID + + 流程: + 1. 循环轮询转录任务状态 + 2. 转录成功后自动启动总结任务 + 3. 转录失败或超时则停止轮询并记录日志 + """ + print(f"[Monitor] Started monitoring transcription task {transcription_task_id} for meeting {meeting_id}") + + # 获取配置参数 + poll_interval = TRANSCRIPTION_POLL_CONFIG['poll_interval'] + max_wait_time = TRANSCRIPTION_POLL_CONFIG['max_wait_time'] + max_polls = max_wait_time // poll_interval + + # 延迟导入以避免循环导入 + transcription_service = AsyncTranscriptionService() + + poll_count = 0 + + try: + while poll_count < max_polls: + poll_count += 1 + elapsed_time = poll_count * poll_interval + + try: + # 查询转录任务状态 + status_info = transcription_service.get_task_status(transcription_task_id) + current_status = status_info.get('status', 'unknown') + progress = status_info.get('progress', 0) + + print(f"[Monitor] Poll {poll_count}/{max_polls} - Status: {current_status}, Progress: {progress}%, Elapsed: {elapsed_time}s") + + # 检查转录是否完成 + if current_status == 'completed': + print(f"[Monitor] Transcription completed successfully for meeting {meeting_id}") + + # 启动总结任务 + try: + summary_task_id = self.start_summary_generation(meeting_id, user_prompt="") + print(f"[Monitor] Summary task {summary_task_id} started for meeting {meeting_id}") + + # 在后台执行总结任务 + self._process_task(summary_task_id) + + except Exception as e: + error_msg = f"Failed to start summary generation: {e}" + print(f"[Monitor] {error_msg}") + + # 监控任务完成,退出循环 + break + + # 检查转录是否失败 + elif current_status == 'failed': + error_msg = status_info.get('error_message', 'Unknown error') + print(f"[Monitor] Transcription failed for meeting {meeting_id}: {error_msg}") + # 转录失败,停止监控 + break + + # 转录还在进行中(pending/processing),继续等待 + elif current_status in ['pending', 'processing']: + # 等待一段时间后继续轮询 + time.sleep(poll_interval) + + else: + # 未知状态 + print(f"[Monitor] Unknown transcription status: {current_status}") + time.sleep(poll_interval) + + except Exception as e: + print(f"[Monitor] Error checking transcription status: {e}") + # 出错后等待一段时间继续尝试 + time.sleep(poll_interval) + + # 检查是否超时 + if poll_count >= max_polls: + print(f"[Monitor] Transcription monitoring timed out after {max_wait_time}s for meeting {meeting_id}") + + except Exception as e: + print(f"[Monitor] Fatal error in monitor_and_auto_summarize: {e}") + # --- 会议相关方法 --- def _get_meeting_transcript(self, meeting_id: int) -> str: diff --git a/test/test_upload_audio_complete.py b/test/test_upload_audio_complete.py new file mode 100644 index 0000000..d716058 --- /dev/null +++ b/test/test_upload_audio_complete.py @@ -0,0 +1,275 @@ +""" +测试 upload_audio 接口的 auto_summarize 参数 +""" +import requests +import time +import json + +# 配置 +BASE_URL = "http://localhost:8000/api" +# 请替换为你的有效token +AUTH_TOKEN = "your_auth_token_here" +# 请替换为你的测试会议ID +TEST_MEETING_ID = 1 + +# 请求头 +headers = { + "Authorization": f"Bearer {AUTH_TOKEN}" +} + + +def test_upload_audio(auto_summarize=True): + """测试音频上传接口""" + print("=" * 60) + print(f"测试: upload_audio 接口 (auto_summarize={auto_summarize})") + print("=" * 60) + + # 准备测试文件 + audio_file_path = "test_audio.mp3" # 请替换为实际的音频文件路径 + + try: + with open(audio_file_path, 'rb') as audio_file: + files = { + 'audio_file': ('test_audio.mp3', audio_file, 'audio/mpeg') + } + data = { + 'force_replace': 'false', + 'auto_summarize': 'true' if auto_summarize else 'false' + } + + # 发送请求 + url = f"{BASE_URL}/meetings/upload-audio" + print(f"\n发送请求到: {url}") + print(f"参数: auto_summarize={data['auto_summarize']}") + response = requests.post(url, headers=headers, files=files, data=data) + + print(f"状态码: {response.status_code}") + print(f"响应内容:") + print(json.dumps(response.json(), indent=2, ensure_ascii=False)) + + # 如果上传成功,获取任务ID + if response.status_code == 200: + response_data = response.json() + if response_data.get('code') == '200': + task_id = response_data['data'].get('task_id') + auto_sum = response_data['data'].get('auto_summarize') + print(f"\n✓ 上传成功! 转录任务ID: {task_id}") + print(f" 自动总结: {'开启' if auto_sum else '关闭'}") + if auto_sum: + print(f" 提示: 音频已上传,后台正在自动进行转录和总结") + else: + print(f" 提示: 音频已上传,正在进行转录(不会自动总结)") + print(f"\n 可以通过以下接口查询状态:") + print(f" - 转录状态: GET /meetings/{TEST_MEETING_ID}/transcription/status") + print(f" - 总结任务: GET /meetings/{TEST_MEETING_ID}/llm-tasks") + print(f" - 会议详情: GET /meetings/{TEST_MEETING_ID}") + return True + elif response_data.get('code') == '300': + print("\n⚠ 需要确认替换现有文件") + return False + else: + print(f"\n✗ 上传失败") + return False + + except FileNotFoundError: + print(f"\n✗ 错误: 找不到测试音频文件 {audio_file_path}") + print("请创建一个测试音频文件或修改 audio_file_path 变量") + return False + except Exception as e: + print(f"\n✗ 错误: {e}") + return False + + +def test_get_transcription_status(): + """测试获取转录状态接口""" + print("\n" + "=" * 60) + print("测试: 获取转录状态") + print("=" * 60) + + url = f"{BASE_URL}/meetings/{TEST_MEETING_ID}/transcription/status" + print(f"\n发送请求到: {url}") + + try: + response = requests.get(url, headers=headers) + print(f"状态码: {response.status_code}") + print(f"响应内容:") + print(json.dumps(response.json(), indent=2, ensure_ascii=False)) + + if response.status_code == 200: + response_data = response.json() + if response_data.get('code') == '200': + data = response_data['data'] + print(f"\n✓ 获取转录状态成功!") + print(f" - 任务ID: {data.get('task_id')}") + print(f" - 状态: {data.get('status')}") + print(f" - 进度: {data.get('progress')}%") + return data.get('status'), data.get('progress') + else: + print(f"\n✗ 获取状态失败") + return None, None + + except Exception as e: + print(f"\n✗ 错误: {e}") + return None, None + + +def test_get_llm_tasks(): + """测试获取LLM任务列表""" + print("\n" + "=" * 60) + print("测试: 获取LLM任务列表") + print("=" * 60) + + url = f"{BASE_URL}/meetings/{TEST_MEETING_ID}/llm-tasks" + print(f"\n发送请求到: {url}") + + try: + response = requests.get(url, headers=headers) + print(f"状态码: {response.status_code}") + print(f"响应内容:") + print(json.dumps(response.json(), indent=2, ensure_ascii=False)) + + if response.status_code == 200: + response_data = response.json() + if response_data.get('code') == '200': + tasks = response_data['data'].get('tasks', []) + print(f"\n✓ 获取LLM任务成功! 共 {len(tasks)} 个任务") + if tasks: + latest_task = tasks[0] + print(f" 最新任务:") + print(f" - 任务ID: {latest_task.get('task_id')}") + print(f" - 状态: {latest_task.get('status')}") + print(f" - 进度: {latest_task.get('progress')}%") + return latest_task.get('status'), latest_task.get('progress') + return None, None + else: + print(f"\n✗ 获取任务失败") + return None, None + + except Exception as e: + print(f"\n✗ 错误: {e}") + return None, None + + +def monitor_progress(): + """持续监控处理进度""" + print("\n" + "=" * 60) + print("持续监控处理进度 (每10秒查询一次)") + print("按 Ctrl+C 停止监控") + print("=" * 60) + + try: + transcription_completed = False + summary_completed = False + + while True: + print(f"\n[{time.strftime('%H:%M:%S')}] 查询状态...") + + # 查询转录状态 + trans_status, trans_progress = test_get_transcription_status() + + # 如果转录完成,查询总结状态 + if trans_status == 'completed' and not transcription_completed: + print(f"\n✓ 转录已完成!") + transcription_completed = True + + if transcription_completed: + summ_status, summ_progress = test_get_llm_tasks() + if summ_status == 'completed' and not summary_completed: + print(f"\n✓ 总结已完成!") + summary_completed = True + break + elif summ_status == 'failed': + print(f"\n✗ 总结失败") + break + + # 检查转录是否失败 + if trans_status == 'failed': + print(f"\n✗ 转录失败") + break + + # 如果全部完成,退出 + if transcription_completed and summary_completed: + print(f"\n✓ 全部完成!") + break + + time.sleep(10) + + except KeyboardInterrupt: + print("\n\n⚠ 用户中断监控") + except Exception as e: + print(f"\n✗ 监控出错: {e}") + + +def main(): + """主函数""" + print("\n") + print("╔" + "═" * 58 + "╗") + print("║" + " " * 12 + "upload_audio 接口测试" + " " * 23 + "║") + print("║" + " " * 10 + "(测试 auto_summarize 参数)" + " " * 17 + "║") + print("╚" + "═" * 58 + "╝") + + print("\n请确保:") + print("1. 后端服务正在运行 (http://localhost:8000)") + print("2. 已修改脚本中的 AUTH_TOKEN 和 TEST_MEETING_ID") + print("3. 已准备好测试音频文件") + + input("\n按回车键开始测试...") + + # 测试1: 查看当前转录状态 + test_get_transcription_status() + + # 测试2: 查看当前LLM任务 + test_get_llm_tasks() + + # 询问要测试哪种模式 + print("\n" + "-" * 60) + print("请选择测试模式:") + print("1. 仅转录 (auto_summarize=false)") + print("2. 转录+自动总结 (auto_summarize=true)") + print("3. 两种模式都测试") + choice = input("请输入选项 (1/2/3): ") + + if choice == '1': + # 测试:仅转录 + if test_upload_audio(auto_summarize=False): + print("\n⚠ 注意: 此模式下不会自动生成总结") + print("如需生成总结,请手动调用: POST /meetings/{meeting_id}/generate-summary-async") + elif choice == '2': + # 测试:转录+自动总结 + if test_upload_audio(auto_summarize=True): + print("\n" + "-" * 60) + choice = input("是否要持续监控处理进度? (y/n): ") + if choice.lower() == 'y': + monitor_progress() + elif choice == '3': + # 两种模式都测试 + print("\n" + "=" * 60) + print("测试模式1: 仅转录 (auto_summarize=false)") + print("=" * 60) + test_upload_audio(auto_summarize=False) + + input("\n按回车键继续测试模式2...") + + print("\n" + "=" * 60) + print("测试模式2: 转录+自动总结 (auto_summarize=true)") + print("=" * 60) + if test_upload_audio(auto_summarize=True): + print("\n" + "-" * 60) + choice = input("是否要持续监控处理进度? (y/n): ") + if choice.lower() == 'y': + monitor_progress() + else: + print("\n✗ 无效选项") + + print("\n" + "=" * 60) + print("测试完成!") + print("=" * 60) + print("\n总结:") + print("- auto_summarize=false: 只执行转录,不自动生成总结") + print("- auto_summarize=true: 执行转录后自动生成总结") + print("- 默认值: true (向前兼容)") + print("- 现有页面建议设置: auto_summarize=false") + + +if __name__ == "__main__": + main() diff --git a/test_voiceprint_api.py b/test/test_voiceprint_api.py similarity index 100% rename from test_voiceprint_api.py rename to test/test_voiceprint_api.py