1.0.4

2025-11-21 15:47:42 +08:00 · 2025-11-21 15:47:42 +08:00 · 72d9ebdc07
parent 7a3e9ad1c7
commit 72d9ebdc07
6 changed files with 44 additions and 128 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/app.zip
+++ b/app.zip
--- a/app/api/endpoints/meetings.py
+++ b/app/api/endpoints/meetings.py
@ -415,13 +415,13 @@ async def upload_audio(audio_file: UploadFile = File(...), meeting_id: int = For
        with get_db_connection() as connection:
            cursor = connection.cursor(dictionary=True)
            if replaced_existing and force_replace_bool:
-                cursor.execute("DELETE FROM transcript_segments WHERE meeting_id = %s", (meeting_id,))
-                cursor.execute("DELETE FROM transcript_tasks WHERE meeting_id = %s", (meeting_id,))
+                # 只删除旧的音频文件，转录数据由 start_transcription 统一处理
                if existing_info and existing_info['file_path']:
                    old_file_path = BASE_DIR / existing_info['file_path'].lstrip('/')
                    if old_file_path.exists():
                        try:
                            os.remove(old_file_path)
+                            print(f"Deleted old audio file: {old_file_path}")
                        except Exception as e:
                            print(f"Warning: Failed to delete old file {old_file_path}: {e}")
            if replaced_existing:
@ -431,6 +431,7 @@ async def upload_audio(audio_file: UploadFile = File(...), meeting_id: int = For
            connection.commit()
            cursor.close()
        try:
+            # start_transcription 会自动删除旧的转录数据和任务记录
            task_id = transcription_service.start_transcription(meeting_id, '/'+str(relative_path))
            print(f"Transcription task started with ID: {task_id}")
        except Exception as e:
--- a/app/services/async_transcription_service.py
+++ b/app/services/async_transcription_service.py
@ -24,21 +24,43 @@ class AsyncTranscriptionService:
    def start_transcription(self, meeting_id: int, audio_file_path: str) -> str:
        """
        启动异步转录任务
-        
+
        Args:
            meeting_id: 会议ID
            audio_file_path: 音频文件相对路径
-            
+
        Returns:
            str: 业务任务ID
        """
        try:
-            # 构造完整的文件URL
+            # 1. 删除该会议的旧转录数据和任务记录，并清空会议总结
+            print(f"Cleaning old transcription data for meeting_id: {meeting_id}")
+            with get_db_connection() as connection:
+                cursor = connection.cursor()
+
+                # 删除旧的转录文本段落
+                cursor.execute("DELETE FROM transcript_segments WHERE meeting_id = %s", (meeting_id,))
+                deleted_segments = cursor.rowcount
+                print(f"Deleted {deleted_segments} old transcript segments")
+
+                # 删除旧的转录任务记录
+                cursor.execute("DELETE FROM transcript_tasks WHERE meeting_id = %s", (meeting_id,))
+                deleted_tasks = cursor.rowcount
+                print(f"Deleted {deleted_tasks} old transcript tasks")
+
+                # 清空会议总结内容
+                cursor.execute("UPDATE meetings SET summary = NULL WHERE meeting_id = %s", (meeting_id,))
+                print(f"Cleared summary for meeting_id: {meeting_id}")
+
+                connection.commit()
+                cursor.close()
+
+            # 2. 构造完整的文件URL
            file_url = f"{self.base_url}{audio_file_path}"
-            
+
            print(f"Starting transcription for meeting_id: {meeting_id}, file_url: {file_url}")
-            
-            # 调用Paraformer异步API
+
+            # 3. 调用Paraformer异步API
            task_response = Transcription.async_call(
                model='paraformer-v2',
                file_urls=[file_url],
@ -47,15 +69,15 @@ class AsyncTranscriptionService:
                diarization_enabled=True,
                speaker_count=10
            )
-            
+
            if task_response.status_code != HTTPStatus.OK:
                print(f"Failed to start transcription: {task_response.status_code}, {task_response.message}")
                raise Exception(f"Transcription API error: {task_response.message}")
-            
+
            paraformer_task_id = task_response.output.task_id
            business_task_id = str(uuid.uuid4())
-            
-            # 在Redis中存储任务映射
+
+            # 4. 在Redis中存储任务映射
            current_time = datetime.now().isoformat()
            task_data = {
                'business_task_id': business_task_id,
@ -67,17 +89,17 @@ class AsyncTranscriptionService:
                'created_at': current_time,
                'updated_at': current_time
            }
-            
+
            # 存储到Redis，过期时间24小时
            self.redis_client.hset(f"task:{business_task_id}", mapping=task_data)
            self.redis_client.expire(f"task:{business_task_id}", 86400)
-            
-            # 在数据库中创建任务记录
+
+            # 5. 在数据库中创建任务记录
            self._save_task_to_db(business_task_id, paraformer_task_id, meeting_id, audio_file_path)
-            
+
            print(f"Transcription task created: {business_task_id}")
            return business_task_id
-            
+
        except Exception as e:
            print(f"Error starting transcription: {e}")
            raise e
@ -391,9 +413,9 @@ class AsyncTranscriptionService:
                cursor = connection.cursor()
                
                # 清除该会议的现有转录分段
-                delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s"
-                cursor.execute(delete_query, (meeting_id,))
-                print(f"Deleted existing segments for meeting_id: {meeting_id}")
+                # delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s"
+                # cursor.execute(delete_query, (meeting_id,))
+                # print(f"Deleted existing segments for meeting_id: {meeting_id}")
                
                # 插入新的转录分段
                insert_query = '''
--- a/app/services/voice_service.py
+++ b/app/services/voice_service.py
@ -1,108 +0,0 @@
-from http import HTTPStatus
-import requests
-import json
-import dashscope
-from dashscope.audio.asr import Transcription
-from app.core.config import QWEN_API_KEY
-from app.core.database import get_db_connection
-
-class VoiceService:
-    def __init__(self):
-        dashscope.api_key = QWEN_API_KEY
-
-    def transcribe(self, file_urls: list[str], meeting_id: int):
-        print(f"Starting transcription for meeting_id: {meeting_id}, files: {file_urls}")
-
-        try:
-            task_response = Transcription.async_call(
-                model='paraformer-v2',
-                file_urls=file_urls,
-                language_hints=['zh', 'en'],
-                disfluency_removal_enabled=True,
-                diarization_enabled=True,
-                speaker_count=10
-            )
-
-            transcribe_response = Transcription.wait(task=task_response.output.task_id)
-
-            if transcribe_response.status_code != HTTPStatus.OK:
-                print(f"Transcription failed: {transcribe_response.status_code}, {transcribe_response.message}")
-                return
-
-            print("Transcription task submitted successfully!")
-            if not (transcribe_response.output and transcribe_response.output.get('results')):
-                print("No transcription results found in the response.")
-                return
-
-            transcription_url = transcribe_response.output['results'][0]['transcription_url']
-            print(f"Fetching transcription from URL: {transcription_url}")
-            
-            response = requests.get(transcription_url)
-            response.raise_for_status()
-            transcription_data = response.json()
-
-            self._save_segments_to_db(transcription_data, meeting_id)
-
-        except requests.exceptions.RequestException as e:
-            print(f"Error fetching transcription from URL: {e}")
-        except json.JSONDecodeError as e:
-            print(f"Error decoding JSON from transcription URL: {e}")
-        except Exception as e:
-            print(f"An unexpected error occurred: {e}")
-
-    def _save_segments_to_db(self, data: dict, meeting_id: int):
-        segments_to_insert = []
-        for transcript in data.get('transcripts', []):
-            for sentence in transcript.get('sentences', []):
-                speaker_id = sentence.get('speaker_id', -1)
-                segments_to_insert.append((
-                    meeting_id,
-                    speaker_id,  # For the new speaker_id column
-                    speaker_id,  # For the speaker_tag column (initial value)
-                    sentence.get('begin_time'),
-                    sentence.get('end_time'),
-                    sentence.get('text')
-                ))
-
-        if not segments_to_insert:
-            print("No segments to save.")
-            return
-
-        try:
-            with get_db_connection() as connection:
-                cursor = connection.cursor()
-                
-                # Clear existing segments for this meeting to avoid duplicates
-                delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s"
-                cursor.execute(delete_query, (meeting_id,))
-                print(f"Deleted existing segments for meeting_id: {meeting_id}")
-
-                insert_query = '''
-                    INSERT INTO transcript_segments (meeting_id, speaker_id, speaker_tag, start_time_ms, end_time_ms, text_content)
-                    VALUES (%s, %s, %s, %s, %s, %s)
-                '''
-                cursor.executemany(insert_query, segments_to_insert)
-                connection.commit()
-                print(f"Successfully saved {len(segments_to_insert)} segments to the database for meeting_id: {meeting_id}")
-
-        except Exception as e:
-            print(f"Database error: {e}")
-
-# Main method for testing
-if __name__ == '__main__':
-    # This is an example of how to use the service.
-    # You need to provide a valid meeting_id that exists in your database
-    # and a publicly accessible URL for the audio file.
-    
-    # Example usage:
-    # 1. Make sure you have a meeting with meeting_id = 1 in your database.
-    # 2. Make sure the audio file URL is correct and accessible.
-    
-    test_meeting_id = 40
-    # Please replace with your own publicly accessible audio file URL
-    test_file_urls = ['http://t0vogyxkz.hn-bkt.clouddn.com/test/dajiang.m4a']
-    
-    print("--- Running Voice Service Test ---")
-    voice_service = VoiceService()
-    voice_service.transcribe(file_urls=test_file_urls, meeting_id=test_meeting_id)
-    print("--- Voice Service Test Finished ---")
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@ -6,6 +6,7 @@ services:
    ports:
      - "8001:8001"
    environment:
+      - TZ=Asia/Shanghai
      # Python运行环境
      - PYTHONPATH=/app
      - PYTHONUNBUFFERED=1