1.0.4
parent
7a3e9ad1c7
commit
72d9ebdc07
|
|
@ -415,13 +415,13 @@ async def upload_audio(audio_file: UploadFile = File(...), meeting_id: int = For
|
||||||
with get_db_connection() as connection:
|
with get_db_connection() as connection:
|
||||||
cursor = connection.cursor(dictionary=True)
|
cursor = connection.cursor(dictionary=True)
|
||||||
if replaced_existing and force_replace_bool:
|
if replaced_existing and force_replace_bool:
|
||||||
cursor.execute("DELETE FROM transcript_segments WHERE meeting_id = %s", (meeting_id,))
|
# 只删除旧的音频文件,转录数据由 start_transcription 统一处理
|
||||||
cursor.execute("DELETE FROM transcript_tasks WHERE meeting_id = %s", (meeting_id,))
|
|
||||||
if existing_info and existing_info['file_path']:
|
if existing_info and existing_info['file_path']:
|
||||||
old_file_path = BASE_DIR / existing_info['file_path'].lstrip('/')
|
old_file_path = BASE_DIR / existing_info['file_path'].lstrip('/')
|
||||||
if old_file_path.exists():
|
if old_file_path.exists():
|
||||||
try:
|
try:
|
||||||
os.remove(old_file_path)
|
os.remove(old_file_path)
|
||||||
|
print(f"Deleted old audio file: {old_file_path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Failed to delete old file {old_file_path}: {e}")
|
print(f"Warning: Failed to delete old file {old_file_path}: {e}")
|
||||||
if replaced_existing:
|
if replaced_existing:
|
||||||
|
|
@ -431,6 +431,7 @@ async def upload_audio(audio_file: UploadFile = File(...), meeting_id: int = For
|
||||||
connection.commit()
|
connection.commit()
|
||||||
cursor.close()
|
cursor.close()
|
||||||
try:
|
try:
|
||||||
|
# start_transcription 会自动删除旧的转录数据和任务记录
|
||||||
task_id = transcription_service.start_transcription(meeting_id, '/'+str(relative_path))
|
task_id = transcription_service.start_transcription(meeting_id, '/'+str(relative_path))
|
||||||
print(f"Transcription task started with ID: {task_id}")
|
print(f"Transcription task started with ID: {task_id}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -24,21 +24,43 @@ class AsyncTranscriptionService:
|
||||||
def start_transcription(self, meeting_id: int, audio_file_path: str) -> str:
|
def start_transcription(self, meeting_id: int, audio_file_path: str) -> str:
|
||||||
"""
|
"""
|
||||||
启动异步转录任务
|
启动异步转录任务
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
meeting_id: 会议ID
|
meeting_id: 会议ID
|
||||||
audio_file_path: 音频文件相对路径
|
audio_file_path: 音频文件相对路径
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: 业务任务ID
|
str: 业务任务ID
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 构造完整的文件URL
|
# 1. 删除该会议的旧转录数据和任务记录,并清空会议总结
|
||||||
|
print(f"Cleaning old transcription data for meeting_id: {meeting_id}")
|
||||||
|
with get_db_connection() as connection:
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# 删除旧的转录文本段落
|
||||||
|
cursor.execute("DELETE FROM transcript_segments WHERE meeting_id = %s", (meeting_id,))
|
||||||
|
deleted_segments = cursor.rowcount
|
||||||
|
print(f"Deleted {deleted_segments} old transcript segments")
|
||||||
|
|
||||||
|
# 删除旧的转录任务记录
|
||||||
|
cursor.execute("DELETE FROM transcript_tasks WHERE meeting_id = %s", (meeting_id,))
|
||||||
|
deleted_tasks = cursor.rowcount
|
||||||
|
print(f"Deleted {deleted_tasks} old transcript tasks")
|
||||||
|
|
||||||
|
# 清空会议总结内容
|
||||||
|
cursor.execute("UPDATE meetings SET summary = NULL WHERE meeting_id = %s", (meeting_id,))
|
||||||
|
print(f"Cleared summary for meeting_id: {meeting_id}")
|
||||||
|
|
||||||
|
connection.commit()
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
# 2. 构造完整的文件URL
|
||||||
file_url = f"{self.base_url}{audio_file_path}"
|
file_url = f"{self.base_url}{audio_file_path}"
|
||||||
|
|
||||||
print(f"Starting transcription for meeting_id: {meeting_id}, file_url: {file_url}")
|
print(f"Starting transcription for meeting_id: {meeting_id}, file_url: {file_url}")
|
||||||
|
|
||||||
# 调用Paraformer异步API
|
# 3. 调用Paraformer异步API
|
||||||
task_response = Transcription.async_call(
|
task_response = Transcription.async_call(
|
||||||
model='paraformer-v2',
|
model='paraformer-v2',
|
||||||
file_urls=[file_url],
|
file_urls=[file_url],
|
||||||
|
|
@ -47,15 +69,15 @@ class AsyncTranscriptionService:
|
||||||
diarization_enabled=True,
|
diarization_enabled=True,
|
||||||
speaker_count=10
|
speaker_count=10
|
||||||
)
|
)
|
||||||
|
|
||||||
if task_response.status_code != HTTPStatus.OK:
|
if task_response.status_code != HTTPStatus.OK:
|
||||||
print(f"Failed to start transcription: {task_response.status_code}, {task_response.message}")
|
print(f"Failed to start transcription: {task_response.status_code}, {task_response.message}")
|
||||||
raise Exception(f"Transcription API error: {task_response.message}")
|
raise Exception(f"Transcription API error: {task_response.message}")
|
||||||
|
|
||||||
paraformer_task_id = task_response.output.task_id
|
paraformer_task_id = task_response.output.task_id
|
||||||
business_task_id = str(uuid.uuid4())
|
business_task_id = str(uuid.uuid4())
|
||||||
|
|
||||||
# 在Redis中存储任务映射
|
# 4. 在Redis中存储任务映射
|
||||||
current_time = datetime.now().isoformat()
|
current_time = datetime.now().isoformat()
|
||||||
task_data = {
|
task_data = {
|
||||||
'business_task_id': business_task_id,
|
'business_task_id': business_task_id,
|
||||||
|
|
@ -67,17 +89,17 @@ class AsyncTranscriptionService:
|
||||||
'created_at': current_time,
|
'created_at': current_time,
|
||||||
'updated_at': current_time
|
'updated_at': current_time
|
||||||
}
|
}
|
||||||
|
|
||||||
# 存储到Redis,过期时间24小时
|
# 存储到Redis,过期时间24小时
|
||||||
self.redis_client.hset(f"task:{business_task_id}", mapping=task_data)
|
self.redis_client.hset(f"task:{business_task_id}", mapping=task_data)
|
||||||
self.redis_client.expire(f"task:{business_task_id}", 86400)
|
self.redis_client.expire(f"task:{business_task_id}", 86400)
|
||||||
|
|
||||||
# 在数据库中创建任务记录
|
# 5. 在数据库中创建任务记录
|
||||||
self._save_task_to_db(business_task_id, paraformer_task_id, meeting_id, audio_file_path)
|
self._save_task_to_db(business_task_id, paraformer_task_id, meeting_id, audio_file_path)
|
||||||
|
|
||||||
print(f"Transcription task created: {business_task_id}")
|
print(f"Transcription task created: {business_task_id}")
|
||||||
return business_task_id
|
return business_task_id
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error starting transcription: {e}")
|
print(f"Error starting transcription: {e}")
|
||||||
raise e
|
raise e
|
||||||
|
|
@ -391,9 +413,9 @@ class AsyncTranscriptionService:
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
|
|
||||||
# 清除该会议的现有转录分段
|
# 清除该会议的现有转录分段
|
||||||
delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s"
|
# delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s"
|
||||||
cursor.execute(delete_query, (meeting_id,))
|
# cursor.execute(delete_query, (meeting_id,))
|
||||||
print(f"Deleted existing segments for meeting_id: {meeting_id}")
|
# print(f"Deleted existing segments for meeting_id: {meeting_id}")
|
||||||
|
|
||||||
# 插入新的转录分段
|
# 插入新的转录分段
|
||||||
insert_query = '''
|
insert_query = '''
|
||||||
|
|
|
||||||
|
|
@ -1,108 +0,0 @@
|
||||||
from http import HTTPStatus
|
|
||||||
import requests
|
|
||||||
import json
|
|
||||||
import dashscope
|
|
||||||
from dashscope.audio.asr import Transcription
|
|
||||||
from app.core.config import QWEN_API_KEY
|
|
||||||
from app.core.database import get_db_connection
|
|
||||||
|
|
||||||
class VoiceService:
|
|
||||||
def __init__(self):
|
|
||||||
dashscope.api_key = QWEN_API_KEY
|
|
||||||
|
|
||||||
def transcribe(self, file_urls: list[str], meeting_id: int):
|
|
||||||
print(f"Starting transcription for meeting_id: {meeting_id}, files: {file_urls}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
task_response = Transcription.async_call(
|
|
||||||
model='paraformer-v2',
|
|
||||||
file_urls=file_urls,
|
|
||||||
language_hints=['zh', 'en'],
|
|
||||||
disfluency_removal_enabled=True,
|
|
||||||
diarization_enabled=True,
|
|
||||||
speaker_count=10
|
|
||||||
)
|
|
||||||
|
|
||||||
transcribe_response = Transcription.wait(task=task_response.output.task_id)
|
|
||||||
|
|
||||||
if transcribe_response.status_code != HTTPStatus.OK:
|
|
||||||
print(f"Transcription failed: {transcribe_response.status_code}, {transcribe_response.message}")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("Transcription task submitted successfully!")
|
|
||||||
if not (transcribe_response.output and transcribe_response.output.get('results')):
|
|
||||||
print("No transcription results found in the response.")
|
|
||||||
return
|
|
||||||
|
|
||||||
transcription_url = transcribe_response.output['results'][0]['transcription_url']
|
|
||||||
print(f"Fetching transcription from URL: {transcription_url}")
|
|
||||||
|
|
||||||
response = requests.get(transcription_url)
|
|
||||||
response.raise_for_status()
|
|
||||||
transcription_data = response.json()
|
|
||||||
|
|
||||||
self._save_segments_to_db(transcription_data, meeting_id)
|
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"Error fetching transcription from URL: {e}")
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
print(f"Error decoding JSON from transcription URL: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"An unexpected error occurred: {e}")
|
|
||||||
|
|
||||||
def _save_segments_to_db(self, data: dict, meeting_id: int):
|
|
||||||
segments_to_insert = []
|
|
||||||
for transcript in data.get('transcripts', []):
|
|
||||||
for sentence in transcript.get('sentences', []):
|
|
||||||
speaker_id = sentence.get('speaker_id', -1)
|
|
||||||
segments_to_insert.append((
|
|
||||||
meeting_id,
|
|
||||||
speaker_id, # For the new speaker_id column
|
|
||||||
speaker_id, # For the speaker_tag column (initial value)
|
|
||||||
sentence.get('begin_time'),
|
|
||||||
sentence.get('end_time'),
|
|
||||||
sentence.get('text')
|
|
||||||
))
|
|
||||||
|
|
||||||
if not segments_to_insert:
|
|
||||||
print("No segments to save.")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
with get_db_connection() as connection:
|
|
||||||
cursor = connection.cursor()
|
|
||||||
|
|
||||||
# Clear existing segments for this meeting to avoid duplicates
|
|
||||||
delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s"
|
|
||||||
cursor.execute(delete_query, (meeting_id,))
|
|
||||||
print(f"Deleted existing segments for meeting_id: {meeting_id}")
|
|
||||||
|
|
||||||
insert_query = '''
|
|
||||||
INSERT INTO transcript_segments (meeting_id, speaker_id, speaker_tag, start_time_ms, end_time_ms, text_content)
|
|
||||||
VALUES (%s, %s, %s, %s, %s, %s)
|
|
||||||
'''
|
|
||||||
cursor.executemany(insert_query, segments_to_insert)
|
|
||||||
connection.commit()
|
|
||||||
print(f"Successfully saved {len(segments_to_insert)} segments to the database for meeting_id: {meeting_id}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Database error: {e}")
|
|
||||||
|
|
||||||
# Main method for testing
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# This is an example of how to use the service.
|
|
||||||
# You need to provide a valid meeting_id that exists in your database
|
|
||||||
# and a publicly accessible URL for the audio file.
|
|
||||||
|
|
||||||
# Example usage:
|
|
||||||
# 1. Make sure you have a meeting with meeting_id = 1 in your database.
|
|
||||||
# 2. Make sure the audio file URL is correct and accessible.
|
|
||||||
|
|
||||||
test_meeting_id = 40
|
|
||||||
# Please replace with your own publicly accessible audio file URL
|
|
||||||
test_file_urls = ['http://t0vogyxkz.hn-bkt.clouddn.com/test/dajiang.m4a']
|
|
||||||
|
|
||||||
print("--- Running Voice Service Test ---")
|
|
||||||
voice_service = VoiceService()
|
|
||||||
voice_service.transcribe(file_urls=test_file_urls, meeting_id=test_meeting_id)
|
|
||||||
print("--- Voice Service Test Finished ---")
|
|
||||||
|
|
@ -6,6 +6,7 @@ services:
|
||||||
ports:
|
ports:
|
||||||
- "8001:8001"
|
- "8001:8001"
|
||||||
environment:
|
environment:
|
||||||
|
- TZ=Asia/Shanghai
|
||||||
# Python运行环境
|
# Python运行环境
|
||||||
- PYTHONPATH=/app
|
- PYTHONPATH=/app
|
||||||
- PYTHONUNBUFFERED=1
|
- PYTHONUNBUFFERED=1
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue