from http import HTTPStatus import requests import json import dashscope from dashscope.audio.asr import Transcription from app.core.config import DASHSCOPE_API_KEY from app.core.database import get_db_connection class AIService: def __init__(self): dashscope.api_key = DASHSCOPE_API_KEY def transcribe(self, file_urls: list[str], meeting_id: int): print(f"Starting transcription for meeting_id: {meeting_id}, files: {file_urls}") try: task_response = Transcription.async_call( model='paraformer-v2', file_urls=file_urls, language_hints=['zh', 'en'], disfluency_removal_enabled=True, diarization_enabled=True, speaker_count=10 ) transcribe_response = Transcription.wait(task=task_response.output.task_id) if transcribe_response.status_code != HTTPStatus.OK: print(f"Transcription failed: {transcribe_response.status_code}, {transcribe_response.message}") return print("Transcription task submitted successfully!") if not (transcribe_response.output and transcribe_response.output.get('results')): print("No transcription results found in the response.") return transcription_url = transcribe_response.output['results'][0]['transcription_url'] print(f"Fetching transcription from URL: {transcription_url}") response = requests.get(transcription_url) response.raise_for_status() transcription_data = response.json() self._save_segments_to_db(transcription_data, meeting_id) except requests.exceptions.RequestException as e: print(f"Error fetching transcription from URL: {e}") except json.JSONDecodeError as e: print(f"Error decoding JSON from transcription URL: {e}") except Exception as e: print(f"An unexpected error occurred: {e}") def _save_segments_to_db(self, data: dict, meeting_id: int): segments_to_insert = [] for transcript in data.get('transcripts', []): for sentence in transcript.get('sentences', []): segments_to_insert.append(( meeting_id, sentence.get('speaker_id', 'Unknown'), sentence.get('begin_time'), sentence.get('end_time'), sentence.get('text') )) if not segments_to_insert: print("No segments to save.") return try: with get_db_connection() as connection: cursor = connection.cursor() # Clear existing segments for this meeting to avoid duplicates delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s" cursor.execute(delete_query, (meeting_id,)) print(f"Deleted existing segments for meeting_id: {meeting_id}") insert_query = ''' INSERT INTO transcript_segments (meeting_id, speaker_tag, start_time_ms, end_time_ms, text_content) VALUES (%s, %s, %s, %s, %s) ''' cursor.executemany(insert_query, segments_to_insert) connection.commit() print(f"Successfully saved {len(segments_to_insert)} segments to the database for meeting_id: {meeting_id}") except Exception as e: print(f"Database error: {e}") # Main method for testing if __name__ == '__main__': # This is an example of how to use the service. # You need to provide a valid meeting_id that exists in your database # and a publicly accessible URL for the audio file. # Example usage: # 1. Make sure you have a meeting with meeting_id = 1 in your database. # 2. Make sure the audio file URL is correct and accessible. test_meeting_id = 37 # Please replace with your own publicly accessible audio file URL test_file_urls = ['http://t0vogyxkz.hn-bkt.clouddn.com/record/meeting_records_2.mp3'] print("--- Running AI Service Test ---") ai_service = AIService() ai_service.transcribe(file_urls=test_file_urls, meeting_id=test_meeting_id) print("--- AI Service Test Finished ---")