加入音频的人声分类和语音识别

main
mula.liu 2025-08-25 11:31:24 +08:00
parent 1589fccfd8
commit 06cffe7cfa
32 changed files with 4410 additions and 20 deletions

BIN
app/.DS_Store vendored 100644

Binary file not shown.

BIN
app/api/.DS_Store vendored 100644

Binary file not shown.

View File

@ -0,0 +1,499 @@
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from app.models.models import Meeting, TranscriptSegment, CreateMeetingRequest, UpdateMeetingRequest
from app.core.database import get_db_connection
from app.core.config import UPLOAD_DIR, AUDIO_DIR, MARKDOWN_DIR, ALLOWED_EXTENSIONS, ALLOWED_IMAGE_EXTENSIONS, MAX_FILE_SIZE, MAX_IMAGE_SIZE
from app.services.qiniu_service import qiniu_service
from typing import Optional
import os
import uuid
import shutil
router = APIRouter()
@router.get("/meetings", response_model=list[Meeting])
def get_meetings(user_id: Optional[int] = None):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
base_query = '''
SELECT
m.meeting_id, m.title, m.meeting_time, m.summary, m.created_at,
m.user_id as creator_id, u.caption as creator_username
FROM meetings m
JOIN users u ON m.user_id = u.user_id
'''
if user_id:
query = f'''
{base_query}
LEFT JOIN attendees a ON m.meeting_id = a.meeting_id
WHERE m.user_id = %s OR a.user_id = %s
GROUP BY m.meeting_id
ORDER BY m.meeting_time DESC, m.created_at DESC
'''
cursor.execute(query, (user_id, user_id))
else:
query = f" {base_query} ORDER BY m.meeting_time DESC, m.created_at DESC"
cursor.execute(query)
meetings = cursor.fetchall()
meeting_list = []
for meeting in meetings:
attendees_query = '''
SELECT u.user_id, u.caption
FROM attendees a
JOIN users u ON a.user_id = u.user_id
WHERE a.meeting_id = %s
'''
cursor.execute(attendees_query, (meeting['meeting_id'],))
attendees_data = cursor.fetchall()
attendees = [{'user_id': row['user_id'], 'caption': row['caption']} for row in attendees_data]
meeting_list.append(Meeting(
meeting_id=meeting['meeting_id'],
title=meeting['title'],
meeting_time=meeting['meeting_time'],
summary=meeting['summary'],
created_at=meeting['created_at'],
attendees=attendees,
creator_id=meeting['creator_id'],
creator_username=meeting['creator_username']
))
return meeting_list
@router.get("/meetings/{meeting_id}", response_model=Meeting)
def get_meeting_details(meeting_id: int):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
query = '''
SELECT
m.meeting_id, m.title, m.meeting_time, m.summary, m.created_at,
m.user_id as creator_id, u.caption as creator_username,
af.file_path as audio_file_path
FROM meetings m
JOIN users u ON m.user_id = u.user_id
LEFT JOIN audio_files af ON m.meeting_id = af.meeting_id
WHERE m.meeting_id = %s
'''
cursor.execute(query, (meeting_id,))
meeting = cursor.fetchone()
if not meeting:
raise HTTPException(status_code=404, detail="Meeting not found")
attendees_query = '''
SELECT u.user_id, u.caption
FROM attendees a
JOIN users u ON a.user_id = u.user_id
WHERE a.meeting_id = %s
'''
cursor.execute(attendees_query, (meeting['meeting_id'],))
attendees_data = cursor.fetchall()
attendees = [{'user_id': row['user_id'], 'caption': row['caption']} for row in attendees_data]
meeting_data = Meeting(
meeting_id=meeting['meeting_id'],
title=meeting['title'],
meeting_time=meeting['meeting_time'],
summary=meeting['summary'],
created_at=meeting['created_at'],
attendees=attendees,
creator_id=meeting['creator_id'],
creator_username=meeting['creator_username']
)
# Add audio file path if exists
if meeting['audio_file_path']:
meeting_data.audio_file_path = meeting['audio_file_path']
return meeting_data
@router.get("/meetings/{meeting_id}/transcript", response_model=list[TranscriptSegment])
def get_meeting_transcript(meeting_id: int):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
# First check if meeting exists
meeting_query = "SELECT meeting_id FROM meetings WHERE meeting_id = %s"
cursor.execute(meeting_query, (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# Get transcript segments
transcript_query = '''
SELECT segment_id, meeting_id, speaker_tag, start_time_ms, end_time_ms, text_content
FROM transcript_segments
WHERE meeting_id = %s
ORDER BY start_time_ms ASC
'''
cursor.execute(transcript_query, (meeting_id,))
segments = cursor.fetchall()
return [TranscriptSegment(**segment) for segment in segments]
@router.post("/meetings")
def create_meeting(meeting_request: CreateMeetingRequest):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
# Create meeting
meeting_query = '''
INSERT INTO meetings (user_id, title, meeting_time, summary)
VALUES (%s, %s, %s, %s)
'''
# Note: You'll need to pass user_id, for now using hardcoded value
cursor.execute(meeting_query, (1, meeting_request.title, meeting_request.meeting_time, None))
meeting_id = cursor.lastrowid
# Add attendees
for attendee_id in meeting_request.attendee_ids:
attendee_query = '''
INSERT INTO attendees (meeting_id, user_id)
VALUES (%s, %s)
ON DUPLICATE KEY UPDATE meeting_id = meeting_id
'''
cursor.execute(attendee_query, (meeting_id, attendee_id))
connection.commit()
return {"meeting_id": meeting_id, "message": "Meeting created successfully"}
@router.put("/meetings/{meeting_id}")
def update_meeting(meeting_id: int, meeting_request: UpdateMeetingRequest):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
# Check if meeting exists
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# Update meeting
update_query = '''
UPDATE meetings
SET title = %s, meeting_time = %s, summary = %s
WHERE meeting_id = %s
'''
cursor.execute(update_query, (
meeting_request.title,
meeting_request.meeting_time,
meeting_request.summary,
meeting_id
))
# Update attendees - remove existing ones and add new ones
cursor.execute("DELETE FROM attendees WHERE meeting_id = %s", (meeting_id,))
for attendee_id in meeting_request.attendee_ids:
attendee_query = '''
INSERT INTO attendees (meeting_id, user_id)
VALUES (%s, %s)
'''
cursor.execute(attendee_query, (meeting_id, attendee_id))
connection.commit()
return {"message": "Meeting updated successfully"}
@router.delete("/meetings/{meeting_id}")
def delete_meeting(meeting_id: int):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
# Check if meeting exists
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# Delete related records first (foreign key constraints)
cursor.execute("DELETE FROM transcript_segments WHERE meeting_id = %s", (meeting_id,))
cursor.execute("DELETE FROM audio_files WHERE meeting_id = %s", (meeting_id,))
cursor.execute("DELETE FROM attachments WHERE meeting_id = %s", (meeting_id,))
cursor.execute("DELETE FROM attendees WHERE meeting_id = %s", (meeting_id,))
# Delete meeting
cursor.execute("DELETE FROM meetings WHERE meeting_id = %s", (meeting_id,))
connection.commit()
return {"message": "Meeting deleted successfully"}
@router.post("/meetings/{meeting_id}/regenerate-summary")
def regenerate_summary(meeting_id: int):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
# Check if meeting exists
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# For now, return a mock summary
# In a real implementation, this would call an AI service
mock_summary = """# AI 生成摘要
## 主要议题
- 项目进度回顾
- 技术方案讨论
- 下阶段规划
## 关键决策
- 采用新的技术架构
- 调整项目时间节点
- 分配任务责任
## 后续行动
- [ ] 完成技术方案文档
- [ ] 安排下次会议时间
- [ ] 跟进项目进度"""
# Update meeting summary
cursor.execute(
"UPDATE meetings SET summary = %s WHERE meeting_id = %s",
(mock_summary, meeting_id)
)
connection.commit()
return {"summary": mock_summary}
@router.get("/meetings/{meeting_id}/edit", response_model=Meeting)
def get_meeting_for_edit(meeting_id: int):
"""Get meeting details with full attendee information for editing"""
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
query = '''
SELECT
m.meeting_id, m.title, m.meeting_time, m.summary, m.created_at,
m.user_id as creator_id, u.caption as creator_username,
af.file_path as audio_file_path
FROM meetings m
JOIN users u ON m.user_id = u.user_id
LEFT JOIN audio_files af ON m.meeting_id = af.meeting_id
WHERE m.meeting_id = %s
'''
cursor.execute(query, (meeting_id,))
meeting = cursor.fetchone()
if not meeting:
raise HTTPException(status_code=404, detail="Meeting not found")
# Get attendees with full info for editing
attendees_query = '''
SELECT u.user_id, u.caption
FROM attendees a
JOIN users u ON a.user_id = u.user_id
WHERE a.meeting_id = %s
'''
cursor.execute(attendees_query, (meeting['meeting_id'],))
attendees_data = cursor.fetchall()
attendees = [{'user_id': row['user_id'], 'caption': row['caption']} for row in attendees_data]
meeting_data = Meeting(
meeting_id=meeting['meeting_id'],
title=meeting['title'],
meeting_time=meeting['meeting_time'],
summary=meeting['summary'],
created_at=meeting['created_at'],
attendees=attendees,
creator_id=meeting['creator_id'],
creator_username=meeting['creator_username']
)
# Add audio file path if exists
if meeting['audio_file_path']:
meeting_data.audio_file_path = meeting['audio_file_path']
return meeting_data
@router.post("/meetings/upload-audio")
async def upload_audio(
audio_file: UploadFile = File(...),
meeting_id: int = Form(...)
):
# Validate file extension
file_extension = os.path.splitext(audio_file.filename)[1].lower()
if file_extension not in ALLOWED_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=f"Unsupported file type. Allowed types: {', '.join(ALLOWED_EXTENSIONS)}"
)
# Check file size
if audio_file.size > MAX_FILE_SIZE:
raise HTTPException(
status_code=400,
detail="File size exceeds 100MB limit"
)
# Check if meeting exists
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# TEMP: Use existing file to test Qiniu upload instead of client file
# This bypasses potential client file processing issues
existing_file = AUDIO_DIR / "31ce039a-f619-4869-91c8-eab934bbd1d4.m4a"
if not existing_file.exists():
raise HTTPException(status_code=500, detail="Test file not found")
temp_path = existing_file
print(f"DEBUG: Using existing test file: {temp_path}")
print(f"DEBUG: Test file exists: {temp_path.exists()}")
print(f"DEBUG: Test file size: {temp_path.stat().st_size}")
# Upload to Qiniu
try:
print(f"DEBUG: Attempting to upload audio to Qiniu - meeting_id: {meeting_id}, filename: {audio_file.filename}")
print(f"DEBUG: Temp file path: {temp_path}")
print(f"DEBUG: Temp file exists: {temp_path.exists()}")
success, qiniu_url, error_msg = qiniu_service.upload_audio_file(
str(temp_path), meeting_id, audio_file.filename
)
print(f"DEBUG: Qiniu upload result - success: {success}, url: {qiniu_url}, error: {error_msg}")
# TEMP: Don't delete existing test file
# if temp_path.exists():
# temp_path.unlink()
if not success:
raise HTTPException(status_code=500, detail=f"Failed to upload to Qiniu: {error_msg}")
# Save file info to database with Qiniu URL
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
# Insert audio file record with Qiniu URL
insert_query = '''
INSERT INTO audio_files (meeting_id, file_name, file_path, file_size, upload_time)
VALUES (%s, %s, %s, %s, NOW())
ON DUPLICATE KEY UPDATE
file_name = VALUES(file_name),
file_path = VALUES(file_path),
file_size = VALUES(file_size),
upload_time = VALUES(upload_time)
'''
cursor.execute(insert_query, (meeting_id, audio_file.filename, qiniu_url, audio_file.size))
connection.commit()
return {
"message": "Audio file uploaded successfully to Qiniu",
"file_name": audio_file.filename,
"file_path": qiniu_url,
"qiniu_url": qiniu_url
}
except Exception as e:
print(f"DEBUG: Exception in audio upload: {str(e)}")
print(f"DEBUG: Exception type: {type(e)}")
import traceback
print(f"DEBUG: Traceback: {traceback.format_exc()}")
# TEMP: Don't delete existing test file in case of error
# if temp_path.exists():
# temp_path.unlink()
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
@router.get("/meetings/{meeting_id}/audio")
def get_audio_file(meeting_id: int):
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
query = '''
SELECT file_name, file_path, file_size, upload_time
FROM audio_files
WHERE meeting_id = %s
'''
cursor.execute(query, (meeting_id,))
audio_file = cursor.fetchone()
if not audio_file:
raise HTTPException(status_code=404, detail="Audio file not found for this meeting")
return {
"file_name": audio_file['file_name'],
"file_path": audio_file['file_path'],
"file_size": audio_file['file_size'],
"upload_time": audio_file['upload_time']
}
@router.post("/meetings/{meeting_id}/upload-image")
async def upload_image(
meeting_id: int,
image_file: UploadFile = File(...)
):
# Validate file extension
file_extension = os.path.splitext(image_file.filename)[1].lower()
if file_extension not in ALLOWED_IMAGE_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=f"Unsupported image type. Allowed types: {', '.join(ALLOWED_IMAGE_EXTENSIONS)}"
)
# Check file size
if image_file.size > MAX_IMAGE_SIZE:
raise HTTPException(
status_code=400,
detail="Image size exceeds 10MB limit"
)
# Check if meeting exists
with get_db_connection() as connection:
cursor = connection.cursor(dictionary=True)
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone():
raise HTTPException(status_code=404, detail="Meeting not found")
# Create temporary file for upload
temp_filename = f"{uuid.uuid4()}{file_extension}"
temp_path = MARKDOWN_DIR / temp_filename
# Save file temporarily
# Save file temporarily
try:
contents = await image_file.read()
with open(temp_path, "wb") as buffer:
buffer.write(contents)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to save temporary image: {str(e)}")
# Upload to Qiniu
try:
print(f"DEBUG: Attempting to upload image to Qiniu - meeting_id: {meeting_id}, filename: {image_file.filename}")
print(f"DEBUG: Temp file path: {temp_path}")
print(f"DEBUG: Temp file exists: {temp_path.exists()}")
success, qiniu_url, error_msg = qiniu_service.upload_markdown_image(
str(temp_path), meeting_id, image_file.filename
)
print(f"DEBUG: Qiniu upload result - success: {success}, url: {qiniu_url}, error: {error_msg}")
# Clean up temporary file
if temp_path.exists():
temp_path.unlink()
if not success:
raise HTTPException(status_code=500, detail=f"Failed to upload image to Qiniu: {error_msg}")
return {
"message": "Image uploaded successfully to Qiniu",
"file_name": image_file.filename,
"file_path": qiniu_url,
"url": qiniu_url,
"qiniu_url": qiniu_url
}
except Exception as e:
print(f"DEBUG: Exception in image upload: {str(e)}")
print(f"DEBUG: Exception type: {type(e)}")
import traceback
print(f"DEBUG: Traceback: {traceback.format_exc()}")
# Clean up temporary file in case of error
if temp_path.exists():
temp_path.unlink()
raise HTTPException(status_code=500, detail=f"Image upload failed: {str(e)}")

View File

@ -2,7 +2,8 @@
from fastapi import APIRouter, HTTPException, UploadFile, File, Form from fastapi import APIRouter, HTTPException, UploadFile, File, Form
from app.models.models import Meeting, TranscriptSegment, CreateMeetingRequest, UpdateMeetingRequest from app.models.models import Meeting, TranscriptSegment, CreateMeetingRequest, UpdateMeetingRequest
from app.core.database import get_db_connection from app.core.database import get_db_connection
from app.core.config import UPLOAD_DIR, AUDIO_DIR, MARKDOWN_DIR, ALLOWED_EXTENSIONS, ALLOWED_IMAGE_EXTENSIONS, MAX_FILE_SIZE, MAX_IMAGE_SIZE from app.core.config import BASE_DIR, UPLOAD_DIR, AUDIO_DIR, MARKDOWN_DIR, ALLOWED_EXTENSIONS, ALLOWED_IMAGE_EXTENSIONS, MAX_FILE_SIZE, MAX_IMAGE_SIZE
from app.services.qiniu_service import qiniu_service
from typing import Optional from typing import Optional
import os import os
import uuid import uuid
@ -144,8 +145,7 @@ def create_meeting(meeting_request: CreateMeetingRequest):
INSERT INTO meetings (user_id, title, meeting_time, summary) INSERT INTO meetings (user_id, title, meeting_time, summary)
VALUES (%s, %s, %s, %s) VALUES (%s, %s, %s, %s)
''' '''
# Note: You'll need to pass user_id, for now using hardcoded value cursor.execute(meeting_query, (meeting_request.user_id, meeting_request.title, meeting_request.meeting_time, None))
cursor.execute(meeting_query, (1, meeting_request.title, meeting_request.meeting_time, None))
meeting_id = cursor.lastrowid meeting_id = cursor.lastrowid
# Add attendees # Add attendees
@ -326,16 +326,18 @@ async def upload_audio(
detail="File size exceeds 100MB limit" detail="File size exceeds 100MB limit"
) )
# Create meeting-specific directory
meeting_dir = AUDIO_DIR / str(meeting_id)
meeting_dir.mkdir(exist_ok=True)
# Generate unique filename # Generate unique filename
unique_filename = f"{uuid.uuid4()}{file_extension}" unique_filename = f"{uuid.uuid4()}{file_extension}"
file_path = AUDIO_DIR / unique_filename absolute_path = meeting_dir / unique_filename
relative_path = absolute_path.relative_to(BASE_DIR)
# Store only relative path for database (audio/filename)
relative_path = f"audio/{unique_filename}"
# Save file # Save file
try: try:
with open(file_path, "wb") as buffer: with open(absolute_path, "wb") as buffer:
shutil.copyfileobj(audio_file.file, buffer) shutil.copyfileobj(audio_file.file, buffer)
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to save file: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to save file: {str(e)}")
@ -348,7 +350,7 @@ async def upload_audio(
cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,)) cursor.execute("SELECT meeting_id FROM meetings WHERE meeting_id = %s", (meeting_id,))
if not cursor.fetchone(): if not cursor.fetchone():
# Clean up uploaded file if meeting doesn't exist # Clean up uploaded file if meeting doesn't exist
os.remove(file_path) os.remove(absolute_path)
raise HTTPException(status_code=404, detail="Meeting not found") raise HTTPException(status_code=404, detail="Meeting not found")
# Insert audio file record # Insert audio file record
@ -361,13 +363,13 @@ async def upload_audio(
file_size = VALUES(file_size), file_size = VALUES(file_size),
upload_time = VALUES(upload_time) upload_time = VALUES(upload_time)
''' '''
cursor.execute(insert_query, (meeting_id, audio_file.filename, relative_path, audio_file.size)) cursor.execute(insert_query, (meeting_id, audio_file.filename, '/'+str(relative_path), audio_file.size))
connection.commit() connection.commit()
return { return {
"message": "Audio file uploaded successfully", "message": "Audio file uploaded successfully",
"file_name": audio_file.filename, "file_name": audio_file.filename,
"file_path": relative_path "file_path": '/'+str(relative_path)
} }
@router.get("/meetings/{meeting_id}/audio") @router.get("/meetings/{meeting_id}/audio")
@ -426,14 +428,12 @@ async def upload_image(
# Generate unique filename # Generate unique filename
unique_filename = f"{uuid.uuid4()}{file_extension}" unique_filename = f"{uuid.uuid4()}{file_extension}"
file_path = meeting_dir / unique_filename absolute_path = meeting_dir / unique_filename
relative_path = absolute_path.relative_to(BASE_DIR)
# Store relative path for URL access
relative_path = f"markdown/{meeting_id}/{unique_filename}"
# Save file # Save file
try: try:
with open(file_path, "wb") as buffer: with open(absolute_path, "wb") as buffer:
shutil.copyfileobj(image_file.file, buffer) shutil.copyfileobj(image_file.file, buffer)
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to save image: {str(e)}") raise HTTPException(status_code=500, detail=f"Failed to save image: {str(e)}")
@ -441,6 +441,5 @@ async def upload_image(
return { return {
"message": "Image uploaded successfully", "message": "Image uploaded successfully",
"file_name": image_file.filename, "file_name": image_file.filename,
"file_path": relative_path, "file_path": '/'+ str(relative_path)
"url": f"/uploads/{relative_path}"
} }

View File

@ -33,4 +33,13 @@ API_CONFIG = {
'host': os.getenv('API_HOST', '0.0.0.0'), 'host': os.getenv('API_HOST', '0.0.0.0'),
'port': int(os.getenv('API_PORT', '8000')), 'port': int(os.getenv('API_PORT', '8000')),
'cors_origins': os.getenv('CORS_ORIGINS', 'http://localhost:5173').split(',') 'cors_origins': os.getenv('CORS_ORIGINS', 'http://localhost:5173').split(',')
} }
# 七牛云配置
QINIU_ACCESS_KEY = os.getenv('QINIU_ACCESS_KEY', 'A0tp96HCtg-wZCughTgi5vc2pJnw3btClwxRE_e8')
QINIU_SECRET_KEY = os.getenv('QINIU_SECRET_KEY', 'Lj-MSHpaVbmzpS86kMIjmwikvYOT9iPBjCk9hm6k')
QINIU_BUCKET = os.getenv('QINIU_BUCKET', 'imeeting')
QINIU_DOMAIN = os.getenv('QINIU_DOMAIN', 't0vogyxkz.hn-bkt.clouddn.com')
# Dashscope (Tongyi Qwen) API Key
DASHSCOPE_API_KEY = os.getenv('DASHSCOPE_API_KEY', 'sk-c2bf06ea56b4491ea3d1e37fdb472b8f')

View File

@ -47,6 +47,7 @@ class TranscriptSegment(BaseModel):
text_content: str text_content: str
class CreateMeetingRequest(BaseModel): class CreateMeetingRequest(BaseModel):
user_id: int
title: str title: str
meeting_time: Optional[datetime.datetime] meeting_time: Optional[datetime.datetime]
attendee_ids: list[int] attendee_ids: list[int]

View File

@ -0,0 +1,106 @@
from http import HTTPStatus
import requests
import json
import dashscope
from dashscope.audio.asr import Transcription
from app.core.config import DASHSCOPE_API_KEY
from app.core.database import get_db_connection
class AIService:
def __init__(self):
dashscope.api_key = DASHSCOPE_API_KEY
def transcribe(self, file_urls: list[str], meeting_id: int):
print(f"Starting transcription for meeting_id: {meeting_id}, files: {file_urls}")
try:
task_response = Transcription.async_call(
model='paraformer-v2',
file_urls=file_urls,
language_hints=['zh', 'en'],
disfluency_removal_enabled=True,
diarization_enabled=True,
speaker_count=10
)
transcribe_response = Transcription.wait(task=task_response.output.task_id)
if transcribe_response.status_code != HTTPStatus.OK:
print(f"Transcription failed: {transcribe_response.status_code}, {transcribe_response.message}")
return
print("Transcription task submitted successfully!")
if not (transcribe_response.output and transcribe_response.output.get('results')):
print("No transcription results found in the response.")
return
transcription_url = transcribe_response.output['results'][0]['transcription_url']
print(f"Fetching transcription from URL: {transcription_url}")
response = requests.get(transcription_url)
response.raise_for_status()
transcription_data = response.json()
self._save_segments_to_db(transcription_data, meeting_id)
except requests.exceptions.RequestException as e:
print(f"Error fetching transcription from URL: {e}")
except json.JSONDecodeError as e:
print(f"Error decoding JSON from transcription URL: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
def _save_segments_to_db(self, data: dict, meeting_id: int):
segments_to_insert = []
for transcript in data.get('transcripts', []):
for sentence in transcript.get('sentences', []):
segments_to_insert.append((
meeting_id,
sentence.get('speaker_id', 'Unknown'),
sentence.get('begin_time'),
sentence.get('end_time'),
sentence.get('text')
))
if not segments_to_insert:
print("No segments to save.")
return
try:
with get_db_connection() as connection:
cursor = connection.cursor()
# Clear existing segments for this meeting to avoid duplicates
delete_query = "DELETE FROM transcript_segments WHERE meeting_id = %s"
cursor.execute(delete_query, (meeting_id,))
print(f"Deleted existing segments for meeting_id: {meeting_id}")
insert_query = '''
INSERT INTO transcript_segments (meeting_id, speaker_tag, start_time_ms, end_time_ms, text_content)
VALUES (%s, %s, %s, %s, %s)
'''
cursor.executemany(insert_query, segments_to_insert)
connection.commit()
print(f"Successfully saved {len(segments_to_insert)} segments to the database for meeting_id: {meeting_id}")
except Exception as e:
print(f"Database error: {e}")
# Main method for testing
if __name__ == '__main__':
# This is an example of how to use the service.
# You need to provide a valid meeting_id that exists in your database
# and a publicly accessible URL for the audio file.
# Example usage:
# 1. Make sure you have a meeting with meeting_id = 1 in your database.
# 2. Make sure the audio file URL is correct and accessible.
test_meeting_id = 37
# Please replace with your own publicly accessible audio file URL
test_file_urls = ['http://t0vogyxkz.hn-bkt.clouddn.com/record/meeting_records_2.mp3']
print("--- Running AI Service Test ---")
ai_service = AIService()
ai_service.transcribe(file_urls=test_file_urls, meeting_id=test_meeting_id)
print("--- AI Service Test Finished ---")

View File

@ -0,0 +1,147 @@
from qiniu import Auth, put_file_v2, etag, BucketManager
import qiniu.config
import os
import uuid
from typing import Optional, Tuple
from app.core.config import QINIU_ACCESS_KEY, QINIU_SECRET_KEY, QINIU_BUCKET, QINIU_DOMAIN
class QiniuService:
def __init__(self):
self.access_key = QINIU_ACCESS_KEY
self.secret_key = QINIU_SECRET_KEY
self.bucket_name = QINIU_BUCKET
self.domain = QINIU_DOMAIN
if not self.access_key or not self.secret_key:
print("ERROR: 七牛云Access Key或Secret Key为空!")
if not self.bucket_name:
print("ERROR: 七牛云Bucket名称为空!")
if not self.domain:
print("ERROR: 七牛云Domain为空!")
self.q = Auth(self.access_key, self.secret_key)
def upload_audio_file(self, file_path: str, meeting_id: int, original_filename: str) -> Tuple[bool, str, Optional[str]]:
"""
Upload audio file to Qiniu cloud storage
Args:
file_path: Local file path
meeting_id: Meeting ID for directory structure
original_filename: Original filename
Returns:
Tuple of (success: bool, url: str, error_message: Optional[str])
"""
try:
# Validate file exists
if not os.path.exists(file_path):
return False, "", f"File not found: {file_path}"
file_extension = os.path.splitext(original_filename)[1]
unique_filename = f"{uuid.uuid4()}{file_extension}"
key = f"record/{meeting_id}/{unique_filename}"
# Generate upload token
token = self.q.upload_token(self.bucket_name, key, 3600)
print(f"DEBUG: 生成音频上传token成功, key: {key}")
print(f"DEBUG: token前50位: {token[:50] if token else 'None'}")
# Upload file with retry mechanism
ret, info = put_file_v2(token, key, file_path)
print(f"DEBUG: Qiniu upload response - ret: {ret}, info: {info}")
print(f"DEBUG: Qiniu upload info details - status_code: {info.status_code}, text_body: {info.text_body}")
# Check if upload was successful
# For put_file_v2, we need to check the info.status_code
if info.status_code == 200 and ret and 'key' in ret and ret['key'] == key:
url = f"http://{self.domain}/{key}"
return True, url, None
else:
# Extract error information properly
error_msg = f"Upload failed: status_code={info.status_code}"
if hasattr(info, 'text_body') and info.text_body:
error_msg += f", response={info.text_body}"
return False, "", error_msg
except Exception as e:
import traceback
traceback.print_exc()
return False, "", f"Upload error: {str(e)}"
def upload_markdown_image(self, file_path: str, meeting_id: int, original_filename: str) -> Tuple[bool, str, Optional[str]]:
"""
Upload markdown image to Qiniu cloud storage
Args:
file_path: Local file path
meeting_id: Meeting ID for directory structure
original_filename: Original filename
Returns:
Tuple of (success: bool, url: str, error_message: Optional[str])
"""
try:
# Validate file exists
if not os.path.exists(file_path):
return False, "", f"File not found: {file_path}"
file_extension = os.path.splitext(original_filename)[1]
unique_filename = f"{uuid.uuid4()}{file_extension}"
key = f"markdown/{meeting_id}/{unique_filename}"
# Generate upload token
token = self.q.upload_token(self.bucket_name, key, 3600)
# Upload file with retry mechanism
ret, info = put_file_v2(token, key, file_path)
print(f"DEBUG: Qiniu image upload response - ret: {ret}, info: {info}")
print(f"DEBUG: Qiniu image upload info details - status_code: {info.status_code}, text_body: {info.text_body}")
# Check if upload was successful
# For put_file_v2, we need to check the info.status_code
if info.status_code == 200 and ret and 'key' in ret and ret['key'] == key:
url = f"http://{self.domain}/{key}"
return True, url, None
else:
# Extract error information properly
error_msg = f"Upload failed: status_code={info.status_code}"
if hasattr(info, 'text_body') and info.text_body:
error_msg += f", response={info.text_body}"
return False, "", error_msg
except Exception as e:
import traceback
traceback.print_exc()
return False, "", f"Upload error: {str(e)}"
def delete_file(self, key: str) -> Tuple[bool, Optional[str]]:
"""
Delete file from Qiniu cloud storage
Args:
key: File key in Qiniu storage
Returns:
Tuple of (success: bool, error_message: Optional[str])
"""
try:
from qiniu import BucketManager
bucket = BucketManager(self.q)
ret, info = bucket.delete(self.bucket_name, key)
if ret is None:
return True, None
else:
return False, f"Delete failed: {info}"
except Exception as e:
return False, f"Delete error: {str(e)}"
# Global instance
qiniu_service = QiniuService()

File diff suppressed because it is too large Load Diff

View File

@ -3,4 +3,5 @@ mysql-connector-python
uvicorn[standard] uvicorn[standard]
python-multipart python-multipart
pydantic[email] pydantic[email]
passlib[bcrypt] passlib[bcrypt]
qiniu

104
test_full_upload.py 100644
View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
import os
import sys
import asyncio
from pathlib import Path
# Add the app directory to the path
sys.path.append(os.path.join(os.path.dirname(__file__), 'app'))
# Mock the FastAPI UploadFile
class MockUploadFile:
def __init__(self, filename, content):
self.filename = filename
self.content = content
self.size = len(content)
self._file_pos = 0
async def read(self, size=-1):
if size == -1:
result = self.content[self._file_pos:]
self._file_pos = len(self.content)
else:
result = self.content[self._file_pos:self._file_pos + size]
self._file_pos += len(result)
return result
def file(self):
from io import BytesIO
return BytesIO(self.content.encode() if isinstance(self.content, str) else self.content)
async def test_audio_upload():
from app.api.endpoints.meetings import AUDIO_DIR
from app.services.qiniu_service import qiniu_service
# Path to the problematic audio file
audio_file_path = "/Users/jiliu/工作/projects/imeeting/backend/uploads/audio/31ce039a-f619-4869-91c8-eab934bbd1d4.m4a"
# Read the content of the audio file
try:
with open(audio_file_path, "rb") as f:
test_content = f.read()
print(f"Successfully read content from {audio_file_path}")
except FileNotFoundError:
print(f"Error: The file was not found at {audio_file_path}")
return
# Create mock UploadFile with the real audio content
mock_file = MockUploadFile("31ce039a-f619-4869-91c8-eab934bbd1d4.m4a", test_content)
# Create temporary file for upload (simulating the API endpoint)
file_extension = ".m4a"
from uuid import uuid4
temp_filename = f"{uuid4()}{file_extension}"
temp_path = AUDIO_DIR / temp_filename
print(f"Creating temporary file at: {temp_path}")
# Save file temporarily (simulating the API endpoint)
try:
# Simulate shutil.copyfileobj(mock_file.file(), open(temp_path, "wb"))
with open(temp_path, "wb") as buffer:
buffer.write(mock_file.content) # content is already bytes
print(f"Temporary file created successfully. Exists: {temp_path.exists()}")
print(f"Temporary file size: {temp_path.stat().st_size}")
except Exception as e:
print(f"Failed to save temporary file: {str(e)}")
return
# Test upload to Qiniu (simulating the API endpoint)
try:
print(f"Attempting to upload audio to Qiniu...")
print(f"Temp file path: {temp_path}")
print(f"Temp file exists: {temp_path.exists()}")
success, qiniu_url, error_msg = qiniu_service.upload_audio_file(
str(temp_path), 11, "test-audio.mp3"
)
print(f"Qiniu upload result - success: {success}")
print(f"Qiniu upload result - url: {qiniu_url}")
print(f"Qiniu upload result - error: {error_msg}")
# Clean up temporary file
if temp_path.exists():
temp_path.unlink()
print("Temporary file cleaned up")
if success:
print("Upload successful!")
print(f"URL: {qiniu_url}")
else:
print(f"Upload failed: {error_msg}")
except Exception as e:
print(f"Exception in audio upload: {str(e)}")
import traceback
print(f"Traceback: {traceback.format_exc()}")
# Clean up temporary file in case of error
if temp_path.exists():
temp_path.unlink()
if __name__ == "__main__":
asyncio.run(test_audio_upload())

82
test_qiniu.py 100644
View File

@ -0,0 +1,82 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
from qiniu import Auth, put_file_v2, BucketManager
# Add app path
sys.path.append(os.path.join(os.path.dirname(__file__), 'app'))
from app.core.config import QINIU_ACCESS_KEY, QINIU_SECRET_KEY, QINIU_BUCKET, QINIU_DOMAIN
def test_qiniu_connection():
print("=== 七牛云连接测试 ===")
print(f"Access Key: {QINIU_ACCESS_KEY[:10]}...")
print(f"Secret Key: {QINIU_SECRET_KEY[:10]}...")
print(f"Bucket: {QINIU_BUCKET}")
print(f"Domain: {QINIU_DOMAIN}")
# 创建认证对象
q = Auth(QINIU_ACCESS_KEY, QINIU_SECRET_KEY)
# 测试1: 生成上传token
try:
key = "test/connection-test.txt"
token = q.upload_token(QINIU_BUCKET, key, 3600)
print(f"✓ Token生成成功: {token[:50]}...")
except Exception as e:
print(f"✗ Token生成失败: {e}")
return False
# 测试2: 列举存储空间 (测试认证是否正确)
try:
bucket_manager = BucketManager(q)
ret, eof, info = bucket_manager.list(QINIU_BUCKET, limit=100)
print(f"✓ Bucket访问成功, status_code: {info.status_code}")
if ret:
print(f" 存储空间中有文件: {len(ret.get('items', []))}")
except Exception as e:
print(f"✗ Bucket访问失败: {e}")
return False
# 测试3: 上传一个小文件
test_file = "/Users/jiliu/工作/projects/imeeting/backend/uploads/result.json"
if os.path.exists(test_file):
try:
key = "test/result1.json"
token = q.upload_token(QINIU_BUCKET, key, 3600)
ret, info = put_file_v2(token, key, test_file, version='v2')
print(f"上传结果:")
print(f" ret: {ret}")
print(f" status_code: {info.status_code}")
print(f" text_body: {info.text_body}")
print(f" url: {info.url}")
print(f" req_id: {info.req_id}")
print(f" x_log: {info.x_log}")
if info.status_code == 200:
print("✓ 文件上传成功")
url = f"http://{QINIU_DOMAIN}/{key}"
print(f" 访问URL: {url}")
return True
else:
print(f"✗ 文件上传失败: {info.status_code}")
return False
except Exception as e:
print(f"✗ 文件上传异常: {e}")
import traceback
traceback.print_exc()
return False
else:
print(f"✗ 测试文件不存在: {test_file}")
return False
if __name__ == "__main__":
success = test_qiniu_connection()
if success:
print("\n🎉 七牛云连接测试成功!")
else:
print("\n❌ 七牛云连接测试失败!")

21
test_upload.py 100644
View File

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# flake8: noqa
from qiniu import Auth, put_data ,put_file_v2, etag
import qiniu.config
#需要填写你的 Access Key 和 Secret Key
access_key = 'A0tp96HCtg-wZCughTgi5vc2pJnw3btClwxRE_e8'
secret_key = 'Lj-MSHpaVbmzpS86kMIjmwikvYOT9iPBjCk9hm6k'
#构建鉴权对象
q = Auth(access_key, secret_key)
#要上传的空间
bucket_name = 'imeeting'
#上传后保存的文件名
key = 'test/result.json'
#生成上传 Token可以指定过期时间等
token = q.upload_token(bucket_name, key, 3600)
#要上传文件的本地路径
localfile = './uploads/result.json'
ret, info = put_file_v2(token, key, localfile, version='v2')
print(info)
assert ret['key'] == key
assert ret['hash'] == etag(localfile)

BIN
uploads/.DS_Store vendored

Binary file not shown.

BIN
uploads/audio/.DS_Store vendored 100644

Binary file not shown.

BIN
uploads/markdown/.DS_Store vendored 100644

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 211 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 221 KiB

Binary file not shown.

7
uploads/r.txt 100644
View File

@ -0,0 +1,7 @@
fastapi
mysql-connector-python
uvicorn[standard]
python-multipart
pydantic[email]
passlib[bcrypt]
qiniu

1707
uploads/result.json 100644

File diff suppressed because it is too large Load Diff

BIN
uploads/test_1.xlsx 100644

Binary file not shown.