feat: 扩展后端WebSocket和语音识别功能
- 扩展websocket.py支持语音消息 - 优化asr_service.py语音识别服务 - 更新main.py和requirements.txt - 更新.env.production配置 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -19,6 +19,7 @@ from database.models import Conversation, Segment
|
||||
from database.models import User as UserModel
|
||||
from services.auth_service import verify_token
|
||||
from services.memoir_state_service import get_or_create_state
|
||||
from services.asr_service import asr_service
|
||||
from fastapi import HTTPException, status
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -28,8 +29,9 @@ class MessageType(str, Enum):
|
||||
"""WebSocket 消息类型"""
|
||||
CONNECT = "connect"
|
||||
AUDIO_CHUNK = "audio_chunk"
|
||||
AUDIO_MESSAGE = "audio_message" # 完整音频消息(类似微信语音)
|
||||
TEXT = "text" # 文本消息
|
||||
TRANSCRIPT = "transcript"
|
||||
TRANSCRIPT = "transcript" # 语音转文字结果
|
||||
AGENT_RESPONSE = "agent_response"
|
||||
TTS_AUDIO = "tts_audio"
|
||||
END_CONVERSATION = "end_conversation"
|
||||
@@ -190,6 +192,70 @@ async def websocket_endpoint(
|
||||
manager=manager
|
||||
)
|
||||
|
||||
elif msg_type == MessageType.AUDIO_MESSAGE:
|
||||
# 处理完整音频消息(类似微信语音)
|
||||
data = message.get("data", {})
|
||||
audio_base64 = data.get("audio_base64", "")
|
||||
audio_duration = data.get("duration", 0)
|
||||
|
||||
if audio_base64:
|
||||
logger.info(f"收到音频消息,时长: {audio_duration}s")
|
||||
|
||||
try:
|
||||
# 1. ASR 转写
|
||||
transcript_text = await asr_service.transcribe(audio_base64)
|
||||
logger.info(f"ASR 转写结果: {transcript_text}")
|
||||
|
||||
# 2. 发送转写结果给客户端
|
||||
await manager.send_message(conversation_id, {
|
||||
"type": MessageType.TRANSCRIPT,
|
||||
"conversation_id": conversation_id,
|
||||
"data": {
|
||||
"text": transcript_text,
|
||||
"audio_duration": audio_duration
|
||||
},
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
})
|
||||
|
||||
# 3. 保存段落到数据库(包含转写文本和音频信息)
|
||||
segment = Segment(
|
||||
id=str(uuid.uuid4()),
|
||||
conversation_id=conversation_id,
|
||||
transcript_text=transcript_text,
|
||||
audio_url=f"audio:{audio_duration}s", # 简化存储,标记为音频消息
|
||||
processed=False
|
||||
)
|
||||
db.add(segment)
|
||||
await db.commit()
|
||||
await db.refresh(segment)
|
||||
await manager.background_runner.queue_message(conversation.user_id, segment.id)
|
||||
|
||||
# 4. Agent 生成回应(基于转写文本)
|
||||
if transcript_text and not transcript_text.startswith("转写失败"):
|
||||
await process_user_message(
|
||||
conversation_id=conversation_id,
|
||||
user_message=transcript_text,
|
||||
conversation=conversation,
|
||||
segment=segment,
|
||||
db=db,
|
||||
manager=manager
|
||||
)
|
||||
else:
|
||||
# 转写失败,发送错误消息
|
||||
await manager.send_message(conversation_id, {
|
||||
"type": MessageType.ERROR,
|
||||
"data": {"message": "语音转写失败,请重试或使用文字输入"},
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理音频消息失败: {e}", exc_info=True)
|
||||
await manager.send_message(conversation_id, {
|
||||
"type": MessageType.ERROR,
|
||||
"data": {"message": f"处理音频消息失败: {str(e)}"},
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
})
|
||||
|
||||
elif msg_type == MessageType.END_CONVERSATION:
|
||||
# 结束对话
|
||||
conversation.status = "ended"
|
||||
|
||||
Reference in New Issue
Block a user