fix: 用户开始录音5s后ai反馈“我在认真听”

This commit is contained in:
yangshilin
2026-03-16 11:24:40 +08:00
parent 981920784f
commit 2070a03d35
9 changed files with 128 additions and 11 deletions

View File

@@ -32,6 +32,7 @@ LEGACY_VOICE_SESSION_ID = "legacy"
class MessageType(str, Enum):
"""WebSocket 消息类型"""
CONNECT = "connect"
RECORDING_STARTED = "recording_started" # 客户端开始录音,用于服务端 5s 后发「我在认真听」
AUDIO_CHUNK = "audio_chunk"
AUDIO_SEGMENT = "audio_segment" # 分段语音消息(长语音持续上传)
AUDIO_MESSAGE = "audio_message" # 完整音频消息(类似微信语音)
@@ -146,6 +147,9 @@ class SegmentStreamState:
buffered_transcripts: Dict[int, Tuple[str, Segment]] = field(default_factory=dict)
consumed_index: int = -1
active_tasks: Set[asyncio.Task] = field(default_factory=set)
# 录音开始约 5s 后只发一次「我在认真听」;若用户提前结束录音则取消待发
listening_feedback_sent: bool = False
listening_feedback_task: Optional[asyncio.Task] = None
def _utc_now() -> datetime:
@@ -257,17 +261,21 @@ async def _get_persisted_contiguous_segment_index(
return contiguous_index
LISTENING_FEEDBACK_DELAY_SEC = 5.0
LISTENING_FEEDBACK_TEXT = "我在认真听,你继续说,我会边听边整理重点。"
async def _send_segment_transition_feedback(
conversation_id: str,
segment_index: int,
manager: ConnectionManager,
) -> None:
"""ASR 处理中先给陪伴式过渡反馈,避免用户感知卡住"""
"""发送一次「我在认真听」陪伴式过渡反馈(由延迟任务调用)"""
await manager.send_message(conversation_id, {
"type": MessageType.AGENT_RESPONSE,
"conversation_id": conversation_id,
"data": {
"text": "我在认真听,你继续说,我会边听边整理重点。",
"text": LISTENING_FEEDBACK_TEXT,
"transition": True,
"segment_index": segment_index,
},
@@ -275,6 +283,22 @@ async def _send_segment_transition_feedback(
})
async def _delayed_listening_feedback(
conversation_id: str,
voice_session_id: str,
manager: ConnectionManager,
) -> None:
"""录音开始后延迟 5 秒发送一次「我在认真听」,本会话内只发一次;若用户已结束录音则不再发送。"""
await asyncio.sleep(LISTENING_FEEDBACK_DELAY_SEC)
state = manager.get_or_create_segment_state(conversation_id, voice_session_id)
async with state.lock:
if state.listening_feedback_sent:
return
state.listening_feedback_sent = True
state.listening_feedback_task = None
await _send_segment_transition_feedback(conversation_id, 0, manager)
async def _process_audio_segment_async(
conversation_id: str,
user_id: str,
@@ -600,6 +624,28 @@ async def websocket_endpoint(
user_message_timestamp=segment.created_at or user_message_timestamp,
)
elif msg_type == MessageType.RECORDING_STARTED:
# 用户点击开始录音:启动 5s 定时器,到时发一次「我在认真听」
data = message.get("data", {})
voice_session_id = _normalize_voice_session_id(data.get("voice_session_id"))
segment_state = manager.get_or_create_segment_state(
conversation_id,
voice_session_id,
)
async with segment_state.lock:
if segment_state.listening_feedback_task is not None and not segment_state.listening_feedback_task.done():
continue # 本会话已有待发任务,不重复
if segment_state.listening_feedback_sent:
continue
delayed_task = asyncio.create_task(
_delayed_listening_feedback(
conversation_id=conversation_id,
voice_session_id=voice_session_id,
manager=manager,
)
)
segment_state.listening_feedback_task = delayed_task
elif msg_type == MessageType.AUDIO_SEGMENT:
# 处理分段语音消息(长语音持续上传)
data = message.get("data", {})
@@ -680,12 +726,13 @@ async def websocket_endpoint(
)
continue
# 先发过渡反馈,减少“等待空白”体感
await _send_segment_transition_feedback(
conversation_id=conversation_id,
segment_index=segment_index,
manager=manager,
)
# 若本段是用户结束录音的最后一段,取消尚未发出的「我在认真听」,避免结束后再说
if is_last:
async with segment_state.lock:
t = segment_state.listening_feedback_task
segment_state.listening_feedback_task = None
if t is not None and not t.done():
t.cancel()
task = asyncio.create_task(
_process_audio_segment_async(