fix(conversation): 修复实时会话 TTS/回复被离屏 WS 抢占
- 列表预热仅预取消息缓存,避免后台 WebSocket 覆盖服务端连接 - RealtimeSession UI 回调按 owner 独占,防止 offscreen 覆盖聊天页 - 列表页聚焦时再 prewarm,会话页 TTS 入队优先 base64 - 管线下发 TTS 同时带 audio_base64 与 audio_url;协议说明同步 - 移除 TTS 排查用前后端调试日志,保留错误/告警 - 补充 WS / RealtimeSession / entry-warmup / 播放器相关单测 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -99,41 +99,10 @@ async def _send_tts_audio(
|
||||
) -> str | None:
|
||||
"""Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None."""
|
||||
current_epoch = _tts_epoch_value(conversation_id)
|
||||
# 长期保留 INFO:TTS 决策与执行链路必须在 INFO 级别全程可见
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio entry conversation_id={} chunk_index={} chunk_total={} "
|
||||
"text_len={} language={} manual={} tts_epoch_start={} current_epoch={} "
|
||||
"enable_tts={} provider={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
chunk_total,
|
||||
len(text or ""),
|
||||
language,
|
||||
manual,
|
||||
tts_epoch_start,
|
||||
current_epoch,
|
||||
settings.enable_tts,
|
||||
settings.tts_provider,
|
||||
)
|
||||
# enable_tts:仅禁用「助手回复自动生成 TTS」(want_tts 路径);用户点喇叭(manual=True)仍可合成。
|
||||
if not manual and not settings.enable_tts:
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=enable_tts_false",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
)
|
||||
return None
|
||||
if current_epoch != tts_epoch_start:
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=epoch_mismatch_pre_synth "
|
||||
"tts_epoch_start={} current_epoch={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
tts_epoch_start,
|
||||
current_epoch,
|
||||
)
|
||||
return None
|
||||
try:
|
||||
tts = get_tts_provider()
|
||||
@@ -148,50 +117,19 @@ async def _send_tts_audio(
|
||||
(text or "")[:30],
|
||||
settings.tts_provider,
|
||||
)
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=synthesize_empty",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
)
|
||||
return None
|
||||
if _tts_epoch_value(conversation_id) != tts_epoch_start:
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len={} reason=epoch_mismatch_post_synth",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
len(audio_bytes),
|
||||
)
|
||||
return None
|
||||
ext = _tts_object_ext(settings.tts_codec)
|
||||
content_type = _tts_codec_to_content_type(settings.tts_codec)
|
||||
storage = get_object_storage()
|
||||
key = f"conversations/{conversation_id}/tts/{uuid.uuid4().hex}.{ext}"
|
||||
upload_started = time.perf_counter()
|
||||
logger.debug(
|
||||
"pipeline._send_tts_audio uploading key={} audio_bytes_len={} content_type={}",
|
||||
key,
|
||||
len(audio_bytes),
|
||||
content_type,
|
||||
)
|
||||
public_url = storage.upload(key, audio_bytes, content_type)
|
||||
upload_ms = (time.perf_counter() - upload_started) * 1000
|
||||
# 与 `tts_delivery.apply_presigned_tts_urls_to_messages` / 回忆录图片 presign 一致:下发可播 URL
|
||||
playback_url = storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC)
|
||||
logger.debug(
|
||||
"pipeline._send_tts_audio uploaded key={} audio_bytes_len={} upload_ms={:.2f} "
|
||||
"public_url_set={} playback_url_set={}",
|
||||
key,
|
||||
len(audio_bytes),
|
||||
upload_ms,
|
||||
bool(public_url),
|
||||
bool(playback_url),
|
||||
)
|
||||
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
|
||||
payload_data: Dict[str, Any] = {
|
||||
"audio_base64": audio_b64,
|
||||
"format": settings.tts_codec,
|
||||
"audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
|
||||
"audio_url": playback_url,
|
||||
"index": chunk_index,
|
||||
"total": chunk_total,
|
||||
@@ -200,16 +138,6 @@ async def _send_tts_audio(
|
||||
payload_data["assistant_message_id"] = assistant_message_id
|
||||
if manual:
|
||||
payload_data["manual"] = True
|
||||
logger.debug(
|
||||
"pipeline._send_tts_audio sending TTS_AUDIO conversation_id={} chunk_index={} "
|
||||
"chunk_total={} payload_fields={} audio_b64_len={} manual={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
chunk_total,
|
||||
sorted(payload_data.keys()),
|
||||
len(audio_b64),
|
||||
manual,
|
||||
)
|
||||
await manager.send_message(
|
||||
conversation_id,
|
||||
{
|
||||
@@ -219,16 +147,6 @@ async def _send_tts_audio(
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
)
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=True "
|
||||
"url_set={} audio_bytes_len={} upload_ms={:.2f} manual={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
bool(public_url),
|
||||
len(audio_bytes),
|
||||
upload_ms,
|
||||
manual,
|
||||
)
|
||||
return public_url
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
@@ -239,13 +157,6 @@ async def _send_tts_audio(
|
||||
)
|
||||
else:
|
||||
logger.error("TTS synthesize failed: {}", e)
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=exception err={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
type(e).__name__,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -1035,18 +946,6 @@ async def process_user_message(
|
||||
segment.id,
|
||||
len(user_message or ""),
|
||||
)
|
||||
# 长期保留:TTS 决策入口(pipeline 层);INFO 级别可见所有控制位
|
||||
logger.info(
|
||||
"pipeline.process_user_message entry conversation_id={} segment_id={} "
|
||||
"tts_this_turn={} force_skip_tts={} enable_tts={} provider={} user_language={}",
|
||||
conversation_id,
|
||||
segment.id,
|
||||
tts_this_turn,
|
||||
force_skip_tts,
|
||||
settings.enable_tts,
|
||||
settings.tts_provider,
|
||||
user_language,
|
||||
)
|
||||
is_from_voice = bool(segment.audio_url)
|
||||
voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
|
||||
audio_dur = getattr(segment, "audio_duration_seconds", None)
|
||||
@@ -1074,21 +973,6 @@ async def process_user_message(
|
||||
skip_tts = bool(turn.skip_tts)
|
||||
want_voice = bool(tts_this_turn) if tts_this_turn is not None else False
|
||||
want_tts = want_voice and settings.enable_tts and not skip_tts
|
||||
# 长期保留 INFO:TTS 决策最终结论;不再被 agent_summary_enabled 门控
|
||||
logger.info(
|
||||
"pipeline.process_user_message tts_decision conversation_id={} segment_id={} "
|
||||
"tts_this_turn={} force_skip_tts={} enable_tts={} skip_tts_from_turn={} "
|
||||
"want_voice={} want_tts={} response_segments={}",
|
||||
conversation_id,
|
||||
segment.id,
|
||||
tts_this_turn,
|
||||
force_skip_tts,
|
||||
settings.enable_tts,
|
||||
skip_tts,
|
||||
want_voice,
|
||||
want_tts,
|
||||
len(turn.messages),
|
||||
)
|
||||
if agent_summary_enabled():
|
||||
logger.info(
|
||||
"pipeline.process_user_message duration_ms={:.2f} "
|
||||
|
||||
Reference in New Issue
Block a user