feat(i18n): persist language preference and thread through chat, memoir, TTS
- Add users.language_preference (Alembic 0018, default zh); capture at signup/SMS only; expose on auth and profile APIs - Lite English prompts for chat and memoir; localized stage labels and agent names (Life Echo / 岁月知己) - Tencent TTS: language-aware synthesis, ModelType=1 for 501004, English chunking - WebSocket pipeline: emit all AGENT_RESPONSE segments when TTS cancels; INFO logs for tts_this_turn and TTS decisions; on-demand TTS logging - Expo: device language on auth, i18n tiers/agent name, [SPLIT] streaming UX fixes - Tests for migration, prompts, pipeline, router tts_this_turn, reply segments Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -64,6 +64,12 @@ def _tts_epoch_value(conversation_id: str) -> int:
|
||||
return _tts_cancel_epoch.get(conversation_id, 0)
|
||||
|
||||
|
||||
def _resolve_user_language(user) -> str:
|
||||
"""Return 'en' iff user.language_preference is set to 'en'; default 'zh'."""
|
||||
raw = getattr(user, "language_preference", "zh") if user is not None else "zh"
|
||||
return "en" if str(raw or "zh").strip().lower() == "en" else "zh"
|
||||
|
||||
|
||||
def _tts_object_ext(codec: str) -> str:
|
||||
c = (codec or "mp3").lower().lstrip(".")
|
||||
if c in ("wave",):
|
||||
@@ -89,31 +95,101 @@ async def _send_tts_audio(
|
||||
assistant_message_id: str | None,
|
||||
tts_epoch_start: int,
|
||||
manual: bool = False,
|
||||
language: str = "zh",
|
||||
) -> str | None:
|
||||
"""Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None."""
|
||||
current_epoch = _tts_epoch_value(conversation_id)
|
||||
# 长期保留 INFO:TTS 决策与执行链路必须在 INFO 级别全程可见
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio entry conversation_id={} chunk_index={} chunk_total={} "
|
||||
"text_len={} language={} manual={} tts_epoch_start={} current_epoch={} "
|
||||
"enable_tts={} provider={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
chunk_total,
|
||||
len(text or ""),
|
||||
language,
|
||||
manual,
|
||||
tts_epoch_start,
|
||||
current_epoch,
|
||||
settings.enable_tts,
|
||||
settings.tts_provider,
|
||||
)
|
||||
if not settings.enable_tts:
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=enable_tts_false",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
)
|
||||
return None
|
||||
if _tts_epoch_value(conversation_id) != tts_epoch_start:
|
||||
if current_epoch != tts_epoch_start:
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=epoch_mismatch_pre_synth "
|
||||
"tts_epoch_start={} current_epoch={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
tts_epoch_start,
|
||||
current_epoch,
|
||||
)
|
||||
return None
|
||||
try:
|
||||
tts = get_tts_provider()
|
||||
audio_bytes = await tts.synthesize(text)
|
||||
audio_bytes = await tts.synthesize(text, language=language)
|
||||
if not audio_bytes:
|
||||
logger.warning(
|
||||
"TTS skipped: synthesize returned empty. Check TTS config in .env"
|
||||
"TTS skipped: synthesize returned empty conversation_id={} chunk_index={} "
|
||||
"language={} text_preview={!r} voice_provider={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
language,
|
||||
(text or "")[:30],
|
||||
settings.tts_provider,
|
||||
)
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=synthesize_empty",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
)
|
||||
return None
|
||||
if _tts_epoch_value(conversation_id) != tts_epoch_start:
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len={} reason=epoch_mismatch_post_synth",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
len(audio_bytes),
|
||||
)
|
||||
return None
|
||||
ext = _tts_object_ext(settings.tts_codec)
|
||||
content_type = _tts_codec_to_content_type(settings.tts_codec)
|
||||
storage = get_object_storage()
|
||||
key = f"conversations/{conversation_id}/tts/{uuid.uuid4().hex}.{ext}"
|
||||
upload_started = time.perf_counter()
|
||||
logger.debug(
|
||||
"pipeline._send_tts_audio uploading key={} audio_bytes_len={} content_type={}",
|
||||
key,
|
||||
len(audio_bytes),
|
||||
content_type,
|
||||
)
|
||||
public_url = storage.upload(key, audio_bytes, content_type)
|
||||
upload_ms = (time.perf_counter() - upload_started) * 1000
|
||||
# 与 `tts_delivery.apply_presigned_tts_urls_to_messages` / 回忆录图片 presign 一致:下发可播 URL
|
||||
playback_url = storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC)
|
||||
logger.debug(
|
||||
"pipeline._send_tts_audio uploaded key={} audio_bytes_len={} upload_ms={:.2f} "
|
||||
"public_url_set={} playback_url_set={}",
|
||||
key,
|
||||
len(audio_bytes),
|
||||
upload_ms,
|
||||
bool(public_url),
|
||||
bool(playback_url),
|
||||
)
|
||||
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
|
||||
payload_data: Dict[str, Any] = {
|
||||
"audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
|
||||
"audio_base64": audio_b64,
|
||||
"format": settings.tts_codec,
|
||||
"audio_url": playback_url,
|
||||
"index": chunk_index,
|
||||
@@ -123,6 +199,16 @@ async def _send_tts_audio(
|
||||
payload_data["assistant_message_id"] = assistant_message_id
|
||||
if manual:
|
||||
payload_data["manual"] = True
|
||||
logger.debug(
|
||||
"pipeline._send_tts_audio sending TTS_AUDIO conversation_id={} chunk_index={} "
|
||||
"chunk_total={} payload_fields={} audio_b64_len={} manual={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
chunk_total,
|
||||
sorted(payload_data.keys()),
|
||||
len(audio_b64),
|
||||
manual,
|
||||
)
|
||||
await manager.send_message(
|
||||
conversation_id,
|
||||
{
|
||||
@@ -132,6 +218,16 @@ async def _send_tts_audio(
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
)
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=True "
|
||||
"url_set={} audio_bytes_len={} upload_ms={:.2f} manual={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
bool(public_url),
|
||||
len(audio_bytes),
|
||||
upload_ms,
|
||||
manual,
|
||||
)
|
||||
return public_url
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
@@ -142,6 +238,13 @@ async def _send_tts_audio(
|
||||
)
|
||||
else:
|
||||
logger.error("TTS synthesize failed: {}", e)
|
||||
logger.info(
|
||||
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
|
||||
"url_set=False audio_bytes_len=0 reason=exception err={}",
|
||||
conversation_id,
|
||||
chunk_index,
|
||||
type(e).__name__,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -155,15 +258,44 @@ async def handle_tts_request_on_demand(
|
||||
db: AsyncSession,
|
||||
) -> tuple[bool, str]:
|
||||
"""用户点喇叭:该段已有 TTS 则预签名下发;否则合成后落库并下发。不重复合成同一段。"""
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand entry conversation_id={} user_id={} "
|
||||
"assistant_message_id={} segment_index={} segment_text_len={} enable_tts={} provider={}",
|
||||
conversation_id,
|
||||
user_id,
|
||||
assistant_message_id,
|
||||
segment_index,
|
||||
len(segment_text or ""),
|
||||
settings.enable_tts,
|
||||
settings.tts_provider,
|
||||
)
|
||||
if not settings.enable_tts:
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand result ok=False reason=未开启语音合成 "
|
||||
"conversation_id={} assistant_message_id={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
)
|
||||
return False, "未开启语音合成"
|
||||
|
||||
conv = await db.get(Conversation, conversation_id)
|
||||
if not conv or conv.user_id != user_id or conv.deleted_at is not None:
|
||||
logger.debug(
|
||||
"pipeline.handle_tts_request_on_demand result ok=False reason=对话不存在或无权访问 "
|
||||
"conversation_id={} user_id={}",
|
||||
conversation_id,
|
||||
user_id,
|
||||
)
|
||||
return False, "对话不存在或无权访问"
|
||||
|
||||
msg = await db.get(ConversationMessage, assistant_message_id)
|
||||
if not msg or msg.conversation_id != conversation_id or msg.role != "ai":
|
||||
logger.debug(
|
||||
"pipeline.handle_tts_request_on_demand result ok=False reason=消息不存在 "
|
||||
"conversation_id={} assistant_message_id={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
)
|
||||
return False, "消息不存在"
|
||||
|
||||
# 与客户端 splitMessageParts / segments_from_llm_response 对齐(含无 [SPLIT] 时的段落拆段)
|
||||
@@ -195,6 +327,14 @@ async def handle_tts_request_on_demand(
|
||||
chunk_total = len(parts)
|
||||
|
||||
if existing:
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand reuse existing url conversation_id={} "
|
||||
"assistant_message_id={} segment_index={} url_len={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
segment_index,
|
||||
len(existing),
|
||||
)
|
||||
storage = get_object_storage()
|
||||
key = extract_cos_object_key_if_owned(existing)
|
||||
try:
|
||||
@@ -222,8 +362,27 @@ async def handle_tts_request_on_demand(
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
)
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand result ok=True reason=existing_reused "
|
||||
"conversation_id={} assistant_message_id={} segment_index={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
segment_index,
|
||||
)
|
||||
return True, ""
|
||||
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand no existing url, will synthesize "
|
||||
"conversation_id={} assistant_message_id={} segment_index={} canon_len={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
segment_index,
|
||||
len(canon),
|
||||
)
|
||||
|
||||
user_obj = await db.get(User, user_id)
|
||||
user_language = _resolve_user_language(user_obj)
|
||||
|
||||
tts_epoch_start = _tts_epoch_value(conversation_id)
|
||||
url_stored = await _send_tts_audio(
|
||||
conversation_id,
|
||||
@@ -233,8 +392,24 @@ async def handle_tts_request_on_demand(
|
||||
assistant_message_id=assistant_message_id,
|
||||
tts_epoch_start=tts_epoch_start,
|
||||
manual=True,
|
||||
language=user_language,
|
||||
)
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand _send_tts_audio returned url_stored_set={} "
|
||||
"conversation_id={} assistant_message_id={} segment_index={}",
|
||||
bool(url_stored),
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
segment_index,
|
||||
)
|
||||
if not url_stored:
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand result ok=False reason=语音合成失败 "
|
||||
"conversation_id={} assistant_message_id={} segment_index={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
segment_index,
|
||||
)
|
||||
return False, "语音合成失败"
|
||||
|
||||
while len(urls) <= segment_index:
|
||||
@@ -245,6 +420,13 @@ async def handle_tts_request_on_demand(
|
||||
|
||||
store = ConversationHistoryStore(db)
|
||||
await store._sync_redis_best_effort(conversation_id)
|
||||
logger.info(
|
||||
"pipeline.handle_tts_request_on_demand result ok=True reason=synthesized "
|
||||
"conversation_id={} assistant_message_id={} segment_index={}",
|
||||
conversation_id,
|
||||
assistant_message_id,
|
||||
segment_index,
|
||||
)
|
||||
return True, ""
|
||||
|
||||
|
||||
@@ -852,6 +1034,7 @@ async def process_user_message(
|
||||
"""处理用户消息,生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
|
||||
store = ConversationHistoryStore(db)
|
||||
tts_urls: list[str] = []
|
||||
user_language = _resolve_user_language(user)
|
||||
try:
|
||||
logger.info(
|
||||
"process_user_message 开始: conversation_id={} segment_id={} user_chars={}",
|
||||
@@ -859,6 +1042,18 @@ async def process_user_message(
|
||||
segment.id,
|
||||
len(user_message or ""),
|
||||
)
|
||||
# 长期保留:TTS 决策入口(pipeline 层);INFO 级别可见所有控制位
|
||||
logger.info(
|
||||
"pipeline.process_user_message entry conversation_id={} segment_id={} "
|
||||
"tts_this_turn={} force_skip_tts={} enable_tts={} provider={} user_language={}",
|
||||
conversation_id,
|
||||
segment.id,
|
||||
tts_this_turn,
|
||||
force_skip_tts,
|
||||
settings.enable_tts,
|
||||
settings.tts_provider,
|
||||
user_language,
|
||||
)
|
||||
is_from_voice = bool(segment.audio_url)
|
||||
voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
|
||||
audio_dur = getattr(segment, "audio_duration_seconds", None)
|
||||
@@ -886,6 +1081,21 @@ async def process_user_message(
|
||||
skip_tts = bool(turn.skip_tts)
|
||||
want_voice = bool(tts_this_turn) if tts_this_turn is not None else False
|
||||
want_tts = want_voice and settings.enable_tts and not skip_tts
|
||||
# 长期保留 INFO:TTS 决策最终结论;不再被 agent_summary_enabled 门控
|
||||
logger.info(
|
||||
"pipeline.process_user_message tts_decision conversation_id={} segment_id={} "
|
||||
"tts_this_turn={} force_skip_tts={} enable_tts={} skip_tts_from_turn={} "
|
||||
"want_voice={} want_tts={} response_segments={}",
|
||||
conversation_id,
|
||||
segment.id,
|
||||
tts_this_turn,
|
||||
force_skip_tts,
|
||||
settings.enable_tts,
|
||||
skip_tts,
|
||||
want_voice,
|
||||
want_tts,
|
||||
len(turn.messages),
|
||||
)
|
||||
if agent_summary_enabled():
|
||||
logger.info(
|
||||
"pipeline.process_user_message duration_ms={:.2f} "
|
||||
@@ -952,21 +1162,55 @@ async def process_user_message(
|
||||
ai_msg_id = turn_ids.assistant_message_id
|
||||
tts_epoch_start = _tts_epoch_value(conversation_id)
|
||||
n = len(responses)
|
||||
# tts_cancelled 仅用于跳过后续 TTS 合成;AGENT_RESPONSE 必须为每段完整下发,
|
||||
# 否则 FE 会停留在 "正在回复…" 或丢失尾段文本。
|
||||
tts_cancelled = False
|
||||
for i, response_text in enumerate(responses):
|
||||
url_for_segment: Optional[str] = None
|
||||
if want_tts:
|
||||
if want_tts and not tts_cancelled:
|
||||
if _tts_epoch_value(conversation_id) != tts_epoch_start:
|
||||
break
|
||||
url_for_segment = await _send_tts_audio(
|
||||
tts_cancelled = True
|
||||
logger.info(
|
||||
"pipeline.process_user_message segment={}/{} tts_branch=skip_cancelled "
|
||||
"tts_cancelled={} conversation_id={}",
|
||||
i,
|
||||
n,
|
||||
tts_cancelled,
|
||||
conversation_id,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"pipeline.process_user_message segment={}/{} tts_branch=synthesize "
|
||||
"tts_cancelled={} conversation_id={}",
|
||||
i,
|
||||
n,
|
||||
tts_cancelled,
|
||||
conversation_id,
|
||||
)
|
||||
url_for_segment = await _send_tts_audio(
|
||||
conversation_id,
|
||||
response_text,
|
||||
chunk_index=i,
|
||||
chunk_total=n,
|
||||
assistant_message_id=ai_msg_id,
|
||||
tts_epoch_start=tts_epoch_start,
|
||||
language=user_language,
|
||||
)
|
||||
if url_for_segment:
|
||||
tts_urls.append(url_for_segment)
|
||||
if _tts_epoch_value(conversation_id) != tts_epoch_start:
|
||||
tts_cancelled = True
|
||||
else:
|
||||
logger.info(
|
||||
"pipeline.process_user_message segment={}/{} tts_branch={} "
|
||||
"tts_cancelled={} want_tts={} conversation_id={}",
|
||||
i,
|
||||
n,
|
||||
"skip_cancelled" if tts_cancelled else "skip_no_tts",
|
||||
tts_cancelled,
|
||||
want_tts,
|
||||
conversation_id,
|
||||
response_text,
|
||||
chunk_index=i,
|
||||
chunk_total=n,
|
||||
assistant_message_id=ai_msg_id,
|
||||
tts_epoch_start=tts_epoch_start,
|
||||
)
|
||||
if url_for_segment:
|
||||
tts_urls.append(url_for_segment)
|
||||
|
||||
await manager.send_message(
|
||||
conversation_id,
|
||||
@@ -983,8 +1227,6 @@ async def process_user_message(
|
||||
},
|
||||
)
|
||||
|
||||
if _tts_epoch_value(conversation_id) != tts_epoch_start:
|
||||
break
|
||||
if i < n - 1:
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user