feat(i18n): persist language preference and thread through chat, memoir, TTS

- Add users.language_preference (Alembic 0018, default zh); capture at signup/SMS
  only; expose on auth and profile APIs
- Lite English prompts for chat and memoir; localized stage labels and agent
  names (Life Echo / 岁月知己)
- Tencent TTS: language-aware synthesis, ModelType=1 for 501004, English chunking
- WebSocket pipeline: emit all AGENT_RESPONSE segments when TTS cancels; INFO logs
  for tts_this_turn and TTS decisions; on-demand TTS logging
- Expo: device language on auth, i18n tiers/agent name, [SPLIT] streaming UX fixes
- Tests for migration, prompts, pipeline, router tts_this_turn, reply segments

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Kevin
2026-05-11 16:16:49 +08:00
parent 5ce29aad64
commit ccdc4e4277
64 changed files with 3233 additions and 208 deletions

View File

@@ -64,6 +64,12 @@ def _tts_epoch_value(conversation_id: str) -> int:
return _tts_cancel_epoch.get(conversation_id, 0)
def _resolve_user_language(user) -> str:
"""Return 'en' iff user.language_preference is set to 'en'; default 'zh'."""
raw = getattr(user, "language_preference", "zh") if user is not None else "zh"
return "en" if str(raw or "zh").strip().lower() == "en" else "zh"
def _tts_object_ext(codec: str) -> str:
c = (codec or "mp3").lower().lstrip(".")
if c in ("wave",):
@@ -89,31 +95,101 @@ async def _send_tts_audio(
assistant_message_id: str | None,
tts_epoch_start: int,
manual: bool = False,
language: str = "zh",
) -> str | None:
"""Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None."""
current_epoch = _tts_epoch_value(conversation_id)
# 长期保留 INFOTTS 决策与执行链路必须在 INFO 级别全程可见
logger.info(
"pipeline._send_tts_audio entry conversation_id={} chunk_index={} chunk_total={} "
"text_len={} language={} manual={} tts_epoch_start={} current_epoch={} "
"enable_tts={} provider={}",
conversation_id,
chunk_index,
chunk_total,
len(text or ""),
language,
manual,
tts_epoch_start,
current_epoch,
settings.enable_tts,
settings.tts_provider,
)
if not settings.enable_tts:
logger.info(
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
"url_set=False audio_bytes_len=0 reason=enable_tts_false",
conversation_id,
chunk_index,
)
return None
if _tts_epoch_value(conversation_id) != tts_epoch_start:
if current_epoch != tts_epoch_start:
logger.info(
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
"url_set=False audio_bytes_len=0 reason=epoch_mismatch_pre_synth "
"tts_epoch_start={} current_epoch={}",
conversation_id,
chunk_index,
tts_epoch_start,
current_epoch,
)
return None
try:
tts = get_tts_provider()
audio_bytes = await tts.synthesize(text)
audio_bytes = await tts.synthesize(text, language=language)
if not audio_bytes:
logger.warning(
"TTS skipped: synthesize returned empty. Check TTS config in .env"
"TTS skipped: synthesize returned empty conversation_id={} chunk_index={} "
"language={} text_preview={!r} voice_provider={}",
conversation_id,
chunk_index,
language,
(text or "")[:30],
settings.tts_provider,
)
logger.info(
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
"url_set=False audio_bytes_len=0 reason=synthesize_empty",
conversation_id,
chunk_index,
)
return None
if _tts_epoch_value(conversation_id) != tts_epoch_start:
logger.info(
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
"url_set=False audio_bytes_len={} reason=epoch_mismatch_post_synth",
conversation_id,
chunk_index,
len(audio_bytes),
)
return None
ext = _tts_object_ext(settings.tts_codec)
content_type = _tts_codec_to_content_type(settings.tts_codec)
storage = get_object_storage()
key = f"conversations/{conversation_id}/tts/{uuid.uuid4().hex}.{ext}"
upload_started = time.perf_counter()
logger.debug(
"pipeline._send_tts_audio uploading key={} audio_bytes_len={} content_type={}",
key,
len(audio_bytes),
content_type,
)
public_url = storage.upload(key, audio_bytes, content_type)
upload_ms = (time.perf_counter() - upload_started) * 1000
# 与 `tts_delivery.apply_presigned_tts_urls_to_messages` / 回忆录图片 presign 一致:下发可播 URL
playback_url = storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC)
logger.debug(
"pipeline._send_tts_audio uploaded key={} audio_bytes_len={} upload_ms={:.2f} "
"public_url_set={} playback_url_set={}",
key,
len(audio_bytes),
upload_ms,
bool(public_url),
bool(playback_url),
)
audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
payload_data: Dict[str, Any] = {
"audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
"audio_base64": audio_b64,
"format": settings.tts_codec,
"audio_url": playback_url,
"index": chunk_index,
@@ -123,6 +199,16 @@ async def _send_tts_audio(
payload_data["assistant_message_id"] = assistant_message_id
if manual:
payload_data["manual"] = True
logger.debug(
"pipeline._send_tts_audio sending TTS_AUDIO conversation_id={} chunk_index={} "
"chunk_total={} payload_fields={} audio_b64_len={} manual={}",
conversation_id,
chunk_index,
chunk_total,
sorted(payload_data.keys()),
len(audio_b64),
manual,
)
await manager.send_message(
conversation_id,
{
@@ -132,6 +218,16 @@ async def _send_tts_audio(
"timestamp": datetime.now(timezone.utc).isoformat(),
},
)
logger.info(
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=True "
"url_set={} audio_bytes_len={} upload_ms={:.2f} manual={}",
conversation_id,
chunk_index,
bool(public_url),
len(audio_bytes),
upload_ms,
manual,
)
return public_url
except Exception as e:
err_str = str(e)
@@ -142,6 +238,13 @@ async def _send_tts_audio(
)
else:
logger.error("TTS synthesize failed: {}", e)
logger.info(
"pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
"url_set=False audio_bytes_len=0 reason=exception err={}",
conversation_id,
chunk_index,
type(e).__name__,
)
return None
@@ -155,15 +258,44 @@ async def handle_tts_request_on_demand(
db: AsyncSession,
) -> tuple[bool, str]:
"""用户点喇叭:该段已有 TTS 则预签名下发;否则合成后落库并下发。不重复合成同一段。"""
logger.info(
"pipeline.handle_tts_request_on_demand entry conversation_id={} user_id={} "
"assistant_message_id={} segment_index={} segment_text_len={} enable_tts={} provider={}",
conversation_id,
user_id,
assistant_message_id,
segment_index,
len(segment_text or ""),
settings.enable_tts,
settings.tts_provider,
)
if not settings.enable_tts:
logger.info(
"pipeline.handle_tts_request_on_demand result ok=False reason=未开启语音合成 "
"conversation_id={} assistant_message_id={}",
conversation_id,
assistant_message_id,
)
return False, "未开启语音合成"
conv = await db.get(Conversation, conversation_id)
if not conv or conv.user_id != user_id or conv.deleted_at is not None:
logger.debug(
"pipeline.handle_tts_request_on_demand result ok=False reason=对话不存在或无权访问 "
"conversation_id={} user_id={}",
conversation_id,
user_id,
)
return False, "对话不存在或无权访问"
msg = await db.get(ConversationMessage, assistant_message_id)
if not msg or msg.conversation_id != conversation_id or msg.role != "ai":
logger.debug(
"pipeline.handle_tts_request_on_demand result ok=False reason=消息不存在 "
"conversation_id={} assistant_message_id={}",
conversation_id,
assistant_message_id,
)
return False, "消息不存在"
# 与客户端 splitMessageParts / segments_from_llm_response 对齐(含无 [SPLIT] 时的段落拆段)
@@ -195,6 +327,14 @@ async def handle_tts_request_on_demand(
chunk_total = len(parts)
if existing:
logger.info(
"pipeline.handle_tts_request_on_demand reuse existing url conversation_id={} "
"assistant_message_id={} segment_index={} url_len={}",
conversation_id,
assistant_message_id,
segment_index,
len(existing),
)
storage = get_object_storage()
key = extract_cos_object_key_if_owned(existing)
try:
@@ -222,8 +362,27 @@ async def handle_tts_request_on_demand(
"timestamp": datetime.now(timezone.utc).isoformat(),
},
)
logger.info(
"pipeline.handle_tts_request_on_demand result ok=True reason=existing_reused "
"conversation_id={} assistant_message_id={} segment_index={}",
conversation_id,
assistant_message_id,
segment_index,
)
return True, ""
logger.info(
"pipeline.handle_tts_request_on_demand no existing url, will synthesize "
"conversation_id={} assistant_message_id={} segment_index={} canon_len={}",
conversation_id,
assistant_message_id,
segment_index,
len(canon),
)
user_obj = await db.get(User, user_id)
user_language = _resolve_user_language(user_obj)
tts_epoch_start = _tts_epoch_value(conversation_id)
url_stored = await _send_tts_audio(
conversation_id,
@@ -233,8 +392,24 @@ async def handle_tts_request_on_demand(
assistant_message_id=assistant_message_id,
tts_epoch_start=tts_epoch_start,
manual=True,
language=user_language,
)
logger.info(
"pipeline.handle_tts_request_on_demand _send_tts_audio returned url_stored_set={} "
"conversation_id={} assistant_message_id={} segment_index={}",
bool(url_stored),
conversation_id,
assistant_message_id,
segment_index,
)
if not url_stored:
logger.info(
"pipeline.handle_tts_request_on_demand result ok=False reason=语音合成失败 "
"conversation_id={} assistant_message_id={} segment_index={}",
conversation_id,
assistant_message_id,
segment_index,
)
return False, "语音合成失败"
while len(urls) <= segment_index:
@@ -245,6 +420,13 @@ async def handle_tts_request_on_demand(
store = ConversationHistoryStore(db)
await store._sync_redis_best_effort(conversation_id)
logger.info(
"pipeline.handle_tts_request_on_demand result ok=True reason=synthesized "
"conversation_id={} assistant_message_id={} segment_index={}",
conversation_id,
assistant_message_id,
segment_index,
)
return True, ""
@@ -852,6 +1034,7 @@ async def process_user_message(
"""处理用户消息,生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
store = ConversationHistoryStore(db)
tts_urls: list[str] = []
user_language = _resolve_user_language(user)
try:
logger.info(
"process_user_message 开始: conversation_id={} segment_id={} user_chars={}",
@@ -859,6 +1042,18 @@ async def process_user_message(
segment.id,
len(user_message or ""),
)
# 长期保留TTS 决策入口pipeline 层INFO 级别可见所有控制位
logger.info(
"pipeline.process_user_message entry conversation_id={} segment_id={} "
"tts_this_turn={} force_skip_tts={} enable_tts={} provider={} user_language={}",
conversation_id,
segment.id,
tts_this_turn,
force_skip_tts,
settings.enable_tts,
settings.tts_provider,
user_language,
)
is_from_voice = bool(segment.audio_url)
voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
audio_dur = getattr(segment, "audio_duration_seconds", None)
@@ -886,6 +1081,21 @@ async def process_user_message(
skip_tts = bool(turn.skip_tts)
want_voice = bool(tts_this_turn) if tts_this_turn is not None else False
want_tts = want_voice and settings.enable_tts and not skip_tts
# 长期保留 INFOTTS 决策最终结论;不再被 agent_summary_enabled 门控
logger.info(
"pipeline.process_user_message tts_decision conversation_id={} segment_id={} "
"tts_this_turn={} force_skip_tts={} enable_tts={} skip_tts_from_turn={} "
"want_voice={} want_tts={} response_segments={}",
conversation_id,
segment.id,
tts_this_turn,
force_skip_tts,
settings.enable_tts,
skip_tts,
want_voice,
want_tts,
len(turn.messages),
)
if agent_summary_enabled():
logger.info(
"pipeline.process_user_message duration_ms={:.2f} "
@@ -952,21 +1162,55 @@ async def process_user_message(
ai_msg_id = turn_ids.assistant_message_id
tts_epoch_start = _tts_epoch_value(conversation_id)
n = len(responses)
# tts_cancelled 仅用于跳过后续 TTS 合成AGENT_RESPONSE 必须为每段完整下发,
# 否则 FE 会停留在 "正在回复…" 或丢失尾段文本。
tts_cancelled = False
for i, response_text in enumerate(responses):
url_for_segment: Optional[str] = None
if want_tts:
if want_tts and not tts_cancelled:
if _tts_epoch_value(conversation_id) != tts_epoch_start:
break
url_for_segment = await _send_tts_audio(
tts_cancelled = True
logger.info(
"pipeline.process_user_message segment={}/{} tts_branch=skip_cancelled "
"tts_cancelled={} conversation_id={}",
i,
n,
tts_cancelled,
conversation_id,
)
else:
logger.info(
"pipeline.process_user_message segment={}/{} tts_branch=synthesize "
"tts_cancelled={} conversation_id={}",
i,
n,
tts_cancelled,
conversation_id,
)
url_for_segment = await _send_tts_audio(
conversation_id,
response_text,
chunk_index=i,
chunk_total=n,
assistant_message_id=ai_msg_id,
tts_epoch_start=tts_epoch_start,
language=user_language,
)
if url_for_segment:
tts_urls.append(url_for_segment)
if _tts_epoch_value(conversation_id) != tts_epoch_start:
tts_cancelled = True
else:
logger.info(
"pipeline.process_user_message segment={}/{} tts_branch={} "
"tts_cancelled={} want_tts={} conversation_id={}",
i,
n,
"skip_cancelled" if tts_cancelled else "skip_no_tts",
tts_cancelled,
want_tts,
conversation_id,
response_text,
chunk_index=i,
chunk_total=n,
assistant_message_id=ai_msg_id,
tts_epoch_start=tts_epoch_start,
)
if url_for_segment:
tts_urls.append(url_for_segment)
await manager.send_message(
conversation_id,
@@ -983,8 +1227,6 @@ async def process_user_message(
},
)
if _tts_epoch_value(conversation_id) != tts_epoch_start:
break
if i < n - 1:
await asyncio.sleep(0.5)