feat(i18n): persist language preference and thread through chat, memoir, TTS

- Add users.language_preference (Alembic 0018, default zh); capture at signup/SMS only; expose on auth and profile APIs - Lite English prompts for chat and memoir; localized stage labels and agent names (Life Echo / 岁月知己) - Tencent TTS: language-aware synthesis, ModelType=1 for 501004, English chunking - WebSocket pipeline: emit all AGENT_RESPONSE segments when TTS cancels; INFO logs for tts_this_turn and TTS decisions; on-demand TTS logging - Expo: device language on auth, i18n tiers/agent name, [SPLIT] streaming UX fixes - Tests for migration, prompts, pipeline, router tts_this_turn, reply segments Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-11 16:16:49 +08:00
parent 5ce29aad64
commit ccdc4e4277
64 changed files with 3233 additions and 208 deletions
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -64,6 +64,12 @@ def _tts_epoch_value(conversation_id: str) -> int:
    return _tts_cancel_epoch.get(conversation_id, 0)


+def _resolve_user_language(user) -> str:
+    """Return 'en' iff user.language_preference is set to 'en'; default 'zh'."""
+    raw = getattr(user, "language_preference", "zh") if user is not None else "zh"
+    return "en" if str(raw or "zh").strip().lower() == "en" else "zh"
+
+
 def _tts_object_ext(codec: str) -> str:
    c = (codec or "mp3").lower().lstrip(".")
    if c in ("wave",):
@@ -89,31 +95,101 @@ async def _send_tts_audio(
    assistant_message_id: str | None,
    tts_epoch_start: int,
    manual: bool = False,
+    language: str = "zh",
 ) -> str | None:
    """Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None."""
+    current_epoch = _tts_epoch_value(conversation_id)
+    # 长期保留 INFO：TTS 决策与执行链路必须在 INFO 级别全程可见
+    logger.info(
+        "pipeline._send_tts_audio entry conversation_id={} chunk_index={} chunk_total={} "
+        "text_len={} language={} manual={} tts_epoch_start={} current_epoch={} "
+        "enable_tts={} provider={}",
+        conversation_id,
+        chunk_index,
+        chunk_total,
+        len(text or ""),
+        language,
+        manual,
+        tts_epoch_start,
+        current_epoch,
+        settings.enable_tts,
+        settings.tts_provider,
+    )
    if not settings.enable_tts:
+        logger.info(
+            "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
+            "url_set=False audio_bytes_len=0 reason=enable_tts_false",
+            conversation_id,
+            chunk_index,
+        )
        return None
-    if _tts_epoch_value(conversation_id) != tts_epoch_start:
+    if current_epoch != tts_epoch_start:
+        logger.info(
+            "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
+            "url_set=False audio_bytes_len=0 reason=epoch_mismatch_pre_synth "
+            "tts_epoch_start={} current_epoch={}",
+            conversation_id,
+            chunk_index,
+            tts_epoch_start,
+            current_epoch,
+        )
        return None
    try:
        tts = get_tts_provider()
-        audio_bytes = await tts.synthesize(text)
+        audio_bytes = await tts.synthesize(text, language=language)
        if not audio_bytes:
            logger.warning(
-                "TTS skipped: synthesize returned empty. Check TTS config in .env"
+                "TTS skipped: synthesize returned empty conversation_id={} chunk_index={} "
+                "language={} text_preview={!r} voice_provider={}",
+                conversation_id,
+                chunk_index,
+                language,
+                (text or "")[:30],
+                settings.tts_provider,
+            )
+            logger.info(
+                "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
+                "url_set=False audio_bytes_len=0 reason=synthesize_empty",
+                conversation_id,
+                chunk_index,
            )
            return None
        if _tts_epoch_value(conversation_id) != tts_epoch_start:
+            logger.info(
+                "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
+                "url_set=False audio_bytes_len={} reason=epoch_mismatch_post_synth",
+                conversation_id,
+                chunk_index,
+                len(audio_bytes),
+            )
            return None
        ext = _tts_object_ext(settings.tts_codec)
        content_type = _tts_codec_to_content_type(settings.tts_codec)
        storage = get_object_storage()
        key = f"conversations/{conversation_id}/tts/{uuid.uuid4().hex}.{ext}"
+        upload_started = time.perf_counter()
+        logger.debug(
+            "pipeline._send_tts_audio uploading key={} audio_bytes_len={} content_type={}",
+            key,
+            len(audio_bytes),
+            content_type,
+        )
        public_url = storage.upload(key, audio_bytes, content_type)
+        upload_ms = (time.perf_counter() - upload_started) * 1000
        # 与 `tts_delivery.apply_presigned_tts_urls_to_messages` / 回忆录图片 presign 一致：下发可播 URL
        playback_url = storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC)
+        logger.debug(
+            "pipeline._send_tts_audio uploaded key={} audio_bytes_len={} upload_ms={:.2f} "
+            "public_url_set={} playback_url_set={}",
+            key,
+            len(audio_bytes),
+            upload_ms,
+            bool(public_url),
+            bool(playback_url),
+        )
+        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
        payload_data: Dict[str, Any] = {
-            "audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
+            "audio_base64": audio_b64,
            "format": settings.tts_codec,
            "audio_url": playback_url,
            "index": chunk_index,
@@ -123,6 +199,16 @@ async def _send_tts_audio(
            payload_data["assistant_message_id"] = assistant_message_id
        if manual:
            payload_data["manual"] = True
+        logger.debug(
+            "pipeline._send_tts_audio sending TTS_AUDIO conversation_id={} chunk_index={} "
+            "chunk_total={} payload_fields={} audio_b64_len={} manual={}",
+            conversation_id,
+            chunk_index,
+            chunk_total,
+            sorted(payload_data.keys()),
+            len(audio_b64),
+            manual,
+        )
        await manager.send_message(
            conversation_id,
            {
@@ -132,6 +218,16 @@ async def _send_tts_audio(
                "timestamp": datetime.now(timezone.utc).isoformat(),
            },
        )
+        logger.info(
+            "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=True "
+            "url_set={} audio_bytes_len={} upload_ms={:.2f} manual={}",
+            conversation_id,
+            chunk_index,
+            bool(public_url),
+            len(audio_bytes),
+            upload_ms,
+            manual,
+        )
        return public_url
    except Exception as e:
        err_str = str(e)
@@ -142,6 +238,13 @@ async def _send_tts_audio(
            )
        else:
            logger.error("TTS synthesize failed: {}", e)
+        logger.info(
+            "pipeline._send_tts_audio result conversation_id={} chunk_index={} ok=False "
+            "url_set=False audio_bytes_len=0 reason=exception err={}",
+            conversation_id,
+            chunk_index,
+            type(e).__name__,
+        )
        return None


@@ -155,15 +258,44 @@ async def handle_tts_request_on_demand(
    db: AsyncSession,
 ) -> tuple[bool, str]:
    """用户点喇叭：该段已有 TTS 则预签名下发；否则合成后落库并下发。不重复合成同一段。"""
+    logger.info(
+        "pipeline.handle_tts_request_on_demand entry conversation_id={} user_id={} "
+        "assistant_message_id={} segment_index={} segment_text_len={} enable_tts={} provider={}",
+        conversation_id,
+        user_id,
+        assistant_message_id,
+        segment_index,
+        len(segment_text or ""),
+        settings.enable_tts,
+        settings.tts_provider,
+    )
    if not settings.enable_tts:
+        logger.info(
+            "pipeline.handle_tts_request_on_demand result ok=False reason=未开启语音合成 "
+            "conversation_id={} assistant_message_id={}",
+            conversation_id,
+            assistant_message_id,
+        )
        return False, "未开启语音合成"

    conv = await db.get(Conversation, conversation_id)
    if not conv or conv.user_id != user_id or conv.deleted_at is not None:
+        logger.debug(
+            "pipeline.handle_tts_request_on_demand result ok=False reason=对话不存在或无权访问 "
+            "conversation_id={} user_id={}",
+            conversation_id,
+            user_id,
+        )
        return False, "对话不存在或无权访问"

    msg = await db.get(ConversationMessage, assistant_message_id)
    if not msg or msg.conversation_id != conversation_id or msg.role != "ai":
+        logger.debug(
+            "pipeline.handle_tts_request_on_demand result ok=False reason=消息不存在 "
+            "conversation_id={} assistant_message_id={}",
+            conversation_id,
+            assistant_message_id,
+        )
        return False, "消息不存在"

    # 与客户端 splitMessageParts / segments_from_llm_response 对齐（含无 [SPLIT] 时的段落拆段）
@@ -195,6 +327,14 @@ async def handle_tts_request_on_demand(
    chunk_total = len(parts)

    if existing:
+        logger.info(
+            "pipeline.handle_tts_request_on_demand reuse existing url conversation_id={} "
+            "assistant_message_id={} segment_index={} url_len={}",
+            conversation_id,
+            assistant_message_id,
+            segment_index,
+            len(existing),
+        )
        storage = get_object_storage()
        key = extract_cos_object_key_if_owned(existing)
        try:
@@ -222,8 +362,27 @@ async def handle_tts_request_on_demand(
                "timestamp": datetime.now(timezone.utc).isoformat(),
            },
        )
+        logger.info(
+            "pipeline.handle_tts_request_on_demand result ok=True reason=existing_reused "
+            "conversation_id={} assistant_message_id={} segment_index={}",
+            conversation_id,
+            assistant_message_id,
+            segment_index,
+        )
        return True, ""

+    logger.info(
+        "pipeline.handle_tts_request_on_demand no existing url, will synthesize "
+        "conversation_id={} assistant_message_id={} segment_index={} canon_len={}",
+        conversation_id,
+        assistant_message_id,
+        segment_index,
+        len(canon),
+    )
+
+    user_obj = await db.get(User, user_id)
+    user_language = _resolve_user_language(user_obj)
+
    tts_epoch_start = _tts_epoch_value(conversation_id)
    url_stored = await _send_tts_audio(
        conversation_id,
@@ -233,8 +392,24 @@ async def handle_tts_request_on_demand(
        assistant_message_id=assistant_message_id,
        tts_epoch_start=tts_epoch_start,
        manual=True,
+        language=user_language,
+    )
+    logger.info(
+        "pipeline.handle_tts_request_on_demand _send_tts_audio returned url_stored_set={} "
+        "conversation_id={} assistant_message_id={} segment_index={}",
+        bool(url_stored),
+        conversation_id,
+        assistant_message_id,
+        segment_index,
    )
    if not url_stored:
+        logger.info(
+            "pipeline.handle_tts_request_on_demand result ok=False reason=语音合成失败 "
+            "conversation_id={} assistant_message_id={} segment_index={}",
+            conversation_id,
+            assistant_message_id,
+            segment_index,
+        )
        return False, "语音合成失败"

    while len(urls) <= segment_index:
@@ -245,6 +420,13 @@ async def handle_tts_request_on_demand(

    store = ConversationHistoryStore(db)
    await store._sync_redis_best_effort(conversation_id)
+    logger.info(
+        "pipeline.handle_tts_request_on_demand result ok=True reason=synthesized "
+        "conversation_id={} assistant_message_id={} segment_index={}",
+        conversation_id,
+        assistant_message_id,
+        segment_index,
+    )
    return True, ""


@@ -852,6 +1034,7 @@ async def process_user_message(
    """处理用户消息，生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
    store = ConversationHistoryStore(db)
    tts_urls: list[str] = []
+    user_language = _resolve_user_language(user)
    try:
        logger.info(
            "process_user_message 开始: conversation_id={} segment_id={} user_chars={}",
@@ -859,6 +1042,18 @@ async def process_user_message(
            segment.id,
            len(user_message or ""),
        )
+        # 长期保留：TTS 决策入口（pipeline 层）；INFO 级别可见所有控制位
+        logger.info(
+            "pipeline.process_user_message entry conversation_id={} segment_id={} "
+            "tts_this_turn={} force_skip_tts={} enable_tts={} provider={} user_language={}",
+            conversation_id,
+            segment.id,
+            tts_this_turn,
+            force_skip_tts,
+            settings.enable_tts,
+            settings.tts_provider,
+            user_language,
+        )
        is_from_voice = bool(segment.audio_url)
        voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
        audio_dur = getattr(segment, "audio_duration_seconds", None)
@@ -886,6 +1081,21 @@ async def process_user_message(
        skip_tts = bool(turn.skip_tts)
        want_voice = bool(tts_this_turn) if tts_this_turn is not None else False
        want_tts = want_voice and settings.enable_tts and not skip_tts
+        # 长期保留 INFO：TTS 决策最终结论；不再被 agent_summary_enabled 门控
+        logger.info(
+            "pipeline.process_user_message tts_decision conversation_id={} segment_id={} "
+            "tts_this_turn={} force_skip_tts={} enable_tts={} skip_tts_from_turn={} "
+            "want_voice={} want_tts={} response_segments={}",
+            conversation_id,
+            segment.id,
+            tts_this_turn,
+            force_skip_tts,
+            settings.enable_tts,
+            skip_tts,
+            want_voice,
+            want_tts,
+            len(turn.messages),
+        )
        if agent_summary_enabled():
            logger.info(
                "pipeline.process_user_message duration_ms={:.2f} "
@@ -952,21 +1162,55 @@ async def process_user_message(
        ai_msg_id = turn_ids.assistant_message_id
        tts_epoch_start = _tts_epoch_value(conversation_id)
        n = len(responses)
+        # tts_cancelled 仅用于跳过后续 TTS 合成；AGENT_RESPONSE 必须为每段完整下发，
+        # 否则 FE 会停留在 "正在回复…" 或丢失尾段文本。
+        tts_cancelled = False
        for i, response_text in enumerate(responses):
            url_for_segment: Optional[str] = None
-            if want_tts:
+            if want_tts and not tts_cancelled:
                if _tts_epoch_value(conversation_id) != tts_epoch_start:
-                    break
-                url_for_segment = await _send_tts_audio(
+                    tts_cancelled = True
+                    logger.info(
+                        "pipeline.process_user_message segment={}/{} tts_branch=skip_cancelled "
+                        "tts_cancelled={} conversation_id={}",
+                        i,
+                        n,
+                        tts_cancelled,
+                        conversation_id,
+                    )
+                else:
+                    logger.info(
+                        "pipeline.process_user_message segment={}/{} tts_branch=synthesize "
+                        "tts_cancelled={} conversation_id={}",
+                        i,
+                        n,
+                        tts_cancelled,
+                        conversation_id,
+                    )
+                    url_for_segment = await _send_tts_audio(
+                        conversation_id,
+                        response_text,
+                        chunk_index=i,
+                        chunk_total=n,
+                        assistant_message_id=ai_msg_id,
+                        tts_epoch_start=tts_epoch_start,
+                        language=user_language,
+                    )
+                    if url_for_segment:
+                        tts_urls.append(url_for_segment)
+                    if _tts_epoch_value(conversation_id) != tts_epoch_start:
+                        tts_cancelled = True
+            else:
+                logger.info(
+                    "pipeline.process_user_message segment={}/{} tts_branch={} "
+                    "tts_cancelled={} want_tts={} conversation_id={}",
+                    i,
+                    n,
+                    "skip_cancelled" if tts_cancelled else "skip_no_tts",
+                    tts_cancelled,
+                    want_tts,
                    conversation_id,
-                    response_text,
-                    chunk_index=i,
-                    chunk_total=n,
-                    assistant_message_id=ai_msg_id,
-                    tts_epoch_start=tts_epoch_start,
                )
-                if url_for_segment:
-                    tts_urls.append(url_for_segment)

            await manager.send_message(
                conversation_id,
@@ -983,8 +1227,6 @@ async def process_user_message(
                },
            )

-            if _tts_epoch_value(conversation_id) != tts_epoch_start:
-                break
            if i < n - 1:
                await asyncio.sleep(0.5)