feat/tts (#15)

Co-authored-by: Kevin <kevin@brighteng.org>
2026-03-19 09:11:25 +08:00
parent faf7607bf9
commit 92b7848c48
5 changed files with 192 additions and 1 deletions
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -26,11 +26,31 @@ from app.features.conversation.ws.profile_collector import (
    get_missing_profile_fields,
 )
 from app.features.user.models import User
-from app.core.dependencies import get_asr_provider
+from app.core.config import settings
+from app.core.dependencies import get_asr_provider, get_tts_provider
 from app.features.memoir.state_service import get_or_create_state

 logger = get_logger(__name__)

+
+async def _send_tts_audio(conversation_id: str, text: str) -> None:
+    """Synthesize text to speech and send TTS_AUDIO if successful."""
+    try:
+        tts = get_tts_provider()
+        audio_bytes = await tts.synthesize(text)
+        if audio_bytes:
+            await manager.send_message(conversation_id, {
+                "type": MessageType.TTS_AUDIO,
+                "conversation_id": conversation_id,
+                "data": {
+                    "audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
+                    "format": settings.tts_codec,
+                },
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+            })
+    except Exception as e:
+        logger.error("TTS synthesize failed: %s", e)
+
 # ── Agent 实例（从 ConnectionManager 移出） ─────────────────────
 conversation_agent = ConversationAgent()
 memory_agent = MemoryAgent()
@@ -447,6 +467,7 @@ async def process_user_message(
                        "data": {"text": response_text, "index": i, "total": len(responses)},
                        "timestamp": datetime.now(timezone.utc).isoformat(),
                    })
+                    await _send_tts_audio(conversation_id, response_text)
                    if i < len(responses) - 1:
                        await asyncio.sleep(0.5)
                return
@@ -498,6 +519,7 @@ async def process_user_message(
                "data": {"text": response_text, "index": i, "total": len(responses)},
                "timestamp": datetime.now(timezone.utc).isoformat(),
            })
+            await _send_tts_audio(conversation_id, response_text)
            if i < len(responses) - 1:
                await asyncio.sleep(0.5)