fix(conversation): 离屏不丢回复、列表预热 WS 与非阻塞进入聊天

- 后端：文本/转写后 AI 生成改为独立任务，避免断连取消整轮；按需 TTS 等与 WS 改动 - 前端：RealtimeSession 重绑 UI 时恢复流式 buffer；列表 onPressIn/挂载预热、已有会话立即 push - 同步会话相关类型、i18n、测试与 env/资源等累计改动 Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-08 17:28:31 +08:00
parent 5dac3efd52
commit d0c26242db
44 changed files with 1209 additions and 212 deletions
--- a/api/.env.example
+++ b/api/.env.example
@@ -11,7 +11,8 @@
 # =============================================================================
 # Docker Compose（宿主机独立 Caddy 反代到本 API）
 # =============================================================================
-# 映射到宿主机的端口，默认 8000；与同机其它项目冲突时改为未占用端口，并在独立 Caddy 的 Caddyfile 中 reverse_proxy 到 127.0.0.1:该端口。
+# 映射到宿主机的端口：不设置则由 Docker 随机分配，避免与同机其它项目冲突；随机时用 `docker compose port api 8000` 查看。
+# 需固定端口时取消下行注释并改为未占用端口，Caddyfile 中 reverse_proxy 到 127.0.0.1:该端口。
 # LIFE_ECHO_API_HOST_PORT=8000
 # 若 Caddy 跑在独立容器且非 host 网络，不要用 127.0.0.1，应把 Caddy 加入与本 compose 相同的 Docker 网络，并对 http://life-echo-api-prod:8000 做 reverse_proxy。

@@ -114,11 +115,11 @@ EMBEDDING_MODEL=embedding-3
 # =============================================================================
 # Database
 # =============================================================================
-# 本地开发:
-# DATABASE_URL=postgresql://postgres:postgres@localhost:5432/life_echo
+# 本地开发（docker-compose.dev.yml 固定宿主端口 48291，避免与本机 5432 冲突）
+# DATABASE_URL=postgresql://postgres:postgres@localhost:48291/life_echo
 # Docker / 服务端（主机名一般为 compose 服务名 postgres）:
 # DATABASE_URL=postgresql://postgres:postgres@postgres:5432/life_echo
-DATABASE_URL=postgresql://postgres:postgres@localhost:5432/life_echo
+DATABASE_URL=postgresql://postgres:postgres@localhost:48291/life_echo
 # 启动时 Alembic（main.py）；生产可设 ALEMBIC_STARTUP_FAIL_FAST=true，迁移失败则拒绝启动
 # ALEMBIC_RUN_ON_STARTUP=true
 # ALEMBIC_STARTUP_FAIL_FAST=false
@@ -128,11 +129,11 @@ DATABASE_URL=postgresql://postgres:postgres@localhost:5432/life_echo
 # =============================================================================
 # Redis
 # =============================================================================
-# 本地开发:
-# REDIS_URL=redis://localhost:6379/0
+# 本地开发（docker-compose.dev.yml 固定宿主端口 48307，避免与本机 6379 冲突）
+# REDIS_URL=redis://localhost:48307/0
 # Docker / 服务端:
 # REDIS_URL=redis://redis:6379/0
-REDIS_URL=redis://localhost:6379/0
+REDIS_URL=redis://localhost:48307/0
 REDIS_SESSION_TTL=86400

 # Celery：ingest 后 Memory LLM 富化任务投递队列（须被 worker 消费；见 README）
@@ -236,9 +237,11 @@ TENCENT_SECRET_ID=your_tencent_asr_secret_id
 TENCENT_SECRET_KEY=your_tencent_asr_secret_key

 # =============================================================================
-# TTS（文字转语音，Agent 回复播音）— 与 ASR 独立
+# TTS（文字转语音，Agent 回复朗读）— 与 ASR 独立
 # =============================================================================
-# ENABLE_TTS：仅控制是否合成并下发 TTS_AUDIO；不影响用户语音转写（ASR）
+# ENABLE_TTS：是否启用「助手回复朗读」服务端能力（TTS 适配器与密钥配置）。关则永远不合成。
+# 每轮是否实际合成：由客户端在 WebSocket `text` / `audio_segment` / `audio_message` 的 `data.tts_this_turn` 控制（未传或 false 仅返回文字）。
+# 若 ENABLE_TTS=true 且该轮 `tts_this_turn=true`：每一段助手文案先下发 `tts_audio`，再下发对应段的 `agent_response`。
 ENABLE_TTS=true
 TTS_PROVIDER=tencent
 # 仅 TTS_PROVIDER=openai 时需要
--- a/api/.env.production
+++ b/api/.env.production
@@ -189,9 +189,11 @@ TENCENT_SECRET_ID=AKIDa2ILCwUr56uVt31oU0JOHxPfGhvvkLiq
 TENCENT_SECRET_KEY=xiFbjlZ9XheS2NWYLvHRPAh2A5nGYcR2

 # =============================================================================
-# TTS（文字转语音，Agent 回复播音）— 与 ASR 独立
+# TTS（文字转语音，Agent 回复朗读）— 与 ASR 独立
 # =============================================================================
-# ENABLE_TTS：仅控制是否合成并下发 TTS_AUDIO；不影响用户语音转写（ASR）
+# ENABLE_TTS：是否启用「助手回复朗读」服务端能力（TTS 适配器与密钥配置）。关则永远不合成。
+# 每轮是否实际合成：由客户端在 WebSocket `text` / `audio_segment` / `audio_message` 的 `data.tts_this_turn` 控制（未传或 false 仅返回文字）。
+# 若 ENABLE_TTS=true 且该轮 `tts_this_turn=true`：每一段助手文案先下发 `tts_audio`，再下发对应段的 `agent_response`。
 ENABLE_TTS=true
 TTS_PROVIDER=tencent
 # 仅 TTS_PROVIDER=openai 时需要（填控制台密钥；勿在注释行写 =your_* 以免旧版 CI 误匹配）
--- a/api/.env.staging
+++ b/api/.env.staging
@@ -119,9 +119,11 @@ TENCENT_SECRET_ID=your_tencent_asr_secret_id
 TENCENT_SECRET_KEY=your_tencent_asr_secret_key

 # =============================================================================
-# TTS（文字转语音，Agent 回复播音）— 与 ASR 独立
+# TTS（文字转语音，Agent 回复朗读）— 与 ASR 独立
 # =============================================================================
-# ENABLE_TTS：仅控制是否合成并下发 TTS_AUDIO；不影响用户语音转写（ASR）
+# ENABLE_TTS：是否启用「助手回复朗读」服务端能力（TTS 适配器与密钥配置）。关则永远不合成。
+# 每轮是否实际合成：由客户端在 WebSocket `text` / `audio_segment` / `audio_message` 的 `data.tts_this_turn` 控制（未传或 false 仅返回文字）。
+# 若 ENABLE_TTS=true 且该轮 `tts_this_turn=true`：每一段助手文案先下发 `tts_audio`，再下发对应段的 `agent_response`。
 ENABLE_TTS=true
 TTS_PROVIDER=tencent
 # 仅 TTS_PROVIDER=openai 时需要
--- a/api/README.md
+++ b/api/README.md
@@ -90,11 +90,11 @@ LLM_BASE_URL=https://api.your-llm-provider.com  # 可选
 LLM_MODEL=your-model-name  # 可选，默认 deepseek-chat
 LLM_TEMPERATURE=0.7  # 可选，默认 0.7

-# 数据库配置（PostgreSQL，推荐）
-DATABASE_URL=postgresql://postgres:postgres@localhost:5432/life_echo
+# 数据库配置（本地用 docker-compose.dev.yml 时为固定端口 48291，见下文「本地开发」）
+DATABASE_URL=postgresql://postgres:postgres@localhost:48291/life_echo

-# Redis 配置
-REDIS_URL=redis://localhost:6379/0
+# Redis 配置（本地 compose.dev 固定端口 48307）
+REDIS_URL=redis://localhost:48307/0

 # 认证配置
 SECRET_KEY=your-secret-key-here  # JWT签名密钥（建议使用随机字符串）
@@ -152,9 +152,9 @@ docker compose -f docker-compose.dev.yml up -d
 # 2. 安装依赖
 pip install -r requirements.txt

-# 3. 配置环境变量
-export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/life_echo
-export REDIS_URL=redis://localhost:6379/0
+# 3. 配置环境变量（与 docker-compose.dev.yml 固定宿主端口一致：Postgres 48291、Redis 48307）
+export DATABASE_URL=postgresql://postgres:postgres@localhost:48291/life_echo
+export REDIS_URL=redis://localhost:48307/0

 # 4. 启动 API（终端 1）
 uvicorn main:app --reload --host 0.0.0.0 --port 8000
--- a/api/app/features/conversation/service.py
+++ b/api/app/features/conversation/service.py
@@ -96,6 +96,9 @@ def _build_messages_from_history(
            tts = msg.get("ttsAudioUrls")
            if isinstance(tts, list) and tts:
                item["ttsAudioUrls"] = [x for x in tts if isinstance(x, str)]
+        dm = msg.get("durableMessageId")
+        if isinstance(dm, str) and dm:
+            item["durableMessageId"] = dm
        messages.append(item)
    return messages

--- a/api/app/features/conversation/session_history.py
+++ b/api/app/features/conversation/session_history.py
@@ -18,6 +18,7 @@ def conversation_messages_to_redis_history(
            "content": row.content,
            "messageType": row.message_type,
            "timestamp": row.created_at.isoformat() if row.created_at else None,
+            "durableMessageId": row.id,
        }
        if row.voice_session_id:
            item["voiceSessionId"] = row.voice_session_id
--- a/api/app/features/conversation/tts_delivery.py
+++ b/api/app/features/conversation/tts_delivery.py
@@ -9,9 +9,15 @@

 from __future__ import annotations

-from app.core.cos_url_keys import presign_tts_urls_for_playback
+from app.core.cos_url_keys import (
+    TTS_PRESIGNED_EXPIRES_SEC,
+    extract_cos_object_key_if_owned,
+)
+from app.core.logging import get_logger
 from app.ports.storage import ObjectStorage

+logger = get_logger(__name__)
+

 def apply_presigned_tts_urls_to_messages(
    messages: list[dict],
@@ -24,5 +30,26 @@ def apply_presigned_tts_urls_to_messages(
        tts = m.get("ttsAudioUrls")
        if not isinstance(tts, list) or not tts:
            continue
-        str_urls = [x for x in tts if isinstance(x, str)]
-        m["ttsAudioUrls"] = presign_tts_urls_for_playback(str_urls, storage)
+        out: list[str] = []
+        for x in tts:
+            if not isinstance(x, str):
+                out.append("")
+                continue
+            s = x.strip()
+            if not s:
+                out.append("")
+                continue
+            key = extract_cos_object_key_if_owned(s)
+            if key:
+                try:
+                    out.append(storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC))
+                except Exception as exc:
+                    logger.warning(
+                        "presign tts url failed, keeping original url: key={} err={}",
+                        key,
+                        exc,
+                    )
+                    out.append(s)
+            else:
+                out.append(s)
+        m["ttsAudioUrls"] = out
--- a/api/app/features/conversation/ws/message_types.py
+++ b/api/app/features/conversation/ws/message_types.py
@@ -17,6 +17,7 @@ class MessageType(str, Enum):
    AGENT_RESPONSE = "agent_response"
    TTS_AUDIO = "tts_audio"
    TTS_CANCEL = "tts_cancel"
+    TTS_REQUEST = "tts_request"
    PING = "ping"
    PONG = "pong"
    END_CONVERSATION = "end_conversation"
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -18,9 +18,13 @@ from sqlalchemy import select, update
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.chat import ChatOrchestrator
+from app.agents.chat.reply_limits import segments_from_llm_response
 from app.core.agent_logging import agent_summary_enabled
 from app.core.config import settings
-from app.core.cos_url_keys import TTS_PRESIGNED_EXPIRES_SEC
+from app.core.cos_url_keys import (
+    TTS_PRESIGNED_EXPIRES_SEC,
+    extract_cos_object_key_if_owned,
+)
 from app.core.db import AsyncSessionLocal
 from app.core.dependencies import get_asr_provider, get_object_storage, get_tts_provider
 from app.features.conversation.chat_turn import (
@@ -33,7 +37,7 @@ from app.features.conversation.history_store import (
    ConversationHistoryStore,
 )
 from app.features.conversation.lineage_schemas import DialogueLineage
-from app.features.conversation.models import Conversation, Segment
+from app.features.conversation.models import Conversation, ConversationMessage, Segment
 from app.features.conversation.ws.connection_manager import manager
 from app.features.conversation.ws.message_types import MessageType
 from app.features.conversation.ws.profile_collector import (
@@ -84,6 +88,7 @@ async def _send_tts_audio(
    chunk_total: int,
    assistant_message_id: str | None,
    tts_epoch_start: int,
+    manual: bool = False,
 ) -> str | None:
    """Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None."""
    if not settings.enable_tts:
@@ -116,6 +121,8 @@ async def _send_tts_audio(
        }
        if assistant_message_id:
            payload_data["assistant_message_id"] = assistant_message_id
+        if manual:
+            payload_data["manual"] = True
        await manager.send_message(
            conversation_id,
            {
@@ -138,6 +145,109 @@ async def _send_tts_audio(
        return None


+async def handle_tts_request_on_demand(
+    *,
+    conversation_id: str,
+    user_id: str,
+    assistant_message_id: str,
+    segment_index: int,
+    segment_text: str | None,
+    db: AsyncSession,
+) -> tuple[bool, str]:
+    """用户点喇叭：该段已有 TTS 则预签名下发；否则合成后落库并下发。不重复合成同一段。"""
+    if not settings.enable_tts:
+        return False, "未开启语音合成"
+
+    conv = await db.get(Conversation, conversation_id)
+    if not conv or conv.user_id != user_id or conv.deleted_at is not None:
+        return False, "对话不存在或无权访问"
+
+    msg = await db.get(ConversationMessage, assistant_message_id)
+    if not msg or msg.conversation_id != conversation_id or msg.role != "ai":
+        return False, "消息不存在"
+
+    # 与客户端 splitMessageParts / segments_from_llm_response 对齐（含无 [SPLIT] 时的段落拆段）
+    parts = segments_from_llm_response(msg.content or "", max_segments=3)
+    if segment_index < 0 or segment_index >= len(parts):
+        return False, "分段序号无效"
+
+    canon = (parts[segment_index] or "").strip()
+    if not canon:
+        return False, "该段无朗读文本"
+    if segment_text and segment_text.strip() and segment_text.strip() != canon:
+        logger.debug(
+            "按需 TTS: 客户端传入 segment_text 与规范化后 canon 不完全一致，已按 segment_index 朗读 canon "
+            "(client_len={} canon_len={})",
+            len(segment_text.strip()),
+            len(canon),
+        )
+
+    urls: List[str] = []
+    for x in msg.tts_audio_urls or []:
+        if isinstance(x, str) and x.strip():
+            urls.append(x)
+        else:
+            urls.append("")
+    while len(urls) < len(parts):
+        urls.append("")
+
+    existing = urls[segment_index].strip() if segment_index < len(urls) else ""
+    chunk_total = len(parts)
+
+    if existing:
+        storage = get_object_storage()
+        key = extract_cos_object_key_if_owned(existing)
+        try:
+            playback_url = (
+                storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC)
+                if key
+                else existing
+            )
+        except Exception as exc:
+            logger.warning("按需 TTS 预签名失败: {}", exc)
+            playback_url = existing
+        await manager.send_message(
+            conversation_id,
+            {
+                "type": MessageType.TTS_AUDIO,
+                "conversation_id": conversation_id,
+                "data": {
+                    "audio_url": playback_url,
+                    "format": settings.tts_codec,
+                    "index": segment_index,
+                    "total": chunk_total,
+                    "assistant_message_id": assistant_message_id,
+                    "manual": True,
+                },
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+            },
+        )
+        return True, ""
+
+    tts_epoch_start = _tts_epoch_value(conversation_id)
+    url_stored = await _send_tts_audio(
+        conversation_id,
+        canon,
+        chunk_index=segment_index,
+        chunk_total=chunk_total,
+        assistant_message_id=assistant_message_id,
+        tts_epoch_start=tts_epoch_start,
+        manual=True,
+    )
+    if not url_stored:
+        return False, "语音合成失败"
+
+    while len(urls) <= segment_index:
+        urls.append("")
+    urls[segment_index] = url_stored
+    msg.tts_audio_urls = urls
+    await db.commit()
+
+    store = ConversationHistoryStore(db)
+    await store._sync_redis_best_effort(conversation_id)
+    return True, ""
+
+
 # ── Agent 实例（从 ConnectionManager 移出） ─────────────────────
 chat_orchestrator = ChatOrchestrator()
 chat_turn_service = ChatTurnService(chat_orchestrator)
@@ -153,6 +263,8 @@ class SegmentStreamState:
    """会话内分段处理状态（用于并行 ASR + 有序聚合）"""

    lock: asyncio.Lock = field(default_factory=asyncio.Lock)
+    #: 本条语音会话最近一次分段上行携带的本轮朗读开关（客户端每段一致即可）
+    tts_this_turn: bool = False
    pending_indices: Set[int] = field(default_factory=set)
    processed_indices: Set[int] = field(default_factory=set)
    buffered_transcripts: Dict[int, Tuple[str, Segment]] = field(default_factory=dict)
@@ -163,6 +275,43 @@ class SegmentStreamState:


 _segment_states: Dict[Tuple[str, str], SegmentStreamState] = {}
+_user_response_tasks: Dict[str, Set[asyncio.Task]] = {}
+_user_response_locks: Dict[str, asyncio.Lock] = {}
+
+
+def _get_user_response_lock(conversation_id: str) -> asyncio.Lock:
+    lock = _user_response_locks.get(conversation_id)
+    if lock is None:
+        lock = asyncio.Lock()
+        _user_response_locks[conversation_id] = lock
+    return lock
+
+
+def register_user_response_task(conversation_id: str, task: asyncio.Task) -> None:
+    tasks = _user_response_tasks.setdefault(conversation_id, set())
+    tasks.add(task)
+
+    def _cleanup(done_task: asyncio.Task) -> None:
+        tasks.discard(done_task)
+        if not tasks:
+            _user_response_tasks.pop(conversation_id, None)
+            _user_response_locks.pop(conversation_id, None)
+        if done_task.cancelled():
+            logger.warning(
+                "用户回复后台任务被取消 conversation_id={}",
+                conversation_id,
+            )
+            return
+        exc = done_task.exception()
+        if exc:
+            logger.error(
+                "用户回复后台任务异常 conversation_id={}: {}",
+                conversation_id,
+                exc,
+                exc_info=True,
+            )
+
+    task.add_done_callback(_cleanup)


 def get_or_create_segment_state(
@@ -432,9 +581,13 @@ async def process_audio_segment(
    audio_base64: str,
    audio_duration: int,
    is_last: bool,
+    *,
+    tts_this_turn: bool = False,
 ) -> None:
    """分段语音的异步处理：并行 ASR + 幂等落库 + 有序聚合触发 Agent。"""
    state = get_or_create_segment_state(conversation_id, voice_session_id)
+    async with state.lock:
+        state.tts_this_turn = bool(tts_this_turn)
    logger.info(
        "process_audio_segment 开始: conversation_id={} voice_session_id={} "
        "segment_index={} is_last={} duration_s={} audio_b64_len={}",
@@ -588,6 +741,7 @@ async def process_audio_segment(
                )

            ready_segments: List[Tuple[int, str, Segment]] = []
+            tts_flag_this_voice_session = False
            async with state.lock:
                state.processed_indices.add(segment_index)
                state.buffered_transcripts[segment_index] = (
@@ -602,6 +756,8 @@ async def process_audio_segment(
                    state.consumed_index = next_index
                    next_index += 1

+                tts_flag_this_voice_session = bool(state.tts_this_turn)
+
            for _, ordered_text, ordered_segment in ready_segments:
                await process_user_message(
                    conversation_id=conversation_id,
@@ -612,6 +768,7 @@ async def process_audio_segment(
                    user=user,
                    user_message_timestamp=ordered_segment.created_at
                    or user_message_timestamp,
+                    tts_this_turn=tts_flag_this_voice_session,
                )

    except Exception as e:
@@ -638,6 +795,48 @@ async def process_audio_segment(
 # ── 用户消息处理 ────────────────────────────────────────────────


+async def process_persisted_user_segment_response(
+    *,
+    conversation_id: str,
+    user_id: str,
+    segment_id: str,
+    tts_this_turn: bool = False,
+) -> None:
+    """后台继续生成已落库用户段落的助手回复；即使 WS 页面退出也要完成落库。"""
+    lock = _get_user_response_lock(conversation_id)
+    async with lock:
+        async with AsyncSessionLocal() as db:
+            conversation = await db.get(Conversation, conversation_id)
+            user = await db.get(User, user_id)
+            segment = await db.get(Segment, segment_id)
+            if (
+                not conversation
+                or conversation.deleted_at is not None
+                or conversation.user_id != user_id
+                or not user
+                or not segment
+                or segment.conversation_id != conversation_id
+            ):
+                logger.warning(
+                    "跳过用户回复后台任务: conversation_id={} segment_id={} user_id={}",
+                    conversation_id,
+                    segment_id,
+                    user_id,
+                )
+                return
+            await process_user_message(
+                conversation_id=conversation_id,
+                user_message=segment.user_input_text or "",
+                conversation=conversation,
+                segment=segment,
+                db=db,
+                user=user,
+                user_message_timestamp=segment.created_at
+                or conversation.last_message_at,
+                tts_this_turn=tts_this_turn,
+            )
+
+
 async def process_user_message(
    conversation_id: str,
    user_message: str,
@@ -648,6 +847,7 @@ async def process_user_message(
    user_message_timestamp: Optional[datetime] = None,
    *,
    force_skip_tts: bool = False,
+    tts_this_turn: Optional[bool] = None,
 ) -> None:
    """处理用户消息，生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
    store = ConversationHistoryStore(db)
@@ -682,20 +882,23 @@ async def process_user_message(
                get_filled_profile_fields_fn=get_filled_profile_fields,
            ),
        )
+        responses = turn.messages
+        skip_tts = bool(turn.skip_tts)
+        want_voice = bool(tts_this_turn) if tts_this_turn is not None else False
+        want_tts = want_voice and settings.enable_tts and not skip_tts
        if agent_summary_enabled():
            logger.info(
                "pipeline.process_user_message duration_ms={:.2f} "
                "conversation_id={} segment_id={} user_msg_len={} "
-                "response_segments={} skip_tts={}",
+                "response_segments={} skip_tts={} want_tts={}",
                (time.perf_counter() - t_pipeline) * 1000,
                conversation_id,
                segment.id,
                len(user_message or ""),
                len(turn.messages),
                turn.skip_tts,
+                want_tts,
            )
-        responses = turn.messages
-        skip_tts = bool(turn.skip_tts)

        segment.agent_response = AI_RESPONSE_SEGMENT_JOIN.join(responses)
        _mark_conversation_active(conversation)
@@ -750,6 +953,21 @@ async def process_user_message(
        tts_epoch_start = _tts_epoch_value(conversation_id)
        n = len(responses)
        for i, response_text in enumerate(responses):
+            url_for_segment: Optional[str] = None
+            if want_tts:
+                if _tts_epoch_value(conversation_id) != tts_epoch_start:
+                    break
+                url_for_segment = await _send_tts_audio(
+                    conversation_id,
+                    response_text,
+                    chunk_index=i,
+                    chunk_total=n,
+                    assistant_message_id=ai_msg_id,
+                    tts_epoch_start=tts_epoch_start,
+                )
+                if url_for_segment:
+                    tts_urls.append(url_for_segment)
+
            await manager.send_message(
                conversation_id,
                {
@@ -764,20 +982,7 @@ async def process_user_message(
                    "timestamp": datetime.now(timezone.utc).isoformat(),
                },
            )
-            url = None
-            if not skip_tts:
-                if _tts_epoch_value(conversation_id) != tts_epoch_start:
-                    break
-                url = await _send_tts_audio(
-                    conversation_id,
-                    response_text,
-                    chunk_index=i,
-                    chunk_total=n,
-                    assistant_message_id=ai_msg_id,
-                    tts_epoch_start=tts_epoch_start,
-                )
-            if url:
-                tts_urls.append(url)
+
            if _tts_epoch_value(conversation_id) != tts_epoch_start:
                break
            if i < n - 1:
--- a/api/app/features/conversation/ws/protocol.md
+++ b/api/app/features/conversation/ws/protocol.md
@@ -1,25 +1,35 @@
 # WebSocket 消息协议

 ## 连接
- URL: /ws/conversation/{conversation_id}?token={jwt_access_token}
- 鉴权: query 参数 token，JWT access_token
+
+- URL: `/ws/conversation/{conversation_id}?token={jwt_access_token}`
+- 鉴权: query 参数 `token`，JWT `access_token`

 ## 消息类型 (client → server)
- TEXT: 文本消息
- AUDIO_SEGMENT: 语音分段
- AUDIO_MESSAGE: 完整语音消息
- TRANSCRIBE_ONLY: 仅转写不回复
- END_CONVERSATION: 结束对话
+
+- `TEXT`：文本消息。`data.text` 必填。可选 `data.tts_this_turn`（布尔）：为 `true` 且服务端 `ENABLE_TTS` 开启且本轮回避 `skip_tts` 时，对该轮助手回复分段合成 TTS；默认为 `false`/缺省即不合成。**当开启本轮 TTS 时，每个助手分段服务端先推送 `tts_audio` 再推送该段 `agent_response`**，便于客户端先收音频再展示同段文字。
+- `AUDIO_SEGMENT`：语音分段。`data` 含 `audio_base64`、`segment_index`、`voice_session_id` / `client_segment_id`、`is_last`、`duration`。可选同上 `tts_this_turn`。
+- `AUDIO_MESSAGE`：整段音频（单次 ASR + 对话）。同上可选 `tts_this_turn`。
+- `TRANSCRIBE_ONLY`：仅转写不回复
+- `TTS_CANCEL`：取消当前轮未完成的分段合成与下发
+- `TTS_REQUEST`：用户点击某一助手气泡「朗读」且该段尚无 TTS 时下发。`data` 含 `assistant_message_id`（落库 `conversation_messages.id`）、`segment_index`（与该条助手正文按 `[SPLIT]` 分段后的从 0 下标）、可选 `segment_text`（须与该分段正文一致，用于校验）。服务端若该段已有 URL 则只做预签名后推送 `tts_audio`（`data.manual=true`），**不重复合成**。
+- `END_CONVERSATION`：结束对话
+- `PING` / `PONG`：心跳（客户端也可用 JSON `{"type":"ping"}`）

 ## 消息类型 (server → client)
- TRANSCRIPT: ASR 转写结果
- AGENT_RESPONSE: AI 回复文本
- TTS_AUDIO: 语音合成音频 (base64)
- MEMOIR_UPDATE: 回忆录更新通知
- ERROR: 错误信息
+
+- `TRANSCRIPT`: ASR 转写结果
+- `AGENT_RESPONSE`: AI 回复文本分段
+- `TTS_AUDIO`: 语音合成结果（可与 `COS` 签名 URL、`base64` 并存）。按需朗读成功时 `data.manual` 可为 `true`，提示客户端应播放（即使用户未开「本轮 Speak」）。
+- `MEMOIR_UPDATE`: 回忆录更新通知
+- `ERROR`: 错误信息

 ## 状态流转
-CONNECT → (TEXT|AUDIO_*) ↔ (TRANSCRIPT|AGENT_RESPONSE|TTS_AUDIO) → END_CONVERSATION
+
+`CONNECT → (TEXT|AUDIO_*) ↔ (TRANSCRIPT|AGENT_RESPONSE|[TTS_AUDIO]) → END_CONVERSATION`
+
+同一连接内消息顺序稳定；首轮朗读模式下每一助手分段为 `tts_audio` 先于对应 `agent_response`。

 ## 重连
-客户端断连后可用相同 conversation_id 重连，历史消息从 Redis 恢复。
+
+客户端断连后可用相同 `conversation_id` 重连，历史消息从 Redis / HTTP 缓存恢复。
--- a/api/app/features/conversation/ws/router.py
+++ b/api/app/features/conversation/ws/router.py
@@ -28,11 +28,13 @@ from app.features.conversation.ws.pipeline import (
    chat_orchestrator,
    cleanup_segment_states,
    get_or_create_segment_state,
+    handle_tts_request_on_demand,
    memoir_ingest_scheduler,
    process_audio_segment,
    process_conversation_segments,
-    process_user_message,
+    process_persisted_user_segment_response,
    register_segment_task,
+    register_user_response_task,
 )
 from app.features.conversation.ws.profile_collector import get_missing_profile_fields
 from app.features.conversation.ws.quota_guard import check_ws_quota
@@ -276,7 +278,9 @@ async def websocket_endpoint(
                        )

                    if msg_type == MessageType.TEXT:
-                        text_message = message.get("data", {}).get("text", "")
+                        data = message.get("data") or {}
+                        text_message = data.get("text", "")
+                        tts_this_turn = bool(data.get("tts_this_turn"))

                        if text_message:
                            can_send, quota_msg = await check_ws_quota(
@@ -303,23 +307,21 @@ async def websocket_endpoint(
                                user_id,
                                text_message,
                            )
-                            user_message_timestamp = conversation.last_message_at
                            await memoir_ingest_scheduler.queue_segment(
                                conversation.user_id,
                                segment.id,
                                text_char_count=len(text_message.strip()),
                            )

-                            await process_user_message(
-                                conversation_id=conversation_id,
-                                user_message=text_message,
-                                conversation=conversation,
-                                segment=segment,
-                                db=db,
-                                user=user,
-                                user_message_timestamp=segment.created_at
-                                or user_message_timestamp,
+                            task = asyncio.create_task(
+                                process_persisted_user_segment_response(
+                                    conversation_id=conversation_id,
+                                    user_id=user_id,
+                                    segment_id=segment.id,
+                                    tts_this_turn=tts_this_turn,
+                                )
                            )
+                            register_user_response_task(conversation_id, task)

                    elif msg_type == MessageType.RECORDING_STARTED:
                        data = message.get("data", {})
@@ -486,6 +488,7 @@ async def websocket_endpoint(
                                audio_base64=audio_base64,
                                audio_duration=audio_duration,
                                is_last=is_last,
+                                tts_this_turn=bool(data.get("tts_this_turn")),
                            )
                        )
                        register_segment_task(conversation_id, voice_session_id, task)
@@ -494,6 +497,7 @@ async def websocket_endpoint(
                        data = message.get("data", {})
                        audio_base64 = data.get("audio_base64", "")
                        audio_duration = data.get("duration", 0)
+                        tts_this_turn = bool(data.get("tts_this_turn"))

                        if audio_base64:
                            can_send, quota_msg = await check_ws_quota(
@@ -564,7 +568,6 @@ async def websocket_endpoint(
                                        audio_duration_seconds=ads if ads > 0 else None,
                                    )
                                )
-                                user_message_timestamp = conversation.last_message_at
                                await memoir_ingest_scheduler.queue_segment(
                                    conversation.user_id,
                                    segment.id,
@@ -572,16 +575,15 @@ async def websocket_endpoint(
                                )

                                if asr_text and not asr_text.startswith("转写失败"):
-                                    await process_user_message(
-                                        conversation_id=conversation_id,
-                                        user_message=asr_text,
-                                        conversation=conversation,
-                                        segment=segment,
-                                        db=db,
-                                        user=user,
-                                        user_message_timestamp=segment.created_at
-                                        or user_message_timestamp,
+                                    task = asyncio.create_task(
+                                        process_persisted_user_segment_response(
+                                            conversation_id=conversation_id,
+                                            user_id=user_id,
+                                            segment_id=segment.id,
+                                            tts_this_turn=tts_this_turn,
+                                        )
                                    )
+                                    register_user_response_task(conversation_id, task)
                                else:
                                    await manager.send_message(
                                        conversation_id,
@@ -651,6 +653,51 @@ async def websocket_endpoint(
                    elif msg_type == MessageType.TTS_CANCEL:
                        bump_tts_cancel_epoch(conversation_id)

+                    elif msg_type == MessageType.TTS_REQUEST:
+                        data = message.get("data") or {}
+                        aid = data.get("assistant_message_id") or data.get(
+                            "assistantMessageId"
+                        )
+                        if not aid or not str(aid).strip():
+                            await manager.send_message(
+                                conversation_id,
+                                {
+                                    "type": MessageType.ERROR,
+                                    "data": {"message": "缺少助手消息 id"},
+                                    "timestamp": datetime.now(timezone.utc).isoformat(),
+                                },
+                            )
+                            continue
+                        try:
+                            seg_idx = int(
+                                data.get("segment_index", data.get("segmentIndex", 0))
+                            )
+                        except (TypeError, ValueError):
+                            seg_idx = 0
+                        st = data.get("segment_text") or data.get("segmentText")
+                        st_val: str | None
+                        if st is None:
+                            st_val = None
+                        else:
+                            st_val = str(st).strip() or None
+                        ok, err_msg = await handle_tts_request_on_demand(
+                            conversation_id=conversation_id,
+                            user_id=user_id,
+                            assistant_message_id=str(aid).strip(),
+                            segment_index=seg_idx,
+                            segment_text=st_val,
+                            db=db,
+                        )
+                        if not ok:
+                            await manager.send_message(
+                                conversation_id,
+                                {
+                                    "type": MessageType.ERROR,
+                                    "data": {"message": err_msg or "朗读请求失败"},
+                                    "timestamp": datetime.now(timezone.utc).isoformat(),
+                                },
+                            )
+
                    elif msg_type == MessageType.END_CONVERSATION:
                        await conversation_service.end(conversation_id, user_id)

--- a/api/app/features/user/router.py
+++ b/api/app/features/user/router.py
@@ -66,6 +66,11 @@ async def update_user_profile(
    current_user: User = Depends(get_current_user),
    service: UserService = Depends(get_user_service),
 ):
+    logger.info(
+        "更新用户档案 user_id={} fields={}",
+        current_user.id,
+        sorted(body.model_fields_set),
+    )
    return await service.update_profile(current_user.id, body)


--- a/api/app/features/user/service.py
+++ b/api/app/features/user/service.py
@@ -46,14 +46,9 @@ class UserService:
        user = await repo.get_user_by_id(user_id, self._db)
        if not user:
            raise ValueError("用户不存在")
-        if body.birth_year is not None:
-            user.birth_year = body.birth_year
-        if body.birth_place is not None:
-            user.birth_place = body.birth_place
-        if body.grew_up_place is not None:
-            user.grew_up_place = body.grew_up_place
-        if body.occupation is not None:
-            user.occupation = body.occupation
+        for field in ("birth_year", "birth_place", "grew_up_place", "occupation"):
+            if field in body.model_fields_set:
+                setattr(user, field, getattr(body, field))
        await self._db.commit()
        await self._db.refresh(user)
        return _user_to_profile(user)
--- a/api/development.sh
+++ b/api/development.sh
@@ -168,6 +168,7 @@ start_infra() {
  cd "${ROOT_DIR}"
  docker compose -f docker-compose.dev.yml up -d
  INFRA_STARTED=1
+  print_ok "PostgreSQL 127.0.0.1:48291，Redis 127.0.0.1:48307（见 docker-compose.dev.yml / .env.example）"
  print_ok "基础设施已就绪"
 }

@@ -236,7 +237,7 @@ print_alembic_failure_hint() {

  log_output="$(sed -n '1,200p' "${log_file}")"
  if [[ "${log_output}" == *'could not translate host name "postgres"'* ]] || [[ "${log_output}" == *"Name or service not known"* ]]; then
-    print_warn "看起来 DATABASE_URL 指向了容器内主机名；在宿主机运行时请改用 localhost:5432"
+    print_warn "看起来 DATABASE_URL 指向了容器内主机名；在宿主机运行时请改用 localhost:48291（见 docker-compose.dev.yml）"
  elif [[ "${log_output}" == *"Connection refused"* ]] || [[ "${log_output}" == *"could not connect to server"* ]]; then
    print_warn "PostgreSQL 连接被拒绝；请确认容器已启动且 DATABASE_URL 与 docker-compose.dev.yml 暴露端口一致"
  elif [[ "${log_output}" == *"password authentication failed"* ]]; then
--- a/api/docker-compose.dev.yml
+++ b/api/docker-compose.dev.yml
@@ -1,5 +1,9 @@
 # 开发环境 Docker Compose
 # 使用方法: docker compose -f docker-compose.dev.yml up -d
+#
+# 宿主端口为项目约定的固定高位端口（避免与本机常用 5432/6379 冲突），与本仓库 .env.example 对齐：
+#   PostgreSQL  127.0.0.1:48291 → 容器 5432
+#   Redis       127.0.0.1:48307 → 容器 6379

 services:
  # PostgreSQL 数据库（pg17 + pgvector，memory 模块需要 vector 类型）
@@ -7,7 +11,7 @@ services:
    image: pgvector/pgvector:pg17
    container_name: life-echo-postgres-dev
    ports:
-      - "5432:5432"
+      - "127.0.0.1:48291:5432"
    environment:
      POSTGRES_USER: postgres
      POSTGRES_PASSWORD: postgres
@@ -26,7 +30,7 @@ services:
    image: redis:7-alpine
    container_name: life-echo-redis-dev
    ports:
-      - "6379:6379"
+      - "127.0.0.1:48307:6379"
    volumes:
      - redis_data_dev:/data
    command: redis-server --appendonly yes
--- a/api/docker-compose.yml
+++ b/api/docker-compose.yml
@@ -4,7 +4,8 @@ services:
    image: m.daocloud.io/docker.io/pgvector/pgvector:pg17
    container_name: life-echo-postgres
    ports:
-      - "127.0.0.1:5432:5432"  # 仅绑定 localhost，通过 SSH 隧道访问
+      # 宿主机端口随机，避免与本机其它 PostgreSQL 冲突；查询: docker compose port postgres 5432
+      - "127.0.0.1::5432"
    environment:
      POSTGRES_USER: ${POSTGRES_USER:-postgres}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
@@ -56,10 +57,10 @@ services:
      dockerfile: Dockerfile
    image: life-echo-api:latest
    container_name: life-echo-api-prod
-    # 独立 Caddy（宿主机或其它 compose）经 HTTPS 反代；仅绑定本机回环，避免与机上其它项目端口直接对公网。
-    # 若与 Cosmetic 等共用主机且 8000 已被占用，在 .env 中设置 LIFE_ECHO_API_HOST_PORT=其它端口并在 Caddyfile 中一致。
+    # 独立 Caddy 反代；绑定本机回环。未设置 LIFE_ECHO_API_HOST_PORT 时宿主机端口随机，避免与机上其它服务冲突。
+    # 需固定端口时（例如 Caddyfile）：在 .env 设置 LIFE_ECHO_API_HOST_PORT=8000；随机时查询: docker compose port api 8000
    ports:
-      - "127.0.0.1:${LIFE_ECHO_API_HOST_PORT:-8000}:8000"
+      - "127.0.0.1:${LIFE_ECHO_API_HOST_PORT:-}:8000"
    env_file:
      - .env
    environment:
--- a/api/docs/本地开发环境配置.md
+++ b/api/docs/本地开发环境配置.md
@@ -24,15 +24,20 @@

 ## 快速开始

-### 1. 启动 Redis
+### 1. 启动 PostgreSQL / Redis

-使用 Docker Compose 启动 Redis：
+使用开发用 Docker Compose 一键启动数据库与缓存：

 ```bash
 cd api
 docker compose -f docker-compose.dev.yml up -d
 ```

+开发 compose 使用 **固定的** 本机映射（与 `api/.env.example` 一致，避免与本机默认 5432 / 6379 抢占）：
+
+- PostgreSQL：`127.0.0.1:48291` → 容器内 `5432`
+- Redis：`127.0.0.1:48307` → 容器内 `6379`
+
 验证 Redis 是否运行：

 ```bash
@@ -61,12 +66,12 @@ DEEPSEEK_BASE_URL=https://api.deepseek.com
 # LLM_MODEL=gpt-4
 # LLM_BASE_URL=https://api.openai.com

-# Redis 配置
-REDIS_URL=redis://localhost:6379/0
+# Redis 配置（宿主 48307，见 docker-compose.dev.yml）
+REDIS_URL=redis://localhost:48307/0
 REDIS_SESSION_TTL=86400  # 会话过期时间（秒），默认 24 小时

-# 数据库配置（PostgreSQL，与线上一致）
-DATABASE_URL=postgresql://postgres:postgres@localhost:5432/life_echo
+# 数据库配置（宿主 48291，见 docker-compose.dev.yml）
+DATABASE_URL=postgresql://postgres:postgres@localhost:48291/life_echo

 # JWT 配置
 SECRET_KEY=your-secret-key-change-in-production
@@ -114,7 +119,7 @@ celery -A tasks.celery_app worker --loglevel=info --concurrency=2
 - 对话的实时响应通过异步 LLM 调用生成
 - 会话历史存储在 Redis 中

-### Redis (端口 6379)
+### Redis（容器内 6379 → 宿主 48307，见 docker-compose.dev.yml）

 - 存储对话会话历史（支持多实例部署）
 - 作为 Celery 的消息队列
@@ -169,12 +174,12 @@ docker compose up -d --scale celery-worker=3
 ### Redis 连接失败

 ```
-Redis 连接失败: Error connecting to redis://localhost:6379/0
+Redis 连接失败: Error connecting to redis://localhost:48307/0
 ```

 **解决方法**：
 1. 确认 Redis 容器正在运行：`docker ps | grep redis`
-2. 检查 `REDIS_URL` 环境变量是否正确
+2. 检查 `REDIS_URL` 是否为 `redis://localhost:48307/0`（或与 `docker-compose.dev.yml` 中映射一致）
 3. 如果在 Docker 内运行 API，使用 `redis://redis:6379/0`

 ### Celery 任务不执行