feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验

- DB: segments 用户输入文本（Alembic 0002） - Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整 - Memoir: 忠实度检查 agent，叙事与分类等链路更新 - Core: agent 日志、Alembic 启动、LangChain/日志/配置等 - Story: time_hints；Memory 检索与相关测试 - Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n - Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
2026-03-26 12:13:36 +08:00
parent 49b089354c
commit a3f61fcc0f
94 changed files with 3332 additions and 672 deletions
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -2,6 +2,7 @@

 import asyncio
 import base64
+import time
 import uuid
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
@@ -16,6 +17,7 @@ from sqlalchemy import select, update
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.chat import ChatOrchestrator
+from app.core.agent_logging import agent_summary_enabled
 from app.core.config import settings
 from app.core.db import AsyncSessionLocal
 from app.core.dependencies import get_asr_provider, get_object_storage, get_tts_provider
@@ -93,11 +95,11 @@ async def _send_tts_audio(
        err_str = str(e)
        if "PkgExhausted" in err_str:
            logger.warning(
-                "TTS skipped: 腾讯云语音合成资源包已用尽，请在控制台购买或开通后付费: %s",
+                "TTS skipped: 腾讯云语音合成资源包已用尽，请在控制台购买或开通后付费: {}",
                err_str[:100],
            )
        else:
-            logger.error("TTS synthesize failed: %s", e)
+            logger.error("TTS synthesize failed: {}", e)
        return None


@@ -438,20 +440,20 @@ async def process_audio_segment(
                async with state.lock:
                    state.processed_indices.add(segment_index)
                logger.debug(
-                    "分段已存在，按幂等跳过: conversation_id=%s voice_session_id=%s "
-                    "segment_index=%s segment_id=%s transcript=%s",
+                    "分段已存在，按幂等跳过: conversation_id={} voice_session_id={} "
+                    "segment_index={} segment_id={} transcript={}",
                    conversation_id,
                    voice_session_id,
                    segment_index,
                    existing_segment.id,
-                    existing_segment.transcript_text or "",
+                    existing_segment.user_input_text or "",
                )
                return
            else:
                segment = Segment(
                    id=str(uuid.uuid4()),
                    conversation_id=conversation_id,
-                    transcript_text=transcript_text or "",
+                    user_input_text=transcript_text or "",
                    audio_url=_build_segment_audio_url(voice_session_id, segment_index),
                    audio_duration_seconds=audio_duration
                    if audio_duration > 0
@@ -531,6 +533,7 @@ async def process_user_message(
        is_from_voice = bool(segment.audio_url)
        voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
        audio_dur = getattr(segment, "audio_duration_seconds", None)
+        t_pipeline = time.perf_counter()
        turn = await chat_orchestrator.process_user_message(
            conversation_id=conversation_id,
            user_message=user_message,
@@ -545,6 +548,18 @@ async def process_user_message(
            user_message_timestamp=user_message_timestamp,
            audio_duration_seconds=audio_dur,
        )
+        if agent_summary_enabled():
+            logger.info(
+                "pipeline.process_user_message duration_ms={:.2f} "
+                "conversation_id={} segment_id={} user_msg_len={} "
+                "response_segments={} skip_tts={}",
+                (time.perf_counter() - t_pipeline) * 1000,
+                conversation_id,
+                segment.id,
+                len(user_message or ""),
+                len(turn.messages),
+                turn.skip_tts,
+            )
        responses = turn.messages
        skip_tts = turn.skip_tts

@@ -618,7 +633,7 @@ async def process_user_message(
                )
                await db.commit()
            except Exception as persist_error:
-                logger.warning("补写 TTS 元数据失败: %s", persist_error)
+                logger.warning("补写 TTS 元数据失败: {}", persist_error)
        logger.error(f"处理用户消息失败: {e}", exc_info=True)
        if conversation_id in manager.active_connections:
            try:
--- a/api/app/features/conversation/ws/router.py
+++ b/api/app/features/conversation/ws/router.py
@@ -218,7 +218,7 @@ async def websocket_endpoint(
                try:
                    if websocket.application_state != WebSocketState.CONNECTED:
                        logger.debug(
-                            "WebSocket 已非连接状态，退出循环: conversation_id=%s",
+                            "WebSocket 已非连接状态，退出循环: conversation_id={}",
                            conversation_id,
                        )
                        break
@@ -251,7 +251,7 @@ async def websocket_endpoint(
                            segment = Segment(
                                id=str(uuid.uuid4()),
                                conversation_id=conversation_id,
-                                transcript_text=text_message,
+                                user_input_text=text_message,
                                processed=False,
                            )
                            db.add(segment)
@@ -414,8 +414,8 @@ async def websocket_endpoint(

                        if not should_process:
                            logger.debug(
-                                "收到重复分段，跳过: conversation_id=%s voice_session_id=%s "
-                                "segment_index=%s audio_b64_len=%s duration=%s",
+                                "收到重复分段，跳过: conversation_id={} voice_session_id={} "
+                                "segment_index={} audio_b64_len={} duration={}",
                                conversation_id,
                                voice_session_id,
                                segment_index,
@@ -470,7 +470,7 @@ async def websocket_endpoint(
                                continue

                            logger.debug(
-                                "收到音频消息: conversation_id=%s duration_s=%s",
+                                "收到音频消息: conversation_id={} duration_s={}",
                                conversation_id,
                                audio_duration,
                            )
@@ -478,18 +478,16 @@ async def websocket_endpoint(
                            try:
                                asr = get_asr_provider()
                                audio_bytes = base64.b64decode(audio_base64)
-                                transcript_text = await asr.transcribe(
-                                    audio_bytes, "m4a"
+                                asr_text = await asr.transcribe(audio_bytes, "m4a")
+                                logger.debug(
+                                    "ASR 转写完成: conversation_id={} chars={}",
+                                    conversation_id,
+                                    len(asr_text or ""),
                                )
                                logger.debug(
-                                    "ASR 转写完成: conversation_id=%s chars=%s",
+                                    "ASR 转写全文: conversation_id={} text={}",
                                    conversation_id,
-                                    len(transcript_text or ""),
-                                )
-                                logger.debug(
-                                    "ASR 转写全文: conversation_id=%s text=%s",
-                                    conversation_id,
-                                    transcript_text,
+                                    asr_text,
                                )

                                await manager.send_message(
@@ -498,7 +496,7 @@ async def websocket_endpoint(
                                        "type": MessageType.TRANSCRIPT,
                                        "conversation_id": conversation_id,
                                        "data": {
-                                            "text": transcript_text,
+                                            "text": asr_text,
                                            "audio_duration": audio_duration,
                                        },
                                        "timestamp": datetime.now(
@@ -514,7 +512,7 @@ async def websocket_endpoint(
                                segment = Segment(
                                    id=str(uuid.uuid4()),
                                    conversation_id=conversation_id,
-                                    transcript_text=transcript_text,
+                                    user_input_text=asr_text,
                                    audio_url=f"audio:{audio_duration}s",
                                    audio_duration_seconds=ads if ads > 0 else None,
                                    processed=False,
@@ -529,12 +527,10 @@ async def websocket_endpoint(
                                    conversation.user_id, segment.id
                                )

-                                if transcript_text and not transcript_text.startswith(
-                                    "转写失败"
-                                ):
+                                if asr_text and not asr_text.startswith("转写失败"):
                                    await process_user_message(
                                        conversation_id=conversation_id,
-                                        user_message=transcript_text,
+                                        user_message=asr_text,
                                        conversation=conversation,
                                        segment=segment,
                                        db=db,
@@ -587,13 +583,13 @@ async def websocket_endpoint(
                        try:
                            asr = get_asr_provider()
                            audio_bytes = base64.b64decode(audio_base64)
-                            transcript_text = await asr.transcribe(audio_bytes, "m4a")
+                            asr_text = await asr.transcribe(audio_bytes, "m4a")
                            await manager.send_message(
                                conversation_id,
                                {
                                    "type": MessageType.TRANSCRIPT,
                                    "conversation_id": conversation_id,
-                                    "data": {"text": transcript_text or ""},
+                                    "data": {"text": asr_text or ""},
                                    "timestamp": datetime.now(timezone.utc).isoformat(),
                                },
                            )
@@ -637,7 +633,7 @@ async def websocket_endpoint(
                        and "not connected" in error_msg.lower()
                    ):
                        logger.debug(
-                            "WebSocket 连接已断开或未就绪: conversation_id=%s error=%s",
+                            "WebSocket 连接已断开或未就绪: conversation_id={} error={}",
                            conversation_id,
                            error_msg,
                        )
@@ -661,7 +657,7 @@ async def websocket_endpoint(
                                break
                except WebSocketDisconnect:
                    logger.debug(
-                        "WebSocket 断开连接: conversation_id=%s", conversation_id
+                        "WebSocket 断开连接: conversation_id={}", conversation_id
                    )
                    break
                except Exception as e:
@@ -681,7 +677,7 @@ async def websocket_endpoint(
                            break

        except WebSocketDisconnect:
-            logger.debug("WebSocket 断开连接: conversation_id=%s", conversation_id)
+            logger.debug("WebSocket 断开连接: conversation_id={}", conversation_id)
            await manager.disconnect(conversation_id)
            cleanup_segment_states(conversation_id)
        except Exception as e: