修复版本1.0.7的若干问题 (#11)

* fix/ 0:00 audio ui * fix/ persist memoir image state and collapse voice history Keep generated chapter images from staying in processing after successful uploads, and restore segmented voice recordings as a single audio message when reopening conversations. Made-with: Cursor * fix/ persist local conversation state and stabilize voice UI Keep CreateMemory conversations driven by Room so recent text and audio survive page exits, and prevent stale 0:00 voice bubbles while list ordering follows the latest local message time. Made-with: Cursor * fix/ server-side root cause for conversation list time and message timestamps - Add Conversation.last_message_at column with migration and index - Update last_message_at on text message, audio segment, and AI response - Sort conversation list by COALESCE(last_message_at, started_at) DESC - Return real per-message timestamps from Redis history instead of now() - Pass user_message_timestamp through agent pipeline to avoid LLM delay skew - Remove all debug logging from server, client, and CI workflow - Restore import json in conversation_agent (was broken by debug removal) - Client: remove DebugRuntimeLogger, stop sending transcript as text message Made-with: Cursor --------- Co-authored-by: Kevin <kevin@brighteng.org>
2026-03-14 23:58:46 +08:00
parent 9636c059d0
commit c2ce4c61f1
29 changed files with 1041 additions and 216 deletions
--- a/api/routers/websocket.py
+++ b/api/routers/websocket.py
@@ -35,7 +35,7 @@ class MessageType(str, Enum):
    AUDIO_CHUNK = "audio_chunk"
    AUDIO_SEGMENT = "audio_segment"  # 分段语音消息（长语音持续上传）
    AUDIO_MESSAGE = "audio_message"  # 完整音频消息（类似微信语音）
-    TRANSCRIBE_ONLY = "transcribe_only"  # 仅转写，不落库、不触发 Agent，用于「转文字」发送
+    TRANSCRIBE_ONLY = "transcribe_only"  # 仅转写，不落库、不触发 Agent，只返回转写结果
    TEXT = "text"  # 文本消息
    TRANSCRIPT = "transcript"  # 语音转文字结果
    AGENT_RESPONSE = "agent_response"
@@ -148,6 +148,16 @@ class SegmentStreamState:
    active_tasks: Set[asyncio.Task] = field(default_factory=set)


+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+def _mark_conversation_active(conversation: Conversation, at: Optional[datetime] = None) -> datetime:
+    activity_time = at or _utc_now()
+    conversation.last_message_at = activity_time
+    return activity_time
+
+
 def _normalize_voice_session_id(voice_session_id: Optional[str]) -> str:
    if voice_session_id:
        return str(voice_session_id)
@@ -183,6 +193,13 @@ def _extract_segment_scope(audio_url: Optional[str]) -> Optional[Tuple[str, int]
        return None


+def _voice_session_id_from_audio_url(audio_url: Optional[str]) -> Optional[str]:
+    scope = _extract_segment_scope(audio_url)
+    if scope:
+        return scope[0]
+    return None
+
+
 def _is_transcribe_failure(transcript_text: Optional[str]) -> bool:
    if not transcript_text:
        return True
@@ -357,6 +374,7 @@ async def _process_audio_segment_async(
                    processed=False,
                )
                db.add(segment)
+                user_message_timestamp = _mark_conversation_active(conversation)
                await db.commit()
                await db.refresh(segment)
                await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -383,6 +401,7 @@ async def _process_audio_segment_async(
                    db=db,
                    manager=manager,
                    user=user,
+                    user_message_timestamp=ordered_segment.created_at or user_message_timestamp,
                )

            break
@@ -564,6 +583,7 @@ async def websocket_endpoint(
                                processed=False
                            )
                            db.add(segment)
+                            user_message_timestamp = _mark_conversation_active(conversation)
                            await db.commit()
                            await db.refresh(segment)
                            await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -576,8 +596,9 @@ async def websocket_endpoint(
                                segment=segment,
                                db=db,
                                manager=manager,
-                                    user=user,
-                                )
+                                user=user,
+                                user_message_timestamp=segment.created_at or user_message_timestamp,
+                            )

                    elif msg_type == MessageType.AUDIO_SEGMENT:
                        # 处理分段语音消息（长语音持续上传）
@@ -726,6 +747,7 @@ async def websocket_endpoint(
                                    processed=False
                                )
                                db.add(segment)
+                                user_message_timestamp = _mark_conversation_active(conversation)
                                await db.commit()
                                await db.refresh(segment)
                                await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -740,6 +762,7 @@ async def websocket_endpoint(
                                        db=db,
                                        manager=manager,
                                        user=user,
+                                        user_message_timestamp=segment.created_at or user_message_timestamp,
                                    )
                                else:
                                    # 转写失败，发送错误消息
@@ -758,7 +781,7 @@ async def websocket_endpoint(
                                })
                    
                    elif msg_type == MessageType.TRANSCRIBE_ONLY:
-                        # 仅转写：不落库、不触发 Agent，用于客户端「转文字」后发文本
+                        # 仅转写：不落库、不触发 Agent，只把识别结果返回给客户端
                        data = message.get("data", {})
                        audio_base64 = data.get("audio_base64", "")
                        if not audio_base64:
@@ -906,6 +929,7 @@ async def process_user_message(
    db: AsyncSession,
    manager: ConnectionManager,
    user: UserModel = None,
+    user_message_timestamp: Optional[datetime] = None,
 ) -> None:
    """
    处理用户消息，生成Agent回应（异步版本）
@@ -936,9 +960,12 @@ async def process_user_message(
                    filled_fields=filled,
                    nickname=user.nickname or "",
                    is_from_voice=is_from_voice,
+                    voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
+                    user_message_timestamp=user_message_timestamp,
                )

                segment.agent_response = "\n\n".join(responses)
+                _mark_conversation_active(conversation)
                await db.commit()

                for i, response_text in enumerate(responses):
@@ -987,9 +1014,12 @@ async def process_user_message(
            memoir_state=state,
            user_profile_context=user_profile_context,
            is_from_voice=is_from_voice,
+            voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
+            user_message_timestamp=user_message_timestamp,
        )

        segment.agent_response = "\n\n".join(responses)
+        _mark_conversation_active(conversation)
        await db.commit()

        for i, response_text in enumerate(responses):