修复版本1.0.7的若干问题 (#11)

* fix/ 0:00 audio ui * fix/ persist memoir image state and collapse voice history Keep generated chapter images from staying in processing after successful uploads, and restore segmented voice recordings as a single audio message when reopening conversations. Made-with: Cursor * fix/ persist local conversation state and stabilize voice UI Keep CreateMemory conversations driven by Room so recent text and audio survive page exits, and prevent stale 0:00 voice bubbles while list ordering follows the latest local message time. Made-with: Cursor * fix/ server-side root cause for conversation list time and message timestamps - Add Conversation.last_message_at column with migration and index - Update last_message_at on text message, audio segment, and AI response - Sort conversation list by COALESCE(last_message_at, started_at) DESC - Return real per-message timestamps from Redis history instead of now() - Pass user_message_timestamp through agent pipeline to avoid LLM delay skew - Remove all debug logging from server, client, and CI workflow - Restore import json in conversation_agent (was broken by debug removal) - Client: remove DebugRuntimeLogger, stop sending transcript as text message Made-with: Cursor --------- Co-authored-by: Kevin <kevin@brighteng.org>
2026-03-14 23:58:46 +08:00
parent 9636c059d0
commit c2ce4c61f1
29 changed files with 1041 additions and 216 deletions
--- a/api/routers/conversations.py
+++ b/api/routers/conversations.py
@@ -6,7 +6,7 @@ from typing import List, Optional
 from fastapi import APIRouter, Depends, HTTPException, Query, Body
 from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy import select
+from sqlalchemy import func, select
 import uuid

 from database import get_async_db, Conversation, Segment, User
@@ -17,6 +17,65 @@ from database.models import User as UserModel
 router = APIRouter(prefix="/api/conversations", tags=["conversations"])


+def _datetime_to_timestamp_ms(value: datetime | None) -> int:
+    if value is None:
+        return int(datetime.now(timezone.utc).timestamp() * 1000)
+    if value.tzinfo is None:
+        value = value.replace(tzinfo=timezone.utc)
+    return int(value.timestamp() * 1000)
+
+
+def _message_timestamp_ms(msg: dict, fallback: datetime | None) -> int:
+    raw_timestamp = msg.get("timestamp")
+    if isinstance(raw_timestamp, (int, float)):
+        return int(raw_timestamp)
+    if isinstance(raw_timestamp, str):
+        try:
+            return int(datetime.fromisoformat(raw_timestamp.replace("Z", "+00:00")).timestamp() * 1000)
+        except ValueError:
+            pass
+    return _datetime_to_timestamp_ms(fallback)
+
+
+def _latest_message_time_ms(conversation: ConversationModel, history: list[dict]) -> int:
+    if conversation.last_message_at:
+        return _datetime_to_timestamp_ms(conversation.last_message_at)
+    if history:
+        return _message_timestamp_ms(history[-1], conversation.started_at)
+    return _datetime_to_timestamp_ms(conversation.started_at)
+
+
+def _build_messages_from_history(
+    conversation_id: str,
+    history: list[dict],
+    fallback_timestamp: datetime | None,
+) -> list[dict]:
+    messages: list[dict] = []
+    seen_audio_sessions: set[str] = set()
+
+    for idx, msg in enumerate(history):
+        role = msg.get("role")
+        message_type = msg.get("messageType", "text")
+        voice_session_id = msg.get("voiceSessionId")
+        if role == "human" and message_type == "audio" and voice_session_id:
+            if voice_session_id in seen_audio_sessions:
+                continue
+            seen_audio_sessions.add(voice_session_id)
+
+        messages.append(
+            {
+                "id": f"{conversation_id}_msg_{idx}",
+                "conversationId": conversation_id,
+                "content": msg.get("content", ""),
+                "senderType": "user" if role == "human" else "assistant",
+                "timestamp": _message_timestamp_ms(msg, fallback_timestamp),
+                "messageType": message_type,
+            }
+        )
+
+    return messages
+
+
@router.get("")
 async def get_conversations(
    current_user: UserModel = Depends(get_current_user),
@@ -25,7 +84,7 @@ async def get_conversations(
    """获取当前用户的所有对话列表（需要认证）"""
    stmt = select(ConversationModel).where(
        ConversationModel.user_id == current_user.id
-    ).order_by(ConversationModel.started_at.desc())
+    ).order_by(func.coalesce(ConversationModel.last_message_at, ConversationModel.started_at).desc())
    result = await db.execute(stmt)
    conversations = result.scalars().all()
    
@@ -35,11 +94,12 @@ async def get_conversations(
    for conv in conversations:
        # 从Redis获取最新消息预览
        latest_message = None
+        history: list[dict] = []
        try:
            history = await redis_service.get_conversation_history(conv.id)
            if history:
                latest_message = history[-1].get("content", "")[:50]  # 取前50个字符
-        except:
+        except Exception:
            pass
        
        conversation_list.append({
@@ -47,7 +107,7 @@ async def get_conversations(
            "title": conv.summary[:30] if conv.summary else "岁月知己",  # 使用summary作为标题，如果没有则使用默认标题
            "avatarUrl": None,
            "latestMessagePreview": latest_message or conv.summary,
-            "latestMessageTime": int(conv.started_at.timestamp() * 1000) if conv.started_at else int(datetime.now(timezone.utc).timestamp() * 1000),
+            "latestMessageTime": _latest_message_time_ms(conv, history),
            "unreadCount": 0,
            "isDefaultAssistant": conv.summary is None  # 如果没有summary，则认为是默认助手
        })
@@ -187,18 +247,12 @@ async def get_messages(
    from services.redis_service import redis_service
    try:
        history = await redis_service.get_conversation_history(conversation_id)
-        messages = []
-        for idx, msg in enumerate(history):
-            messages.append({
-                "id": f"{conversation_id}_msg_{idx}",
-                "conversationId": conversation_id,
-                "content": msg.get("content", ""),
-                "senderType": "user" if msg.get("role") == "human" else "assistant",
-                "timestamp": int(datetime.now(timezone.utc).timestamp() * 1000),  # Redis中没有时间戳，使用当前时间
-                "messageType": msg.get("messageType", "text"),  # 保留语音消息类型，使重新进入时仍显示为语音条
-            })
-        return messages
-    except Exception as e:
+        return _build_messages_from_history(
+            conversation_id=conversation_id,
+            history=history,
+            fallback_timestamp=conversation.started_at,
+        )
+    except Exception:
        # 如果Redis中没有数据，返回空列表
        return []

--- a/api/routers/websocket.py
+++ b/api/routers/websocket.py
@@ -35,7 +35,7 @@ class MessageType(str, Enum):
    AUDIO_CHUNK = "audio_chunk"
    AUDIO_SEGMENT = "audio_segment"  # 分段语音消息（长语音持续上传）
    AUDIO_MESSAGE = "audio_message"  # 完整音频消息（类似微信语音）
-    TRANSCRIBE_ONLY = "transcribe_only"  # 仅转写，不落库、不触发 Agent，用于「转文字」发送
+    TRANSCRIBE_ONLY = "transcribe_only"  # 仅转写，不落库、不触发 Agent，只返回转写结果
    TEXT = "text"  # 文本消息
    TRANSCRIPT = "transcript"  # 语音转文字结果
    AGENT_RESPONSE = "agent_response"
@@ -148,6 +148,16 @@ class SegmentStreamState:
    active_tasks: Set[asyncio.Task] = field(default_factory=set)


+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+def _mark_conversation_active(conversation: Conversation, at: Optional[datetime] = None) -> datetime:
+    activity_time = at or _utc_now()
+    conversation.last_message_at = activity_time
+    return activity_time
+
+
 def _normalize_voice_session_id(voice_session_id: Optional[str]) -> str:
    if voice_session_id:
        return str(voice_session_id)
@@ -183,6 +193,13 @@ def _extract_segment_scope(audio_url: Optional[str]) -> Optional[Tuple[str, int]
        return None


+def _voice_session_id_from_audio_url(audio_url: Optional[str]) -> Optional[str]:
+    scope = _extract_segment_scope(audio_url)
+    if scope:
+        return scope[0]
+    return None
+
+
 def _is_transcribe_failure(transcript_text: Optional[str]) -> bool:
    if not transcript_text:
        return True
@@ -357,6 +374,7 @@ async def _process_audio_segment_async(
                    processed=False,
                )
                db.add(segment)
+                user_message_timestamp = _mark_conversation_active(conversation)
                await db.commit()
                await db.refresh(segment)
                await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -383,6 +401,7 @@ async def _process_audio_segment_async(
                    db=db,
                    manager=manager,
                    user=user,
+                    user_message_timestamp=ordered_segment.created_at or user_message_timestamp,
                )

            break
@@ -564,6 +583,7 @@ async def websocket_endpoint(
                                processed=False
                            )
                            db.add(segment)
+                            user_message_timestamp = _mark_conversation_active(conversation)
                            await db.commit()
                            await db.refresh(segment)
                            await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -576,8 +596,9 @@ async def websocket_endpoint(
                                segment=segment,
                                db=db,
                                manager=manager,
-                                    user=user,
-                                )
+                                user=user,
+                                user_message_timestamp=segment.created_at or user_message_timestamp,
+                            )

                    elif msg_type == MessageType.AUDIO_SEGMENT:
                        # 处理分段语音消息（长语音持续上传）
@@ -726,6 +747,7 @@ async def websocket_endpoint(
                                    processed=False
                                )
                                db.add(segment)
+                                user_message_timestamp = _mark_conversation_active(conversation)
                                await db.commit()
                                await db.refresh(segment)
                                await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -740,6 +762,7 @@ async def websocket_endpoint(
                                        db=db,
                                        manager=manager,
                                        user=user,
+                                        user_message_timestamp=segment.created_at or user_message_timestamp,
                                    )
                                else:
                                    # 转写失败，发送错误消息
@@ -758,7 +781,7 @@ async def websocket_endpoint(
                                })
                    
                    elif msg_type == MessageType.TRANSCRIBE_ONLY:
-                        # 仅转写：不落库、不触发 Agent，用于客户端「转文字」后发文本
+                        # 仅转写：不落库、不触发 Agent，只把识别结果返回给客户端
                        data = message.get("data", {})
                        audio_base64 = data.get("audio_base64", "")
                        if not audio_base64:
@@ -906,6 +929,7 @@ async def process_user_message(
    db: AsyncSession,
    manager: ConnectionManager,
    user: UserModel = None,
+    user_message_timestamp: Optional[datetime] = None,
 ) -> None:
    """
    处理用户消息，生成Agent回应（异步版本）
@@ -936,9 +960,12 @@ async def process_user_message(
                    filled_fields=filled,
                    nickname=user.nickname or "",
                    is_from_voice=is_from_voice,
+                    voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
+                    user_message_timestamp=user_message_timestamp,
                )

                segment.agent_response = "\n\n".join(responses)
+                _mark_conversation_active(conversation)
                await db.commit()

                for i, response_text in enumerate(responses):
@@ -987,9 +1014,12 @@ async def process_user_message(
            memoir_state=state,
            user_profile_context=user_profile_context,
            is_from_voice=is_from_voice,
+            voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
+            user_message_timestamp=user_message_timestamp,
        )

        segment.agent_response = "\n\n".join(responses)
+        _mark_conversation_active(conversation)
        await db.commit()

        for i, response_text in enumerate(responses):