修复版本1.0.7的若干问题 (#11)

* fix/ 0:00 audio ui

* fix/ persist memoir image state and collapse voice history

Keep generated chapter images from staying in processing after successful uploads, and restore segmented voice recordings as a single audio message when reopening conversations.

Made-with: Cursor

* fix/ persist local conversation state and stabilize voice UI

Keep CreateMemory conversations driven by Room so recent text and audio survive page exits, and prevent stale 0:00 voice bubbles while list ordering follows the latest local message time.

Made-with: Cursor

* fix/ server-side root cause for conversation list time and message timestamps

- Add Conversation.last_message_at column with migration and index
- Update last_message_at on text message, audio segment, and AI response
- Sort conversation list by COALESCE(last_message_at, started_at) DESC
- Return real per-message timestamps from Redis history instead of now()
- Pass user_message_timestamp through agent pipeline to avoid LLM delay skew
- Remove all debug logging from server, client, and CI workflow
- Restore import json in conversation_agent (was broken by debug removal)
- Client: remove DebugRuntimeLogger, stop sending transcript as text message

Made-with: Cursor

---------

Co-authored-by: Kevin <kevin@brighteng.org>
This commit is contained in:
Sully
2026-03-14 23:58:46 +08:00
committed by GitHub
parent 9636c059d0
commit c2ce4c61f1
29 changed files with 1041 additions and 216 deletions

View File

@@ -6,7 +6,7 @@ from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Body
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from sqlalchemy import func, select
import uuid
from database import get_async_db, Conversation, Segment, User
@@ -17,6 +17,65 @@ from database.models import User as UserModel
router = APIRouter(prefix="/api/conversations", tags=["conversations"])
def _datetime_to_timestamp_ms(value: datetime | None) -> int:
if value is None:
return int(datetime.now(timezone.utc).timestamp() * 1000)
if value.tzinfo is None:
value = value.replace(tzinfo=timezone.utc)
return int(value.timestamp() * 1000)
def _message_timestamp_ms(msg: dict, fallback: datetime | None) -> int:
raw_timestamp = msg.get("timestamp")
if isinstance(raw_timestamp, (int, float)):
return int(raw_timestamp)
if isinstance(raw_timestamp, str):
try:
return int(datetime.fromisoformat(raw_timestamp.replace("Z", "+00:00")).timestamp() * 1000)
except ValueError:
pass
return _datetime_to_timestamp_ms(fallback)
def _latest_message_time_ms(conversation: ConversationModel, history: list[dict]) -> int:
if conversation.last_message_at:
return _datetime_to_timestamp_ms(conversation.last_message_at)
if history:
return _message_timestamp_ms(history[-1], conversation.started_at)
return _datetime_to_timestamp_ms(conversation.started_at)
def _build_messages_from_history(
conversation_id: str,
history: list[dict],
fallback_timestamp: datetime | None,
) -> list[dict]:
messages: list[dict] = []
seen_audio_sessions: set[str] = set()
for idx, msg in enumerate(history):
role = msg.get("role")
message_type = msg.get("messageType", "text")
voice_session_id = msg.get("voiceSessionId")
if role == "human" and message_type == "audio" and voice_session_id:
if voice_session_id in seen_audio_sessions:
continue
seen_audio_sessions.add(voice_session_id)
messages.append(
{
"id": f"{conversation_id}_msg_{idx}",
"conversationId": conversation_id,
"content": msg.get("content", ""),
"senderType": "user" if role == "human" else "assistant",
"timestamp": _message_timestamp_ms(msg, fallback_timestamp),
"messageType": message_type,
}
)
return messages
@router.get("")
async def get_conversations(
current_user: UserModel = Depends(get_current_user),
@@ -25,7 +84,7 @@ async def get_conversations(
"""获取当前用户的所有对话列表(需要认证)"""
stmt = select(ConversationModel).where(
ConversationModel.user_id == current_user.id
).order_by(ConversationModel.started_at.desc())
).order_by(func.coalesce(ConversationModel.last_message_at, ConversationModel.started_at).desc())
result = await db.execute(stmt)
conversations = result.scalars().all()
@@ -35,11 +94,12 @@ async def get_conversations(
for conv in conversations:
# 从Redis获取最新消息预览
latest_message = None
history: list[dict] = []
try:
history = await redis_service.get_conversation_history(conv.id)
if history:
latest_message = history[-1].get("content", "")[:50] # 取前50个字符
except:
except Exception:
pass
conversation_list.append({
@@ -47,7 +107,7 @@ async def get_conversations(
"title": conv.summary[:30] if conv.summary else "岁月知己", # 使用summary作为标题如果没有则使用默认标题
"avatarUrl": None,
"latestMessagePreview": latest_message or conv.summary,
"latestMessageTime": int(conv.started_at.timestamp() * 1000) if conv.started_at else int(datetime.now(timezone.utc).timestamp() * 1000),
"latestMessageTime": _latest_message_time_ms(conv, history),
"unreadCount": 0,
"isDefaultAssistant": conv.summary is None # 如果没有summary则认为是默认助手
})
@@ -187,18 +247,12 @@ async def get_messages(
from services.redis_service import redis_service
try:
history = await redis_service.get_conversation_history(conversation_id)
messages = []
for idx, msg in enumerate(history):
messages.append({
"id": f"{conversation_id}_msg_{idx}",
"conversationId": conversation_id,
"content": msg.get("content", ""),
"senderType": "user" if msg.get("role") == "human" else "assistant",
"timestamp": int(datetime.now(timezone.utc).timestamp() * 1000), # Redis中没有时间戳使用当前时间
"messageType": msg.get("messageType", "text"), # 保留语音消息类型,使重新进入时仍显示为语音条
})
return messages
except Exception as e:
return _build_messages_from_history(
conversation_id=conversation_id,
history=history,
fallback_timestamp=conversation.started_at,
)
except Exception:
# 如果Redis中没有数据返回空列表
return []

View File

@@ -35,7 +35,7 @@ class MessageType(str, Enum):
AUDIO_CHUNK = "audio_chunk"
AUDIO_SEGMENT = "audio_segment" # 分段语音消息(长语音持续上传)
AUDIO_MESSAGE = "audio_message" # 完整音频消息(类似微信语音)
TRANSCRIBE_ONLY = "transcribe_only" # 仅转写,不落库、不触发 Agent用于「转文字」发送
TRANSCRIBE_ONLY = "transcribe_only" # 仅转写,不落库、不触发 Agent只返回转写结果
TEXT = "text" # 文本消息
TRANSCRIPT = "transcript" # 语音转文字结果
AGENT_RESPONSE = "agent_response"
@@ -148,6 +148,16 @@ class SegmentStreamState:
active_tasks: Set[asyncio.Task] = field(default_factory=set)
def _utc_now() -> datetime:
return datetime.now(timezone.utc)
def _mark_conversation_active(conversation: Conversation, at: Optional[datetime] = None) -> datetime:
activity_time = at or _utc_now()
conversation.last_message_at = activity_time
return activity_time
def _normalize_voice_session_id(voice_session_id: Optional[str]) -> str:
if voice_session_id:
return str(voice_session_id)
@@ -183,6 +193,13 @@ def _extract_segment_scope(audio_url: Optional[str]) -> Optional[Tuple[str, int]
return None
def _voice_session_id_from_audio_url(audio_url: Optional[str]) -> Optional[str]:
scope = _extract_segment_scope(audio_url)
if scope:
return scope[0]
return None
def _is_transcribe_failure(transcript_text: Optional[str]) -> bool:
if not transcript_text:
return True
@@ -357,6 +374,7 @@ async def _process_audio_segment_async(
processed=False,
)
db.add(segment)
user_message_timestamp = _mark_conversation_active(conversation)
await db.commit()
await db.refresh(segment)
await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -383,6 +401,7 @@ async def _process_audio_segment_async(
db=db,
manager=manager,
user=user,
user_message_timestamp=ordered_segment.created_at or user_message_timestamp,
)
break
@@ -564,6 +583,7 @@ async def websocket_endpoint(
processed=False
)
db.add(segment)
user_message_timestamp = _mark_conversation_active(conversation)
await db.commit()
await db.refresh(segment)
await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -576,8 +596,9 @@ async def websocket_endpoint(
segment=segment,
db=db,
manager=manager,
user=user,
)
user=user,
user_message_timestamp=segment.created_at or user_message_timestamp,
)
elif msg_type == MessageType.AUDIO_SEGMENT:
# 处理分段语音消息(长语音持续上传)
@@ -726,6 +747,7 @@ async def websocket_endpoint(
processed=False
)
db.add(segment)
user_message_timestamp = _mark_conversation_active(conversation)
await db.commit()
await db.refresh(segment)
await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -740,6 +762,7 @@ async def websocket_endpoint(
db=db,
manager=manager,
user=user,
user_message_timestamp=segment.created_at or user_message_timestamp,
)
else:
# 转写失败,发送错误消息
@@ -758,7 +781,7 @@ async def websocket_endpoint(
})
elif msg_type == MessageType.TRANSCRIBE_ONLY:
# 仅转写:不落库、不触发 Agent用于客户端「转文字」后发文本
# 仅转写:不落库、不触发 Agent只把识别结果返回给客户端
data = message.get("data", {})
audio_base64 = data.get("audio_base64", "")
if not audio_base64:
@@ -906,6 +929,7 @@ async def process_user_message(
db: AsyncSession,
manager: ConnectionManager,
user: UserModel = None,
user_message_timestamp: Optional[datetime] = None,
) -> None:
"""
处理用户消息生成Agent回应异步版本
@@ -936,9 +960,12 @@ async def process_user_message(
filled_fields=filled,
nickname=user.nickname or "",
is_from_voice=is_from_voice,
voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
user_message_timestamp=user_message_timestamp,
)
segment.agent_response = "\n\n".join(responses)
_mark_conversation_active(conversation)
await db.commit()
for i, response_text in enumerate(responses):
@@ -987,9 +1014,12 @@ async def process_user_message(
memoir_state=state,
user_profile_context=user_profile_context,
is_from_voice=is_from_voice,
voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
user_message_timestamp=user_message_timestamp,
)
segment.agent_response = "\n\n".join(responses)
_mark_conversation_active(conversation)
await db.commit()
for i, response_text in enumerate(responses):