修复版本1.0.7的若干问题 (#11)

* fix/ 0:00 audio ui

* fix/ persist memoir image state and collapse voice history

Keep generated chapter images from staying in processing after successful uploads, and restore segmented voice recordings as a single audio message when reopening conversations.

Made-with: Cursor

* fix/ persist local conversation state and stabilize voice UI

Keep CreateMemory conversations driven by Room so recent text and audio survive page exits, and prevent stale 0:00 voice bubbles while list ordering follows the latest local message time.

Made-with: Cursor

* fix/ server-side root cause for conversation list time and message timestamps

- Add Conversation.last_message_at column with migration and index
- Update last_message_at on text message, audio segment, and AI response
- Sort conversation list by COALESCE(last_message_at, started_at) DESC
- Return real per-message timestamps from Redis history instead of now()
- Pass user_message_timestamp through agent pipeline to avoid LLM delay skew
- Remove all debug logging from server, client, and CI workflow
- Restore import json in conversation_agent (was broken by debug removal)
- Client: remove DebugRuntimeLogger, stop sending transcript as text message

Made-with: Cursor

---------

Co-authored-by: Kevin <kevin@brighteng.org>
This commit is contained in:
Sully
2026-03-14 23:58:46 +08:00
committed by GitHub
parent 9636c059d0
commit c2ce4c61f1
29 changed files with 1041 additions and 216 deletions

View File

@@ -35,7 +35,7 @@ class MessageType(str, Enum):
AUDIO_CHUNK = "audio_chunk"
AUDIO_SEGMENT = "audio_segment" # 分段语音消息(长语音持续上传)
AUDIO_MESSAGE = "audio_message" # 完整音频消息(类似微信语音)
TRANSCRIBE_ONLY = "transcribe_only" # 仅转写,不落库、不触发 Agent用于「转文字」发送
TRANSCRIBE_ONLY = "transcribe_only" # 仅转写,不落库、不触发 Agent只返回转写结果
TEXT = "text" # 文本消息
TRANSCRIPT = "transcript" # 语音转文字结果
AGENT_RESPONSE = "agent_response"
@@ -148,6 +148,16 @@ class SegmentStreamState:
active_tasks: Set[asyncio.Task] = field(default_factory=set)
def _utc_now() -> datetime:
return datetime.now(timezone.utc)
def _mark_conversation_active(conversation: Conversation, at: Optional[datetime] = None) -> datetime:
activity_time = at or _utc_now()
conversation.last_message_at = activity_time
return activity_time
def _normalize_voice_session_id(voice_session_id: Optional[str]) -> str:
if voice_session_id:
return str(voice_session_id)
@@ -183,6 +193,13 @@ def _extract_segment_scope(audio_url: Optional[str]) -> Optional[Tuple[str, int]
return None
def _voice_session_id_from_audio_url(audio_url: Optional[str]) -> Optional[str]:
scope = _extract_segment_scope(audio_url)
if scope:
return scope[0]
return None
def _is_transcribe_failure(transcript_text: Optional[str]) -> bool:
if not transcript_text:
return True
@@ -357,6 +374,7 @@ async def _process_audio_segment_async(
processed=False,
)
db.add(segment)
user_message_timestamp = _mark_conversation_active(conversation)
await db.commit()
await db.refresh(segment)
await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -383,6 +401,7 @@ async def _process_audio_segment_async(
db=db,
manager=manager,
user=user,
user_message_timestamp=ordered_segment.created_at or user_message_timestamp,
)
break
@@ -564,6 +583,7 @@ async def websocket_endpoint(
processed=False
)
db.add(segment)
user_message_timestamp = _mark_conversation_active(conversation)
await db.commit()
await db.refresh(segment)
await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -576,8 +596,9 @@ async def websocket_endpoint(
segment=segment,
db=db,
manager=manager,
user=user,
)
user=user,
user_message_timestamp=segment.created_at or user_message_timestamp,
)
elif msg_type == MessageType.AUDIO_SEGMENT:
# 处理分段语音消息(长语音持续上传)
@@ -726,6 +747,7 @@ async def websocket_endpoint(
processed=False
)
db.add(segment)
user_message_timestamp = _mark_conversation_active(conversation)
await db.commit()
await db.refresh(segment)
await manager.background_runner.queue_message(conversation.user_id, segment.id)
@@ -740,6 +762,7 @@ async def websocket_endpoint(
db=db,
manager=manager,
user=user,
user_message_timestamp=segment.created_at or user_message_timestamp,
)
else:
# 转写失败,发送错误消息
@@ -758,7 +781,7 @@ async def websocket_endpoint(
})
elif msg_type == MessageType.TRANSCRIBE_ONLY:
# 仅转写:不落库、不触发 Agent用于客户端「转文字」后发文本
# 仅转写:不落库、不触发 Agent只把识别结果返回给客户端
data = message.get("data", {})
audio_base64 = data.get("audio_base64", "")
if not audio_base64:
@@ -906,6 +929,7 @@ async def process_user_message(
db: AsyncSession,
manager: ConnectionManager,
user: UserModel = None,
user_message_timestamp: Optional[datetime] = None,
) -> None:
"""
处理用户消息生成Agent回应异步版本
@@ -936,9 +960,12 @@ async def process_user_message(
filled_fields=filled,
nickname=user.nickname or "",
is_from_voice=is_from_voice,
voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
user_message_timestamp=user_message_timestamp,
)
segment.agent_response = "\n\n".join(responses)
_mark_conversation_active(conversation)
await db.commit()
for i, response_text in enumerate(responses):
@@ -987,9 +1014,12 @@ async def process_user_message(
memoir_state=state,
user_profile_context=user_profile_context,
is_from_voice=is_from_voice,
voice_session_id=_voice_session_id_from_audio_url(segment.audio_url),
user_message_timestamp=user_message_timestamp,
)
segment.agent_response = "\n\n".join(responses)
_mark_conversation_active(conversation)
await db.commit()
for i, response_text in enumerate(responses):