feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本(Alembic 0002) - Chat: 阶段检测/阶段提示/回复限制,编排与访谈/画像 prompts 调整 - Memoir: 忠实度检查 agent,叙事与分类等链路更新 - Core: agent 日志、Alembic 启动、LangChain/日志/配置等 - Story: time_hints;Memory 检索与相关测试 - Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n - Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
This commit is contained in:
@@ -51,7 +51,7 @@ class ConversationHistoryStore:
|
||||
try:
|
||||
await self._sync_redis_from_db(conversation_id)
|
||||
except Exception as exc:
|
||||
logger.warning("conversation history cache sync skipped: %s", exc)
|
||||
logger.warning("conversation history cache sync skipped: {}", exc)
|
||||
|
||||
async def record_ai_only_turn(
|
||||
self, conversation_id: str, responses: list[str]
|
||||
|
||||
@@ -45,7 +45,8 @@ class Segment(Base):
|
||||
id = Column(String, primary_key=True)
|
||||
conversation_id = Column(String, ForeignKey("conversations.id"), nullable=False)
|
||||
audio_url = Column(String, nullable=True)
|
||||
transcript_text = Column(Text, nullable=False)
|
||||
# 用户输入正文:语音 ASR 结果或键盘输入(历史列名 transcript_text)
|
||||
user_input_text = Column(Text, nullable=False)
|
||||
audio_duration_seconds = Column(Integer, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=utc_now)
|
||||
processed = Column(Boolean, default=False)
|
||||
|
||||
@@ -101,5 +101,5 @@ async def organize_conversation(
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("提交整理任务失败: %s", e)
|
||||
logger.exception("提交整理任务失败: {}", e)
|
||||
raise HTTPException(status_code=500, detail=f"提交整理任务失败: {str(e)}")
|
||||
|
||||
@@ -119,7 +119,7 @@ class ConversationService:
|
||||
try:
|
||||
history = await redis_service.get_conversation_history(conversation_id)
|
||||
except Exception as exc:
|
||||
logger.warning("conversation history cache read skipped: %s", exc)
|
||||
logger.warning("conversation history cache read skipped: {}", exc)
|
||||
history = []
|
||||
if history:
|
||||
return history
|
||||
@@ -130,7 +130,7 @@ class ConversationService:
|
||||
try:
|
||||
await redis_service.set_conversation_history(conversation_id, rebuilt)
|
||||
except Exception as exc:
|
||||
logger.warning("conversation history cache write skipped: %s", exc)
|
||||
logger.warning("conversation history cache write skipped: {}", exc)
|
||||
return rebuilt
|
||||
|
||||
return []
|
||||
@@ -271,7 +271,7 @@ class ConversationService:
|
||||
segment_ids = [s.id for s in segments]
|
||||
process_memoir_segments.delay(conv.user_id, segment_ids)
|
||||
logger.info(
|
||||
"手动触发对话整理: conversation_id=%s, segments=%s",
|
||||
"手动触发对话整理: conversation_id={}, segments={}",
|
||||
conversation_id,
|
||||
len(segment_ids),
|
||||
)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
@@ -16,6 +17,7 @@ from sqlalchemy import select, update
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.chat import ChatOrchestrator
|
||||
from app.core.agent_logging import agent_summary_enabled
|
||||
from app.core.config import settings
|
||||
from app.core.db import AsyncSessionLocal
|
||||
from app.core.dependencies import get_asr_provider, get_object_storage, get_tts_provider
|
||||
@@ -93,11 +95,11 @@ async def _send_tts_audio(
|
||||
err_str = str(e)
|
||||
if "PkgExhausted" in err_str:
|
||||
logger.warning(
|
||||
"TTS skipped: 腾讯云语音合成资源包已用尽,请在控制台购买或开通后付费: %s",
|
||||
"TTS skipped: 腾讯云语音合成资源包已用尽,请在控制台购买或开通后付费: {}",
|
||||
err_str[:100],
|
||||
)
|
||||
else:
|
||||
logger.error("TTS synthesize failed: %s", e)
|
||||
logger.error("TTS synthesize failed: {}", e)
|
||||
return None
|
||||
|
||||
|
||||
@@ -438,20 +440,20 @@ async def process_audio_segment(
|
||||
async with state.lock:
|
||||
state.processed_indices.add(segment_index)
|
||||
logger.debug(
|
||||
"分段已存在,按幂等跳过: conversation_id=%s voice_session_id=%s "
|
||||
"segment_index=%s segment_id=%s transcript=%s",
|
||||
"分段已存在,按幂等跳过: conversation_id={} voice_session_id={} "
|
||||
"segment_index={} segment_id={} transcript={}",
|
||||
conversation_id,
|
||||
voice_session_id,
|
||||
segment_index,
|
||||
existing_segment.id,
|
||||
existing_segment.transcript_text or "",
|
||||
existing_segment.user_input_text or "",
|
||||
)
|
||||
return
|
||||
else:
|
||||
segment = Segment(
|
||||
id=str(uuid.uuid4()),
|
||||
conversation_id=conversation_id,
|
||||
transcript_text=transcript_text or "",
|
||||
user_input_text=transcript_text or "",
|
||||
audio_url=_build_segment_audio_url(voice_session_id, segment_index),
|
||||
audio_duration_seconds=audio_duration
|
||||
if audio_duration > 0
|
||||
@@ -531,6 +533,7 @@ async def process_user_message(
|
||||
is_from_voice = bool(segment.audio_url)
|
||||
voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
|
||||
audio_dur = getattr(segment, "audio_duration_seconds", None)
|
||||
t_pipeline = time.perf_counter()
|
||||
turn = await chat_orchestrator.process_user_message(
|
||||
conversation_id=conversation_id,
|
||||
user_message=user_message,
|
||||
@@ -545,6 +548,18 @@ async def process_user_message(
|
||||
user_message_timestamp=user_message_timestamp,
|
||||
audio_duration_seconds=audio_dur,
|
||||
)
|
||||
if agent_summary_enabled():
|
||||
logger.info(
|
||||
"pipeline.process_user_message duration_ms={:.2f} "
|
||||
"conversation_id={} segment_id={} user_msg_len={} "
|
||||
"response_segments={} skip_tts={}",
|
||||
(time.perf_counter() - t_pipeline) * 1000,
|
||||
conversation_id,
|
||||
segment.id,
|
||||
len(user_message or ""),
|
||||
len(turn.messages),
|
||||
turn.skip_tts,
|
||||
)
|
||||
responses = turn.messages
|
||||
skip_tts = turn.skip_tts
|
||||
|
||||
@@ -618,7 +633,7 @@ async def process_user_message(
|
||||
)
|
||||
await db.commit()
|
||||
except Exception as persist_error:
|
||||
logger.warning("补写 TTS 元数据失败: %s", persist_error)
|
||||
logger.warning("补写 TTS 元数据失败: {}", persist_error)
|
||||
logger.error(f"处理用户消息失败: {e}", exc_info=True)
|
||||
if conversation_id in manager.active_connections:
|
||||
try:
|
||||
|
||||
@@ -218,7 +218,7 @@ async def websocket_endpoint(
|
||||
try:
|
||||
if websocket.application_state != WebSocketState.CONNECTED:
|
||||
logger.debug(
|
||||
"WebSocket 已非连接状态,退出循环: conversation_id=%s",
|
||||
"WebSocket 已非连接状态,退出循环: conversation_id={}",
|
||||
conversation_id,
|
||||
)
|
||||
break
|
||||
@@ -251,7 +251,7 @@ async def websocket_endpoint(
|
||||
segment = Segment(
|
||||
id=str(uuid.uuid4()),
|
||||
conversation_id=conversation_id,
|
||||
transcript_text=text_message,
|
||||
user_input_text=text_message,
|
||||
processed=False,
|
||||
)
|
||||
db.add(segment)
|
||||
@@ -414,8 +414,8 @@ async def websocket_endpoint(
|
||||
|
||||
if not should_process:
|
||||
logger.debug(
|
||||
"收到重复分段,跳过: conversation_id=%s voice_session_id=%s "
|
||||
"segment_index=%s audio_b64_len=%s duration=%s",
|
||||
"收到重复分段,跳过: conversation_id={} voice_session_id={} "
|
||||
"segment_index={} audio_b64_len={} duration={}",
|
||||
conversation_id,
|
||||
voice_session_id,
|
||||
segment_index,
|
||||
@@ -470,7 +470,7 @@ async def websocket_endpoint(
|
||||
continue
|
||||
|
||||
logger.debug(
|
||||
"收到音频消息: conversation_id=%s duration_s=%s",
|
||||
"收到音频消息: conversation_id={} duration_s={}",
|
||||
conversation_id,
|
||||
audio_duration,
|
||||
)
|
||||
@@ -478,18 +478,16 @@ async def websocket_endpoint(
|
||||
try:
|
||||
asr = get_asr_provider()
|
||||
audio_bytes = base64.b64decode(audio_base64)
|
||||
transcript_text = await asr.transcribe(
|
||||
audio_bytes, "m4a"
|
||||
asr_text = await asr.transcribe(audio_bytes, "m4a")
|
||||
logger.debug(
|
||||
"ASR 转写完成: conversation_id={} chars={}",
|
||||
conversation_id,
|
||||
len(asr_text or ""),
|
||||
)
|
||||
logger.debug(
|
||||
"ASR 转写完成: conversation_id=%s chars=%s",
|
||||
"ASR 转写全文: conversation_id={} text={}",
|
||||
conversation_id,
|
||||
len(transcript_text or ""),
|
||||
)
|
||||
logger.debug(
|
||||
"ASR 转写全文: conversation_id=%s text=%s",
|
||||
conversation_id,
|
||||
transcript_text,
|
||||
asr_text,
|
||||
)
|
||||
|
||||
await manager.send_message(
|
||||
@@ -498,7 +496,7 @@ async def websocket_endpoint(
|
||||
"type": MessageType.TRANSCRIPT,
|
||||
"conversation_id": conversation_id,
|
||||
"data": {
|
||||
"text": transcript_text,
|
||||
"text": asr_text,
|
||||
"audio_duration": audio_duration,
|
||||
},
|
||||
"timestamp": datetime.now(
|
||||
@@ -514,7 +512,7 @@ async def websocket_endpoint(
|
||||
segment = Segment(
|
||||
id=str(uuid.uuid4()),
|
||||
conversation_id=conversation_id,
|
||||
transcript_text=transcript_text,
|
||||
user_input_text=asr_text,
|
||||
audio_url=f"audio:{audio_duration}s",
|
||||
audio_duration_seconds=ads if ads > 0 else None,
|
||||
processed=False,
|
||||
@@ -529,12 +527,10 @@ async def websocket_endpoint(
|
||||
conversation.user_id, segment.id
|
||||
)
|
||||
|
||||
if transcript_text and not transcript_text.startswith(
|
||||
"转写失败"
|
||||
):
|
||||
if asr_text and not asr_text.startswith("转写失败"):
|
||||
await process_user_message(
|
||||
conversation_id=conversation_id,
|
||||
user_message=transcript_text,
|
||||
user_message=asr_text,
|
||||
conversation=conversation,
|
||||
segment=segment,
|
||||
db=db,
|
||||
@@ -587,13 +583,13 @@ async def websocket_endpoint(
|
||||
try:
|
||||
asr = get_asr_provider()
|
||||
audio_bytes = base64.b64decode(audio_base64)
|
||||
transcript_text = await asr.transcribe(audio_bytes, "m4a")
|
||||
asr_text = await asr.transcribe(audio_bytes, "m4a")
|
||||
await manager.send_message(
|
||||
conversation_id,
|
||||
{
|
||||
"type": MessageType.TRANSCRIPT,
|
||||
"conversation_id": conversation_id,
|
||||
"data": {"text": transcript_text or ""},
|
||||
"data": {"text": asr_text or ""},
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
)
|
||||
@@ -637,7 +633,7 @@ async def websocket_endpoint(
|
||||
and "not connected" in error_msg.lower()
|
||||
):
|
||||
logger.debug(
|
||||
"WebSocket 连接已断开或未就绪: conversation_id=%s error=%s",
|
||||
"WebSocket 连接已断开或未就绪: conversation_id={} error={}",
|
||||
conversation_id,
|
||||
error_msg,
|
||||
)
|
||||
@@ -661,7 +657,7 @@ async def websocket_endpoint(
|
||||
break
|
||||
except WebSocketDisconnect:
|
||||
logger.debug(
|
||||
"WebSocket 断开连接: conversation_id=%s", conversation_id
|
||||
"WebSocket 断开连接: conversation_id={}", conversation_id
|
||||
)
|
||||
break
|
||||
except Exception as e:
|
||||
@@ -681,7 +677,7 @@ async def websocket_endpoint(
|
||||
break
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.debug("WebSocket 断开连接: conversation_id=%s", conversation_id)
|
||||
logger.debug("WebSocket 断开连接: conversation_id={}", conversation_id)
|
||||
await manager.disconnect(conversation_id)
|
||||
cleanup_segment_states(conversation_id)
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user