feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI

数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
This commit is contained in:
Kevin
2026-04-08 15:37:09 +08:00
parent 6772e1269c
commit 309a051038
109 changed files with 4125 additions and 858 deletions

View File

@@ -27,6 +27,7 @@ from app.features.conversation.history_store import (
AI_RESPONSE_SEGMENT_JOIN,
ConversationHistoryStore,
)
from app.features.conversation.lineage_schemas import DialogueLineage
from app.features.conversation.models import Conversation, Segment
from app.features.conversation.ws.connection_manager import manager
from app.features.conversation.ws.message_types import MessageType
@@ -37,6 +38,7 @@ from app.features.conversation.ws.profile_collector import (
)
from app.features.memoir.background_runner import BackgroundTaskRunner
from app.features.user.models import User
from app.ports.asr import ASRTranscriptionError
logger = get_logger(__name__)
@@ -492,7 +494,16 @@ async def process_audio_segment(
conversation_id,
segment_index,
)
transcript_text = await _transcribe_long_audio(audio_bytes, fmt="m4a")
try:
transcript_text = await _transcribe_long_audio(audio_bytes, fmt="m4a")
except ASRTranscriptionError as e:
logger.warning(
"ASR 转写失败 segment_index={} conversation_id={}: {}",
segment_index,
conversation_id,
e,
)
transcript_text = ""
await manager.send_message(
conversation_id,
{
@@ -511,12 +522,12 @@ async def process_audio_segment(
if _is_transcribe_failure(transcript_text):
detail = (transcript_text or "").strip()
if detail.startswith("转写失败"):
user_msg = f"分段 {segment_index} {detail}"
elif not detail:
user_msg = f"分段 {segment_index} 转写失败:未识别到内容(请检查后端 ASR 配置)"
if not detail:
user_msg = (
f"分段 {segment_index} 未识别到语音内容,请重试或检查麦克风与网络"
)
else:
user_msg = f"分段 {segment_index} 转写失败:{detail[:400]}"
user_msg = f"分段 {segment_index} 语音识别失败,请稍后再试"
await manager.send_message(
conversation_id,
{
@@ -607,7 +618,7 @@ async def process_audio_segment(
{
"type": MessageType.ERROR,
"data": {
"message": f"分段处理失败: {str(e)}",
"message": "语音分段处理遇到问题,请重试",
"segment_index": segment_index,
},
"timestamp": datetime.now(timezone.utc).isoformat(),
@@ -677,7 +688,7 @@ async def process_user_message(
segment.agent_response = AI_RESPONSE_SEGMENT_JOIN.join(responses)
_mark_conversation_active(conversation)
ai_msg_id = await store.record_human_ai_turn(
turn_ids = await store.record_human_ai_turn(
conversation_id=conversation_id,
user_message=user_message,
responses=responses,
@@ -687,8 +698,11 @@ async def process_user_message(
audio_duration_seconds=audio_dur,
tts_audio_urls=None,
segment_id=segment.id,
memory_retrieval_trace=getattr(
turn, "memory_retrieval_trace", None
),
)
if not ai_msg_id:
if not turn_ids:
logger.warning(
"process_user_message: 无有效助手段落responses 为空conversation_id={} segment_id={}",
conversation_id,
@@ -707,6 +721,23 @@ async def process_user_message(
)
return
lineage = DialogueLineage.for_single_turn(
conversation_id=conversation_id,
user_message_id=turn_ids.human_message_id,
assistant_message_id=turn_ids.assistant_message_id,
segment_ids=[str(segment.id)],
)
await db.execute(
update(Segment)
.where(Segment.id == segment.id)
.values(
user_message_id=turn_ids.human_message_id,
lineage_json=lineage.model_dump(mode="json"),
)
)
await db.commit()
ai_msg_id = turn_ids.assistant_message_id
tts_epoch_start = _tts_epoch_value(conversation_id)
n = len(responses)
for i, response_text in enumerate(responses):
@@ -779,7 +810,7 @@ async def process_user_message(
conversation_id,
{
"type": MessageType.ERROR,
"data": {"message": f"生成回应失败: {str(e)}"},
"data": {"message": "生成回应时遇到问题,请稍后再试"},
"timestamp": datetime.now(timezone.utc).isoformat(),
},
)

View File

@@ -5,15 +5,14 @@ WebSocket 路由:实时对话通信
import asyncio
import base64
import uuid
from datetime import datetime, timezone
from fastapi import WebSocket, WebSocketDisconnect, status
from starlette.websockets import WebSocketState
from app.agents.chat.background_voice import infer_background_voice
from app.agents.stage_constants import STAGE_TO_ORDER
from app.agents.chat.prompts_profile import format_user_profile_context
from app.agents.stage_constants import STAGE_TO_ORDER
from app.core.db import AsyncSessionLocal
from app.core.dependencies import get_asr_provider
from app.core.logging import get_logger