feat: 回忆录证据血缘与内部评测可追溯，顺带对齐本地评测台与 CI

数据库与模型：新增多版迁移（章节证据快照、对话血缘、记忆事实/时间线 lineage 等），把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。业务链路：会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照；新增章节证据快照与评测侧 EvalTraceService 等模块，方便组评审用的证据包。内部评测：自动化 run 与手工 memoir 评审共用可追溯证据；rubric/ judge 相关脚本与文档有配套调整。 app-eval-web：Memoir/实验详情里能展开看证据摘要与 evidence_trace（含对话轮次 id）；Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致，避免改端口后页面连错服务。工程杂项：GitHub Actions / 仓库说明有更新；各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾；新增/扩充了?
2026-04-08 15:37:09 +08:00
parent 6772e1269c
commit 309a051038
109 changed files with 4125 additions and 858 deletions
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -27,6 +27,7 @@ from app.features.conversation.history_store import (
    AI_RESPONSE_SEGMENT_JOIN,
    ConversationHistoryStore,
 )
+from app.features.conversation.lineage_schemas import DialogueLineage
 from app.features.conversation.models import Conversation, Segment
 from app.features.conversation.ws.connection_manager import manager
 from app.features.conversation.ws.message_types import MessageType
@@ -37,6 +38,7 @@ from app.features.conversation.ws.profile_collector import (
 )
 from app.features.memoir.background_runner import BackgroundTaskRunner
 from app.features.user.models import User
+from app.ports.asr import ASRTranscriptionError

 logger = get_logger(__name__)

@@ -492,7 +494,16 @@ async def process_audio_segment(
                    conversation_id,
                    segment_index,
                )
-            transcript_text = await _transcribe_long_audio(audio_bytes, fmt="m4a")
+            try:
+                transcript_text = await _transcribe_long_audio(audio_bytes, fmt="m4a")
+            except ASRTranscriptionError as e:
+                logger.warning(
+                    "ASR 转写失败 segment_index={} conversation_id={}: {}",
+                    segment_index,
+                    conversation_id,
+                    e,
+                )
+                transcript_text = ""
            await manager.send_message(
                conversation_id,
                {
@@ -511,12 +522,12 @@ async def process_audio_segment(

            if _is_transcribe_failure(transcript_text):
                detail = (transcript_text or "").strip()
-                if detail.startswith("转写失败"):
-                    user_msg = f"分段 {segment_index} {detail}"
-                elif not detail:
-                    user_msg = f"分段 {segment_index} 转写失败：未识别到内容（请检查后端 ASR 配置）"
+                if not detail:
+                    user_msg = (
+                        f"分段 {segment_index} 未识别到语音内容，请重试或检查麦克风与网络"
+                    )
                else:
-                    user_msg = f"分段 {segment_index} 转写失败：{detail[:400]}"
+                    user_msg = f"分段 {segment_index} 语音识别失败，请稍后再试"
                await manager.send_message(
                    conversation_id,
                    {
@@ -607,7 +618,7 @@ async def process_audio_segment(
            {
                "type": MessageType.ERROR,
                "data": {
-                    "message": f"分段处理失败: {str(e)}",
+                    "message": "语音分段处理遇到问题，请重试",
                    "segment_index": segment_index,
                },
                "timestamp": datetime.now(timezone.utc).isoformat(),
@@ -677,7 +688,7 @@ async def process_user_message(

        segment.agent_response = AI_RESPONSE_SEGMENT_JOIN.join(responses)
        _mark_conversation_active(conversation)
-        ai_msg_id = await store.record_human_ai_turn(
+        turn_ids = await store.record_human_ai_turn(
            conversation_id=conversation_id,
            user_message=user_message,
            responses=responses,
@@ -687,8 +698,11 @@ async def process_user_message(
            audio_duration_seconds=audio_dur,
            tts_audio_urls=None,
            segment_id=segment.id,
+            memory_retrieval_trace=getattr(
+                turn, "memory_retrieval_trace", None
+            ),
        )
-        if not ai_msg_id:
+        if not turn_ids:
            logger.warning(
                "process_user_message: 无有效助手段落（responses 为空），conversation_id={} segment_id={}",
                conversation_id,
@@ -707,6 +721,23 @@ async def process_user_message(
                )
            return

+        lineage = DialogueLineage.for_single_turn(
+            conversation_id=conversation_id,
+            user_message_id=turn_ids.human_message_id,
+            assistant_message_id=turn_ids.assistant_message_id,
+            segment_ids=[str(segment.id)],
+        )
+        await db.execute(
+            update(Segment)
+            .where(Segment.id == segment.id)
+            .values(
+                user_message_id=turn_ids.human_message_id,
+                lineage_json=lineage.model_dump(mode="json"),
+            )
+        )
+        await db.commit()
+
+        ai_msg_id = turn_ids.assistant_message_id
        tts_epoch_start = _tts_epoch_value(conversation_id)
        n = len(responses)
        for i, response_text in enumerate(responses):
@@ -779,7 +810,7 @@ async def process_user_message(
                    conversation_id,
                    {
                        "type": MessageType.ERROR,
-                        "data": {"message": f"生成回应失败: {str(e)}"},
+                        "data": {"message": "生成回应时遇到问题，请稍后再试"},
                        "timestamp": datetime.now(timezone.utc).isoformat(),
                    },
                )
--- a/api/app/features/conversation/ws/router.py
+++ b/api/app/features/conversation/ws/router.py
@@ -5,15 +5,14 @@ WebSocket 路由：实时对话通信

 import asyncio
 import base64
-import uuid
 from datetime import datetime, timezone

 from fastapi import WebSocket, WebSocketDisconnect, status
 from starlette.websockets import WebSocketState

 from app.agents.chat.background_voice import infer_background_voice
-from app.agents.stage_constants import STAGE_TO_ORDER
 from app.agents.chat.prompts_profile import format_user_profile_context
+from app.agents.stage_constants import STAGE_TO_ORDER
 from app.core.db import AsyncSessionLocal
 from app.core.dependencies import get_asr_provider
 from app.core.logging import get_logger