feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库 - 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等后端 - 记忆 - 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数 - 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关 - repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新后端 - 对话 WS - 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确 - 助手多段回复持久化使用统一分隔符，与分段逻辑一致后端 - Agent - reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发后端 - 回忆录任务 - transcript ingest 记录 source_id；任务成功结?
2026-03-27 16:01:28 +08:00
parent 1374f6e8f5
commit e4bf0710c7
70 changed files with 3404 additions and 557 deletions
--- a/app-expo/src/features/conversation/hooks.ts
+++ b/app-expo/src/features/conversation/hooks.ts
@@ -1,5 +1,5 @@
 import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
-import { File } from 'expo-file-system';
+import { File, Paths } from 'expo-file-system';
 import { useCallback, useEffect, useRef, useState } from 'react';

 import type { WsConnectionState } from '@/core/ws/types';
@@ -20,6 +20,48 @@ import {
 } from './types';
 import { voiceSegmentStore } from '@/features/voice/voice-segment-store';

+/** Expo `File` 需要规范 `file://` URI；部分录音 API 会返回裸绝对路径。 */
+function ensureFileUri(uri: string): string {
+  const u = uri.trim();
+  if (u.startsWith('file://')) return u;
+  if (u.startsWith('/')) return `file://${u}`;
+  return u;
+}
+
+function guessAudioExtension(uri: string): string {
+  const pathOnly = uri.split('?')[0] ?? uri;
+  const m = /\.[^/.]+$/u.exec(pathOnly);
+  return m ? m[0] : '.m4a';
+}
+
+/**
+ * 使用主包 `File`/`Paths`（见 Expo 文档：新 File 与旧 readAsStringAsync 互操作示例）。
+ * 先 copy 到 cache 下唯一文件名再 `base64()`，避免直接读源路径时偶发读到陈旧/错误内容。
+ */
+async function readRecordingPayload(uri: string): Promise<string> {
+  const resolved = ensureFileUri(uri);
+  const source = new File(resolved);
+  if (!source.exists) {
+    throw new Error('recording file missing');
+  }
+  const stagedName = `voice-upload-${Date.now()}-${Math.random().toString(36).slice(2, 10)}${guessAudioExtension(resolved)}`;
+  const staged = new File(Paths.cache, stagedName);
+  try {
+    source.copy(staged);
+  } catch {
+    return await source.base64();
+  }
+  try {
+    return await staged.base64();
+  } finally {
+    try {
+      staged.delete();
+    } catch {
+      // ignore
+    }
+  }
+}
+
 // ─── Query hooks ───

 // TODO: 连接不上后端时 isLoading 可能一直为 true，需加超时或展示错误态
@@ -265,8 +307,7 @@ export function useRealtimeSession({
      if (durationSec < MIN_RECORDING_DURATION_SEC) return false;

      try {
-        const file = new File(uri);
-        const base64 = await file.base64();
+        const base64 = await readRecordingPayload(uri);
        if (!base64) return false;

        const voiceSessionId = generateUUID();
--- a/app-expo/src/features/conversation/message-split.ts
+++ b/app-expo/src/features/conversation/message-split.ts
@@ -1,16 +1,58 @@
 /**
 * 与后端 / LLM 约定：多条助手消息用 [SPLIT] 分隔（大小写不敏感）。
 * 分隔符为边界，不包含在气泡正文中。
+ *
+ * LLM 常输出全角括号、括号内空格或零宽字符，需先规范化再匹配。
 */

-export const MESSAGE_SPLIT_REGEX = /\[SPLIT\]/i;
+const ZERO_WIDTH = /[\u200B-\u200D\uFEFF]/g;

-/** 历史/已落库消息：拆成非空片段，各渲染为一个气泡 */
-export function splitMessageParts(content: string): string[] {
-  return content
+/** 与后端 `segments_from_llm_response` 一致：先 [SPLIT]，再双换行段落 */
+const MIN_PARAGRAPH_CHARS = 12;
+const PARA_BREAK = /\n\s*\n+/;
+
+function splitToPartsNormalized(content: string): string[] {
+  const normalized = normalizeAssistantContentForSplit(content);
+  let parts = normalized
    .split(MESSAGE_SPLIT_REGEX)
    .map((s) => s.trim())
    .filter((s) => s.length > 0);
+  if (parts.length > 1) return parts;
+  if (parts.length === 1) {
+    const only = parts[0]!;
+    if (!only.includes('\n\n')) return parts;
+    const paras = only
+      .split(PARA_BREAK)
+      .map((s) => s.trim())
+      .filter((s) => s.length >= MIN_PARAGRAPH_CHARS);
+    if (paras.length >= 2) return paras.slice(0, 3);
+  }
+  return parts;
+}
+
+/** 供测试；与 split 前处理一致 */
+export function normalizeAssistantContentForSplit(content: string): string {
+  let s = String(content ?? '');
+  s = s.replace(ZERO_WIDTH, '');
+  s = s.replace(/\uFF3B/g, '[').replace(/\uFF3D/g, ']');
+  s = s.replace(/\u3010/g, '[').replace(/\u3011/g, ']');
+  return s;
+}
+
+/** 允许 `[\s*SPLIT\s*]`，兼容 `[ SPLIT ]`、换行等 */
+export const MESSAGE_SPLIT_REGEX = /\[\s*SPLIT\s*\]/i;
+
+/** 与后端持久化的助手消息 id + 段序号对齐（乐观列表 / TTS 绑定） */
+export function assistantSegmentMessageId(
+  assistantMessageId: string,
+  segmentIndex: number,
+): string {
+  return `${assistantMessageId}_seg_${segmentIndex}`;
+}
+
+/** 历史/已落库消息：拆成非空片段，各渲染为一个气泡 */
+export function splitMessageParts(content: string): string[] {
+  return splitToPartsNormalized(String(content ?? ''));
 }

 /**
@@ -18,7 +60,25 @@ export function splitMessageParts(content: string): string[] {
 * 仍能拆成两段（上一段完成气泡 + 下一段空流式气泡）。
 */
 export function splitStreamingSegments(content: string): string[] {
-  return content.split(MESSAGE_SPLIT_REGEX).map((s) => s.trim());
+  const raw = String(content ?? '');
+  const normalized = normalizeAssistantContentForSplit(raw);
+  const byMark = normalized.split(MESSAGE_SPLIT_REGEX).map((s) => s.trim());
+  let segments: string[];
+  if (byMark.length > 1) {
+    segments = byMark;
+  } else {
+    const only = byMark[0] ?? '';
+    if (only.includes('\n\n')) {
+      const paras = only
+        .split(PARA_BREAK)
+        .map((s) => s.trim())
+        .filter((s) => s.length >= MIN_PARAGRAPH_CHARS);
+      segments = paras.length >= 2 ? paras.slice(0, 3) : [only];
+    } else {
+      segments = [only];
+    }
+  }
+  return segments;
 }

 /** 会话列表预览：取最后一条子消息的前若干字 */
--- a/app-expo/src/features/conversation/realtime-session.ts
+++ b/app-expo/src/features/conversation/realtime-session.ts
@@ -8,7 +8,7 @@ import {
 import type { WsConnectionState, WsEvent } from '@/core/ws/types';

 import { handleWsEvent } from './event-handlers';
-import { lastSegmentPreview } from './message-split';
+import { assistantSegmentMessageId, lastSegmentPreview } from './message-split';
 import { conversationKeys } from './query-keys';
 import type { ConversationListItem, MessageItem } from './types';

@@ -57,6 +57,8 @@ export class RealtimeSession {
  private unsubState: (() => void) | null = null;

  private streamingBuffer = '';
+  /** 单段回复且服务端带 `assistant_message_id` 时用于落缓存 id */
+  private pendingAssistantMessageId: string | null = null;

  constructor(options: RealtimeSessionOptions) {
    this.client = new WsClient(options.conversationId);
@@ -82,7 +84,7 @@ export class RealtimeSession {
  }

  dispose(): void {
-    this.commitStreamingBuffer();
+    this.flushStreamingBufferIfPending();
    this.unsubEvent?.();
    this.unsubState?.();
    this.client.dispose();
@@ -160,40 +162,87 @@ export class RealtimeSession {
  };

  /**
-   * Accumulates agent_response chunks into streamingBuffer.
-   * Only commits the final aggregated message to Query cache
-   * when the last chunk arrives (index >= total - 1).
-   * Individual chunks are forwarded to onStreamingText for UI display.
+   * 服务端已按 [SPLIT] 拆好的多段：每段一条独立 agent_response（total>1），
+   * 这里逐条写入缓存，与逐段 TTS 一一对应。
+   * 单段（total===1）仍走流式 buffer，结束时一条消息。
   */
  private handleAgentChunk(
    event: Extract<WsEvent, { kind: 'agent_response' }>,
  ): void {
+    if (event.isTransition) {
+      this.commitOneAssistantMessage(
+        event.text,
+        `${this.conversationId}_agent_${Date.now()}`,
+      );
+      this.onStreamingText?.(event.text, true);
+      return;
+    }
+
+    const total = event.total ?? 1;
+    const index = event.index ?? 0;
+
+    if (total > 1) {
+      const id =
+        event.assistantMessageId != null
+          ? assistantSegmentMessageId(event.assistantMessageId, index)
+          : `${this.conversationId}_agent_${Date.now()}_${index}`;
+      this.commitOneAssistantMessage(event.text, id);
+      this.onStreamingText?.(event.text, true);
+      return;
+    }
+
+    if (event.assistantMessageId) {
+      this.pendingAssistantMessageId = event.assistantMessageId;
+    }
+
    this.streamingBuffer += event.text;

-    const isComplete =
-      event.index !== undefined &&
-      event.total !== undefined &&
-      event.index >= event.total - 1;
+    // 与 coerced index/total 对齐：若服务端只带 text、省略 index/total，旧逻辑会 isComplete=false，永远不落库
+    const isComplete = index >= total - 1;

    this.onStreamingText?.(this.streamingBuffer, isComplete);

    if (isComplete) {
-      this.commitStreamingBuffer();
+      const id =
+        this.pendingAssistantMessageId ??
+        `${this.conversationId}_agent_${Date.now()}`;
+      this.commitStreamingBufferWithId(id);
+      this.pendingAssistantMessageId = null;
    }
  }

-  private commitStreamingBuffer(): void {
-    if (!this.streamingBuffer) return;
+  private commitOneAssistantMessage(content: string, id: string): void {
+    const messagesKey = conversationKeys.messages(this.conversationId);
+    this.queryClient.setQueryData<MessageItem[]>(messagesKey, (old) => {
+      const message: MessageItem = {
+        id,
+        conversationId: this.conversationId,
+        content,
+        senderType: 'assistant',
+        timestamp: Date.now(),
+        messageType: 'text',
+      };
+      return [...(old ?? []), message];
+    });
+    this.updateConversationListPreview(content);
+  }
+
+  private commitStreamingBufferWithId(messageId: string): void {
+    // 允许空字符串：否则服务端下发 text="" 时永不写入缓存，表现为「无回复」
+    if (this.streamingBuffer === undefined || this.streamingBuffer === null) {
+      return;
+    }

    const fullText = this.streamingBuffer;
    this.streamingBuffer = '';
+    const content = fullText.trim().length > 0 ? fullText : '…';

    const messagesKey = conversationKeys.messages(this.conversationId);
    this.queryClient.setQueryData<MessageItem[]>(messagesKey, (old) => {
      const message: MessageItem = {
-        id: `${this.conversationId}_agent_${Date.now()}`,
+        id: messageId,
        conversationId: this.conversationId,
-        content: fullText,
+        content,
        senderType: 'assistant',
        timestamp: Date.now(),
        messageType: 'text',
@@ -201,6 +250,10 @@ export class RealtimeSession {
      return [...(old ?? []), message];
    });

+    this.updateConversationListPreview(content);
+  }
+
+  private updateConversationListPreview(latestContent: string): void {
    this.queryClient.setQueryData<ConversationListItem[]>(
      conversationKeys.lists(),
      (old) => {
@@ -209,7 +262,7 @@ export class RealtimeSession {
          item.id === this.conversationId
            ? {
                ...item,
-                latestMessagePreview: lastSegmentPreview(fullText, 50),
+                latestMessagePreview: lastSegmentPreview(latestContent, 50),
                latestMessageTime: Date.now(),
              }
            : item,
@@ -217,4 +270,13 @@ export class RealtimeSession {
      },
    );
  }
+
+  private flushStreamingBufferIfPending(): void {
+    if (!this.streamingBuffer) return;
+    const id =
+      this.pendingAssistantMessageId ??
+      `${this.conversationId}_agent_${Date.now()}`;
+    this.commitStreamingBufferWithId(id);
+    this.pendingAssistantMessageId = null;
+  }
 }
--- a/app-expo/src/features/voice/hooks/use-player.ts
+++ b/app-expo/src/features/voice/hooks/use-player.ts
@@ -47,7 +47,13 @@ export function usePlayer(): UsePlayerResult {
      typeof currentSource === 'string' &&
      (currentSource.startsWith('https://') ||
        currentSource.startsWith('http://'));
-    return { downloadFirst: remote };
+    return {
+      downloadFirst: remote,
+      // Expo's native player deactivates AVAudioSession on pause by default.
+      // We manage session ownership centrally via audioFocus, so keep it active
+      // until audioFocus.release() explicitly tears it down.
+      keepAudioSessionActive: true,
+    };
  }, [currentSource]);

  const player = useAudioPlayer(currentSource, playerOptions);
@@ -76,7 +82,7 @@ export function usePlayer(): UsePlayerResult {
        setCurrentSource(null);
        setStatus('idle');
        setQueueLength(0);
-        await audioFocus.release();
+        await audioFocus.releaseIfOwnedBy('player');
        return;
      }

@@ -170,7 +176,7 @@ export function usePlayer(): UsePlayerResult {
      setCurrentPlaybackItem(null);
      setCurrentSource(null);
      setStatus('idle');
-      await audioFocus.release();
+      await audioFocus.releaseIfOwnedBy('player');
      await playNext();
    },
    [player, playNext],
@@ -189,7 +195,7 @@ export function usePlayer(): UsePlayerResult {
    setCurrentPlaybackItem(null);
    setCurrentSource(null);
    setStatus('idle');
-    await audioFocus.release();
+    await audioFocus.releaseIfOwnedBy('player');
  }, [player]);

  return {
--- a/app-expo/src/features/voice/hooks/use-recorder.ts
+++ b/app-expo/src/features/voice/hooks/use-recorder.ts
@@ -1,12 +1,12 @@
 import { useCallback, useEffect, useRef, useState } from 'react';

 import { VoiceRecorder } from '../recorder';
-import type { RecorderStatus } from '../types';
+import type { RecorderStartResult, RecorderStatus } from '../types';

 interface UseRecorderResult {
  status: RecorderStatus;
  durationMs: number;
-  start: () => Promise<boolean>;
+  start: () => Promise<RecorderStartResult>;
  stop: () => Promise<{ uri: string; durationMs: number } | null>;
  cancel: () => Promise<void>;
 }
@@ -44,16 +44,21 @@ export function useRecorder(

  const start = useCallback(async () => {
    const recorder = recorderRef.current;
-    if (!recorder) return false;
+    if (!recorder) {
+      return {
+        ok: false,
+        reason: 'recorder_unavailable',
+      } as const;
+    }

-    const ok = await recorder.start();
-    if (ok) {
+    const result = await recorder.start();
+    if (result.ok) {
      setDurationMs(0);
      durationTimer.current = setInterval(() => {
        setDurationMs(recorder.getDurationMs());
      }, 200);
    }
-    return ok;
+    return result;
  }, []);

  const stop = useCallback(async () => {
--- a/app-expo/src/features/voice/recorder.ts
+++ b/app-expo/src/features/voice/recorder.ts
@@ -2,12 +2,13 @@ import { Platform } from 'react-native';

 import {
  AudioModule,
+  AudioQuality,
+  IOSOutputFormat,
  type RecordingOptions,
-  RecordingPresets,
 } from 'expo-audio';
 import { audioFocus } from '@/core/audio/audio-focus';

-import type { RecorderStatus } from './types';
+import type { RecorderStartResult, RecorderStatus } from './types';

 // Native module exposes AudioRecorder as constructor; ESLint import/namespace doesn't resolve it
 // eslint-disable-next-line import/namespace -- AudioModule.AudioRecorder exists at runtime
@@ -16,6 +17,30 @@ const AudioRecorderCtor = AudioModule.AudioRecorder;
 type StatusListener = (status: RecorderStatus) => void;
 type RecordingCompleteListener = (uri: string, durationMs: number) => void;

+/**
+ * Tencent SentenceRecognition is currently called with `EngSerViceType=16k_zh`
+ * and `VoiceFormat=m4a`, so record speech in that shape directly instead of
+ * relying on Expo's default 44.1 kHz stereo preset.
+ */
+export const VOICE_RECORDING_OPTIONS: RecordingOptions = {
+  extension: '.m4a',
+  sampleRate: 16_000,
+  numberOfChannels: 1,
+  bitRate: 32_000,
+  android: {
+    outputFormat: 'mpeg4',
+    audioEncoder: 'aac',
+  },
+  ios: {
+    outputFormat: IOSOutputFormat.MPEG4AAC,
+    audioQuality: AudioQuality.HIGH,
+  },
+  web: {
+    mimeType: 'audio/webm',
+    bitsPerSecond: 32_000,
+  },
+};
+
 /** Platform-specific recording options (expo-audio internal createRecordingOptions logic). */
 function createRecordingOptions(
  options: RecordingOptions,
@@ -32,6 +57,15 @@ function createRecordingOptions(
  return { ...common, ...options.web };
 }

+export function buildVoiceRecordingOptions(): Partial<RecordingOptions> {
+  return createRecordingOptions(VOICE_RECORDING_OPTIONS);
+}
+
+function getErrorMessage(error: unknown): string {
+  if (error instanceof Error) return error.message;
+  return String(error);
+}
+
 /**
 * Class-level wrapper over expo-audio recording.
 * No React dependency — hooks/ layer adapts this to React.
@@ -55,29 +89,35 @@ export class VoiceRecorder {
    return result.granted;
  }

-  async start(): Promise<boolean> {
-    if (this.status !== 'idle') return false;
+  async start(): Promise<RecorderStartResult> {
+    if (this.status !== 'idle') {
+      return { ok: false, reason: 'prepare_failed' };
+    }

    const hasPermission = await this.requestPermission();
-    if (!hasPermission) return false;
+    if (!hasPermission) {
+      return { ok: false, reason: 'permission_denied' };
+    }

    const acquired = await audioFocus.acquireForRecording();
-    if (!acquired) return false;
+    if (!acquired) {
+      return { ok: false, reason: 'audio_focus_unavailable' };
+    }

    this.setStatus('preparing');

    try {
-      this.recorder = new AudioRecorderCtor(
-        createRecordingOptions(RecordingPresets.HIGH_QUALITY),
-      );
+      this.recorder = new AudioRecorderCtor(buildVoiceRecordingOptions());
      await this.recorder.prepareToRecordAsync();
      this.recorder.record();
      this.startTime = Date.now();
      this.setStatus('recording');
-      return true;
-    } catch {
+      return { ok: true };
+    } catch (error) {
+      const errorMessage = getErrorMessage(error);
+      console.warn('VoiceRecorder.start failed during prepare', errorMessage);
      await this.cleanup();
-      return false;
+      return { ok: false, reason: 'prepare_failed', errorMessage };
    }
  }

@@ -151,6 +191,6 @@ export class VoiceRecorder {
    this.recorder = null;
    this.startTime = 0;
    this.setStatus('idle');
-    await audioFocus.release();
+    await audioFocus.releaseIfOwnedBy('recorder');
  }
 }
--- a/app-expo/src/features/voice/types.ts
+++ b/app-expo/src/features/voice/types.ts
@@ -2,6 +2,18 @@

 export type RecorderStatus = 'idle' | 'preparing' | 'recording' | 'stopping';

+export type RecorderStartFailureReason =
+  | 'permission_denied'
+  | 'audio_focus_unavailable'
+  | 'prepare_failed'
+  | 'recorder_unavailable';
+
+export interface RecorderStartResult {
+  ok: boolean;
+  reason?: RecorderStartFailureReason;
+  errorMessage?: string;
+}
+
 // ─── Segmenter ───

 export type SegmentStrategy = 'fixed-duration';