feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数
- 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符,与分段逻辑一致

后端 - Agent
- reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id;任务成功结?
This commit is contained in:
Kevin
2026-03-27 16:01:28 +08:00
parent 1374f6e8f5
commit e4bf0710c7
70 changed files with 3404 additions and 557 deletions

View File

@@ -1,5 +1,5 @@
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import { File } from 'expo-file-system';
import { File, Paths } from 'expo-file-system';
import { useCallback, useEffect, useRef, useState } from 'react';
import type { WsConnectionState } from '@/core/ws/types';
@@ -20,6 +20,48 @@ import {
} from './types';
import { voiceSegmentStore } from '@/features/voice/voice-segment-store';
/** Expo `File` 需要规范 `file://` URI部分录音 API 会返回裸绝对路径。 */
function ensureFileUri(uri: string): string {
const u = uri.trim();
if (u.startsWith('file://')) return u;
if (u.startsWith('/')) return `file://${u}`;
return u;
}
function guessAudioExtension(uri: string): string {
const pathOnly = uri.split('?')[0] ?? uri;
const m = /\.[^/.]+$/u.exec(pathOnly);
return m ? m[0] : '.m4a';
}
/**
* 使用主包 `File`/`Paths`(见 Expo 文档:新 File 与旧 readAsStringAsync 互操作示例)。
* 先 copy 到 cache 下唯一文件名再 `base64()`,避免直接读源路径时偶发读到陈旧/错误内容。
*/
async function readRecordingPayload(uri: string): Promise<string> {
const resolved = ensureFileUri(uri);
const source = new File(resolved);
if (!source.exists) {
throw new Error('recording file missing');
}
const stagedName = `voice-upload-${Date.now()}-${Math.random().toString(36).slice(2, 10)}${guessAudioExtension(resolved)}`;
const staged = new File(Paths.cache, stagedName);
try {
source.copy(staged);
} catch {
return await source.base64();
}
try {
return await staged.base64();
} finally {
try {
staged.delete();
} catch {
// ignore
}
}
}
// ─── Query hooks ───
// TODO: 连接不上后端时 isLoading 可能一直为 true需加超时或展示错误态
@@ -265,8 +307,7 @@ export function useRealtimeSession({
if (durationSec < MIN_RECORDING_DURATION_SEC) return false;
try {
const file = new File(uri);
const base64 = await file.base64();
const base64 = await readRecordingPayload(uri);
if (!base64) return false;
const voiceSessionId = generateUUID();

View File

@@ -1,16 +1,58 @@
/**
* 与后端 / LLM 约定:多条助手消息用 [SPLIT] 分隔(大小写不敏感)。
* 分隔符为边界,不包含在气泡正文中。
*
* LLM 常输出全角括号、括号内空格或零宽字符,需先规范化再匹配。
*/
export const MESSAGE_SPLIT_REGEX = /\[SPLIT\]/i;
const ZERO_WIDTH = /[\u200B-\u200D\uFEFF]/g;
/** 历史/已落库消息:拆成非空片段,各渲染为一个气泡 */
export function splitMessageParts(content: string): string[] {
return content
/** 与后端 `segments_from_llm_response` 一致:先 [SPLIT],再双换行段落 */
const MIN_PARAGRAPH_CHARS = 12;
const PARA_BREAK = /\n\s*\n+/;
function splitToPartsNormalized(content: string): string[] {
const normalized = normalizeAssistantContentForSplit(content);
let parts = normalized
.split(MESSAGE_SPLIT_REGEX)
.map((s) => s.trim())
.filter((s) => s.length > 0);
if (parts.length > 1) return parts;
if (parts.length === 1) {
const only = parts[0]!;
if (!only.includes('\n\n')) return parts;
const paras = only
.split(PARA_BREAK)
.map((s) => s.trim())
.filter((s) => s.length >= MIN_PARAGRAPH_CHARS);
if (paras.length >= 2) return paras.slice(0, 3);
}
return parts;
}
/** 供测试;与 split 前处理一致 */
export function normalizeAssistantContentForSplit(content: string): string {
let s = String(content ?? '');
s = s.replace(ZERO_WIDTH, '');
s = s.replace(/\uFF3B/g, '[').replace(/\uFF3D/g, ']');
s = s.replace(/\u3010/g, '[').replace(/\u3011/g, ']');
return s;
}
/** 允许 `[\s*SPLIT\s*]`,兼容 `[ SPLIT ]`、换行等 */
export const MESSAGE_SPLIT_REGEX = /\[\s*SPLIT\s*\]/i;
/** 与后端持久化的助手消息 id + 段序号对齐(乐观列表 / TTS 绑定) */
export function assistantSegmentMessageId(
assistantMessageId: string,
segmentIndex: number,
): string {
return `${assistantMessageId}_seg_${segmentIndex}`;
}
/** 历史/已落库消息:拆成非空片段,各渲染为一个气泡 */
export function splitMessageParts(content: string): string[] {
return splitToPartsNormalized(String(content ?? ''));
}
/**
@@ -18,7 +60,25 @@ export function splitMessageParts(content: string): string[] {
* 仍能拆成两段(上一段完成气泡 + 下一段空流式气泡)。
*/
export function splitStreamingSegments(content: string): string[] {
return content.split(MESSAGE_SPLIT_REGEX).map((s) => s.trim());
const raw = String(content ?? '');
const normalized = normalizeAssistantContentForSplit(raw);
const byMark = normalized.split(MESSAGE_SPLIT_REGEX).map((s) => s.trim());
let segments: string[];
if (byMark.length > 1) {
segments = byMark;
} else {
const only = byMark[0] ?? '';
if (only.includes('\n\n')) {
const paras = only
.split(PARA_BREAK)
.map((s) => s.trim())
.filter((s) => s.length >= MIN_PARAGRAPH_CHARS);
segments = paras.length >= 2 ? paras.slice(0, 3) : [only];
} else {
segments = [only];
}
}
return segments;
}
/** 会话列表预览:取最后一条子消息的前若干字 */

View File

@@ -8,7 +8,7 @@ import {
import type { WsConnectionState, WsEvent } from '@/core/ws/types';
import { handleWsEvent } from './event-handlers';
import { lastSegmentPreview } from './message-split';
import { assistantSegmentMessageId, lastSegmentPreview } from './message-split';
import { conversationKeys } from './query-keys';
import type { ConversationListItem, MessageItem } from './types';
@@ -57,6 +57,8 @@ export class RealtimeSession {
private unsubState: (() => void) | null = null;
private streamingBuffer = '';
/** 单段回复且服务端带 `assistant_message_id` 时用于落缓存 id */
private pendingAssistantMessageId: string | null = null;
constructor(options: RealtimeSessionOptions) {
this.client = new WsClient(options.conversationId);
@@ -82,7 +84,7 @@ export class RealtimeSession {
}
dispose(): void {
this.commitStreamingBuffer();
this.flushStreamingBufferIfPending();
this.unsubEvent?.();
this.unsubState?.();
this.client.dispose();
@@ -160,40 +162,87 @@ export class RealtimeSession {
};
/**
* Accumulates agent_response chunks into streamingBuffer.
* Only commits the final aggregated message to Query cache
* when the last chunk arrives (index >= total - 1).
* Individual chunks are forwarded to onStreamingText for UI display.
* 服务端已按 [SPLIT] 拆好的多段:每段一条独立 agent_responsetotal>1
* 这里逐条写入缓存,与逐段 TTS 一一对应。
* 单段total===1仍走流式 buffer结束时一条消息。
*/
private handleAgentChunk(
event: Extract<WsEvent, { kind: 'agent_response' }>,
): void {
if (event.isTransition) {
this.commitOneAssistantMessage(
event.text,
`${this.conversationId}_agent_${Date.now()}`,
);
this.onStreamingText?.(event.text, true);
return;
}
const total = event.total ?? 1;
const index = event.index ?? 0;
if (total > 1) {
const id =
event.assistantMessageId != null
? assistantSegmentMessageId(event.assistantMessageId, index)
: `${this.conversationId}_agent_${Date.now()}_${index}`;
this.commitOneAssistantMessage(event.text, id);
this.onStreamingText?.(event.text, true);
return;
}
if (event.assistantMessageId) {
this.pendingAssistantMessageId = event.assistantMessageId;
}
this.streamingBuffer += event.text;
const isComplete =
event.index !== undefined &&
event.total !== undefined &&
event.index >= event.total - 1;
// 与 coerced index/total 对齐:若服务端只带 text、省略 index/total旧逻辑会 isComplete=false永远不落库
const isComplete = index >= total - 1;
this.onStreamingText?.(this.streamingBuffer, isComplete);
if (isComplete) {
this.commitStreamingBuffer();
const id =
this.pendingAssistantMessageId ??
`${this.conversationId}_agent_${Date.now()}`;
this.commitStreamingBufferWithId(id);
this.pendingAssistantMessageId = null;
}
}
private commitStreamingBuffer(): void {
if (!this.streamingBuffer) return;
private commitOneAssistantMessage(content: string, id: string): void {
const messagesKey = conversationKeys.messages(this.conversationId);
this.queryClient.setQueryData<MessageItem[]>(messagesKey, (old) => {
const message: MessageItem = {
id,
conversationId: this.conversationId,
content,
senderType: 'assistant',
timestamp: Date.now(),
messageType: 'text',
};
return [...(old ?? []), message];
});
this.updateConversationListPreview(content);
}
private commitStreamingBufferWithId(messageId: string): void {
// 允许空字符串:否则服务端下发 text="" 时永不写入缓存,表现为「无回复」
if (this.streamingBuffer === undefined || this.streamingBuffer === null) {
return;
}
const fullText = this.streamingBuffer;
this.streamingBuffer = '';
const content = fullText.trim().length > 0 ? fullText : '…';
const messagesKey = conversationKeys.messages(this.conversationId);
this.queryClient.setQueryData<MessageItem[]>(messagesKey, (old) => {
const message: MessageItem = {
id: `${this.conversationId}_agent_${Date.now()}`,
id: messageId,
conversationId: this.conversationId,
content: fullText,
content,
senderType: 'assistant',
timestamp: Date.now(),
messageType: 'text',
@@ -201,6 +250,10 @@ export class RealtimeSession {
return [...(old ?? []), message];
});
this.updateConversationListPreview(content);
}
private updateConversationListPreview(latestContent: string): void {
this.queryClient.setQueryData<ConversationListItem[]>(
conversationKeys.lists(),
(old) => {
@@ -209,7 +262,7 @@ export class RealtimeSession {
item.id === this.conversationId
? {
...item,
latestMessagePreview: lastSegmentPreview(fullText, 50),
latestMessagePreview: lastSegmentPreview(latestContent, 50),
latestMessageTime: Date.now(),
}
: item,
@@ -217,4 +270,13 @@ export class RealtimeSession {
},
);
}
private flushStreamingBufferIfPending(): void {
if (!this.streamingBuffer) return;
const id =
this.pendingAssistantMessageId ??
`${this.conversationId}_agent_${Date.now()}`;
this.commitStreamingBufferWithId(id);
this.pendingAssistantMessageId = null;
}
}

View File

@@ -47,7 +47,13 @@ export function usePlayer(): UsePlayerResult {
typeof currentSource === 'string' &&
(currentSource.startsWith('https://') ||
currentSource.startsWith('http://'));
return { downloadFirst: remote };
return {
downloadFirst: remote,
// Expo's native player deactivates AVAudioSession on pause by default.
// We manage session ownership centrally via audioFocus, so keep it active
// until audioFocus.release() explicitly tears it down.
keepAudioSessionActive: true,
};
}, [currentSource]);
const player = useAudioPlayer(currentSource, playerOptions);
@@ -76,7 +82,7 @@ export function usePlayer(): UsePlayerResult {
setCurrentSource(null);
setStatus('idle');
setQueueLength(0);
await audioFocus.release();
await audioFocus.releaseIfOwnedBy('player');
return;
}
@@ -170,7 +176,7 @@ export function usePlayer(): UsePlayerResult {
setCurrentPlaybackItem(null);
setCurrentSource(null);
setStatus('idle');
await audioFocus.release();
await audioFocus.releaseIfOwnedBy('player');
await playNext();
},
[player, playNext],
@@ -189,7 +195,7 @@ export function usePlayer(): UsePlayerResult {
setCurrentPlaybackItem(null);
setCurrentSource(null);
setStatus('idle');
await audioFocus.release();
await audioFocus.releaseIfOwnedBy('player');
}, [player]);
return {

View File

@@ -1,12 +1,12 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import { VoiceRecorder } from '../recorder';
import type { RecorderStatus } from '../types';
import type { RecorderStartResult, RecorderStatus } from '../types';
interface UseRecorderResult {
status: RecorderStatus;
durationMs: number;
start: () => Promise<boolean>;
start: () => Promise<RecorderStartResult>;
stop: () => Promise<{ uri: string; durationMs: number } | null>;
cancel: () => Promise<void>;
}
@@ -44,16 +44,21 @@ export function useRecorder(
const start = useCallback(async () => {
const recorder = recorderRef.current;
if (!recorder) return false;
if (!recorder) {
return {
ok: false,
reason: 'recorder_unavailable',
} as const;
}
const ok = await recorder.start();
if (ok) {
const result = await recorder.start();
if (result.ok) {
setDurationMs(0);
durationTimer.current = setInterval(() => {
setDurationMs(recorder.getDurationMs());
}, 200);
}
return ok;
return result;
}, []);
const stop = useCallback(async () => {

View File

@@ -2,12 +2,13 @@ import { Platform } from 'react-native';
import {
AudioModule,
AudioQuality,
IOSOutputFormat,
type RecordingOptions,
RecordingPresets,
} from 'expo-audio';
import { audioFocus } from '@/core/audio/audio-focus';
import type { RecorderStatus } from './types';
import type { RecorderStartResult, RecorderStatus } from './types';
// Native module exposes AudioRecorder as constructor; ESLint import/namespace doesn't resolve it
// eslint-disable-next-line import/namespace -- AudioModule.AudioRecorder exists at runtime
@@ -16,6 +17,30 @@ const AudioRecorderCtor = AudioModule.AudioRecorder;
type StatusListener = (status: RecorderStatus) => void;
type RecordingCompleteListener = (uri: string, durationMs: number) => void;
/**
* Tencent SentenceRecognition is currently called with `EngSerViceType=16k_zh`
* and `VoiceFormat=m4a`, so record speech in that shape directly instead of
* relying on Expo's default 44.1 kHz stereo preset.
*/
export const VOICE_RECORDING_OPTIONS: RecordingOptions = {
extension: '.m4a',
sampleRate: 16_000,
numberOfChannels: 1,
bitRate: 32_000,
android: {
outputFormat: 'mpeg4',
audioEncoder: 'aac',
},
ios: {
outputFormat: IOSOutputFormat.MPEG4AAC,
audioQuality: AudioQuality.HIGH,
},
web: {
mimeType: 'audio/webm',
bitsPerSecond: 32_000,
},
};
/** Platform-specific recording options (expo-audio internal createRecordingOptions logic). */
function createRecordingOptions(
options: RecordingOptions,
@@ -32,6 +57,15 @@ function createRecordingOptions(
return { ...common, ...options.web };
}
export function buildVoiceRecordingOptions(): Partial<RecordingOptions> {
return createRecordingOptions(VOICE_RECORDING_OPTIONS);
}
function getErrorMessage(error: unknown): string {
if (error instanceof Error) return error.message;
return String(error);
}
/**
* Class-level wrapper over expo-audio recording.
* No React dependency — hooks/ layer adapts this to React.
@@ -55,29 +89,35 @@ export class VoiceRecorder {
return result.granted;
}
async start(): Promise<boolean> {
if (this.status !== 'idle') return false;
async start(): Promise<RecorderStartResult> {
if (this.status !== 'idle') {
return { ok: false, reason: 'prepare_failed' };
}
const hasPermission = await this.requestPermission();
if (!hasPermission) return false;
if (!hasPermission) {
return { ok: false, reason: 'permission_denied' };
}
const acquired = await audioFocus.acquireForRecording();
if (!acquired) return false;
if (!acquired) {
return { ok: false, reason: 'audio_focus_unavailable' };
}
this.setStatus('preparing');
try {
this.recorder = new AudioRecorderCtor(
createRecordingOptions(RecordingPresets.HIGH_QUALITY),
);
this.recorder = new AudioRecorderCtor(buildVoiceRecordingOptions());
await this.recorder.prepareToRecordAsync();
this.recorder.record();
this.startTime = Date.now();
this.setStatus('recording');
return true;
} catch {
return { ok: true };
} catch (error) {
const errorMessage = getErrorMessage(error);
console.warn('VoiceRecorder.start failed during prepare', errorMessage);
await this.cleanup();
return false;
return { ok: false, reason: 'prepare_failed', errorMessage };
}
}
@@ -151,6 +191,6 @@ export class VoiceRecorder {
this.recorder = null;
this.startTime = 0;
this.setStatus('idle');
await audioFocus.release();
await audioFocus.releaseIfOwnedBy('recorder');
}
}

View File

@@ -2,6 +2,18 @@
export type RecorderStatus = 'idle' | 'preparing' | 'recording' | 'stopping';
export type RecorderStartFailureReason =
| 'permission_denied'
| 'audio_focus_unavailable'
| 'prepare_failed'
| 'recorder_unavailable';
export interface RecorderStartResult {
ok: boolean;
reason?: RecorderStartFailureReason;
errorMessage?: string;
}
// ─── Segmenter ───
export type SegmentStrategy = 'fixed-duration';