api/app/features/conversation/ws/pipeline.py

"""核心消息处理管道：Agent 调用、ASR 转写、分段有序聚合"""

import asyncio
import base64
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple

from app.core.logging import get_logger

if TYPE_CHECKING:
    from app.features.quota.service import QuotaService

from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession

from app.agents.chat import ChatOrchestrator
from app.core.agent_logging import agent_summary_enabled
from app.core.config import settings
from app.core.cos_url_keys import TTS_PRESIGNED_EXPIRES_SEC
from app.core.db import AsyncSessionLocal
from app.core.dependencies import get_asr_provider, get_object_storage, get_tts_provider
from app.features.conversation.history_store import (
    AI_RESPONSE_SEGMENT_JOIN,
    ConversationHistoryStore,
)
from app.features.conversation.models import Conversation, Segment
from app.features.conversation.ws.connection_manager import manager
from app.features.conversation.ws.message_types import MessageType
from app.features.conversation.ws.profile_collector import (
    apply_extracted_profile,
    get_filled_profile_fields,
    get_missing_profile_fields,
)
from app.features.memoir.background_runner import BackgroundTaskRunner
from app.features.user.models import User

logger = get_logger(__name__)

# 客户端发送 tts_cancel 时递增；process_user_message 内 TTS 循环与合成前后对照，用于短路剩余片段
_tts_cancel_epoch: dict[str, int] = {}


def bump_tts_cancel_epoch(conversation_id: str) -> None:
    _tts_cancel_epoch[conversation_id] = _tts_cancel_epoch.get(conversation_id, 0) + 1


def _tts_epoch_value(conversation_id: str) -> int:
    return _tts_cancel_epoch.get(conversation_id, 0)


def _tts_object_ext(codec: str) -> str:
    c = (codec or "mp3").lower().lstrip(".")
    if c in ("wave",):
        return "wav"
    return c if c else "mp3"


def _tts_codec_to_content_type(codec: str) -> str:
    c = (codec or "mp3").lower().lstrip(".")
    if c == "mp3":
        return "audio/mpeg"
    if c in ("wav", "wave"):
        return "audio/wav"
    return "application/octet-stream"


async def _send_tts_audio(
    conversation_id: str,
    text: str,
    *,
    chunk_index: int,
    chunk_total: int,
    assistant_message_id: str | None,
    tts_epoch_start: int,
) -> str | None:
    """Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None."""
    if not settings.enable_tts:
        return None
    if _tts_epoch_value(conversation_id) != tts_epoch_start:
        return None
    try:
        tts = get_tts_provider()
        audio_bytes = await tts.synthesize(text)
        if not audio_bytes:
            logger.warning(
                "TTS skipped: synthesize returned empty. Check TTS config in .env"
            )
            return None
        if _tts_epoch_value(conversation_id) != tts_epoch_start:
            return None
        ext = _tts_object_ext(settings.tts_codec)
        content_type = _tts_codec_to_content_type(settings.tts_codec)
        storage = get_object_storage()
        key = f"conversations/{conversation_id}/tts/{uuid.uuid4().hex}.{ext}"
        public_url = storage.upload(key, audio_bytes, content_type)
        # 与 `tts_delivery.apply_presigned_tts_urls_to_messages` / 回忆录图片 presign 一致：下发可播 URL
        playback_url = storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC)
        payload_data: Dict[str, Any] = {
            "audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
            "format": settings.tts_codec,
            "audio_url": playback_url,
            "index": chunk_index,
            "total": chunk_total,
        }
        if assistant_message_id:
            payload_data["assistant_message_id"] = assistant_message_id
        await manager.send_message(
            conversation_id,
            {
                "type": MessageType.TTS_AUDIO,
                "conversation_id": conversation_id,
                "data": payload_data,
                "timestamp": datetime.now(timezone.utc).isoformat(),
            },
        )
        return public_url
    except Exception as e:
        err_str = str(e)
        if "PkgExhausted" in err_str:
            logger.warning(
                "TTS skipped: 腾讯云语音合成资源包已用尽，请在控制台购买或开通后付费: {}",
                err_str[:100],
            )
        else:
            logger.error("TTS synthesize failed: {}", e)
        return None


# ── Agent 实例（从 ConnectionManager 移出） ─────────────────────
chat_orchestrator = ChatOrchestrator()
background_runner = BackgroundTaskRunner()


# ── 分段流状态 ──────────────────────────────────────────────────


@dataclass
class SegmentStreamState:
    """会话内分段处理状态（用于并行 ASR + 有序聚合）"""

    lock: asyncio.Lock = field(default_factory=asyncio.Lock)
    pending_indices: Set[int] = field(default_factory=set)
    processed_indices: Set[int] = field(default_factory=set)
    buffered_transcripts: Dict[int, Tuple[str, Segment]] = field(default_factory=dict)
    consumed_index: int = -1
    active_tasks: Set[asyncio.Task] = field(default_factory=set)
    listening_feedback_sent: bool = False
    listening_feedback_task: Optional[asyncio.Task] = None


_segment_states: Dict[Tuple[str, str], SegmentStreamState] = {}


def get_or_create_segment_state(
    conversation_id: str,
    voice_session_id: str,
) -> SegmentStreamState:
    state_key = (conversation_id, voice_session_id)
    if state_key not in _segment_states:
        _segment_states[state_key] = SegmentStreamState()
    return _segment_states[state_key]


def register_segment_task(
    conversation_id: str,
    voice_session_id: str,
    task: asyncio.Task,
) -> None:
    state_key = (conversation_id, voice_session_id)
    state = get_or_create_segment_state(conversation_id, voice_session_id)
    state.active_tasks.add(task)

    def _cleanup(done_task: asyncio.Task) -> None:
        state.active_tasks.discard(done_task)
        if not state.active_tasks and conversation_id not in manager.active_connections:
            _segment_states.pop(state_key, None)
        if done_task.cancelled():
            return
        exc = done_task.exception()
        if exc:
            logger.error(
                "分段处理任务异常 "
                f"(conversation_id={conversation_id}, voice_session_id={voice_session_id}): {exc}",
                exc_info=True,
            )

    task.add_done_callback(_cleanup)


def cleanup_segment_states(conversation_id: str) -> None:
    """断开连接后清理无活跃任务的分段状态"""
    stale_keys = [
        key
        for key, state in _segment_states.items()
        if key[0] == conversation_id and not state.active_tasks
    ]
    for key in stale_keys:
        _segment_states.pop(key, None)


# ── 工具函数 ────────────────────────────────────────────────────


def _utc_now() -> datetime:
    return datetime.now(timezone.utc)


def _mark_conversation_active(
    conversation: Conversation, at: Optional[datetime] = None
) -> datetime:
    activity_time = at or _utc_now()
    conversation.last_message_at = activity_time
    return activity_time


def _voice_session_id_from_client_segment_id(
    client_segment_id: Optional[str],
) -> Optional[str]:
    if not client_segment_id:
        return None
    session_id, separator, _ = client_segment_id.rpartition("-")
    if separator and session_id:
        return session_id
    return None


def _build_segment_audio_url(voice_session_id: str, segment_index: int) -> str:
    """构建分段语音的幂等标识（conversation_id + voice_session_id + segment_index）。"""
    return f"audio-segment:{voice_session_id}:{segment_index}"


def _extract_segment_scope(audio_url: Optional[str]) -> Optional[Tuple[str, int]]:
    """从 audio_url 解析 voice_session_id 与 segment_index（audio-segment:{session_id}:{index}）。"""
    prefix = "audio-segment:"
    if not audio_url or not audio_url.startswith(prefix):
        return None
    payload = audio_url[len(prefix) :]
    voice_session_id_raw, separator, segment_index_raw = payload.rpartition(":")
    if not separator:
        return None
    try:
        sid = str(voice_session_id_raw).strip()
        if not sid:
            return None
        return (sid, int(segment_index_raw))
    except ValueError:
        return None


def _voice_session_id_from_audio_url(audio_url: Optional[str]) -> Optional[str]:
    scope = _extract_segment_scope(audio_url)
    if scope:
        return scope[0]
    return None


def _is_transcribe_failure(transcript_text: Optional[str]) -> bool:
    if not transcript_text:
        return True
    return transcript_text.startswith("转写失败")


async def _find_existing_segment_by_index(
    db: AsyncSession,
    conversation_id: str,
    voice_session_id: str,
    segment_index: int,
) -> Optional[Segment]:
    segment_audio_url = _build_segment_audio_url(voice_session_id, segment_index)
    stmt = (
        select(Segment)
        .where(
            Segment.conversation_id == conversation_id,
            Segment.audio_url == segment_audio_url,
        )
        .order_by(Segment.created_at.desc())
    )
    result = await db.execute(stmt)
    candidates = result.scalars().all()
    for item in candidates:
        if (
            item.conversation_id == conversation_id
            and item.audio_url == segment_audio_url
        ):
            return item
    return None


async def _get_persisted_contiguous_segment_index(
    db: AsyncSession,
    conversation_id: str,
    voice_session_id: str,
) -> int:
    """读取数据库中当前 voice session 已连续落库的最大 segment_index，用于重连恢复。"""
    stmt = select(Segment).where(Segment.conversation_id == conversation_id)
    result = await db.execute(stmt)
    candidates = result.scalars().all()

    persisted_indices: Set[int] = set()
    for item in candidates:
        if item.conversation_id != conversation_id:
            continue
        segment_scope = _extract_segment_scope(item.audio_url)
        if not segment_scope:
            continue
        item_voice_session_id, item_index = segment_scope
        if item_voice_session_id != voice_session_id:
            continue
        persisted_indices.add(item_index)

    contiguous_index = -1
    while contiguous_index + 1 in persisted_indices:
        contiguous_index += 1
    return contiguous_index


# ── 过渡反馈 ────────────────────────────────────────────────────

LISTENING_FEEDBACK_DELAY_SEC = 5.0
LISTENING_FEEDBACK_TEXT = "我在认真听，你继续说，我会边听边整理重点。"


async def _send_segment_transition_feedback(
    conversation_id: str,
    segment_index: int,
) -> None:
    """发送一次「我在认真听」陪伴式过渡反馈（由延迟任务调用）。"""
    await manager.send_message(
        conversation_id,
        {
            "type": MessageType.AGENT_RESPONSE,
            "conversation_id": conversation_id,
            "data": {
                "text": LISTENING_FEEDBACK_TEXT,
                "transition": True,
                "segment_index": segment_index,
            },
            "timestamp": datetime.now(timezone.utc).isoformat(),
        },
    )


async def _delayed_listening_feedback(
    conversation_id: str,
    voice_session_id: str,
) -> None:
    """录音开始后延迟 5 秒发送一次「我在认真听」，本会话内只发一次；若用户已结束录音则不再发送。"""
    await asyncio.sleep(LISTENING_FEEDBACK_DELAY_SEC)
    state = get_or_create_segment_state(conversation_id, voice_session_id)
    async with state.lock:
        if state.listening_feedback_sent:
            return
        state.listening_feedback_sent = True
        state.listening_feedback_task = None
    await _send_segment_transition_feedback(conversation_id, 0)


# ── 分段语音异步处理 ────────────────────────────────────────────


async def process_audio_segment(
    conversation_id: str,
    user_id: str,
    voice_session_id: str,
    segment_index: int,
    audio_base64: str,
    audio_duration: int,
    is_last: bool,
) -> None:
    """分段语音的异步处理：并行 ASR + 幂等落库 + 有序聚合触发 Agent。"""
    state = get_or_create_segment_state(conversation_id, voice_session_id)
    logger.info(
        "process_audio_segment 开始: conversation_id={} voice_session_id={} "
        "segment_index={} is_last={} duration_s={} audio_b64_len={}",
        conversation_id,
        voice_session_id,
        segment_index,
        is_last,
        audio_duration,
        len(audio_base64 or ""),
    )

    try:
        async with AsyncSessionLocal() as db:
            conversation = await db.get(Conversation, conversation_id)
            user = await db.get(User, user_id)
            if not conversation or conversation.deleted_at is not None:
                await manager.send_message(
                    conversation_id,
                    {
                        "type": MessageType.ERROR,
                        "data": {"message": "对话不存在，分段处理已取消"},
                        "timestamp": datetime.now(timezone.utc).isoformat(),
                    },
                )
                return
            if not user:
                await manager.send_message(
                    conversation_id,
                    {
                        "type": MessageType.ERROR,
                        "data": {"message": "用户不存在，分段处理已取消"},
                        "timestamp": datetime.now(timezone.utc).isoformat(),
                    },
                )
                return

            async with state.lock:
                should_prime_state = (
                    state.consumed_index < 0
                    and not state.processed_indices
                    and not state.buffered_transcripts
                )

            if should_prime_state:
                persisted_contiguous_index = (
                    await _get_persisted_contiguous_segment_index(
                        db=db,
                        conversation_id=conversation_id,
                        voice_session_id=voice_session_id,
                    )
                )
                if persisted_contiguous_index >= 0:
                    async with state.lock:
                        state.consumed_index = max(
                            state.consumed_index, persisted_contiguous_index
                        )

            try:
                audio_bytes = base64.b64decode(audio_base64)
            except Exception:
                audio_bytes = b""
            if not audio_bytes:
                logger.warning(
                    "process_audio_segment: 解码后音频为空 conversation_id={} segment_index={}",
                    conversation_id,
                    segment_index,
                )
            transcript_text = await get_asr_provider().transcribe(
                audio_bytes, format="m4a"
            )
            await manager.send_message(
                conversation_id,
                {
                    "type": MessageType.TRANSCRIPT,
                    "conversation_id": conversation_id,
                    "data": {
                        "text": transcript_text or "",
                        "audio_duration": audio_duration,
                        "voice_session_id": voice_session_id,
                        "segment_index": segment_index,
                        "is_last": is_last,
                    },
                    "timestamp": datetime.now(timezone.utc).isoformat(),
                },
            )

            if _is_transcribe_failure(transcript_text):
                detail = (transcript_text or "").strip()
                if detail.startswith("转写失败"):
                    user_msg = f"分段 {segment_index} {detail}"
                elif not detail:
                    user_msg = f"分段 {segment_index} 转写失败：未识别到内容（请检查后端 ASR 配置）"
                else:
                    user_msg = f"分段 {segment_index} 转写失败：{detail[:400]}"
                await manager.send_message(
                    conversation_id,
                    {
                        "type": MessageType.ERROR,
                        "data": {
                            "message": user_msg,
                            "segment_index": segment_index,
                        },
                        "timestamp": datetime.now(timezone.utc).isoformat(),
                    },
                )
                return

            existing_segment = await _find_existing_segment_by_index(
                db=db,
                conversation_id=conversation_id,
                voice_session_id=voice_session_id,
                segment_index=segment_index,
            )
            if existing_segment:
                async with state.lock:
                    state.processed_indices.add(segment_index)
                logger.debug(
                    "分段已存在，按幂等跳过: conversation_id={} voice_session_id={} "
                    "segment_index={} segment_id={} transcript={}",
                    conversation_id,
                    voice_session_id,
                    segment_index,
                    existing_segment.id,
                    existing_segment.user_input_text or "",
                )
                return
            else:
                segment = Segment(
                    id=str(uuid.uuid4()),
                    conversation_id=conversation_id,
                    user_input_text=transcript_text or "",
                    audio_url=_build_segment_audio_url(voice_session_id, segment_index),
                    audio_duration_seconds=audio_duration
                    if audio_duration > 0
                    else None,
                    processed=False,
                )
                db.add(segment)
                user_message_timestamp = _mark_conversation_active(conversation)
                await db.commit()
                await db.refresh(segment)
                await background_runner.queue_message(conversation.user_id, segment.id)

            ready_segments: List[Tuple[int, str, Segment]] = []
            async with state.lock:
                state.processed_indices.add(segment_index)
                state.buffered_transcripts[segment_index] = (
                    transcript_text or "",
                    segment,
                )

                next_index = state.consumed_index + 1
                while next_index in state.buffered_transcripts:
                    text, seg = state.buffered_transcripts.pop(next_index)
                    ready_segments.append((next_index, text, seg))
                    state.consumed_index = next_index
                    next_index += 1

            for _, ordered_text, ordered_segment in ready_segments:
                await process_user_message(
                    conversation_id=conversation_id,
                    user_message=ordered_text,
                    conversation=conversation,
                    segment=ordered_segment,
                    db=db,
                    user=user,
                    user_message_timestamp=ordered_segment.created_at
                    or user_message_timestamp,
                )

    except Exception as e:
        logger.error(
            f"处理语音分段失败: conversation_id={conversation_id}, segment_index={segment_index}, error={e}",
            exc_info=True,
        )
        await manager.send_message(
            conversation_id,
            {
                "type": MessageType.ERROR,
                "data": {
                    "message": f"分段处理失败: {str(e)}",
                    "segment_index": segment_index,
                },
                "timestamp": datetime.now(timezone.utc).isoformat(),
            },
        )
    finally:
        async with state.lock:
            state.pending_indices.discard(segment_index)


# ── 用户消息处理 ────────────────────────────────────────────────


async def process_user_message(
    conversation_id: str,
    user_message: str,
    conversation: Conversation,
    segment: Segment,
    db: AsyncSession,
    user: User = None,
    user_message_timestamp: Optional[datetime] = None,
) -> None:
    """处理用户消息，生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
    store = ConversationHistoryStore(db)
    tts_urls: list[str] = []
    try:
        logger.info(
            "process_user_message 开始: conversation_id={} segment_id={} user_chars={}",
            conversation_id,
            segment.id,
            len(user_message or ""),
        )
        is_from_voice = bool(segment.audio_url)
        voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
        audio_dur = getattr(segment, "audio_duration_seconds", None)
        t_pipeline = time.perf_counter()
        turn = await chat_orchestrator.process_user_message(
            conversation_id=conversation_id,
            user_message=user_message,
            user=user,
            conversation=conversation,
            is_from_voice=is_from_voice,
            voice_session_id=voice_session_id,
            db=db,
            apply_extracted_profile_fn=apply_extracted_profile,
            get_missing_profile_fields_fn=get_missing_profile_fields,
            get_filled_profile_fields_fn=get_filled_profile_fields,
            user_message_timestamp=user_message_timestamp,
            audio_duration_seconds=audio_dur,
        )
        if agent_summary_enabled():
            logger.info(
                "pipeline.process_user_message duration_ms={:.2f} "
                "conversation_id={} segment_id={} user_msg_len={} "
                "response_segments={} skip_tts={}",
                (time.perf_counter() - t_pipeline) * 1000,
                conversation_id,
                segment.id,
                len(user_message or ""),
                len(turn.messages),
                turn.skip_tts,
            )
        responses = turn.messages
        skip_tts = turn.skip_tts

        segment.agent_response = AI_RESPONSE_SEGMENT_JOIN.join(responses)
        _mark_conversation_active(conversation)
        ai_msg_id = await store.record_human_ai_turn(
            conversation_id=conversation_id,
            user_message=user_message,
            responses=responses,
            user_message_timestamp=user_message_timestamp,
            is_from_voice=is_from_voice,
            voice_session_id=voice_session_id,
            audio_duration_seconds=audio_dur,
            tts_audio_urls=None,
            segment_id=segment.id,
        )
        if not ai_msg_id:
            logger.warning(
                "process_user_message: 无有效助手段落（responses 为空），conversation_id={} segment_id={}",
                conversation_id,
                segment.id,
            )
            if conversation_id in manager.active_connections:
                await manager.send_message(
                    conversation_id,
                    {
                        "type": MessageType.ERROR,
                        "data": {
                            "message": "未生成回复，请重试或稍后再试",
                        },
                        "timestamp": datetime.now(timezone.utc).isoformat(),
                    },
                )
            return

        tts_epoch_start = _tts_epoch_value(conversation_id)
        n = len(responses)
        for i, response_text in enumerate(responses):
            await manager.send_message(
                conversation_id,
                {
                    "type": MessageType.AGENT_RESPONSE,
                    "conversation_id": conversation_id,
                    "data": {
                        "text": response_text,
                        "index": i,
                        "total": n,
                        "assistant_message_id": ai_msg_id,
                    },
                    "timestamp": datetime.now(timezone.utc).isoformat(),
                },
            )
            url = None
            if not skip_tts:
                if _tts_epoch_value(conversation_id) != tts_epoch_start:
                    break
                url = await _send_tts_audio(
                    conversation_id,
                    response_text,
                    chunk_index=i,
                    chunk_total=n,
                    assistant_message_id=ai_msg_id,
                    tts_epoch_start=tts_epoch_start,
                )
            if url:
                tts_urls.append(url)
            if _tts_epoch_value(conversation_id) != tts_epoch_start:
                break
            if i < n - 1:
                await asyncio.sleep(0.5)

        if tts_urls:
            await store.attach_ai_tts_audio_urls(
                conversation_id,
                tts_audio_urls=tts_urls,
                segment_id=segment.id,
            )
            await db.execute(
                update(Segment)
                .where(Segment.id == segment.id)
                .values(tts_audio_urls=tts_urls)
            )
            await db.commit()

    except Exception as e:
        if tts_urls:
            try:
                await store.attach_ai_tts_audio_urls(
                    conversation_id,
                    tts_audio_urls=tts_urls,
                    segment_id=segment.id,
                )
                await db.execute(
                    update(Segment)
                    .where(Segment.id == segment.id)
                    .values(tts_audio_urls=tts_urls)
                )
                await db.commit()
            except Exception as persist_error:
                logger.warning("补写 TTS 元数据失败: {}", persist_error)
        logger.error(f"处理用户消息失败: {e}", exc_info=True)
        if conversation_id in manager.active_connections:
            try:
                await manager.send_message(
                    conversation_id,
                    {
                        "type": MessageType.ERROR,
                        "data": {"message": f"生成回应失败: {str(e)}"},
                        "timestamp": datetime.now(timezone.utc).isoformat(),
                    },
                )
            except Exception as send_error:
                logger.warning(f"发送错误消息失败: {send_error}")


# ── 对话结束处理 ────────────────────────────────────────────────


async def process_conversation_segments(
    conversation_id: str, db: AsyncSession, quota_service: "QuotaService"
):
    """
    处理对话段落，生成章节（对话结束时调用）

    注意：大部分处理已通过 Celery 任务增量完成
    这里立即提交所有待处理的段落到 Celery
    配额检查通过注入的 quota_service 完成，不直接 import quota 内部函数。
    """
    conversation = await db.get(Conversation, conversation_id)
    if not conversation or conversation.deleted_at is not None:
        return

    stmt = select(Segment).where(
        Segment.conversation_id == conversation_id,
        Segment.processed == False,
    )
    result = await db.execute(stmt)
    segments = result.scalars().all()

    if not segments:
        await background_runner.flush_pending(conversation.user_id)
        return

    user = await db.get(User, conversation.user_id)
    if user:
        can_submit, _ = await quota_service.check_can_submit_organize(
            user.id, user.subscription_type
        )
        if not can_submit:
            logger.info(
                f"用户 {user.id} 章节配额已用尽，跳过提交整理任务: conversation_id={conversation_id}"
            )
            await background_runner.flush_pending(conversation.user_id)
            return

    segment_ids = [seg.id for seg in segments]
    try:
        from app.tasks.memoir_tasks import process_memoir_segments

        process_memoir_segments.delay(conversation.user_id, segment_ids)
        logger.info(
            f"对话结束，提交 Celery 任务: conversation_id={conversation_id}, segments={len(segment_ids)}"
        )
    except Exception as e:
        logger.error(f"提交 Celery 任务失败: {e}")

    await background_runner.flush_pending(conversation.user_id)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								"""核心消息处理管道：Agent 调用、ASR 转写、分段有序聚合"""
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								import asyncio
 								import base64
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								import time
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								import uuid
 								from dataclasses import dataclass, field
 								from datetime import datetime, timezone
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
+								from app.core.logging import get_logger
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								if TYPE_CHECKING:
 								    from app.features.quota.service import QuotaService
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								from sqlalchemy import select, update
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from sqlalchemy.ext.asyncio import AsyncSession
-												feat & refactor: 重构agents目录结构；AI回复模块agent结构封装

											
										
										
											2026-03-19 10:36:55 +08:00
+								from app.agents.chat import ChatOrchestrator
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								from app.core.agent_logging import agent_summary_enabled
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
+								from app.core.config import settings
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								from app.core.cos_url_keys import TTS_PRESIGNED_EXPIRES_SEC
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from app.core.db import AsyncSessionLocal
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								from app.core.dependencies import get_asr_provider, get_object_storage, get_tts_provider
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								from app.features.conversation.history_store import (
 								    AI_RESPONSE_SEGMENT_JOIN,
 								    ConversationHistoryStore,
 								)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from app.features.conversation.models import Conversation, Segment
 								from app.features.conversation.ws.connection_manager import manager
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								from app.features.conversation.ws.message_types import MessageType
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from app.features.conversation.ws.profile_collector import (
 								    apply_extracted_profile,
 								    get_filled_profile_fields,
 								    get_missing_profile_fields,
 								)
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								from app.features.memoir.background_runner import BackgroundTaskRunner
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								from app.features.user.models import User
 								logger = get_logger(__name__)
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								# 客户端发送 tts_cancel 时递增；process_user_message 内 TTS 循环与合成前后对照，用于短路剩余片段
 								_tts_cancel_epoch: dict[str, int] = {}
 								def bump_tts_cancel_epoch(conversation_id: str) -> None:
 								    _tts_cancel_epoch[conversation_id] = _tts_cancel_epoch.get(conversation_id, 0) + 1
 								def _tts_epoch_value(conversation_id: str) -> int:
 								    return _tts_cancel_epoch.get(conversation_id, 0)
-												feat/ 添加app-expo三种环境切换，待测试 调整tts

											
										
										
											2026-03-19 09:58:02 +08:00
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								def _tts_object_ext(codec: str) -> str:
 								    c = (codec or "mp3").lower().lstrip(".")
 								    if c in ("wave",):
 								        return "wav"
 								    return c if c else "mp3"
 								def _tts_codec_to_content_type(codec: str) -> str:
 								    c = (codec or "mp3").lower().lstrip(".")
 								    if c == "mp3":
 								        return "audio/mpeg"
 								    if c in ("wav", "wave"):
 								        return "audio/wav"
 								    return "application/octet-stream"
 								async def _send_tts_audio(
 								    conversation_id: str,
 								    text: str,
 								    *,
 								    chunk_index: int,
 								    chunk_total: int,
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								    assistant_message_id: str | None,
 								    tts_epoch_start: int,
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								) -> str | None:
 								    """Synthesize TTS, upload to COS, append Redis, send TTS_AUDIO. Returns public URL or None."""
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
+								    if not settings.enable_tts:
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								        return None
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								    if _tts_epoch_value(conversation_id) != tts_epoch_start:
 								        return None
-												feat/ 添加app-expo三种环境切换，待测试 调整tts

											
										
										
											2026-03-19 09:58:02 +08:00
+								    try:
 								        tts = get_tts_provider()
 								        audio_bytes = await tts.synthesize(text)
 								        if not audio_bytes:
 								            logger.warning(
 								                "TTS skipped: synthesize returned empty. Check TTS config in .env"
 								            )
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								            return None
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								        if _tts_epoch_value(conversation_id) != tts_epoch_start:
 								            return None
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								        ext = _tts_object_ext(settings.tts_codec)
 								        content_type = _tts_codec_to_content_type(settings.tts_codec)
 								        storage = get_object_storage()
 								        key = f"conversations/{conversation_id}/tts/{uuid.uuid4().hex}.{ext}"
 								        public_url = storage.upload(key, audio_bytes, content_type)
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								        # 与 `tts_delivery.apply_presigned_tts_urls_to_messages` / 回忆录图片 presign 一致：下发可播 URL
 								        playback_url = storage.get_url(key, expires=TTS_PRESIGNED_EXPIRES_SEC)
 								        payload_data: Dict[str, Any] = {
 								            "audio_base64": base64.b64encode(audio_bytes).decode("utf-8"),
 								            "format": settings.tts_codec,
 								            "audio_url": playback_url,
 								            "index": chunk_index,
 								            "total": chunk_total,
 								        }
 								        if assistant_message_id:
 								            payload_data["assistant_message_id"] = assistant_message_id
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								        await manager.send_message(
 								            conversation_id,
 								            {
 								                "type": MessageType.TTS_AUDIO,
 								                "conversation_id": conversation_id,
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								                "data": payload_data,
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                "timestamp": datetime.now(timezone.utc).isoformat(),
-												feat/ 添加app-expo三种环境切换，待测试 调整tts

											
										
										
											2026-03-19 09:58:02 +08:00
+								            },
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								        )
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								        return public_url
-												feat/ 添加app-expo三种环境切换，待测试 调整tts

											
										
										
											2026-03-19 09:58:02 +08:00
+								    except Exception as e:
 								        err_str = str(e)
 								        if "PkgExhausted" in err_str:
 								            logger.warning(
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								                "TTS skipped: 腾讯云语音合成资源包已用尽，请在控制台购买或开通后付费: {}",
-												feat/ 添加app-expo三种环境切换，待测试 调整tts

											
										
										
											2026-03-19 09:58:02 +08:00
+								                err_str[:100],
 								            )
 								        else:
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								            logger.error("TTS synthesize failed: {}", e)
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								        return None
-												feat/ 添加app-expo三种环境切换，待测试 调整tts

											
										
										
											2026-03-19 09:58:02 +08:00
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								# ── Agent 实例（从 ConnectionManager 移出） ─────────────────────
-												feat & refactor: 重构agents目录结构；AI回复模块agent结构封装

											
										
										
											2026-03-19 10:36:55 +08:00
+								chat_orchestrator = ChatOrchestrator()
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								background_runner = BackgroundTaskRunner()
 								# ── 分段流状态 ──────────────────────────────────────────────────
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								@dataclass
 								class SegmentStreamState:
 								    """会话内分段处理状态（用于并行 ASR + 有序聚合）"""
 								    lock: asyncio.Lock = field(default_factory=asyncio.Lock)
 								    pending_indices: Set[int] = field(default_factory=set)
 								    processed_indices: Set[int] = field(default_factory=set)
 								    buffered_transcripts: Dict[int, Tuple[str, Segment]] = field(default_factory=dict)
 								    consumed_index: int = -1
 								    active_tasks: Set[asyncio.Task] = field(default_factory=set)
 								    listening_feedback_sent: bool = False
 								    listening_feedback_task: Optional[asyncio.Task] = None
 								_segment_states: Dict[Tuple[str, str], SegmentStreamState] = {}
 								def get_or_create_segment_state(
 								    conversation_id: str,
 								    voice_session_id: str,
 								) -> SegmentStreamState:
 								    state_key = (conversation_id, voice_session_id)
 								    if state_key not in _segment_states:
 								        _segment_states[state_key] = SegmentStreamState()
 								    return _segment_states[state_key]
 								def register_segment_task(
 								    conversation_id: str,
 								    voice_session_id: str,
 								    task: asyncio.Task,
 								) -> None:
 								    state_key = (conversation_id, voice_session_id)
 								    state = get_or_create_segment_state(conversation_id, voice_session_id)
 								    state.active_tasks.add(task)
 								    def _cleanup(done_task: asyncio.Task) -> None:
 								        state.active_tasks.discard(done_task)
 								        if not state.active_tasks and conversation_id not in manager.active_connections:
 								            _segment_states.pop(state_key, None)
 								        if done_task.cancelled():
 								            return
 								        exc = done_task.exception()
 								        if exc:
 								            logger.error(
 								                "分段处理任务异常 "
 								                f"(conversation_id={conversation_id}, voice_session_id={voice_session_id}): {exc}",
 								                exc_info=True,
 								            )
 								    task.add_done_callback(_cleanup)
 								def cleanup_segment_states(conversation_id: str) -> None:
 								    """断开连接后清理无活跃任务的分段状态"""
 								    stale_keys = [
 								        key
 								        for key, state in _segment_states.items()
 								        if key[0] == conversation_id and not state.active_tasks
 								    ]
 								    for key in stale_keys:
 								        _segment_states.pop(key, None)
 								# ── 工具函数 ────────────────────────────────────────────────────
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								def _utc_now() -> datetime:
 								    return datetime.now(timezone.utc)
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								def _mark_conversation_active(
 								    conversation: Conversation, at: Optional[datetime] = None
 								) -> datetime:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    activity_time = at or _utc_now()
 								    conversation.last_message_at = activity_time
 								    return activity_time
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								def _voice_session_id_from_client_segment_id(
 								    client_segment_id: Optional[str],
 								) -> Optional[str]:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    if not client_segment_id:
 								        return None
 								    session_id, separator, _ = client_segment_id.rpartition("-")
 								    if separator and session_id:
 								        return session_id
 								    return None
 								def _build_segment_audio_url(voice_session_id: str, segment_index: int) -> str:
 								    """构建分段语音的幂等标识（conversation_id + voice_session_id + segment_index）。"""
 								    return f"audio-segment:{voice_session_id}:{segment_index}"
 								def _extract_segment_scope(audio_url: Optional[str]) -> Optional[Tuple[str, int]]:
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								    """从 audio_url 解析 voice_session_id 与 segment_index（audio-segment:{session_id}:{index}）。"""
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    prefix = "audio-segment:"
 								    if not audio_url or not audio_url.startswith(prefix):
 								        return None
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								    payload = audio_url[len(prefix) :]
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    voice_session_id_raw, separator, segment_index_raw = payload.rpartition(":")
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								    if not separator:
 								        return None
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    try:
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								        sid = str(voice_session_id_raw).strip()
 								        if not sid:
 								            return None
 								        return (sid, int(segment_index_raw))
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    except ValueError:
 								        return None
 								def _voice_session_id_from_audio_url(audio_url: Optional[str]) -> Optional[str]:
 								    scope = _extract_segment_scope(audio_url)
 								    if scope:
 								        return scope[0]
 								    return None
 								def _is_transcribe_failure(transcript_text: Optional[str]) -> bool:
 								    if not transcript_text:
 								        return True
 								    return transcript_text.startswith("转写失败")
 								async def _find_existing_segment_by_index(
 								    db: AsyncSession,
 								    conversation_id: str,
 								    voice_session_id: str,
 								    segment_index: int,
 								) -> Optional[Segment]:
 								    segment_audio_url = _build_segment_audio_url(voice_session_id, segment_index)
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								    stmt = (
 								        select(Segment)
 								        .where(
 								            Segment.conversation_id == conversation_id,
 								            Segment.audio_url == segment_audio_url,
 								        )
 								        .order_by(Segment.created_at.desc())
 								    )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    result = await db.execute(stmt)
 								    candidates = result.scalars().all()
 								    for item in candidates:
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								        if (
 								            item.conversation_id == conversation_id
 								            and item.audio_url == segment_audio_url
 								        ):
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            return item
 								    return None
 								async def _get_persisted_contiguous_segment_index(
 								    db: AsyncSession,
 								    conversation_id: str,
 								    voice_session_id: str,
 								) -> int:
 								    """读取数据库中当前 voice session 已连续落库的最大 segment_index，用于重连恢复。"""
 								    stmt = select(Segment).where(Segment.conversation_id == conversation_id)
 								    result = await db.execute(stmt)
 								    candidates = result.scalars().all()
 								    persisted_indices: Set[int] = set()
 								    for item in candidates:
 								        if item.conversation_id != conversation_id:
 								            continue
 								        segment_scope = _extract_segment_scope(item.audio_url)
 								        if not segment_scope:
 								            continue
 								        item_voice_session_id, item_index = segment_scope
 								        if item_voice_session_id != voice_session_id:
 								            continue
 								        persisted_indices.add(item_index)
 								    contiguous_index = -1
 								    while contiguous_index + 1 in persisted_indices:
 								        contiguous_index += 1
 								    return contiguous_index
 								# ── 过渡反馈 ────────────────────────────────────────────────────
 								LISTENING_FEEDBACK_DELAY_SEC = 5.0
 								LISTENING_FEEDBACK_TEXT = "我在认真听，你继续说，我会边听边整理重点。"
 								async def _send_segment_transition_feedback(
 								    conversation_id: str,
 								    segment_index: int,
 								) -> None:
 								    """发送一次「我在认真听」陪伴式过渡反馈（由延迟任务调用）。"""
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								    await manager.send_message(
 								        conversation_id,
 								        {
 								            "type": MessageType.AGENT_RESPONSE,
 								            "conversation_id": conversation_id,
 								            "data": {
 								                "text": LISTENING_FEEDBACK_TEXT,
 								                "transition": True,
 								                "segment_index": segment_index,
 								            },
 								            "timestamp": datetime.now(timezone.utc).isoformat(),
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        },
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								    )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
 								async def _delayed_listening_feedback(
 								    conversation_id: str,
 								    voice_session_id: str,
 								) -> None:
 								    """录音开始后延迟 5 秒发送一次「我在认真听」，本会话内只发一次；若用户已结束录音则不再发送。"""
 								    await asyncio.sleep(LISTENING_FEEDBACK_DELAY_SEC)
 								    state = get_or_create_segment_state(conversation_id, voice_session_id)
 								    async with state.lock:
 								        if state.listening_feedback_sent:
 								            return
 								        state.listening_feedback_sent = True
 								        state.listening_feedback_task = None
 								    await _send_segment_transition_feedback(conversation_id, 0)
 								# ── 分段语音异步处理 ────────────────────────────────────────────
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								async def process_audio_segment(
 								    conversation_id: str,
 								    user_id: str,
 								    voice_session_id: str,
 								    segment_index: int,
 								    audio_base64: str,
 								    audio_duration: int,
 								    is_last: bool,
 								) -> None:
 								    """分段语音的异步处理：并行 ASR + 幂等落库 + 有序聚合触发 Agent。"""
 								    state = get_or_create_segment_state(conversation_id, voice_session_id)
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								    logger.info(
 								        "process_audio_segment 开始: conversation_id={} voice_session_id={} "
 								        "segment_index={} is_last={} duration_s={} audio_b64_len={}",
 								        conversation_id,
 								        voice_session_id,
 								        segment_index,
 								        is_last,
 								        audio_duration,
 								        len(audio_base64 or ""),
 								    )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
 								    try:
 								        async with AsyncSessionLocal() as db:
 								            conversation = await db.get(Conversation, conversation_id)
 								            user = await db.get(User, user_id)
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
+								            if not conversation or conversation.deleted_at is not None:
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                await manager.send_message(
 								                    conversation_id,
 								                    {
 								                        "type": MessageType.ERROR,
 								                        "data": {"message": "对话不存在，分段处理已取消"},
 								                        "timestamp": datetime.now(timezone.utc).isoformat(),
 								                    },
 								                )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                return
 								            if not user:
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                await manager.send_message(
 								                    conversation_id,
 								                    {
 								                        "type": MessageType.ERROR,
 								                        "data": {"message": "用户不存在，分段处理已取消"},
 								                        "timestamp": datetime.now(timezone.utc).isoformat(),
 								                    },
 								                )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                return
 								            async with state.lock:
 								                should_prime_state = (
 								                    state.consumed_index < 0
 								                    and not state.processed_indices
 								                    and not state.buffered_transcripts
 								                )
 								            if should_prime_state:
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                persisted_contiguous_index = (
 								                    await _get_persisted_contiguous_segment_index(
 								                        db=db,
 								                        conversation_id=conversation_id,
 								                        voice_session_id=voice_session_id,
 								                    )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                )
 								                if persisted_contiguous_index >= 0:
 								                    async with state.lock:
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                        state.consumed_index = max(
 								                            state.consumed_index, persisted_contiguous_index
 								                        )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
 								            try:
 								                audio_bytes = base64.b64decode(audio_base64)
 								            except Exception:
 								                audio_bytes = b""
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								            if not audio_bytes:
 								                logger.warning(
 								                    "process_audio_segment: 解码后音频为空 conversation_id={} segment_index={}",
 								                    conversation_id,
 								                    segment_index,
 								                )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            transcript_text = await get_asr_provider().transcribe(
 								                audio_bytes, format="m4a"
 								            )
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								            await manager.send_message(
 								                conversation_id,
 								                {
 								                    "type": MessageType.TRANSCRIPT,
 								                    "conversation_id": conversation_id,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                    "data": {
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                        "text": transcript_text or "",
 								                        "audio_duration": audio_duration,
 								                        "voice_session_id": voice_session_id,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                        "segment_index": segment_index,
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                        "is_last": is_last,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                    },
 								                    "timestamp": datetime.now(timezone.utc).isoformat(),
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                },
 								            )
 								            if _is_transcribe_failure(transcript_text):
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								                detail = (transcript_text or "").strip()
 								                if detail.startswith("转写失败"):
 								                    user_msg = f"分段 {segment_index} {detail}"
 								                elif not detail:
 								                    user_msg = f"分段 {segment_index} 转写失败：未识别到内容（请检查后端 ASR 配置）"
 								                else:
 								                    user_msg = f"分段 {segment_index} 转写失败：{detail[:400]}"
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                await manager.send_message(
 								                    conversation_id,
 								                    {
 								                        "type": MessageType.ERROR,
 								                        "data": {
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								                            "message": user_msg,
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                            "segment_index": segment_index,
 								                        },
 								                        "timestamp": datetime.now(timezone.utc).isoformat(),
 								                    },
 								                )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                return
 								            existing_segment = await _find_existing_segment_by_index(
 								                db=db,
 								                conversation_id=conversation_id,
 								                voice_session_id=voice_session_id,
 								                segment_index=segment_index,
 								            )
 								            if existing_segment:
 								                async with state.lock:
 								                    state.processed_indices.add(segment_index)
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								                logger.debug(
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								                    "分段已存在，按幂等跳过: conversation_id={} voice_session_id={} "
 								                    "segment_index={} segment_id={} transcript={}",
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								                    conversation_id,
 								                    voice_session_id,
 								                    segment_index,
 								                    existing_segment.id,
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								                    existing_segment.user_input_text or "",
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                )
 								                return
 								            else:
 								                segment = Segment(
 								                    id=str(uuid.uuid4()),
 								                    conversation_id=conversation_id,
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								                    user_input_text=transcript_text or "",
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                    audio_url=_build_segment_audio_url(voice_session_id, segment_index),
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								                    audio_duration_seconds=audio_duration
 								                    if audio_duration > 0
 								                    else None,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                    processed=False,
 								                )
 								                db.add(segment)
 								                user_message_timestamp = _mark_conversation_active(conversation)
 								                await db.commit()
 								                await db.refresh(segment)
 								                await background_runner.queue_message(conversation.user_id, segment.id)
 								            ready_segments: List[Tuple[int, str, Segment]] = []
 								            async with state.lock:
 								                state.processed_indices.add(segment_index)
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                state.buffered_transcripts[segment_index] = (
 								                    transcript_text or "",
 								                    segment,
 								                )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
 								                next_index = state.consumed_index + 1
 								                while next_index in state.buffered_transcripts:
 								                    text, seg = state.buffered_transcripts.pop(next_index)
 								                    ready_segments.append((next_index, text, seg))
 								                    state.consumed_index = next_index
 								                    next_index += 1
 								            for _, ordered_text, ordered_segment in ready_segments:
 								                await process_user_message(
 								                    conversation_id=conversation_id,
 								                    user_message=ordered_text,
 								                    conversation=conversation,
 								                    segment=ordered_segment,
 								                    db=db,
 								                    user=user,
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                    user_message_timestamp=ordered_segment.created_at
 								                    or user_message_timestamp,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                )
 								    except Exception as e:
 								        logger.error(
 								            f"处理语音分段失败: conversation_id={conversation_id}, segment_index={segment_index}, error={e}",
 								            exc_info=True,
 								        )
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								        await manager.send_message(
 								            conversation_id,
 								            {
 								                "type": MessageType.ERROR,
 								                "data": {
 								                    "message": f"分段处理失败: {str(e)}",
 								                    "segment_index": segment_index,
 								                },
 								                "timestamp": datetime.now(timezone.utc).isoformat(),
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            },
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								        )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    finally:
 								        async with state.lock:
 								            state.pending_indices.discard(segment_index)
 								# ── 用户消息处理 ────────────────────────────────────────────────
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								async def process_user_message(
 								    conversation_id: str,
 								    user_message: str,
 								    conversation: Conversation,
 								    segment: Segment,
 								    db: AsyncSession,
 								    user: User = None,
 								    user_message_timestamp: Optional[datetime] = None,
 								) -> None:
-												feat & refactor: 重构agents目录结构；AI回复模块agent结构封装

											
										
										
											2026-03-19 10:36:55 +08:00
+								    """处理用户消息，生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								    store = ConversationHistoryStore(db)
 								    tts_urls: list[str] = []
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    try:
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								        logger.info(
 								            "process_user_message 开始: conversation_id={} segment_id={} user_chars={}",
 								            conversation_id,
 								            segment.id,
 								            len(user_message or ""),
 								        )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        is_from_voice = bool(segment.audio_url)
-												feat & refactor: 重构agents目录结构；AI回复模块agent结构封装

											
										
										
											2026-03-19 10:36:55 +08:00
+								        voice_session_id = _voice_session_id_from_audio_url(segment.audio_url)
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								        audio_dur = getattr(segment, "audio_duration_seconds", None)
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								        t_pipeline = time.perf_counter()
-												修复一些已知问题

											
										
										
											2026-03-20 17:25:42 +08:00
+								        turn = await chat_orchestrator.process_user_message(
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            conversation_id=conversation_id,
 								            user_message=user_message,
-												feat & refactor: 重构agents目录结构；AI回复模块agent结构封装

											
										
										
											2026-03-19 10:36:55 +08:00
+								            user=user,
 								            conversation=conversation,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            is_from_voice=is_from_voice,
-												feat & refactor: 重构agents目录结构；AI回复模块agent结构封装

											
										
										
											2026-03-19 10:36:55 +08:00
+								            voice_session_id=voice_session_id,
 								            db=db,
 								            apply_extracted_profile_fn=apply_extracted_profile,
 								            get_missing_profile_fields_fn=get_missing_profile_fields,
 								            get_filled_profile_fields_fn=get_filled_profile_fields,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            user_message_timestamp=user_message_timestamp,
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								            audio_duration_seconds=audio_dur,
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        )
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								        if agent_summary_enabled():
 								            logger.info(
 								                "pipeline.process_user_message duration_ms={:.2f} "
 								                "conversation_id={} segment_id={} user_msg_len={} "
 								                "response_segments={} skip_tts={}",
 								                (time.perf_counter() - t_pipeline) * 1000,
 								                conversation_id,
 								                segment.id,
 								                len(user_message or ""),
 								                len(turn.messages),
 								                turn.skip_tts,
 								            )
-												修复一些已知问题

											
										
										
											2026-03-20 17:25:42 +08:00
+								        responses = turn.messages
 								        skip_tts = turn.skip_tts
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								        segment.agent_response = AI_RESPONSE_SEGMENT_JOIN.join(responses)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        _mark_conversation_active(conversation)
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								        ai_msg_id = await store.record_human_ai_turn(
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								            conversation_id=conversation_id,
 								            user_message=user_message,
 								            responses=responses,
 								            user_message_timestamp=user_message_timestamp,
 								            is_from_voice=is_from_voice,
 								            voice_session_id=voice_session_id,
 								            audio_duration_seconds=audio_dur,
 								            tts_audio_urls=None,
 								            segment_id=segment.id,
 								        )
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								        if not ai_msg_id:
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								            logger.warning(
 								                "process_user_message: 无有效助手段落（responses 为空），conversation_id={} segment_id={}",
 								                conversation_id,
 								                segment.id,
 								            )
 								            if conversation_id in manager.active_connections:
 								                await manager.send_message(
 								                    conversation_id,
 								                    {
 								                        "type": MessageType.ERROR,
 								                        "data": {
 								                            "message": "未生成回复，请重试或稍后再试",
 								                        },
 								                        "timestamp": datetime.now(timezone.utc).isoformat(),
 								                    },
 								                )
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								            return
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								        tts_epoch_start = _tts_epoch_value(conversation_id)
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								        n = len(responses)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        for i, response_text in enumerate(responses):
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								            await manager.send_message(
 								                conversation_id,
 								                {
 								                    "type": MessageType.AGENT_RESPONSE,
 								                    "conversation_id": conversation_id,
 								                    "data": {
 								                        "text": response_text,
 								                        "index": i,
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								                        "total": n,
-												feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路

数据库
- 新增迁移 0003：timeline_events.memory_source_id 外键 → memory_sources，便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化（摘要/事实/时间线），可配置开关与最大字符数
- 新增证据包组装：合并 chunk、摘要、事实、时间线、故事等检索结果；支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展；文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG；分段 ASR 日志与空音频处理；转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符，与分段逻辑一致

后端 - Agent
- reply_limits：按 [SPLIT] 与段落拆段，并保证非空 fallback，供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id；任务成功结?

											
										
										
											2026-03-27 16:01:28 +08:00
+								                        "assistant_message_id": ai_msg_id,
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                    },
 								                    "timestamp": datetime.now(timezone.utc).isoformat(),
 								                },
 								            )
-												修复一些已知问题

											
										
										
											2026-03-20 17:25:42 +08:00
+								            url = None
 								            if not skip_tts:
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								                if _tts_epoch_value(conversation_id) != tts_epoch_start:
 								                    break
-												修复一些已知问题

											
										
										
											2026-03-20 17:25:42 +08:00
+								                url = await _send_tts_audio(
 								                    conversation_id,
 								                    response_text,
 								                    chunk_index=i,
 								                    chunk_total=n,
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								                    assistant_message_id=ai_msg_id,
 								                    tts_epoch_start=tts_epoch_start,
-												修复一些已知问题

											
										
										
											2026-03-20 17:25:42 +08:00
+								                )
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								            if url:
 								                tts_urls.append(url)
-												feat(conversation): TTS 投递与 WebSocket 管线；客户端播放门禁与会话页联动；COS 键与迁移脚本调整

											
										
										
											2026-03-26 15:51:24 +08:00
+								            if _tts_epoch_value(conversation_id) != tts_epoch_start:
 								                break
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
+								            if i < n - 1:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								                await asyncio.sleep(0.5)
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								        if tts_urls:
 								            await store.attach_ai_tts_audio_urls(
 								                conversation_id,
 								                tts_audio_urls=tts_urls,
 								                segment_id=segment.id,
 								            )
 								            await db.execute(
 								                update(Segment)
 								                .where(Segment.id == segment.id)
 								                .values(tts_audio_urls=tts_urls)
 								            )
 								            await db.commit()
-												修复：CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致

新增：TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试

											
										
										
											2026-03-20 16:36:42 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    except Exception as e:
-												refactor(api,expo): 多智能体与会话收敛、回忆录兼容层移除、后端测试集大幅删减

- 对齐「多智能体收敛」与「回忆录 stories-first / markdown-first」方向：收紧运行时契约、
  删除过渡兼容路径与双轨逻辑，并同步更新客户端与文档。

- Chat：以 ChatOrchestrator 为实时编排入口；删除独立 conversation_agent，精简 prompts。
- Memoir：删除 memory_agent；MemoirOrchestrator、classification / story_route 与 prompts 收敛到
  prepare_batches + run_story_pipeline_for_category_batch 主链路。
- 将 agents 侧 processor 迁入 feature 层为 background_runner，并移除 features 下重复/过时
  processor 封装。

- 新增 history_store，强化「conversation_messages 为 DB 真源、Redis 为缓存」模型。
- 调整 models、repo、service、session_history；精简 WS message_types，重构 pipeline 与 router。

- 移除章节占位、整章再生等旧路径；章节列表与封面逻辑要求 story 关联；收紧 cover 资格与
  enqueue。
- helpers、repo、service、router、reading_segment_materialize、story_pipeline_sync、pdf_service
  等按 canonical markdown / cover_asset_id 收缩；删除 memoir_images/provider 等冗余。
- tasks：memoir_tasks、chapter_cover_tasks 等大幅瘦身；story_image_tasks 等与当前图片任务对齐。

- core：config、logging、redis、task_tracker 小幅调整。
- auth / user / payment / quota：路由或服务侧删减过时接口或逻辑（如 payment router 行数减少）。

- pyproject.toml、development.sh、.env.example / .env.production、README 等同步说明或变量。

- Alembic 0001_initial_schema 微调（与当前 schema 叙事一致的小改动）。

- 回忆录：types / mappers / api、章节页与 memoir 页与后端契约对齐；markdown-renderer 调整。
- 语音：删除 voice/player，voice-segment-store 相应精简。

- api/tests：删除 conftest 及绝大部分既有测试文件（websocket_baseline、conversation、memoir
  图片、PDF、SMS 等），属有意收缩/待按 backend-test-system 重建的信号。
- docs：新增多智能体收敛与移除兼容层计划摘要；更新 story-first 设计、backend-test-system、
  multi-agent-refactor-plan、实施总结等。

BREAKING CHANGE: 后端对外契约、回忆录章节字段与若干路由/任务行为已变更；大量 API 测试被移除，
  CI 若依赖这些用例需按新策略补测或调整流水线。

											
										
										
											2026-03-22 16:45:57 +08:00
+								        if tts_urls:
 								            try:
 								                await store.attach_ai_tts_audio_urls(
 								                    conversation_id,
 								                    tts_audio_urls=tts_urls,
 								                    segment_id=segment.id,
 								                )
 								                await db.execute(
 								                    update(Segment)
 								                    .where(Segment.id == segment.id)
 								                    .values(tts_audio_urls=tts_urls)
 								                )
 								                await db.commit()
 								            except Exception as persist_error:
-												feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验
- DB: segments 用户输入文本（Alembic 0002）
- Chat: 阶段检测/阶段提示/回复限制，编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent，叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints；Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测

											
										
										
											2026-03-26 12:13:36 +08:00
+								                logger.warning("补写 TTS 元数据失败: {}", persist_error)
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        logger.error(f"处理用户消息失败: {e}", exc_info=True)
 								        if conversation_id in manager.active_connections:
 								            try:
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								                await manager.send_message(
 								                    conversation_id,
 								                    {
 								                        "type": MessageType.ERROR,
 								                        "data": {"message": f"生成回应失败: {str(e)}"},
 								                        "timestamp": datetime.now(timezone.utc).isoformat(),
 								                    },
 								                )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								            except Exception as send_error:
 								                logger.warning(f"发送错误消息失败: {send_error}")
 								# ── 对话结束处理 ────────────────────────────────────────────────
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								async def process_conversation_segments(
 								    conversation_id: str, db: AsyncSession, quota_service: "QuotaService"
 								):
 								    """
 								    处理对话段落，生成章节（对话结束时调用）
 								    注意：大部分处理已通过 Celery 任务增量完成
 								    这里立即提交所有待处理的段落到 Celery
 								    配额检查通过注入的 quota_service 完成，不直接 import quota 内部函数。
 								    """
 								    conversation = await db.get(Conversation, conversation_id)
-												fix/various fixes

											
										
										
											2026-03-20 15:15:35 +08:00
+								    if not conversation or conversation.deleted_at is not None:
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        return
 								    stmt = select(Segment).where(
 								        Segment.conversation_id == conversation_id,
 								        Segment.processed == False,
 								    )
 								    result = await db.execute(stmt)
 								    segments = result.scalars().all()
 								    if not segments:
 								        await background_runner.flush_pending(conversation.user_id)
 								        return
 								    user = await db.get(User, conversation.user_id)
 								    if user:
 								        can_submit, _ = await quota_service.check_can_submit_organize(
 								            user.id, user.subscription_type
 								        )
 								        if not can_submit:
 								            logger.info(
 								                f"用户 {user.id} 章节配额已用尽，跳过提交整理任务: conversation_id={conversation_id}"
 								            )
 								            await background_runner.flush_pending(conversation.user_id)
 								            return
 								    segment_ids = [seg.id for seg in segments]
 								    try:
 								        from app.tasks.memoir_tasks import process_memoir_segments
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								        process_memoir_segments.delay(conversation.user_id, segment_ids)
-												chore/ 删除无用文件

											
										
										
											2026-03-19 14:36:14 +08:00
+								        logger.info(
 								            f"对话结束，提交 Celery 任务: conversation_id={conversation_id}, segments={len(segment_ids)}"
 								        )
-												Merge branch 'refactor/backend-architecture' into development

											
										
										
											2026-03-18 17:18:23 +08:00
+								    except Exception as e:
 								        logger.error(f"提交 Celery 任务失败: {e}")
 								    await background_runner.flush_pending(conversation.user_id)