life-echo/api/routers/websocket.py

"""
WebSocket 路由：实时对话通信
"""
import uuid
from datetime import datetime, timezone
from enum import Enum
from typing import Dict

from fastapi import WebSocket, WebSocketDisconnect, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from agents import ConversationAgent, MemoryAgent
from agents.prompts import ConversationStage
from database import get_async_db
from database.models import Conversation, Segment
from services.asr_service import asr_service
from services.tts_service import tts_service


class MessageType(str, Enum):
    """WebSocket 消息类型"""
    CONNECT = "connect"
    AUDIO_CHUNK = "audio_chunk"
    TRANSCRIPT = "transcript"
    AGENT_RESPONSE = "agent_response"
    TTS_AUDIO = "tts_audio"
    END_CONVERSATION = "end_conversation"
    ERROR = "error"


# 连接管理
class ConnectionManager:
    """WebSocket 连接管理器"""

    def __init__(self):
        self.active_connections: Dict[str, WebSocket] = {}
        self.conversation_agents: Dict[str, ConversationAgent] = {}
        self.memory_agent = MemoryAgent()

    async def connect(self, websocket: WebSocket, conversation_id: str):
        """建立连接"""
        await websocket.accept()
        self.active_connections[conversation_id] = websocket
        self.conversation_agents[conversation_id] = ConversationAgent()

    def disconnect(self, conversation_id: str):
        """断开连接"""
        if conversation_id in self.active_connections:
            del self.active_connections[conversation_id]
        if conversation_id in self.conversation_agents:
            self.conversation_agents[conversation_id].clear_memory(conversation_id)
            del self.conversation_agents[conversation_id]

    async def send_message(self, conversation_id: str, message: dict):
        """发送消息"""
        if conversation_id in self.active_connections:
            websocket = self.active_connections[conversation_id]
            await websocket.send_json(message)

    async def receive_message(self, conversation_id: str) -> dict:
        """接收消息"""
        if conversation_id in self.active_connections:
            websocket = self.active_connections[conversation_id]
            return await websocket.receive_json()
        raise HTTPException(status_code=404, detail="Connection not found")


manager = ConnectionManager()


async def websocket_endpoint(websocket: WebSocket, conversation_id: str):
    """
    WebSocket 端点：处理实时对话

    Args:
        websocket: WebSocket 连接
        conversation_id: 对话 ID
    """
    await manager.connect(websocket, conversation_id)

    try:
        # 发送连接确认
        await manager.send_message(conversation_id, {
            "type": MessageType.CONNECT,
            "conversation_id": conversation_id,
            "data": {"status": "connected"},
            "timestamp": datetime.now(timezone.utc).isoformat()
        })

        # 从数据库获取对话信息
        async for db in get_async_db():
            conversation = await db.get(Conversation, conversation_id)
            if not conversation:
                # 如果对话不存在，创建新对话
                from database.models import User as UserModel
                # 假设用户 ID 从连接参数获取（实际应该从认证获取）
                user_id = "default_user"  # TODO: 从认证获取实际用户 ID

                conversation = Conversation(
                    id=conversation_id,
                    user_id=user_id,
                    started_at=datetime.now(timezone.utc),
                    status="active"
                )
                db.add(conversation)
                await db.commit()

            current_stage = ConversationStage(conversation.conversation_stage) if conversation.conversation_stage else ConversationStage.CHILDHOOD

            # 主循环：处理消息
            while True:
                try:
                    message = await websocket.receive_json()
                    msg_type = message.get("type")

                    if msg_type == MessageType.AUDIO_CHUNK:
                        # 处理音频块
                        audio_data = message.get("data", {}).get("audio_base64", "")

                        # 调用 ASR 服务转文字
                        transcript = await asr_service.transcribe(audio_data)

                        # 保存段落到数据库
                        segment = Segment(
                            id=str(uuid.uuid4()),
                            conversation_id=conversation_id,
                            transcript_text=transcript,
                            processed=False
                        )
                        db.add(segment)
                        await db.commit()

                        # 发送转写结果
                        await manager.send_message(conversation_id, {
                            "type": MessageType.TRANSCRIPT,
                            "conversation_id": conversation_id,
                            "data": {"text": transcript},
                            "timestamp": datetime.now(timezone.utc).isoformat()
                        })

                        # Agent 生成回应
                        agent = manager.conversation_agents.get(conversation_id)
                        if agent:
                            # 检测对话阶段
                            detected_stage = agent.detect_stage(conversation_id, transcript)
                            if detected_stage != current_stage:
                                current_stage = detected_stage
                                conversation.conversation_stage = current_stage.value
                                await db.commit()

                            # 获取已聊话题
                            stmt_segments = select(Segment).where(
                                Segment.conversation_id == conversation_id
                            ).order_by(Segment.created_at)
                            result_segments = await db.execute(stmt_segments)
                            previous_segments = result_segments.scalars().all()
                            covered_topics = [seg.topic_category for seg in previous_segments if seg.topic_category]

                            # 生成回应
                            response = agent.generate_response(
                                conversation_id=conversation_id,
                                user_message=transcript,
                                current_stage=current_stage,
                                covered_topics=covered_topics
                            )

                            # 更新段落的 Agent 回应
                            segment.agent_response = response
                            await db.commit()

                            # 发送 Agent 回应
                            await manager.send_message(conversation_id, {
                                "type": MessageType.AGENT_RESPONSE,
                                "conversation_id": conversation_id,
                                "data": {"text": response},
                                "timestamp": datetime.now(timezone.utc).isoformat()
                            })

                            # 调用 TTS 服务生成音频
                            tts_audio = await tts_service.synthesize(response)

                            # 发送 TTS 音频
                            await manager.send_message(conversation_id, {
                                "type": MessageType.TTS_AUDIO,
                                "conversation_id": conversation_id,
                                "data": {"audio_base64": tts_audio},
                                "timestamp": datetime.now(timezone.utc).isoformat()
                            })

                    elif msg_type == MessageType.END_CONVERSATION:
                        # 结束对话
                        conversation.status = "ended"
                        conversation.ended_at = datetime.now(timezone.utc)
                        await db.commit()

                        # 触发整理 Agent
                        await process_conversation_segments(conversation_id, db)

                        await manager.send_message(conversation_id, {
                            "type": MessageType.END_CONVERSATION,
                            "conversation_id": conversation_id,
                            "data": {"status": "ended"},
                            "timestamp": datetime.now(timezone.utc).isoformat()
                        })
                        break

                except Exception as e:
                    await manager.send_message(conversation_id, {
                        "type": MessageType.ERROR,
                        "data": {"message": str(e)},
                        "timestamp": datetime.now(timezone.utc).isoformat()
                    })

    except WebSocketDisconnect:
        manager.disconnect(conversation_id)
    except Exception:
        manager.disconnect(conversation_id)
        raise


async def process_conversation_segments(conversation_id: str, db: AsyncSession):
    """
    处理对话段落，生成章节

    Args:
        conversation_id: 对话 ID
        db: 数据库会话
    """
    # 获取所有未处理的段落
    stmt = select(Segment).where(
        Segment.conversation_id == conversation_id,
        Segment.processed == False
    )
    result = await db.execute(stmt)
    segments = result.scalars().all()

    if not segments:
        return

    # 准备段落数据
    segments_data = [
        {"transcript_text": seg.transcript_text}
        for seg in segments
    ]

    # 调用整理 Agent
    memory_agent = manager.memory_agent
    chapters_data = memory_agent.process_segments(segments_data)

    # 保存章节到数据库
    from database import Chapter
    conversation = await db.get(Conversation, conversation_id)

    for category, chapter_data in chapters_data.items():
        chapter = Chapter(
            id=str(uuid.uuid4()),
            user_id=conversation.user_id,
            title=chapter_data["title"],
            content=chapter_data["content"],
            order_index=chapter_data.get("order_index", 999),
            status="completed",
            category=category,
            images=chapter_data.get("image_suggestions", [])
        )
        db.add(chapter)

    # 标记段落为已处理
    for seg in segments:
        seg.processed = True

    await db.commit()