life-echo/api/app/features/conversation/history_store.py

"""Durable conversation turn persistence + Redis cache sync (feature layer).

PostgreSQL is the source of truth for conversation history. Each write path
commits via ``transactional()`` first; ``_sync_redis_best_effort`` runs only
after a successful DB commit. Redis sync failures are logged as warnings and
do not roll back durable state. A brief "DB has data, cache missing" window is
expected under Redis outages; WS reconnect and ``load_canonical_history`` read
from DB and self-heal the cache on the next successful sync.
"""

from __future__ import annotations

import uuid
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Any

from sqlalchemy.ext.asyncio import AsyncSession

from app.core import redis as redis_core
from app.core.db import transactional
from app.core.logging import get_logger
from app.features.conversation import repo
from app.features.conversation.lineage_schemas import DialogueLineage
from app.features.conversation.models import ConversationMessage, Segment
from app.features.conversation.session_history import (
    conversation_messages_to_redis_history,
)

logger = get_logger(__name__)

# 与 LLM / 客户端约定：多段助手消息用 [SPLIT] 拼接，便于拆成多条气泡与多段 TTS
AI_RESPONSE_SEGMENT_JOIN = "[SPLIT]"


@dataclass(frozen=True)
class HumanAiTurnIds:
    """Durable ids for one user + assistant pair in conversation_messages."""

    human_message_id: str
    assistant_message_id: str


def _utc_now() -> datetime:
    return datetime.now(timezone.utc)


class ConversationHistoryStore:
    def __init__(self, db: AsyncSession):
        self._db = db

    async def load_canonical_history(
        self, conversation_id: str
    ) -> list[dict[str, Any]]:
        rows = await repo.get_conversation_messages(conversation_id, self._db)
        return conversation_messages_to_redis_history(rows)

    async def _touch_conversation(
        self, conversation_id: str, *, occurred_at: datetime
    ) -> None:
        conversation = await repo.get_conversation(conversation_id, self._db)
        if conversation is None:
            return
        current = getattr(conversation, "last_message_at", None)
        if current is None or current < occurred_at:
            conversation.last_message_at = occurred_at

    async def _sync_redis_from_db(self, conversation_id: str) -> None:
        hist = await self.load_canonical_history(conversation_id)
        await redis_core.redis_service.set_conversation_history(conversation_id, hist)

    async def _sync_redis_best_effort(self, conversation_id: str) -> None:
        try:
            await self._sync_redis_from_db(conversation_id)
        except Exception as exc:
            logger.warning("conversation history cache sync skipped: {}", exc)

    async def record_ai_only_turn(
        self, conversation_id: str, responses: list[str]
    ) -> str | None:
        if not responses:
            return None
        combined = AI_RESPONSE_SEGMENT_JOIN.join(responses)
        created_at = _utc_now()
        msg = ConversationMessage(
            id=str(uuid.uuid4()),
            conversation_id=conversation_id,
            role="ai",
            content=combined,
            message_type="text",
            created_at=created_at,
        )
        async with transactional(self._db):
            repo.add_conversation_message(msg, self._db)
            await self._touch_conversation(conversation_id, occurred_at=created_at)
        await self._sync_redis_best_effort(conversation_id)
        return msg.id

    async def record_human_ai_turn(
        self,
        conversation_id: str,
        user_message: str,
        responses: list[str],
        *,
        user_message_timestamp: datetime | None,
        is_from_voice: bool,
        voice_session_id: str | None,
        audio_duration_seconds: int | None,
        tts_audio_urls: list[str] | None,
        segment_id: str | None,
        memory_retrieval_trace: dict | None = None,
    ) -> HumanAiTurnIds | None:
        if not responses:
            return None
        human_ts = user_message_timestamp or _utc_now()
        if human_ts.tzinfo is None:
            human_ts = human_ts.replace(tzinfo=timezone.utc)
        ai_ts = human_ts + timedelta(microseconds=1)
        human_type = "audio" if is_from_voice else "text"
        human = ConversationMessage(
            id=str(uuid.uuid4()),
            conversation_id=conversation_id,
            role="human",
            content=user_message,
            message_type=human_type,
            voice_session_id=voice_session_id,
            duration_seconds=audio_duration_seconds
            if audio_duration_seconds is not None and audio_duration_seconds > 0
            else None,
            segment_id=segment_id,
            created_at=human_ts,
        )
        combined = AI_RESPONSE_SEGMENT_JOIN.join(responses)
        ai = ConversationMessage(
            id=str(uuid.uuid4()),
            conversation_id=conversation_id,
            role="ai",
            content=combined,
            message_type="text",
            tts_audio_urls=tts_audio_urls if tts_audio_urls else None,
            segment_id=segment_id,
            created_at=ai_ts,
            memory_retrieval_trace_json=memory_retrieval_trace,
        )
        async with transactional(self._db):
            repo.add_conversation_message(human, self._db)
            repo.add_conversation_message(ai, self._db)
            await self._touch_conversation(conversation_id, occurred_at=ai_ts)
        await self._sync_redis_best_effort(conversation_id)
        return HumanAiTurnIds(
            human_message_id=str(human.id),
            assistant_message_id=str(ai.id),
        )

    async def record_human_ai_turn_with_segment(
        self,
        conversation_id: str,
        user_message: str,
        responses: list[str],
        segment: Segment,
        *,
        user_message_timestamp: datetime | None,
        is_from_voice: bool,
        voice_session_id: str | None,
        audio_duration_seconds: int | None,
        agent_response: str,
        memory_retrieval_trace: dict | None = None,
    ) -> HumanAiTurnIds | None:
        """Persist human/ai messages and segment metadata in one transaction."""
        if not responses:
            return None
        human_ts = user_message_timestamp or _utc_now()
        if human_ts.tzinfo is None:
            human_ts = human_ts.replace(tzinfo=timezone.utc)
        ai_ts = human_ts + timedelta(microseconds=1)
        human_type = "audio" if is_from_voice else "text"
        segment_id = str(segment.id)
        human = ConversationMessage(
            id=str(uuid.uuid4()),
            conversation_id=conversation_id,
            role="human",
            content=user_message,
            message_type=human_type,
            voice_session_id=voice_session_id,
            duration_seconds=audio_duration_seconds
            if audio_duration_seconds is not None and audio_duration_seconds > 0
            else None,
            segment_id=segment_id,
            created_at=human_ts,
        )
        combined = AI_RESPONSE_SEGMENT_JOIN.join(responses)
        ai = ConversationMessage(
            id=str(uuid.uuid4()),
            conversation_id=conversation_id,
            role="ai",
            content=combined,
            message_type="text",
            segment_id=segment_id,
            created_at=ai_ts,
            memory_retrieval_trace_json=memory_retrieval_trace,
        )
        async with transactional(self._db):
            repo.add_conversation_message(human, self._db)
            repo.add_conversation_message(ai, self._db)
            # Postgres: segments.user_message_id FK must exist before segment UPDATE;
            # SQLAlchemy may otherwise flush the dirty segment row before message INSERTs.
            await self._db.flush()
            await self._touch_conversation(conversation_id, occurred_at=ai_ts)
            segment.agent_response = agent_response
            segment.user_message_id = str(human.id)
            segment.lineage_json = DialogueLineage.for_single_turn(
                conversation_id=conversation_id,
                user_message_id=str(human.id),
                assistant_message_id=str(ai.id),
                segment_ids=[segment_id],
            ).model_dump(mode="json")
        await self._sync_redis_best_effort(conversation_id)
        return HumanAiTurnIds(
            human_message_id=str(human.id),
            assistant_message_id=str(ai.id),
        )

    async def attach_ai_tts_for_turn(
        self,
        conversation_id: str,
        *,
        tts_audio_urls: list[str],
        segment: Segment,
    ) -> None:
        """Update latest AI message and segment TTS URLs in one transaction."""
        if not tts_audio_urls:
            return
        segment_id = str(segment.id)
        async with transactional(self._db):
            row = await repo.set_latest_ai_message_tts_audio_urls(
                conversation_id,
                self._db,
                tts_audio_urls=tts_audio_urls,
                segment_id=segment_id,
            )
            if row is None:
                return
            segment.tts_audio_urls = list(tts_audio_urls)
        await self._sync_redis_best_effort(conversation_id)

    async def attach_ai_tts_audio_urls(
        self,
        conversation_id: str,
        *,
        tts_audio_urls: list[str],
        segment_id: str | None = None,
    ) -> None:
        if not tts_audio_urls:
            return
        async with transactional(self._db):
            row = await repo.set_latest_ai_message_tts_audio_urls(
                conversation_id,
                self._db,
                tts_audio_urls=tts_audio_urls,
                segment_id=segment_id,
            )
        if row is None:
            return
        await self._sync_redis_best_effort(conversation_id)