api/app/features/memoir/story_pipeline_sync.py

"""
Celery 用：按批次将 transcript 写入 Story，并物化 Chapter canonical_markdown。
"""

from __future__ import annotations

import uuid
from typing import Any

from sqlalchemy import select
from sqlalchemy.orm import Session, joinedload

from app.agents.memoir.narrative_agent import NarrativeAgent
from app.agents.memoir.prompts import STAGE_TO_ORDER, format_evidence_chunks_for_prompt
from app.agents.memoir.story_route_agent import StoryRouteAgent
from app.agents.state_schema import MemoirStateSchema
from app.core.logging import get_logger
from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
from app.features.memoir.helpers import _chapter_markdown
from app.features.memoir.memoir_images.settings import MemoirImageSettings
from app.features.memoir.models import Chapter
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
from app.features.memoir.repo import compose_chapter_from_story_links_sync
from app.features.story.models import Story
from app.features.story.sync_write import (
    append_story_version_sync,
    create_story_with_version_sync,
    ensure_chapter_story_link_sync,
    list_active_stories_for_user_sync,
)
from app.features.memory.repo import retrieve_evidence_sync

logger = get_logger(__name__)


def _is_json_narrative(text: str) -> bool:
    if not text or not text.strip():
        return False
    s = text.strip()
    return s.startswith("{") and "paragraphs" in s


def run_story_pipeline_for_category_batch(
    session: Session,
    *,
    user_id: str,
    chapter_category: str,
    category_segments: list,
    state: MemoirStateSchema,
    user_profile: str,
    user_birth_year: int | None,
    llm: Any,
) -> tuple[Chapter | None, bool, set[str]]:
    """
    返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。
    """
    narrative_agent = NarrativeAgent()
    route_agent = StoryRouteAgent()
    dispatch_ids: set[str] = set()

    segment_texts = [seg.transcript_text or "" for seg in category_segments]
    combined_text = "\n\n".join(segment_texts)
    source_ids = [seg.id for seg in category_segments]

    try:
        evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10)
    except Exception as e:
        logger.warning("Evidence 检索跳过: %s", e)
        evidence = {
            "relevant_chunks": [],
            "relevant_summaries": [],
            "relevant_facts": [],
            "timeline_hints": [],
            "relevant_stories": [],
        }

    evidence_text = format_evidence_chunks_for_prompt(evidence)
    new_content_input = (
        f"{combined_text}\n\n【相关记忆摘录】\n{evidence_text}"
        if evidence_text.strip()
        else combined_text
    )

    stmt_chapter = (
        select(Chapter)
        .where(
            Chapter.user_id == user_id,
            Chapter.category == chapter_category,
            Chapter.is_active == True,  # noqa: E712
        )
        .options(
            joinedload(Chapter.images),
            joinedload(Chapter.story_links),
        )
    )
    chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()

    slot_snippets: dict[str, str] = {}
    stage_slots = state.slots.get(chapter_category, {}) or {}
    for key, value in stage_slots.items():
        snip = getattr(value, "snippet", None) or (
            value.get("snippet") if isinstance(value, dict) else None
        )
        if snip:
            slot_snippets[key] = snip

    title = chapter.title if chapter else f"{chapter_category} 回忆"
    existing_chapter_md = _chapter_markdown(chapter) if chapter else ""

    if not chapter:
        title = narrative_agent.generate_title(
            stage=chapter_category,
            emotion="neutral",
            slots=slot_snippets,
            user_profile=user_profile,
            birth_year=user_birth_year,
            llm=llm,
        )

    candidates = list_active_stories_for_user_sync(session, user_id)
    valid_ids = {s.id for s in candidates}

    batch_for_route = (
        f"{combined_text}\n\n{evidence_text}"
        if evidence_text.strip()
        else combined_text
    )
    route = route_agent.decide(
        chapter_category=chapter_category,
        chapter_title=title,
        batch_transcript=batch_for_route,
        candidate_stories=candidates,
        llm=llm,
        valid_story_ids=valid_ids,
    )

    target_story_id: str | None = None
    existing_for_narrative = ""
    if route.decision == "append_story" and route.target_story_id:
        st = session.get(Story, route.target_story_id)
        if st and st.user_id == user_id:
            target_story_id = st.id
            existing_for_narrative = (st.canonical_markdown or "").strip()

    narrative_raw = narrative_agent.generate_narrative(
        stage=chapter_category,
        slots=slot_snippets,
        new_content=new_content_input,
        existing_content=existing_for_narrative,
        user_profile=user_profile,
        birth_year=user_birth_year,
        llm=llm,
    )

    if (
        existing_for_narrative
        and not _is_json_narrative(narrative_raw)
        and len(narrative_raw) < len(existing_for_narrative) * 0.8
    ):
        logger.warning("叙事长度异常: 回退为原文追加")
        narrative_raw = f"{existing_for_narrative}\n\n{combined_text}"

    if (
        not existing_for_narrative
        and existing_chapter_md
        and not _is_json_narrative(narrative_raw)
        and len(narrative_raw) < len(existing_chapter_md) * 0.8
    ):
        logger.warning(
            "章节级长度异常: 回退为 transcript 追加, category=%s",
            chapter_category,
        )
        narrative_raw = f"{existing_chapter_md}\n\n{combined_text}"

    md = narrative_to_markdown(narrative_raw)
    if not md.strip():
        md = combined_text.strip()

    calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)

    if not chapter:
        chapter = Chapter(
            id=str(uuid.uuid4()),
            user_id=user_id,
            title=title,
            order_index=calculated_order_index,
            status="completed",
            category=chapter_category,
            cover_image=None,
            is_new=True,
            source_segments=source_ids,
        )
        session.add(chapter)
        session.flush()
    else:
        chapter.source_segments = list(
            set((chapter.source_segments or []) + source_ids)
        )
        chapter.is_new = True

    do_append = target_story_id is not None

    if do_append:
        append_story_version_sync(session, target_story_id, md)
        dispatch_ids.add(target_story_id)
        ensure_chapter_story_link_sync(
            session, chapter_id=chapter.id, story_id=target_story_id
        )
    else:
        story_title = (route.new_story_title or "").strip()
        if not story_title:
            story_title = narrative_agent.generate_title(
                stage=chapter_category,
                emotion="neutral",
                slots=slot_snippets,
                user_profile=user_profile,
                birth_year=user_birth_year,
                llm=llm,
            )
        st = create_story_with_version_sync(
            session,
            user_id=user_id,
            title=story_title,
            canonical_markdown=md,
            stage=chapter_category,
        )
        dispatch_ids.add(st.id)
        ensure_chapter_story_link_sync(session, chapter_id=chapter.id, story_id=st.id)

    compose_chapter_from_story_links_sync(session, chapter.id)
    session.flush()

    image_settings = MemoirImageSettings.from_env()
    needs_cover = image_settings.enabled and chapter_needs_cover_enqueue(chapter)

    return chapter, needs_cover, dispatch_ids