""" Celery 用:按批次将 transcript 写入 Story,并物化 Chapter canonical_markdown。 """ from __future__ import annotations import uuid from typing import Any from sqlalchemy import select from sqlalchemy.orm import Session, joinedload from app.agents.memoir.narrative_agent import NarrativeAgent from app.agents.memoir.prompts import STAGE_TO_ORDER, format_evidence_chunks_for_prompt from app.agents.memoir.story_route_agent import StoryRouteAgent from app.agents.state_schema import MemoirStateSchema from app.core.logging import get_logger from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue from app.features.memoir.helpers import _chapter_markdown from app.features.memoir.memoir_images.settings import MemoirImageSettings from app.features.memoir.models import Chapter from app.features.memoir.narrative_to_markdown import narrative_to_markdown from app.features.memoir.repo import compose_chapter_from_story_links_sync from app.features.story.models import Story from app.features.story.sync_write import ( append_story_version_sync, create_story_with_version_sync, ensure_chapter_story_link_sync, list_active_stories_for_user_sync, ) from app.features.memory.repo import retrieve_evidence_sync logger = get_logger(__name__) def _is_json_narrative(text: str) -> bool: if not text or not text.strip(): return False s = text.strip() return s.startswith("{") and "paragraphs" in s def run_story_pipeline_for_category_batch( session: Session, *, user_id: str, chapter_category: str, category_segments: list, state: MemoirStateSchema, user_profile: str, user_birth_year: int | None, llm: Any, ) -> tuple[Chapter | None, bool, set[str]]: """ 返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。 """ narrative_agent = NarrativeAgent() route_agent = StoryRouteAgent() dispatch_ids: set[str] = set() segment_texts = [seg.transcript_text or "" for seg in category_segments] combined_text = "\n\n".join(segment_texts) source_ids = [seg.id for seg in category_segments] try: evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10) except Exception as e: logger.warning("Evidence 检索跳过: %s", e) evidence = { "relevant_chunks": [], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } evidence_text = format_evidence_chunks_for_prompt(evidence) new_content_input = ( f"{combined_text}\n\n【相关记忆摘录】\n{evidence_text}" if evidence_text.strip() else combined_text ) stmt_chapter = ( select(Chapter) .where( Chapter.user_id == user_id, Chapter.category == chapter_category, Chapter.is_active == True, # noqa: E712 ) .options( joinedload(Chapter.images), joinedload(Chapter.story_links), ) ) chapter = session.execute(stmt_chapter).unique().scalar_one_or_none() slot_snippets: dict[str, str] = {} stage_slots = state.slots.get(chapter_category, {}) or {} for key, value in stage_slots.items(): snip = getattr(value, "snippet", None) or ( value.get("snippet") if isinstance(value, dict) else None ) if snip: slot_snippets[key] = snip title = chapter.title if chapter else f"{chapter_category} 回忆" existing_chapter_md = _chapter_markdown(chapter) if chapter else "" if not chapter: title = narrative_agent.generate_title( stage=chapter_category, emotion="neutral", slots=slot_snippets, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) candidates = list_active_stories_for_user_sync(session, user_id) valid_ids = {s.id for s in candidates} batch_for_route = ( f"{combined_text}\n\n{evidence_text}" if evidence_text.strip() else combined_text ) route = route_agent.decide( chapter_category=chapter_category, chapter_title=title, batch_transcript=batch_for_route, candidate_stories=candidates, llm=llm, valid_story_ids=valid_ids, ) target_story_id: str | None = None existing_for_narrative = "" if route.decision == "append_story" and route.target_story_id: st = session.get(Story, route.target_story_id) if st and st.user_id == user_id: target_story_id = st.id existing_for_narrative = (st.canonical_markdown or "").strip() narrative_raw = narrative_agent.generate_narrative( stage=chapter_category, slots=slot_snippets, new_content=new_content_input, existing_content=existing_for_narrative, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) if ( existing_for_narrative and not _is_json_narrative(narrative_raw) and len(narrative_raw) < len(existing_for_narrative) * 0.8 ): logger.warning("叙事长度异常: 回退为原文追加") narrative_raw = f"{existing_for_narrative}\n\n{combined_text}" if ( not existing_for_narrative and existing_chapter_md and not _is_json_narrative(narrative_raw) and len(narrative_raw) < len(existing_chapter_md) * 0.8 ): logger.warning( "章节级长度异常: 回退为 transcript 追加, category=%s", chapter_category, ) narrative_raw = f"{existing_chapter_md}\n\n{combined_text}" md = narrative_to_markdown(narrative_raw) if not md.strip(): md = combined_text.strip() calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999) if not chapter: chapter = Chapter( id=str(uuid.uuid4()), user_id=user_id, title=title, order_index=calculated_order_index, status="completed", category=chapter_category, cover_image=None, is_new=True, source_segments=source_ids, ) session.add(chapter) session.flush() else: chapter.source_segments = list( set((chapter.source_segments or []) + source_ids) ) chapter.is_new = True do_append = target_story_id is not None if do_append: append_story_version_sync(session, target_story_id, md) dispatch_ids.add(target_story_id) ensure_chapter_story_link_sync( session, chapter_id=chapter.id, story_id=target_story_id ) else: story_title = (route.new_story_title or "").strip() if not story_title: story_title = narrative_agent.generate_title( stage=chapter_category, emotion="neutral", slots=slot_snippets, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) st = create_story_with_version_sync( session, user_id=user_id, title=story_title, canonical_markdown=md, stage=chapter_category, ) dispatch_ids.add(st.id) ensure_chapter_story_link_sync(session, chapter_id=chapter.id, story_id=st.id) compose_chapter_from_story_links_sync(session, chapter.id) session.flush() image_settings = MemoirImageSettings.from_env() needs_cover = image_settings.enabled and chapter_needs_cover_enqueue(chapter) return chapter, needs_cover, dispatch_ids