""" Celery 用:按批次将 transcript 写入 Story,并物化 Chapter canonical_markdown。 """ from __future__ import annotations import json import uuid from typing import Any from sqlalchemy import select from sqlalchemy.orm import Session, joinedload from app.agents.memoir.narrative_agent import NarrativeAgent from app.agents.memoir.prompts import ( STAGE_TO_ORDER, format_evidence_chunks_for_prompt, format_narrative_user_content, ) from app.core.config import settings from app.agents.memoir.story_route_agent import ( PLAN_BATCH_MAX_SEGMENTS, StoryBatchPlan, StoryRouteAgent, ) from app.agents.state_schema import MemoirStateSchema from app.core.logging import get_logger from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue from app.features.memoir.memoir_images.settings import MemoirImageSettings from app.features.memoir.models import Chapter from app.features.memoir.narrative_to_markdown import narrative_to_markdown from app.features.memoir.repo import ( compose_chapter_from_story_links_sync, reorder_chapter_story_links_by_life_order_sync, ) from app.features.memory.repo import retrieve_evidence_sync from app.features.story.models import Story from app.features.story.sync_write import ( append_story_version_sync, create_story_with_version_sync, ensure_chapter_story_link_sync, list_active_stories_for_user_sync, ) logger = get_logger(__name__) def _fidelity_fallback_json(oral: str, existing_canonical: str | None) -> str: """忠实度未通过时的安全回退:续写场景保留旧文 + 本段口述,避免只剩一句。""" o = (oral or "").strip()[:15000] ex = (existing_canonical or "").strip()[:15000] if ex and o: return json.dumps( {"paragraphs": [{"content": ex}, {"content": o}]}, ensure_ascii=False, ) if ex: return json.dumps( {"paragraphs": [{"content": ex}]}, ensure_ascii=False, ) return json.dumps( {"paragraphs": [{"content": o}]}, ensure_ascii=False, ) def _gate_narrative_fidelity( oral_text: str, narrative_raw: str, llm: Any, *, existing_canonical: str | None = None, ) -> str: """叙事 JSON 忠实度检查;不通过则回退为口述正文(续写时保留已有故事 + 口述)。""" from app.agents.memoir.fidelity_check_agent import FidelityCheckAgent if not settings.memoir_fidelity_check_enabled or not llm: return narrative_raw agent = FidelityCheckAgent() ex = (existing_canonical or "").strip() or None if agent.passes( oral_text=oral_text, narrative_json=narrative_raw, llm=llm, existing_canonical_markdown=ex, ): return narrative_raw logger.warning( "event=fidelity_gate_fallback oral_len={} merge={}", len((oral_text or "").strip()), bool(ex), ) o = (oral_text or "").strip() if not o and not ex: return narrative_raw return _fidelity_fallback_json(o, ex) def _should_fallback_to_transcript(md: str, oral: str) -> bool: """模型输出相对口述明显过短时回退为口述原文(防「1999」类压缩)。""" o = (oral or "").strip() if not o: return False m = (md or "").strip() if not m: return True if len(o) < 12: return len(m) < len(o) ratio = float(settings.memoir_narrative_fallback_body_ratio) min_abs = int(settings.memoir_narrative_fallback_min_chars) threshold = max(min_abs, int(len(o) * ratio)) return len(m) < threshold def _coalesce_story_markdown( md: str, oral: str, existing_for_narrative: str, ) -> str: """落库前对齐正文:空输出或过短回退时,续写场景保留「已有故事 + 本段口述」。""" o = (oral or "").strip() ex = (existing_for_narrative or "").strip() m = (md or "").strip() if not m: if ex and o: return f"{ex}\n\n{o}" if o: return o return ex if o and _should_fallback_to_transcript(m, o): if ex: return f"{ex}\n\n{o}" return o return m def _is_json_narrative(text: str) -> bool: if not text or not text.strip(): return False s = text.strip() return s.startswith("{") and "paragraphs" in s def _ordered_text_for_segment_ids( category_segments: list, segment_ids: list[str] ) -> str: id_to_text = {seg.id: (seg.user_input_text or "") for seg in category_segments} return "\n\n".join(id_to_text.get(sid, "") for sid in segment_ids) def _apply_narrative_fallbacks( narrative_raw: str, combined_unit_text: str, existing_for_narrative: str, *, chapter_category: str, ) -> str: # 整篇合并(JSON)输出异常缩水:回退为旧文 + 本段口述,避免覆盖丢失 if existing_for_narrative and _is_json_narrative(narrative_raw): merged_md = narrative_to_markdown(narrative_raw).strip() ex = (existing_for_narrative or "").strip() if ex and len(ex) > 400 and len(merged_md) < len(ex) * 0.35: logger.warning( "event=narrative_fallback reason=merge_shrink action=append_oral " "chapter_category={}", chapter_category, ) return f"{ex}\n\n{combined_unit_text.strip()}" if ( existing_for_narrative and not _is_json_narrative(narrative_raw) and len(narrative_raw) < len(existing_for_narrative) * 0.8 ): logger.warning( "event=narrative_fallback reason=length_anomaly action=append_raw " "chapter_category={}", chapter_category, ) return f"{existing_for_narrative}\n\n{combined_unit_text}" # 禁止把「章节级 canonical」(多故事拼接)写进单条 Story:会把全章正文塞进一个故事, # 且该 story 若挂多章会导致各章阅读视图串台。新建故事时宁可短,也不拼接 existing_chapter_md。 md_check = narrative_to_markdown(narrative_raw).strip() oral = (combined_unit_text or "").strip() ex_fb = (existing_for_narrative or "").strip() if oral and _should_fallback_to_transcript(md_check, oral): if ex_fb: logger.warning( "event=narrative_fallback reason=body_too_short_vs_oral_merge " "chapter_category={} oral_len={} md_len={}", chapter_category, len(oral), len(md_check), ) return f"{ex_fb}\n\n{oral}" logger.warning( "event=narrative_fallback reason=body_too_short_vs_oral " "chapter_category={} oral_len={} md_len={}", chapter_category, len(oral), len(md_check), ) return oral return narrative_raw def _ensure_chapter_record( session: Session, *, user_id: str, chapter_category: str, title: str, source_ids: list[str], calculated_order_index: int, ) -> Chapter: stmt_chapter = ( select(Chapter) .where( Chapter.user_id == user_id, Chapter.category == chapter_category, Chapter.is_active == True, # noqa: E712 ) .options( joinedload(Chapter.images), joinedload(Chapter.story_links), ) ) chapter = session.execute(stmt_chapter).unique().scalar_one_or_none() if not chapter: chapter = Chapter( id=str(uuid.uuid4()), user_id=user_id, title=title, order_index=calculated_order_index, status="completed", category=chapter_category, is_new=True, source_segments=source_ids, ) session.add(chapter) session.flush() else: chapter.source_segments = list( set((chapter.source_segments or []) + source_ids) ) chapter.is_new = True session.flush() return chapter def _run_batch_plan_writes( session: Session, *, plan: StoryBatchPlan, category_segments: list, chapter: Chapter, chapter_category: str, evidence_text: str, slot_snippets: dict[str, str], user_id: str, user_profile: str, user_birth_year: int | None, llm: Any, narrative_agent: NarrativeAgent, ) -> set[str]: dispatch_ids: set[str] = set() for unit in plan.units: unit_text = _ordered_text_for_segment_ids(category_segments, unit.segment_ids) new_content_input = format_narrative_user_content(unit_text, evidence_text) target_story_id: str | None = None existing_for_narrative = "" if unit.decision == "append_story" and unit.target_story_id: st = session.get(Story, unit.target_story_id) if st and st.user_id == user_id: target_story_id = st.id existing_for_narrative = (st.canonical_markdown or "").strip() narrative_raw = narrative_agent.generate_narrative( stage=chapter_category, slots=slot_snippets, new_content=new_content_input, existing_content=existing_for_narrative, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) narrative_raw = _gate_narrative_fidelity( unit_text, narrative_raw, llm, existing_canonical=existing_for_narrative or None, ) narrative_raw = _apply_narrative_fallbacks( narrative_raw, unit_text, existing_for_narrative, chapter_category=chapter_category, ) md = _coalesce_story_markdown( narrative_to_markdown(narrative_raw).strip(), unit_text.strip(), existing_for_narrative or "", ) if target_story_id: append_story_version_sync(session, target_story_id, md) dispatch_ids.add(target_story_id) ensure_chapter_story_link_sync( session, chapter_id=chapter.id, story_id=target_story_id ) else: story_title = (unit.new_story_title or "").strip() if not story_title: story_title = narrative_agent.generate_title( stage=chapter_category, emotion="neutral", slots=slot_snippets, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) st = create_story_with_version_sync( session, user_id=user_id, title=story_title, canonical_markdown=md, stage=chapter_category, ) dispatch_ids.add(st.id) ensure_chapter_story_link_sync( session, chapter_id=chapter.id, story_id=st.id ) return dispatch_ids def run_story_pipeline_for_category_batch( session: Session, *, user_id: str, chapter_category: str, category_segments: list, state: MemoirStateSchema, user_profile: str, user_birth_year: int | None, llm: Any, ) -> tuple[Chapter | None, bool, set[str]]: """ 返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。 """ narrative_agent = NarrativeAgent() route_agent = StoryRouteAgent() dispatch_ids: set[str] = set() segment_texts = [seg.user_input_text or "" for seg in category_segments] combined_text = "\n\n".join(segment_texts) source_ids = [seg.id for seg in category_segments] try: evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10) except Exception as e: logger.warning("Evidence 检索跳过: {}", e) evidence = { "relevant_chunks": [], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } evidence_text = format_evidence_chunks_for_prompt(evidence) new_content_input = format_narrative_user_content(combined_text, evidence_text) stmt_chapter = ( select(Chapter) .where( Chapter.user_id == user_id, Chapter.category == chapter_category, Chapter.is_active == True, # noqa: E712 ) .options( joinedload(Chapter.images), joinedload(Chapter.story_links), ) ) chapter = session.execute(stmt_chapter).unique().scalar_one_or_none() slot_snippets: dict[str, str] = {} stage_slots = state.slots.get(chapter_category, {}) or {} for key, value in stage_slots.items(): snip = getattr(value, "snippet", None) or ( value.get("snippet") if isinstance(value, dict) else None ) if snip: slot_snippets[key] = snip title = chapter.title if chapter else f"{chapter_category} 回忆" if not chapter: title = narrative_agent.generate_title( stage=chapter_category, emotion="neutral", slots=slot_snippets, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) candidates = list_active_stories_for_user_sync(session, user_id) valid_ids = {s.id for s in candidates} batch_for_route = ( f"{combined_text}\n\n{evidence_text}" if evidence_text.strip() else combined_text ) calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999) use_batch_plan = ( llm and len(category_segments) >= 2 and len(category_segments) <= PLAN_BATCH_MAX_SEGMENTS ) plan: StoryBatchPlan | None = None if use_batch_plan: segs = [(seg.id, seg.user_input_text or "") for seg in category_segments] plan = route_agent.plan_batch( chapter_category=chapter_category, chapter_title=title, segments=segs, candidate_stories=candidates, llm=llm, valid_story_ids=valid_ids, ) chapter = _ensure_chapter_record( session, user_id=user_id, chapter_category=chapter_category, title=title, source_ids=source_ids, calculated_order_index=calculated_order_index, ) if plan is not None: dispatch_ids = _run_batch_plan_writes( session, plan=plan, category_segments=category_segments, chapter=chapter, chapter_category=chapter_category, evidence_text=evidence_text, slot_snippets=slot_snippets, user_id=user_id, user_profile=user_profile, user_birth_year=user_birth_year, llm=llm, narrative_agent=narrative_agent, ) else: route = route_agent.decide( chapter_category=chapter_category, chapter_title=title, batch_transcript=batch_for_route, candidate_stories=candidates, llm=llm, valid_story_ids=valid_ids, ) target_story_id: str | None = None existing_for_narrative = "" if route.decision == "append_story" and route.target_story_id: st = session.get(Story, route.target_story_id) if st and st.user_id == user_id: target_story_id = st.id existing_for_narrative = (st.canonical_markdown or "").strip() narrative_raw = narrative_agent.generate_narrative( stage=chapter_category, slots=slot_snippets, new_content=new_content_input, existing_content=existing_for_narrative, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) narrative_raw = _gate_narrative_fidelity( combined_text, narrative_raw, llm, existing_canonical=existing_for_narrative or None, ) narrative_raw = _apply_narrative_fallbacks( narrative_raw, combined_text, existing_for_narrative, chapter_category=chapter_category, ) md = _coalesce_story_markdown( narrative_to_markdown(narrative_raw).strip(), combined_text.strip(), existing_for_narrative or "", ) do_append = target_story_id is not None if do_append: append_story_version_sync(session, target_story_id, md) dispatch_ids.add(target_story_id) ensure_chapter_story_link_sync( session, chapter_id=chapter.id, story_id=target_story_id ) else: story_title = (route.new_story_title or "").strip() if not story_title: story_title = narrative_agent.generate_title( stage=chapter_category, emotion="neutral", slots=slot_snippets, user_profile=user_profile, birth_year=user_birth_year, llm=llm, ) st = create_story_with_version_sync( session, user_id=user_id, title=story_title, canonical_markdown=md, stage=chapter_category, ) dispatch_ids.add(st.id) ensure_chapter_story_link_sync( session, chapter_id=chapter.id, story_id=st.id ) reorder_chapter_story_links_by_life_order_sync(session, chapter.id) compose_chapter_from_story_links_sync(session, chapter.id) session.flush() image_settings = MemoirImageSettings.from_env() needs_cover = image_settings.enabled and chapter_needs_cover_enqueue(chapter) return chapter, needs_cover, dispatch_ids