237 lines
7.6 KiB
Python
237 lines
7.6 KiB
Python
|
|
"""
|
|||
|
|
Celery 用:按批次将 transcript 写入 Story,并物化 Chapter canonical_markdown。
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import uuid
|
|||
|
|
from typing import Any
|
|||
|
|
|
|||
|
|
from sqlalchemy import select
|
|||
|
|
from sqlalchemy.orm import Session, joinedload
|
|||
|
|
|
|||
|
|
from app.agents.memoir.narrative_agent import NarrativeAgent
|
|||
|
|
from app.agents.memoir.prompts import STAGE_TO_ORDER, format_evidence_chunks_for_prompt
|
|||
|
|
from app.agents.memoir.story_route_agent import StoryRouteAgent
|
|||
|
|
from app.agents.state_schema import MemoirStateSchema
|
|||
|
|
from app.core.logging import get_logger
|
|||
|
|
from app.features.memoir.cover_eligibility import chapter_needs_cover_enqueue
|
|||
|
|
from app.features.memoir.helpers import _chapter_markdown
|
|||
|
|
from app.features.memoir.memoir_images.settings import MemoirImageSettings
|
|||
|
|
from app.features.memoir.models import Chapter
|
|||
|
|
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
|||
|
|
from app.features.memoir.repo import compose_chapter_from_story_links_sync
|
|||
|
|
from app.features.story.models import Story
|
|||
|
|
from app.features.story.sync_write import (
|
|||
|
|
append_story_version_sync,
|
|||
|
|
create_story_with_version_sync,
|
|||
|
|
ensure_chapter_story_link_sync,
|
|||
|
|
list_active_stories_for_user_sync,
|
|||
|
|
)
|
|||
|
|
from app.features.memory.repo import retrieve_evidence_sync
|
|||
|
|
|
|||
|
|
logger = get_logger(__name__)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _is_json_narrative(text: str) -> bool:
|
|||
|
|
if not text or not text.strip():
|
|||
|
|
return False
|
|||
|
|
s = text.strip()
|
|||
|
|
return s.startswith("{") and "paragraphs" in s
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_story_pipeline_for_category_batch(
|
|||
|
|
session: Session,
|
|||
|
|
*,
|
|||
|
|
user_id: str,
|
|||
|
|
chapter_category: str,
|
|||
|
|
category_segments: list,
|
|||
|
|
state: MemoirStateSchema,
|
|||
|
|
user_profile: str,
|
|||
|
|
user_birth_year: int | None,
|
|||
|
|
llm: Any,
|
|||
|
|
) -> tuple[Chapter | None, bool, set[str]]:
|
|||
|
|
"""
|
|||
|
|
返回 (chapter, needs_cover_enqueue, story_ids_to_dispatch_after_commit)。
|
|||
|
|
"""
|
|||
|
|
narrative_agent = NarrativeAgent()
|
|||
|
|
route_agent = StoryRouteAgent()
|
|||
|
|
dispatch_ids: set[str] = set()
|
|||
|
|
|
|||
|
|
segment_texts = [seg.transcript_text or "" for seg in category_segments]
|
|||
|
|
combined_text = "\n\n".join(segment_texts)
|
|||
|
|
source_ids = [seg.id for seg in category_segments]
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
evidence = retrieve_evidence_sync(session, user_id, combined_text, top_k=10)
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.warning("Evidence 检索跳过: %s", e)
|
|||
|
|
evidence = {
|
|||
|
|
"relevant_chunks": [],
|
|||
|
|
"relevant_summaries": [],
|
|||
|
|
"relevant_facts": [],
|
|||
|
|
"timeline_hints": [],
|
|||
|
|
"relevant_stories": [],
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
evidence_text = format_evidence_chunks_for_prompt(evidence)
|
|||
|
|
new_content_input = (
|
|||
|
|
f"{combined_text}\n\n【相关记忆摘录】\n{evidence_text}"
|
|||
|
|
if evidence_text.strip()
|
|||
|
|
else combined_text
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
stmt_chapter = (
|
|||
|
|
select(Chapter)
|
|||
|
|
.where(
|
|||
|
|
Chapter.user_id == user_id,
|
|||
|
|
Chapter.category == chapter_category,
|
|||
|
|
Chapter.is_active == True, # noqa: E712
|
|||
|
|
)
|
|||
|
|
.options(
|
|||
|
|
joinedload(Chapter.images),
|
|||
|
|
joinedload(Chapter.story_links),
|
|||
|
|
)
|
|||
|
|
)
|
|||
|
|
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
|
|||
|
|
|
|||
|
|
slot_snippets: dict[str, str] = {}
|
|||
|
|
stage_slots = state.slots.get(chapter_category, {}) or {}
|
|||
|
|
for key, value in stage_slots.items():
|
|||
|
|
snip = getattr(value, "snippet", None) or (
|
|||
|
|
value.get("snippet") if isinstance(value, dict) else None
|
|||
|
|
)
|
|||
|
|
if snip:
|
|||
|
|
slot_snippets[key] = snip
|
|||
|
|
|
|||
|
|
title = chapter.title if chapter else f"{chapter_category} 回忆"
|
|||
|
|
existing_chapter_md = _chapter_markdown(chapter) if chapter else ""
|
|||
|
|
|
|||
|
|
if not chapter:
|
|||
|
|
title = narrative_agent.generate_title(
|
|||
|
|
stage=chapter_category,
|
|||
|
|
emotion="neutral",
|
|||
|
|
slots=slot_snippets,
|
|||
|
|
user_profile=user_profile,
|
|||
|
|
birth_year=user_birth_year,
|
|||
|
|
llm=llm,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
candidates = list_active_stories_for_user_sync(session, user_id)
|
|||
|
|
valid_ids = {s.id for s in candidates}
|
|||
|
|
|
|||
|
|
batch_for_route = (
|
|||
|
|
f"{combined_text}\n\n{evidence_text}"
|
|||
|
|
if evidence_text.strip()
|
|||
|
|
else combined_text
|
|||
|
|
)
|
|||
|
|
route = route_agent.decide(
|
|||
|
|
chapter_category=chapter_category,
|
|||
|
|
chapter_title=title,
|
|||
|
|
batch_transcript=batch_for_route,
|
|||
|
|
candidate_stories=candidates,
|
|||
|
|
llm=llm,
|
|||
|
|
valid_story_ids=valid_ids,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
target_story_id: str | None = None
|
|||
|
|
existing_for_narrative = ""
|
|||
|
|
if route.decision == "append_story" and route.target_story_id:
|
|||
|
|
st = session.get(Story, route.target_story_id)
|
|||
|
|
if st and st.user_id == user_id:
|
|||
|
|
target_story_id = st.id
|
|||
|
|
existing_for_narrative = (st.canonical_markdown or "").strip()
|
|||
|
|
|
|||
|
|
narrative_raw = narrative_agent.generate_narrative(
|
|||
|
|
stage=chapter_category,
|
|||
|
|
slots=slot_snippets,
|
|||
|
|
new_content=new_content_input,
|
|||
|
|
existing_content=existing_for_narrative,
|
|||
|
|
user_profile=user_profile,
|
|||
|
|
birth_year=user_birth_year,
|
|||
|
|
llm=llm,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if (
|
|||
|
|
existing_for_narrative
|
|||
|
|
and not _is_json_narrative(narrative_raw)
|
|||
|
|
and len(narrative_raw) < len(existing_for_narrative) * 0.8
|
|||
|
|
):
|
|||
|
|
logger.warning("叙事长度异常: 回退为原文追加")
|
|||
|
|
narrative_raw = f"{existing_for_narrative}\n\n{combined_text}"
|
|||
|
|
|
|||
|
|
if (
|
|||
|
|
not existing_for_narrative
|
|||
|
|
and existing_chapter_md
|
|||
|
|
and not _is_json_narrative(narrative_raw)
|
|||
|
|
and len(narrative_raw) < len(existing_chapter_md) * 0.8
|
|||
|
|
):
|
|||
|
|
logger.warning(
|
|||
|
|
"章节级长度异常: 回退为 transcript 追加, category=%s",
|
|||
|
|
chapter_category,
|
|||
|
|
)
|
|||
|
|
narrative_raw = f"{existing_chapter_md}\n\n{combined_text}"
|
|||
|
|
|
|||
|
|
md = narrative_to_markdown(narrative_raw)
|
|||
|
|
if not md.strip():
|
|||
|
|
md = combined_text.strip()
|
|||
|
|
|
|||
|
|
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
|
|||
|
|
|
|||
|
|
if not chapter:
|
|||
|
|
chapter = Chapter(
|
|||
|
|
id=str(uuid.uuid4()),
|
|||
|
|
user_id=user_id,
|
|||
|
|
title=title,
|
|||
|
|
order_index=calculated_order_index,
|
|||
|
|
status="completed",
|
|||
|
|
category=chapter_category,
|
|||
|
|
cover_image=None,
|
|||
|
|
is_new=True,
|
|||
|
|
source_segments=source_ids,
|
|||
|
|
)
|
|||
|
|
session.add(chapter)
|
|||
|
|
session.flush()
|
|||
|
|
else:
|
|||
|
|
chapter.source_segments = list(
|
|||
|
|
set((chapter.source_segments or []) + source_ids)
|
|||
|
|
)
|
|||
|
|
chapter.is_new = True
|
|||
|
|
|
|||
|
|
do_append = target_story_id is not None
|
|||
|
|
|
|||
|
|
if do_append:
|
|||
|
|
append_story_version_sync(session, target_story_id, md)
|
|||
|
|
dispatch_ids.add(target_story_id)
|
|||
|
|
ensure_chapter_story_link_sync(
|
|||
|
|
session, chapter_id=chapter.id, story_id=target_story_id
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
story_title = (route.new_story_title or "").strip()
|
|||
|
|
if not story_title:
|
|||
|
|
story_title = narrative_agent.generate_title(
|
|||
|
|
stage=chapter_category,
|
|||
|
|
emotion="neutral",
|
|||
|
|
slots=slot_snippets,
|
|||
|
|
user_profile=user_profile,
|
|||
|
|
birth_year=user_birth_year,
|
|||
|
|
llm=llm,
|
|||
|
|
)
|
|||
|
|
st = create_story_with_version_sync(
|
|||
|
|
session,
|
|||
|
|
user_id=user_id,
|
|||
|
|
title=story_title,
|
|||
|
|
canonical_markdown=md,
|
|||
|
|
stage=chapter_category,
|
|||
|
|
)
|
|||
|
|
dispatch_ids.add(st.id)
|
|||
|
|
ensure_chapter_story_link_sync(session, chapter_id=chapter.id, story_id=st.id)
|
|||
|
|
|
|||
|
|
compose_chapter_from_story_links_sync(session, chapter.id)
|
|||
|
|
session.flush()
|
|||
|
|
|
|||
|
|
image_settings = MemoirImageSettings.from_env()
|
|||
|
|
needs_cover = image_settings.enabled and chapter_needs_cover_enqueue(chapter)
|
|||
|
|
|
|||
|
|
return chapter, needs_cover, dispatch_ids
|