feat: 生成回忆录agent结构封装

This commit is contained in:
yangshilin
2026-03-19 10:38:11 +08:00
parent b16bb2b96c
commit 4a1d6f0dcc
10 changed files with 881 additions and 227 deletions

View File

@@ -27,23 +27,20 @@ from app.features.user.models import User
from app.core.dependencies import get_llm_provider
from app.agents.state_schema import MemoirStateSchema, SlotData, default_state
from app.agents.prompts.memory_prompts import (
get_creative_title_prompt,
get_narrative_prompt,
get_state_extraction_prompt,
get_chapter_classification_prompt,
inject_image_placeholder_template,
STAGE_TO_ORDER,
CHAPTER_CATEGORIES,
get_narrative_prompt,
inject_image_placeholder_template,
)
from app.agents.memoir import MemoirOrchestrator
from app.agents.memoir.narrative_agent import NarrativeAgent
from app.agents.memoir.placeholder_agent import inject_placeholders
from app.agents.prompts.profile_prompts import format_user_profile_context
import hashlib
from app.features.memoir.memoir_images.parser import (
build_initial_image_assets,
parse_image_placeholders,
split_narrative_to_sections,
)
from app.features.memoir.memoir_images.json_payload import extract_json_payload
import hashlib
from app.core.dependencies import get_image_generator
from app.features.memoir.memoir_images.prompting import MemoirImagePromptService
from app.features.memoir.memoir_images.schema import (
@@ -469,56 +466,6 @@ def _normalize_image_bytes_for_storage(image_bytes: bytes) -> bytes:
return output.getvalue()
STAGE_KEYWORDS = {
"childhood": ["童年", "小时候", "出生", "家乡", "小镇"],
"education": ["上学", "学校", "老师", "同学", "教育", "大学"],
"career": ["工作", "职业", "事业", "公司", "同事", "创业"],
"family": ["伴侣", "孩子", "家庭", "家人", "结婚", "父母"],
"belief": ["信念", "价值观", "座右铭", "坚持", "原则"],
}
# 5-stage → 默认 8-category 映射LLM 分类失败时的兜底)
_STAGE_TO_DEFAULT_CATEGORY = {
"childhood": "childhood",
"education": "education",
"career": "career_early",
"family": "family",
"belief": "beliefs",
}
def _detect_stage(user_message: str, fallback_stage: str) -> str:
"""检测消息所属的 5-stage 阶段(用于状态跟踪)"""
message = user_message.lower()
for stage, keywords in STAGE_KEYWORDS.items():
if any(word in message for word in keywords):
return stage
return fallback_stage
def _classify_chapter_category(text: str, fallback_stage: str, llm=None) -> str | None:
"""
将内容分类到 8 个章节类别之一。
优先使用 LLM失败则按 5-stage 关键词映射到默认类别。
如果 LLM 判定内容无实质回忆录价值,返回 None。
"""
if llm:
try:
prompt = get_chapter_classification_prompt(text)
response = llm.invoke(prompt)
category = response.content.strip().lower()
if category == "none":
logger.info(f"LLM 判定内容无回忆录价值,跳过: {text[:80]}...")
return None
if category in CHAPTER_CATEGORIES:
return category
except Exception as e:
logger.warning(f"LLM 章节分类失败: {e}")
stage = _detect_stage(text, fallback_stage)
return _STAGE_TO_DEFAULT_CATEGORY.get(stage, _STAGE_TO_DEFAULT_CATEGORY.get(fallback_stage, "childhood"))
def _coerce_state(model: MemoirState) -> MemoirStateSchema:
"""将数据库模型转换为 Schema"""
return MemoirStateSchema.model_validate(
@@ -628,174 +575,141 @@ def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
grew_up_place=user_obj.grew_up_place,
occupation=user_obj.occupation,
)
# 分两步处理:
# 1) 5-stage 状态跟踪slots
# 2) 8-category 章节分类chapter creation
category_to_segments: Dict[str, List[Segment]] = {}
for segment in segments:
text = segment.transcript_text
detected_stage = _detect_stage(text, state.current_stage)
narrative_agent = NarrativeAgent()
# 提取 slots5-stage 状态跟踪)
extracted_slots = {}
if llm:
try:
prompt = get_state_extraction_prompt(
user_message=text,
current_stage=state.current_stage,
stage_slots=state.slots.get(detected_stage, {}),
)
response = llm.invoke(prompt)
parsed = json.loads(extract_json_payload(response.content))
detected_stage = parsed.get("detected_stage", detected_stage)
extracted_slots = parsed.get("slots", {}) or {}
except (json.JSONDecodeError, Exception) as e:
logger.warning(f"LLM 解析失败: {e}")
def _process_category(
chapter_category: str,
category_segments: List,
state: MemoirStateSchema,
profile: str,
birth_year,
llm,
):
"""单章节处理NarrativeAgent 生成标题+叙事PlaceholderInjectAgent 注入,持久化"""
segment_texts = [seg.transcript_text or "" for seg in category_segments]
combined_text = "\n\n".join(segment_texts)
source_ids = [seg.id for seg in category_segments]
for slot_name, snippet in extracted_slots.items():
state = _update_slot_sync(
stmt_chapter = (
select(Chapter)
.where(
Chapter.user_id == user_id,
Chapter.category == chapter_category,
Chapter.is_active == True,
)
.options(
joinedload(Chapter.sections).joinedload(ChapterSection.image_record),
joinedload(Chapter.images),
)
)
result_chapter = db.execute(stmt_chapter)
chapter = result_chapter.unique().scalar_one_or_none()
slot_snippets = {}
stage_slots = state.slots.get(chapter_category, {}) or {}
for key, value in stage_slots.items():
snip = getattr(value, "snippet", None) or (value.get("snippet") if isinstance(value, dict) else None)
if snip:
slot_snippets[key] = snip
title = chapter.title if chapter else f"{chapter_category} 回忆"
existing_content = ""
if chapter and getattr(chapter, "sections", None):
existing_content = "\n\n".join(
s.content for s in sorted(chapter.sections, key=lambda x: x.order_index) if (s.content or "").strip()
)
narrative = combined_text
if not chapter:
title = narrative_agent.generate_title(
stage=chapter_category,
emotion="neutral",
slots=slot_snippets,
user_profile=profile,
birth_year=birth_year,
llm=llm,
)
new_narrative = narrative_agent.generate_narrative(
stage=chapter_category,
slots=slot_snippets,
new_content=combined_text,
existing_content=existing_content,
user_profile=profile,
birth_year=birth_year,
llm=llm,
)
if existing_content:
narrative = f"{existing_content}\n\n{new_narrative}"
else:
narrative = new_narrative
if existing_content and len(narrative) < len(existing_content) * 0.8:
logger.warning(
"内容长度异常: existing=%d, new=%d, category=%s. 回退为追加模式",
len(existing_content),
len(narrative),
chapter_category,
)
narrative = f"{existing_content}\n\n{combined_text}"
narrative = inject_placeholders(narrative)
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
chapter = _save_narrative_to_sections(
db,
chapter,
narrative,
title=title,
category=chapter_category,
order_index=calculated_order_index,
source_segments=source_ids,
user_id=user_id,
)
db.flush()
db.refresh(chapter)
has_images = image_settings.enabled and (
_chapter_has_any_section_images_to_generate(chapter)
or _chapter_has_cover_to_generate(chapter)
)
stmt_book = select(Book).where(Book.user_id == user_id).order_by(Book.updated_at.desc())
result_book = db.execute(stmt_book)
book = result_book.scalar_one_or_none()
if not book:
book = Book(
id=str(uuid.uuid4()),
user_id=user_id,
stage=detected_stage,
slot_name=slot_name,
snippet=snippet,
segment_ids=[segment.id],
db=db,
title="我的回忆录",
total_pages=0,
total_words=0,
cover_image_url=None,
)
db.add(book)
book.has_update = True
book.last_update_chapter_id = chapter.id
# 8-category 章节分类
chapter_category = _classify_chapter_category(text, detected_stage, llm)
if chapter_category is None:
logger.info(f"段落无回忆录价值,跳过: segment_id={segment.id}")
continue
category_to_segments.setdefault(chapter_category, []).append(segment)
return chapter, has_images
# 按 8 分类生成章节内容
for chapter_category, category_segments in category_to_segments.items():
if not _acquire_chapter_lock(user_id, chapter_category):
logger.warning(f"章节锁竞争: user={user_id}, category={chapter_category}, 延迟重试")
raise self.retry(countdown=10)
try:
segment_texts = [seg.transcript_text for seg in category_segments]
combined_text = "\n\n".join(segment_texts)
source_ids = [seg.id for seg in category_segments]
def _raise_retry():
raise self.retry(countdown=10)
# 查找 active 章节(被清除的章节不继续更新,而是创建新的),并预加载 sections、images
stmt_chapter = (
select(Chapter)
.where(
Chapter.user_id == user_id,
Chapter.category == chapter_category,
Chapter.is_active == True,
)
.options(
joinedload(Chapter.sections).joinedload(ChapterSection.image_record),
joinedload(Chapter.images),
)
)
result_chapter = db.execute(stmt_chapter)
chapter = result_chapter.unique().scalar_one_or_none()
# 获取 slot snippets
slot_snippets = {
key: value.snippet
for key, value in (state.slots.get(chapter_category, {}) or {}).items()
if value.snippet
}
# 生成标题和内容;已有章节的正文从 sections 拼接
title = chapter.title if chapter else f"{chapter_category} 回忆"
existing_content = ""
if chapter and getattr(chapter, "sections", None):
existing_content = "\n\n".join(
s.content for s in sorted(chapter.sections, key=lambda x: x.order_index) if (s.content or "").strip()
)
narrative = combined_text
if llm:
try:
if not chapter:
title_prompt = get_creative_title_prompt(
stage=chapter_category,
emotion="neutral",
slots=slot_snippets,
user_profile=user_profile,
birth_year=user_birth_year,
)
title_response = llm.invoke(title_prompt)
title = title_response.content.strip().strip('"')
narrative_prompt = get_narrative_prompt(
stage=chapter_category,
slots=slot_snippets,
new_content=combined_text,
existing_content=existing_content,
user_profile=user_profile,
birth_year=user_birth_year,
)
narrative_response = llm.invoke(narrative_prompt)
new_narrative = narrative_response.content.strip()
# 追加而非替换
if existing_content:
narrative = f"{existing_content}\n\n{new_narrative}"
else:
narrative = new_narrative
except Exception as e:
logger.warning(f"LLM 生成失败: {e}")
if existing_content:
narrative = f"{existing_content}\n\n{combined_text}"
# 安全检查:新内容不应比旧内容短
if existing_content and len(narrative) < len(existing_content) * 0.8:
logger.warning(
f"内容长度异常: existing={len(existing_content)}, "
f"new={len(narrative)}, category={chapter_category}. 回退为追加模式"
)
narrative = f"{existing_content}\n\n{combined_text}"
# 入库前:占位符位置用正则匹配后拼上固定模板
narrative = inject_image_placeholder_template(narrative)
calculated_order_index = STAGE_TO_ORDER.get(chapter_category, 999)
# 写入 sections拆段 + 每段配图占位),新建或覆盖该章下所有 sections
chapter = _save_narrative_to_sections(
db,
chapter,
narrative,
title=title,
category=chapter_category,
order_index=calculated_order_index,
source_segments=source_ids,
user_id=user_id,
)
db.flush()
db.refresh(chapter)
if image_settings.enabled and (
_chapter_has_any_section_images_to_generate(chapter)
or _chapter_has_cover_to_generate(chapter)
):
chapters_to_enqueue.add(chapter.id)
# 更新 Book
stmt_book = select(Book).where(Book.user_id == user_id).order_by(Book.updated_at.desc())
result_book = db.execute(stmt_book)
book = result_book.scalar_one_or_none()
if not book:
book = Book(
id=str(uuid.uuid4()),
user_id=user_id,
title="我的回忆录",
total_pages=0,
total_words=0,
cover_image_url=None,
)
db.add(book)
book.has_update = True
book.last_update_chapter_id = chapter.id
finally:
_release_chapter_lock(user_id, chapter_category)
memoir_orchestrator = MemoirOrchestrator()
chapters_to_enqueue, _ = memoir_orchestrator.run(
segments=segments,
llm=llm,
user_profile=user_profile,
user_birth_year=user_birth_year,
get_or_create_state=lambda: _get_or_create_state_sync(user_id, db),
update_slot=lambda stage, slot_name, snippet, seg_ids: _update_slot_sync(
user_id, stage, slot_name, snippet, seg_ids, db
),
acquire_lock=lambda stage: _acquire_chapter_lock(user_id, stage),
release_lock=lambda stage: _release_chapter_lock(user_id, stage),
process_category=_process_category,
raise_retry=_raise_retry,
)
# 标记段落为已处理
for seg in segments: