feat(memoir): 路由阶段不要求标题,按正文字数门闸延迟 LLM 标题

- 从 story 路由 prompt/校验中移除 new_story_title,改由叙事管线在正文足够长时生成
- 新增 story_title_min_body_chars;短正文使用章节类别占位标题
- CATEGORY_TO_CHAT_STAGE 对齐访谈 state.slots 的 stage 键
- 删除相对口述长度的叙事回退,仅保留 merge JSON 极端缩水类 fallback
- evidence_format:解析 object_json 并优化事实条目标点符号
- 更新 narrative / experience 相关单测
This commit is contained in:
Kevin
2026-04-02 14:38:40 +08:00
parent bb16d3a5c9
commit 3ae39838c0
8 changed files with 125 additions and 132 deletions

View File

@@ -19,7 +19,11 @@ from app.agents.memoir.prompts import (
format_evidence_chunks_for_prompt,
format_narrative_user_content,
)
from app.agents.stage_constants import STAGE_TO_ORDER
from app.agents.stage_constants import (
CATEGORY_TO_CHAT_STAGE,
CHAPTER_CATEGORIES,
STAGE_TO_ORDER,
)
from app.agents.memoir.story_route_agent import (
PLAN_BATCH_MAX_SEGMENTS,
StoryBatchPlan,
@@ -53,6 +57,38 @@ from app.features.story.sync_write import (
logger = get_logger(__name__)
def _placeholder_title(chapter_category: str) -> str:
return CHAPTER_CATEGORIES.get(chapter_category, chapter_category)
def _maybe_generate_title(
narrative_agent: "NarrativeAgent",
*,
chapter_category: str,
md: str,
slot_snippets: dict[str, str],
user_profile: str,
user_birth_year: int | None,
llm: Any,
) -> str:
"""Generate a title only when body is long enough; otherwise return placeholder."""
body_len = len((md or "").strip())
if body_len < settings.story_title_min_body_chars:
return _placeholder_title(chapter_category)
content_excerpt = (md or "").strip()[:300]
merged_slots = dict(slot_snippets)
if content_excerpt and "content_excerpt" not in merged_slots:
merged_slots["content_excerpt"] = content_excerpt
return narrative_agent.generate_title(
stage=chapter_category,
emotion="neutral",
slots=merged_slots,
user_profile=user_profile,
birth_year=user_birth_year,
llm=llm,
)
def _route_segment_texts(category_segments: list) -> list[tuple[str, str]]:
"""批量路由 plan_batch每段仅做规则归一避免 N 次 LLM。"""
out: list[tuple[str, str]] = []
@@ -122,28 +158,12 @@ def _gate_narrative_fidelity(
return _fidelity_fallback_json(o, ex), "fidelity_failed"
def _should_fallback_to_transcript(md: str, oral: str) -> bool:
"""模型输出相对口述极度过短时才回退仅防极端压缩如「1999」"""
o = (oral or "").strip()
if not o:
return False
m = (md or "").strip()
if not m:
return True
if len(o) < 12:
return len(m) < len(o)
ratio = float(settings.memoir_narrative_fallback_body_ratio)
min_abs = int(settings.memoir_narrative_fallback_min_chars)
threshold = max(min_abs, int(len(o) * ratio))
return len(m) < threshold
def _coalesce_story_markdown(
md: str,
oral: str,
existing_for_narrative: str,
) -> str:
"""落库前对齐正文:空输出或过短回退时,续写场景保留「已有故事 + 本段口述」。"""
"""落库前对齐正文:空输出续写场景保留「已有故事 + 本段口述」。"""
o = (oral or "").strip()
ex = (existing_for_narrative or "").strip()
m = (md or "").strip()
@@ -153,10 +173,6 @@ def _coalesce_story_markdown(
if o:
return o
return ex
if o and _should_fallback_to_transcript(m, o):
if ex:
return f"{ex}\n\n{o}"
return o
return m
@@ -181,8 +197,10 @@ def _apply_narrative_fallbacks(
*,
chapter_category: str,
) -> tuple[str, str]:
"""返回 (文本, fallback_type);无改写时为 none。"""
# 整篇合并JSON输出异常缩水回退为旧文 + 本段口述,避免覆盖丢失
"""返回 (文本, fallback_type);无改写时为 none。
仅防 merge/append 场景下模型输出极端缩水(丢旧内容),不再按口述字数比例回退。
"""
if existing_for_narrative and _is_json_narrative(narrative_raw):
merged_md = narrative_to_markdown(narrative_raw).strip()
ex = (existing_for_narrative or "").strip()
@@ -209,28 +227,6 @@ def _apply_narrative_fallbacks(
"coalesce_to_old_plus_oral",
)
md_check = narrative_to_markdown(narrative_raw).strip()
oral = (combined_unit_text or "").strip()
ex_fb = (existing_for_narrative or "").strip()
if oral and _should_fallback_to_transcript(md_check, oral):
if ex_fb:
logger.warning(
"event=narrative_fallback reason=body_too_short_vs_oral_merge "
"chapter_category={} oral_len={} md_len={}",
chapter_category,
len(oral),
len(md_check),
)
return f"{ex_fb}\n\n{oral}", "coalesce_to_old_plus_oral"
logger.warning(
"event=narrative_fallback reason=body_too_short_vs_oral "
"chapter_category={} oral_len={} md_len={}",
chapter_category,
len(oral),
len(md_check),
)
return oral, "coalesce_to_oral"
return narrative_raw, "none"
@@ -404,16 +400,15 @@ def _run_batch_plan_writes(
sid_log = target_story_id
is_append = True
else:
story_title = (unit.new_story_title or "").strip()
if not story_title:
story_title = narrative_agent.generate_title(
stage=chapter_category,
emotion="neutral",
slots=slot_snippets,
user_profile=user_profile,
birth_year=user_birth_year,
llm=llm,
)
story_title = _maybe_generate_title(
narrative_agent,
chapter_category=chapter_category,
md=md,
slot_snippets=slot_snippets,
user_profile=user_profile,
user_birth_year=user_birth_year,
llm=llm,
)
st = create_story_with_version_sync(
session,
user_id=user_id,
@@ -519,7 +514,8 @@ def run_story_pipeline_for_category_batch(
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
slot_snippets: dict[str, str] = {}
stage_slots = state.slots.get(chapter_category, {}) or {}
chat_stage = CATEGORY_TO_CHAT_STAGE.get(chapter_category, chapter_category)
stage_slots = state.slots.get(chat_stage, {}) or {}
for key, value in stage_slots.items():
snip = getattr(value, "snippet", None) or (
value.get("snippet") if isinstance(value, dict) else None
@@ -527,17 +523,7 @@ def run_story_pipeline_for_category_batch(
if snip:
slot_snippets[key] = snip
title = chapter.title if chapter else f"{chapter_category} 回忆"
if not chapter:
title = narrative_agent.generate_title(
stage=chapter_category,
emotion="neutral",
slots=slot_snippets,
user_profile=user_profile,
birth_year=user_birth_year,
llm=llm,
)
title = chapter.title if chapter else _placeholder_title(chapter_category)
# 仅同 chapter_categorystory.stage的 Story 可作为 append 候选,避免跨章节链接导致多章内容相同
all_stories = list_active_stories_for_user_sync(session, user_id)
@@ -684,16 +670,15 @@ def run_story_pipeline_for_category_batch(
sid_log = target_story_id
is_append = True
else:
story_title = (route.new_story_title or "").strip()
if not story_title:
story_title = narrative_agent.generate_title(
stage=chapter_category,
emotion="neutral",
slots=slot_snippets,
user_profile=user_profile,
birth_year=user_birth_year,
llm=llm,
)
story_title = _maybe_generate_title(
narrative_agent,
chapter_category=chapter_category,
md=md,
slot_snippets=slot_snippets,
user_profile=user_profile,
user_birth_year=user_birth_year,
llm=llm,
)
st = create_story_with_version_sync(
session,
user_id=user_id,