feat(memoir): 路由阶段不要求标题,按正文字数门闸延迟 LLM 标题
- 从 story 路由 prompt/校验中移除 new_story_title,改由叙事管线在正文足够长时生成 - 新增 story_title_min_body_chars;短正文使用章节类别占位标题 - CATEGORY_TO_CHAT_STAGE 对齐访谈 state.slots 的 stage 键 - 删除相对口述长度的叙事回退,仅保留 merge JSON 极端缩水类 fallback - evidence_format:解析 object_json 并优化事实条目标点符号 - 更新 narrative / experience 相关单测
This commit is contained in:
@@ -401,8 +401,6 @@ def get_story_route_prompt(
|
|||||||
|
|
||||||
「故事」在此指:**可独立讲述的一段人生经历**——单一主题或同一事件链;不要假设本批里包含多个互不相关的故事(多段由系统其它步骤处理)。
|
「故事」在此指:**可独立讲述的一段人生经历**——单一主题或同一事件链;不要假设本批里包含多个互不相关的故事(多段由系统其它步骤处理)。
|
||||||
|
|
||||||
**new_story_title 与 reason 只能依据口述中已有信息概括,不得编造口述未出现的人、事、地、物。**
|
|
||||||
|
|
||||||
**路由边界(必须遵守)**:仅根据下方「本批口述合并文本」判断 new_story 与 append_story;不得将系统检索摘要、记忆摘录、图谱事实或其它非用户口述材料当作本批口述内容来匹配候选故事。
|
**路由边界(必须遵守)**:仅根据下方「本批口述合并文本」判断 new_story 与 append_story;不得将系统检索摘要、记忆摘录、图谱事实或其它非用户口述材料当作本批口述内容来匹配候选故事。
|
||||||
|
|
||||||
当前章节(写作容器):
|
当前章节(写作容器):
|
||||||
@@ -419,13 +417,11 @@ def get_story_route_prompt(
|
|||||||
{{
|
{{
|
||||||
"decision": "new_story" | "append_story",
|
"decision": "new_story" | "append_story",
|
||||||
"target_story_id": "<uuid 或 null;append 时必填且必须来自候选>",
|
"target_story_id": "<uuid 或 null;append 时必填且必须来自候选>",
|
||||||
"new_story_title": "<短标题,6-20 字;new_story 时必填,append 时可 null>",
|
|
||||||
"reason": "<一句中文理由>"
|
"reason": "<一句中文理由>"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
规则:
|
规则:
|
||||||
- 若无法自信匹配某一候选,选 new_story
|
- 若无法自信匹配某一候选,选 new_story
|
||||||
- new_story_title 应概括本批新内容,不要与候选标题重复
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -444,8 +440,6 @@ def get_story_batch_plan_prompt(
|
|||||||
## 「故事」定义(必须遵守)
|
## 「故事」定义(必须遵守)
|
||||||
一段「故事」= **可独立讲述的一段人生经历**:单一主题或同一事件链,能单独成篇。若话题切换、时间线跳到另一件事、人物/主线明显变化,应作为**新的故事**(new_story),而不是塞进同一段 append。
|
一段「故事」= **可独立讲述的一段人生经历**:单一主题或同一事件链,能单独成篇。若话题切换、时间线跳到另一件事、人物/主线明显变化,应作为**新的故事**(new_story),而不是塞进同一段 append。
|
||||||
|
|
||||||
**new_story_title 与 reason 只能依据各 segment 文本中已有信息,不得编造口述未出现的事实。**
|
|
||||||
|
|
||||||
## 任务
|
## 任务
|
||||||
将本批 segment **划分为连续若干块**(每块包含至少一个 segment,顺序不能打乱;每个 segment 必须恰好属于一块)。对每一块决定:
|
将本批 segment **划分为连续若干块**(每块包含至少一个 segment,顺序不能打乱;每个 segment 必须恰好属于一块)。对每一块决定:
|
||||||
- **append_story**:内容明显延续、补充**某一已有候选故事**的主题与时间线,且能对应到具体 candidate id
|
- **append_story**:内容明显延续、补充**某一已有候选故事**的主题与时间线,且能对应到具体 candidate id
|
||||||
@@ -468,7 +462,6 @@ def get_story_batch_plan_prompt(
|
|||||||
"segment_ids": ["<按顺序列出本块包含的 segment id>"],
|
"segment_ids": ["<按顺序列出本块包含的 segment id>"],
|
||||||
"decision": "new_story" | "append_story",
|
"decision": "new_story" | "append_story",
|
||||||
"target_story_id": "<uuid 或 null;append 时必填且必须来自候选>",
|
"target_story_id": "<uuid 或 null;append 时必填且必须来自候选>",
|
||||||
"new_story_title": "<短标题,6-20 字;new_story 时必填,append 时可 null>",
|
|
||||||
"reason": "<一句中文理由,可选>"
|
"reason": "<一句中文理由,可选>"
|
||||||
}}
|
}}
|
||||||
]
|
]
|
||||||
@@ -477,7 +470,6 @@ def get_story_batch_plan_prompt(
|
|||||||
规则:
|
规则:
|
||||||
- `units` 中所有 `segment_ids` 拼接后,必须**不重不漏**地覆盖本批全部 id,且顺序与【本批口述片段】数组一致
|
- `units` 中所有 `segment_ids` 拼接后,必须**不重不漏**地覆盖本批全部 id,且顺序与【本批口述片段】数组一致
|
||||||
- 若无法自信匹配某一候选,对该块选 new_story
|
- 若无法自信匹配某一候选,对该块选 new_story
|
||||||
- new_story_title 应概括该块内容,不要与候选标题重复
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -116,7 +116,8 @@ def validate_story_batch_plan(
|
|||||||
valid_story_ids: set[str],
|
valid_story_ids: set[str],
|
||||||
) -> tuple[bool, str | None]:
|
) -> tuple[bool, str | None]:
|
||||||
"""
|
"""
|
||||||
校验:segment 全覆盖、顺序一致、append 目标合法、new_story 有标题。
|
校验:segment 全覆盖、顺序一致、append 目标合法。
|
||||||
|
标题由 NarrativeAgent 延迟生成,路由阶段不再要求 new_story_title。
|
||||||
返回 (ok, error_code)。
|
返回 (ok, error_code)。
|
||||||
"""
|
"""
|
||||||
if not plan.units:
|
if not plan.units:
|
||||||
@@ -135,10 +136,6 @@ def validate_story_batch_plan(
|
|||||||
tid = u.target_story_id
|
tid = u.target_story_id
|
||||||
if not tid or tid not in valid_story_ids:
|
if not tid or tid not in valid_story_ids:
|
||||||
return False, "invalid_append_target"
|
return False, "invalid_append_target"
|
||||||
else:
|
|
||||||
title = (u.new_story_title or "").strip()
|
|
||||||
if not title:
|
|
||||||
return False, "missing_new_title"
|
|
||||||
return True, None
|
return True, None
|
||||||
|
|
||||||
|
|
||||||
@@ -196,10 +193,6 @@ class StoryRouteAgent:
|
|||||||
new_story_title=decision.new_story_title,
|
new_story_title=decision.new_story_title,
|
||||||
reason="invalid_target",
|
reason="invalid_target",
|
||||||
)
|
)
|
||||||
if decision.decision == "new_story" and not (
|
|
||||||
decision.new_story_title and decision.new_story_title.strip()
|
|
||||||
):
|
|
||||||
decision.new_story_title = None
|
|
||||||
return decision
|
return decision
|
||||||
|
|
||||||
def plan_batch(
|
def plan_batch(
|
||||||
|
|||||||
@@ -61,3 +61,14 @@ STAGE_TO_ORDER = {
|
|||||||
"beliefs": 6,
|
"beliefs": 6,
|
||||||
"summary": 7,
|
"summary": 7,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CATEGORY_TO_CHAT_STAGE: dict[str, str] = {
|
||||||
|
"childhood": "childhood",
|
||||||
|
"education": "education",
|
||||||
|
"career_early": "career",
|
||||||
|
"career_achievement": "career",
|
||||||
|
"career_challenge": "career",
|
||||||
|
"family": "family",
|
||||||
|
"beliefs": "belief",
|
||||||
|
"summary": "belief",
|
||||||
|
}
|
||||||
|
|||||||
@@ -205,9 +205,8 @@ class Settings(BaseSettings):
|
|||||||
evidence_top_k_default: int = Field(default=10, ge=1, le=50)
|
evidence_top_k_default: int = Field(default=10, ge=1, le=50)
|
||||||
evidence_top_k_large_batch: int = Field(default=5, ge=1, le=50)
|
evidence_top_k_large_batch: int = Field(default=5, ge=1, le=50)
|
||||||
evidence_large_batch_threshold: int = Field(default=3, ge=1, le=100)
|
evidence_large_batch_threshold: int = Field(default=3, ge=1, le=100)
|
||||||
# 叙事输出相对口述极端过短才回退(仅防极端压缩;0.3 = 模型输出不到口述 30% 才触发)
|
# Story/Chapter 标题在正文达到此字数后才由 LLM 生成;之前用占位符
|
||||||
memoir_narrative_fallback_body_ratio: float = 0.3
|
story_title_min_body_chars: int = Field(default=60, ge=0, le=10_000)
|
||||||
memoir_narrative_fallback_min_chars: int = 15
|
|
||||||
# 回忆录 Celery:累计 strip 后口述字数未达此值则暂缓提交(0=关闭,仅防抖后提交)
|
# 回忆录 Celery:累计 strip 后口述字数未达此值则暂缓提交(0=关闭,仅防抖后提交)
|
||||||
memoir_segment_batch_min_chars: int = Field(default=50, ge=0, le=50_000)
|
memoir_segment_batch_min_chars: int = Field(default=50, ge=0, le=50_000)
|
||||||
# 本批首条 segment 入队起最长等待(秒),超时则提交(即使字数不足)
|
# 本批首条 segment 入队起最长等待(秒),超时则提交(即使字数不足)
|
||||||
|
|||||||
@@ -19,7 +19,11 @@ from app.agents.memoir.prompts import (
|
|||||||
format_evidence_chunks_for_prompt,
|
format_evidence_chunks_for_prompt,
|
||||||
format_narrative_user_content,
|
format_narrative_user_content,
|
||||||
)
|
)
|
||||||
from app.agents.stage_constants import STAGE_TO_ORDER
|
from app.agents.stage_constants import (
|
||||||
|
CATEGORY_TO_CHAT_STAGE,
|
||||||
|
CHAPTER_CATEGORIES,
|
||||||
|
STAGE_TO_ORDER,
|
||||||
|
)
|
||||||
from app.agents.memoir.story_route_agent import (
|
from app.agents.memoir.story_route_agent import (
|
||||||
PLAN_BATCH_MAX_SEGMENTS,
|
PLAN_BATCH_MAX_SEGMENTS,
|
||||||
StoryBatchPlan,
|
StoryBatchPlan,
|
||||||
@@ -53,6 +57,38 @@ from app.features.story.sync_write import (
|
|||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _placeholder_title(chapter_category: str) -> str:
|
||||||
|
return CHAPTER_CATEGORIES.get(chapter_category, chapter_category)
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_generate_title(
|
||||||
|
narrative_agent: "NarrativeAgent",
|
||||||
|
*,
|
||||||
|
chapter_category: str,
|
||||||
|
md: str,
|
||||||
|
slot_snippets: dict[str, str],
|
||||||
|
user_profile: str,
|
||||||
|
user_birth_year: int | None,
|
||||||
|
llm: Any,
|
||||||
|
) -> str:
|
||||||
|
"""Generate a title only when body is long enough; otherwise return placeholder."""
|
||||||
|
body_len = len((md or "").strip())
|
||||||
|
if body_len < settings.story_title_min_body_chars:
|
||||||
|
return _placeholder_title(chapter_category)
|
||||||
|
content_excerpt = (md or "").strip()[:300]
|
||||||
|
merged_slots = dict(slot_snippets)
|
||||||
|
if content_excerpt and "content_excerpt" not in merged_slots:
|
||||||
|
merged_slots["content_excerpt"] = content_excerpt
|
||||||
|
return narrative_agent.generate_title(
|
||||||
|
stage=chapter_category,
|
||||||
|
emotion="neutral",
|
||||||
|
slots=merged_slots,
|
||||||
|
user_profile=user_profile,
|
||||||
|
birth_year=user_birth_year,
|
||||||
|
llm=llm,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _route_segment_texts(category_segments: list) -> list[tuple[str, str]]:
|
def _route_segment_texts(category_segments: list) -> list[tuple[str, str]]:
|
||||||
"""批量路由 plan_batch:每段仅做规则归一,避免 N 次 LLM。"""
|
"""批量路由 plan_batch:每段仅做规则归一,避免 N 次 LLM。"""
|
||||||
out: list[tuple[str, str]] = []
|
out: list[tuple[str, str]] = []
|
||||||
@@ -122,28 +158,12 @@ def _gate_narrative_fidelity(
|
|||||||
return _fidelity_fallback_json(o, ex), "fidelity_failed"
|
return _fidelity_fallback_json(o, ex), "fidelity_failed"
|
||||||
|
|
||||||
|
|
||||||
def _should_fallback_to_transcript(md: str, oral: str) -> bool:
|
|
||||||
"""模型输出相对口述极度过短时才回退(仅防极端压缩如「1999」)。"""
|
|
||||||
o = (oral or "").strip()
|
|
||||||
if not o:
|
|
||||||
return False
|
|
||||||
m = (md or "").strip()
|
|
||||||
if not m:
|
|
||||||
return True
|
|
||||||
if len(o) < 12:
|
|
||||||
return len(m) < len(o)
|
|
||||||
ratio = float(settings.memoir_narrative_fallback_body_ratio)
|
|
||||||
min_abs = int(settings.memoir_narrative_fallback_min_chars)
|
|
||||||
threshold = max(min_abs, int(len(o) * ratio))
|
|
||||||
return len(m) < threshold
|
|
||||||
|
|
||||||
|
|
||||||
def _coalesce_story_markdown(
|
def _coalesce_story_markdown(
|
||||||
md: str,
|
md: str,
|
||||||
oral: str,
|
oral: str,
|
||||||
existing_for_narrative: str,
|
existing_for_narrative: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""落库前对齐正文:空输出或过短回退时,续写场景保留「已有故事 + 本段口述」。"""
|
"""落库前对齐正文:空输出时续写场景保留「已有故事 + 本段口述」。"""
|
||||||
o = (oral or "").strip()
|
o = (oral or "").strip()
|
||||||
ex = (existing_for_narrative or "").strip()
|
ex = (existing_for_narrative or "").strip()
|
||||||
m = (md or "").strip()
|
m = (md or "").strip()
|
||||||
@@ -153,10 +173,6 @@ def _coalesce_story_markdown(
|
|||||||
if o:
|
if o:
|
||||||
return o
|
return o
|
||||||
return ex
|
return ex
|
||||||
if o and _should_fallback_to_transcript(m, o):
|
|
||||||
if ex:
|
|
||||||
return f"{ex}\n\n{o}"
|
|
||||||
return o
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
@@ -181,8 +197,10 @@ def _apply_narrative_fallbacks(
|
|||||||
*,
|
*,
|
||||||
chapter_category: str,
|
chapter_category: str,
|
||||||
) -> tuple[str, str]:
|
) -> tuple[str, str]:
|
||||||
"""返回 (文本, fallback_type);无改写时为 none。"""
|
"""返回 (文本, fallback_type);无改写时为 none。
|
||||||
# 整篇合并(JSON)输出异常缩水:回退为旧文 + 本段口述,避免覆盖丢失
|
|
||||||
|
仅防 merge/append 场景下模型输出极端缩水(丢旧内容),不再按口述字数比例回退。
|
||||||
|
"""
|
||||||
if existing_for_narrative and _is_json_narrative(narrative_raw):
|
if existing_for_narrative and _is_json_narrative(narrative_raw):
|
||||||
merged_md = narrative_to_markdown(narrative_raw).strip()
|
merged_md = narrative_to_markdown(narrative_raw).strip()
|
||||||
ex = (existing_for_narrative or "").strip()
|
ex = (existing_for_narrative or "").strip()
|
||||||
@@ -209,28 +227,6 @@ def _apply_narrative_fallbacks(
|
|||||||
"coalesce_to_old_plus_oral",
|
"coalesce_to_old_plus_oral",
|
||||||
)
|
)
|
||||||
|
|
||||||
md_check = narrative_to_markdown(narrative_raw).strip()
|
|
||||||
oral = (combined_unit_text or "").strip()
|
|
||||||
ex_fb = (existing_for_narrative or "").strip()
|
|
||||||
if oral and _should_fallback_to_transcript(md_check, oral):
|
|
||||||
if ex_fb:
|
|
||||||
logger.warning(
|
|
||||||
"event=narrative_fallback reason=body_too_short_vs_oral_merge "
|
|
||||||
"chapter_category={} oral_len={} md_len={}",
|
|
||||||
chapter_category,
|
|
||||||
len(oral),
|
|
||||||
len(md_check),
|
|
||||||
)
|
|
||||||
return f"{ex_fb}\n\n{oral}", "coalesce_to_old_plus_oral"
|
|
||||||
logger.warning(
|
|
||||||
"event=narrative_fallback reason=body_too_short_vs_oral "
|
|
||||||
"chapter_category={} oral_len={} md_len={}",
|
|
||||||
chapter_category,
|
|
||||||
len(oral),
|
|
||||||
len(md_check),
|
|
||||||
)
|
|
||||||
return oral, "coalesce_to_oral"
|
|
||||||
|
|
||||||
return narrative_raw, "none"
|
return narrative_raw, "none"
|
||||||
|
|
||||||
|
|
||||||
@@ -404,14 +400,13 @@ def _run_batch_plan_writes(
|
|||||||
sid_log = target_story_id
|
sid_log = target_story_id
|
||||||
is_append = True
|
is_append = True
|
||||||
else:
|
else:
|
||||||
story_title = (unit.new_story_title or "").strip()
|
story_title = _maybe_generate_title(
|
||||||
if not story_title:
|
narrative_agent,
|
||||||
story_title = narrative_agent.generate_title(
|
chapter_category=chapter_category,
|
||||||
stage=chapter_category,
|
md=md,
|
||||||
emotion="neutral",
|
slot_snippets=slot_snippets,
|
||||||
slots=slot_snippets,
|
|
||||||
user_profile=user_profile,
|
user_profile=user_profile,
|
||||||
birth_year=user_birth_year,
|
user_birth_year=user_birth_year,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
)
|
)
|
||||||
st = create_story_with_version_sync(
|
st = create_story_with_version_sync(
|
||||||
@@ -519,7 +514,8 @@ def run_story_pipeline_for_category_batch(
|
|||||||
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
|
chapter = session.execute(stmt_chapter).unique().scalar_one_or_none()
|
||||||
|
|
||||||
slot_snippets: dict[str, str] = {}
|
slot_snippets: dict[str, str] = {}
|
||||||
stage_slots = state.slots.get(chapter_category, {}) or {}
|
chat_stage = CATEGORY_TO_CHAT_STAGE.get(chapter_category, chapter_category)
|
||||||
|
stage_slots = state.slots.get(chat_stage, {}) or {}
|
||||||
for key, value in stage_slots.items():
|
for key, value in stage_slots.items():
|
||||||
snip = getattr(value, "snippet", None) or (
|
snip = getattr(value, "snippet", None) or (
|
||||||
value.get("snippet") if isinstance(value, dict) else None
|
value.get("snippet") if isinstance(value, dict) else None
|
||||||
@@ -527,17 +523,7 @@ def run_story_pipeline_for_category_batch(
|
|||||||
if snip:
|
if snip:
|
||||||
slot_snippets[key] = snip
|
slot_snippets[key] = snip
|
||||||
|
|
||||||
title = chapter.title if chapter else f"{chapter_category} 回忆"
|
title = chapter.title if chapter else _placeholder_title(chapter_category)
|
||||||
|
|
||||||
if not chapter:
|
|
||||||
title = narrative_agent.generate_title(
|
|
||||||
stage=chapter_category,
|
|
||||||
emotion="neutral",
|
|
||||||
slots=slot_snippets,
|
|
||||||
user_profile=user_profile,
|
|
||||||
birth_year=user_birth_year,
|
|
||||||
llm=llm,
|
|
||||||
)
|
|
||||||
|
|
||||||
# 仅同 chapter_category(story.stage)的 Story 可作为 append 候选,避免跨章节链接导致多章内容相同
|
# 仅同 chapter_category(story.stage)的 Story 可作为 append 候选,避免跨章节链接导致多章内容相同
|
||||||
all_stories = list_active_stories_for_user_sync(session, user_id)
|
all_stories = list_active_stories_for_user_sync(session, user_id)
|
||||||
@@ -684,14 +670,13 @@ def run_story_pipeline_for_category_batch(
|
|||||||
sid_log = target_story_id
|
sid_log = target_story_id
|
||||||
is_append = True
|
is_append = True
|
||||||
else:
|
else:
|
||||||
story_title = (route.new_story_title or "").strip()
|
story_title = _maybe_generate_title(
|
||||||
if not story_title:
|
narrative_agent,
|
||||||
story_title = narrative_agent.generate_title(
|
chapter_category=chapter_category,
|
||||||
stage=chapter_category,
|
md=md,
|
||||||
emotion="neutral",
|
slot_snippets=slot_snippets,
|
||||||
slots=slot_snippets,
|
|
||||||
user_profile=user_profile,
|
user_profile=user_profile,
|
||||||
birth_year=user_birth_year,
|
user_birth_year=user_birth_year,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
)
|
)
|
||||||
st = create_story_with_version_sync(
|
st = create_story_with_version_sync(
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
@@ -46,6 +47,25 @@ def dedupe_evidence_chunk_rows(chunks: list) -> list:
|
|||||||
return [x[1] for x in kept]
|
return [x[1] for x in kept]
|
||||||
|
|
||||||
|
|
||||||
|
def _flatten_object_json(obj_raw: object) -> str:
|
||||||
|
"""Extract readable text from fact object_json (may be dict, JSON string, or plain str)."""
|
||||||
|
if isinstance(obj_raw, dict):
|
||||||
|
return str(obj_raw.get("value", "")) or ", ".join(
|
||||||
|
f"{k}={v}" for k, v in obj_raw.items() if v
|
||||||
|
)
|
||||||
|
if isinstance(obj_raw, str):
|
||||||
|
s = obj_raw.strip()
|
||||||
|
if s.startswith("{"):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(s)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
return str(parsed.get("value", s)) or s
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
pass
|
||||||
|
return s
|
||||||
|
return str(obj_raw) if obj_raw else ""
|
||||||
|
|
||||||
|
|
||||||
def format_evidence_chunks_for_prompt(evidence: dict) -> str:
|
def format_evidence_chunks_for_prompt(evidence: dict) -> str:
|
||||||
"""将 retrieve_evidence / retrieve_evidence_sync 结果格式化为简短文本,供叙事与访谈 prompt 使用。
|
"""将 retrieve_evidence / retrieve_evidence_sync 结果格式化为简短文本,供叙事与访谈 prompt 使用。
|
||||||
|
|
||||||
@@ -75,11 +95,15 @@ def format_evidence_chunks_for_prompt(evidence: dict) -> str:
|
|||||||
if isinstance(f, dict):
|
if isinstance(f, dict):
|
||||||
subj = f.get("subject", "")
|
subj = f.get("subject", "")
|
||||||
pred = f.get("predicate", "")
|
pred = f.get("predicate", "")
|
||||||
obj = f.get("object_json", "")
|
obj_raw = f.get("object_json", "")
|
||||||
|
obj = _flatten_object_json(obj_raw)
|
||||||
if subj or pred:
|
if subj or pred:
|
||||||
parts.append(f"{subj} {pred} {obj}")
|
if obj:
|
||||||
|
parts.append(f"{subj}:{pred}({obj})")
|
||||||
else:
|
else:
|
||||||
parts.append(f"{getattr(f, 'subject', '')} {getattr(f, 'predicate', '')}")
|
parts.append(f"{subj}:{pred}")
|
||||||
|
else:
|
||||||
|
parts.append(f"{getattr(f, 'subject', '')}:{getattr(f, 'predicate', '')}")
|
||||||
for t in timeline[:5]:
|
for t in timeline[:5]:
|
||||||
if isinstance(t, dict):
|
if isinstance(t, dict):
|
||||||
title = (t.get("title") or "").strip()
|
title = (t.get("title") or "").strip()
|
||||||
|
|||||||
@@ -186,12 +186,6 @@ class TestMemoirStyleRegressions:
|
|||||||
)
|
)
|
||||||
assert "文采服务于真实" in prompt or "虚构描写" in prompt
|
assert "文采服务于真实" in prompt or "虚构描写" in prompt
|
||||||
|
|
||||||
def test_fallback_ratio_is_lenient(self) -> None:
|
|
||||||
"""fallback 阈值应该宽松——只有极端压缩才触发,正常书面化改写不触发。"""
|
|
||||||
oral = "我一九九九年出生在上海,后来搬到苏州。小学时爷爷常带我去河边散步。"
|
|
||||||
half_length_md = oral[: len(oral) // 2 + 5]
|
|
||||||
assert not sps._should_fallback_to_transcript(half_length_md, oral)
|
|
||||||
|
|
||||||
def test_merge_shrink_only_on_extreme_loss(self) -> None:
|
def test_merge_shrink_only_on_extreme_loss(self) -> None:
|
||||||
"""合并场景只有在极端缩水时才触发 fallback,不因正常重组而退回。"""
|
"""合并场景只有在极端缩水时才触发 fallback,不因正常重组而退回。"""
|
||||||
existing = "这是一段已有的故事正文,讲述了童年在河边的回忆。" * 20
|
existing = "这是一段已有的故事正文,讲述了童年在河边的回忆。" * 20
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
"""叙事分区、口述过短回退、配图字数门闸(纯函数/无 DB)。"""
|
"""叙事分区、merge_shrink 回退、配图字数门闸(纯函数/无 DB)。"""
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from app.agents.memoir.prompts import format_narrative_user_content
|
from app.agents.memoir.prompts import format_narrative_user_content
|
||||||
from app.features.memoir import story_pipeline_sync as sps
|
from app.features.memoir import story_pipeline_sync as sps
|
||||||
@@ -18,16 +16,6 @@ def test_format_narrative_user_content_with_evidence() -> None:
|
|||||||
assert "非本段口述" in out
|
assert "非本段口述" in out
|
||||||
|
|
||||||
|
|
||||||
def test_should_fallback_to_transcript_short_md(
|
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
|
||||||
) -> None:
|
|
||||||
monkeypatch.setattr(sps.settings, "memoir_narrative_fallback_body_ratio", 0.5)
|
|
||||||
monkeypatch.setattr(sps.settings, "memoir_narrative_fallback_min_chars", 20)
|
|
||||||
oral = "我一九九九年出生在上海,后来全家搬到苏州生活了好几年。"
|
|
||||||
assert sps._should_fallback_to_transcript("1999", oral) is True
|
|
||||||
assert sps._should_fallback_to_transcript(oral, oral) is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_apply_narrative_fallbacks_merge_shrink_appends_oral() -> None:
|
def test_apply_narrative_fallbacks_merge_shrink_appends_oral() -> None:
|
||||||
"""整篇合并 JSON 输出过短:保留旧文并拼本段口述。"""
|
"""整篇合并 JSON 输出过短:保留旧文并拼本段口述。"""
|
||||||
long_existing = "x" * 500
|
long_existing = "x" * 500
|
||||||
@@ -42,20 +30,27 @@ def test_apply_narrative_fallbacks_merge_shrink_appends_oral() -> None:
|
|||||||
assert "新口述补充" in out
|
assert "新口述补充" in out
|
||||||
|
|
||||||
|
|
||||||
def test_apply_narrative_fallbacks_json_too_short_returns_oral(
|
def test_apply_narrative_fallbacks_short_output_no_longer_falls_back() -> None:
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
"""短口述的正常改写不应被回退到口述原文。"""
|
||||||
) -> None:
|
|
||||||
monkeypatch.setattr(sps.settings, "memoir_narrative_fallback_body_ratio", 0.5)
|
|
||||||
monkeypatch.setattr(sps.settings, "memoir_narrative_fallback_min_chars", 20)
|
|
||||||
oral = "我1999年出生在上海,小学时爷爷常带我去河边散步。"
|
oral = "我1999年出生在上海,小学时爷爷常带我去河边散步。"
|
||||||
raw = '{"paragraphs": [{"content": "1999"}]}'
|
raw = '{"paragraphs": [{"content": "1999年,我出生在上海。"}]}'
|
||||||
out, _ft = sps._apply_narrative_fallbacks(
|
out, ft = sps._apply_narrative_fallbacks(
|
||||||
raw,
|
raw, oral, "", chapter_category="childhood"
|
||||||
oral,
|
|
||||||
"",
|
|
||||||
chapter_category="childhood",
|
|
||||||
)
|
)
|
||||||
assert out.strip() == oral
|
assert ft == "none"
|
||||||
|
assert "1999" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_coalesce_story_markdown_empty_md_falls_back_to_oral() -> None:
|
||||||
|
"""模型返回空 paragraphs 时仍回退到口述原文。"""
|
||||||
|
md = sps._coalesce_story_markdown("", "口述原文", "")
|
||||||
|
assert md == "口述原文"
|
||||||
|
|
||||||
|
|
||||||
|
def test_coalesce_story_markdown_nonempty_md_kept() -> None:
|
||||||
|
"""非空改写不再按字数比例回退。"""
|
||||||
|
md = sps._coalesce_story_markdown("改写后的短文本", "原始口述比较长的一段话", "")
|
||||||
|
assert md == "改写后的短文本"
|
||||||
|
|
||||||
|
|
||||||
def test_memoir_image_settings_min_body_field() -> None:
|
def test_memoir_image_settings_min_body_field() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user