- 新增 utterance_substance:短时/应答/元话语可跳过记忆检索、阶段 LLM 与资料抽取 LLM;可配置 - 输入归一化:LLM 模式默认仅语音/ASR;配置项写入 .env.example - Memoir Phase1:可选 batch LLM 一次性抽取+分类(失败回退逐段);Extraction 空槽位时阶段与 current_stage 对齐,prompt 约束收紧 - 叙事与忠实度:narrative_safety、证据重叠/场合锚点、标题 slots 与履历短语 grounded;fidelity 解析失败 fail-open 可配置 - 章节管线:锁 TTL 上调、锁竞争 Celery 重试、Phase2 immediate singleflight 等;story_pipeline_sync / chapter_compose / memoir_tasks 联动 - Memory:compaction / repo / summarizer / evidence 小修;事实 FTS 未命中是否回退最近事实可配置 - 新增 memoir_pipeline_trace;补充 memoir_reliability 文档与多项回归/门控测试
66 lines
2.6 KiB
Python
66 lines
2.6 KiB
Python
"""叙事边界:伪 JSON、prompt 标记泄漏启发式。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from app.features.memoir.narrative_to_markdown import narrative_to_markdown
|
|
from app.features.memoir import narrative_safety as ns
|
|
from app.features.memoir import story_pipeline_sync as sps
|
|
|
|
|
|
def test_narrative_to_markdown_malformed_json_with_paragraphs_shell_returns_empty() -> (
|
|
None
|
|
):
|
|
"""不得以破损 JSON 当正文;上层应回退 oral/旧文。"""
|
|
raw = '{"paragraphs": [broken'
|
|
assert raw.strip().startswith("{")
|
|
assert "paragraphs" in raw
|
|
assert narrative_to_markdown(raw) == ""
|
|
|
|
|
|
def test_narrative_to_markdown_valid_paragraphs_preserved() -> None:
|
|
md = narrative_to_markdown(
|
|
'{"paragraphs": [{"content": "第一段"}, {"content": "第二段"}]}'
|
|
)
|
|
assert "第一段" in md
|
|
assert "第二段" in md
|
|
|
|
|
|
def test_body_contains_prompt_artifact_detects_evidence_marker() -> None:
|
|
body = "前文\n【仅供参考的相关记忆摘录(不得把其中具体事实写成本轮亲历经历)】\nfoo"
|
|
assert ns.body_contains_prompt_artifact(body) is True
|
|
|
|
|
|
def test_body_contains_prompt_artifact_clean() -> None:
|
|
assert ns.body_contains_prompt_artifact("我在河边长大。") is False
|
|
|
|
|
|
def test_evidence_leak_heuristic_flags_long_shared_substring() -> None:
|
|
oral = "短口述"
|
|
ev = "独有细节abcdefghijklmnopqrstuvwxyz独有"
|
|
body = "中间夹着独有细节abcdefghijklmnopqrstuvwxyz独有结尾"
|
|
# 长公共子串仅在 evidence 与 body 之间,且 oral 未覆盖
|
|
score = ns.evidence_substring_leak_score(body, ev, min_len=14)
|
|
assert score >= 14
|
|
|
|
|
|
def test_evidence_scene_anchor_leak_detects_dinner_not_in_oral() -> None:
|
|
oral = "我们聊了我要去南京了,成家,结婚生子。"
|
|
ev = "我们大伙前一天晚上还在聚餐,聊了我要去南京了。"
|
|
body = "回想起来,那晚聚餐时聊到了我将要前往南京以及成家、结婚生子的话题。"
|
|
assert ns.evidence_scene_anchor_leak(body, ev, oral, "") is True
|
|
|
|
|
|
def test_evidence_scene_anchor_no_flag_when_oral_has_anchor() -> None:
|
|
oral = "前一晚聚餐时我们聊了很多。"
|
|
ev = "摘录里也写了聚餐。"
|
|
body = "前一晚聚餐时我们聊了很多。"
|
|
assert ns.evidence_scene_anchor_leak(body, ev, oral, "") is False
|
|
|
|
|
|
def test_strip_ungrounded_title_drops_career_segment() -> None:
|
|
hay = "我与妻子和孩子之间,从不为琐碎小事置气。"
|
|
raw = "晋升旅长后 · 家庭中的沟通"
|
|
out = sps._strip_ungrounded_title_segments(raw, hay, chapter_category="family")
|
|
assert "晋升旅长" not in out
|
|
assert "家庭" in out
|