Files
life-echo/api/tests/test_experience_regressions.py
Kevin 3ae39838c0 feat(memoir): 路由阶段不要求标题,按正文字数门闸延迟 LLM 标题
- 从 story 路由 prompt/校验中移除 new_story_title,改由叙事管线在正文足够长时生成
- 新增 story_title_min_body_chars;短正文使用章节类别占位标题
- CATEGORY_TO_CHAT_STAGE 对齐访谈 state.slots 的 stage 键
- 删除相对口述长度的叙事回退,仅保留 merge JSON 极端缩水类 fallback
- evidence_format:解析 object_json 并优化事实条目标点符号
- 更新 narrative / experience 相关单测
2026-04-02 14:38:40 +08:00

203 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""面向体验的回归测试:保护"聊得下去""回忆录有文笔"两个核心目标。
与 test_interview_prompts / test_interview_reply_length 不同,这组测试不验证字面规则,
而是验证体验目标的必要条件是否成立。改 agent 后如果这里挂了,说明体验方向可能在退步。
"""
from types import SimpleNamespace
import pytest
from app.agents.chat.interview_reply_length import (
ReplyLengthMode,
compute_reply_plan,
heuristic_likely_emotional,
heuristic_likely_new_detail,
)
from app.agents.chat.prompts_conversation import (
get_guided_conversation_prompt,
get_opening_prompt,
)
from app.agents.memoir.prompts import (
get_creative_title_json_prompt,
get_narrative_editor_system_prompt,
get_narrative_json_prompt,
)
from app.features.memoir import story_pipeline_sync as sps
def _fake_settings(**overrides: object) -> SimpleNamespace:
base = {
"chat_interview_max_tokens": 380,
"chat_interview_max_segments": 2,
"chat_interview_max_chars_per_segment": 260,
"chat_interview_brief_max_tokens": 260,
"chat_interview_brief_max_chars_per_segment": 200,
"chat_interview_expanded_max_tokens": 520,
"chat_interview_expanded_max_chars_per_segment": 380,
}
base.update(overrides)
return SimpleNamespace(**base)
# ── 聊天体验回归 ──────────────────────────────────────────────────
class TestChatExperienceRegressions:
"""保护"聊得下去"体验。"""
def test_emotional_short_message_not_brief(self) -> None:
"""用户表达强情绪时不应压成 brief要给模型足够空间承接情绪。"""
p = compute_reply_plan(
"我妈走了以后,我真的很难过",
background_voice=None,
settings=_fake_settings(),
)
assert p.mode != ReplyLengthMode.brief
assert heuristic_likely_emotional("我妈走了以后,我真的很难过") is True
def test_emotional_medium_message_gets_expanded(self) -> None:
"""中等长度且有情绪的消息应该给 expanded 档位,让模型有空间好好共情。"""
msg = "那年我奶奶去世的时候,我在外地上学,没来得及见最后一面,到现在想起来还是特别难过"
assert len(msg) >= 40
p = compute_reply_plan(msg, background_voice=None, settings=_fake_settings())
assert p.mode == ReplyLengthMode.expanded
def test_new_detail_triggers_followup_hint_in_prompt(self) -> None:
"""用户提到新人名/新关系时prompt 应明确要求追问(而不是只感慨)。"""
p = get_guided_conversation_prompt(
current_stage="childhood",
empty_slots=["place", "people"],
filled_slots={},
user_message="那个女生叫小芳,是我同桌",
conversation_turn_total=2,
same_topic_turns=2,
all_stages_coverage=None,
detected_user_stage="childhood",
user_profile_context="",
persona="default",
)
assert "本轮判定" in p
assert "追问" in p
def test_emotional_prompt_prioritizes_empathy(self) -> None:
"""用户情绪浓时 prompt 应出现情绪承接优先的提示。"""
p = get_guided_conversation_prompt(
current_stage="family",
empty_slots=["relationship"],
filled_slots={},
user_message="想起我妈,心酸",
conversation_turn_total=3,
same_topic_turns=1,
all_stages_coverage=None,
detected_user_stage="family",
user_profile_context="",
persona="default",
)
assert "情绪" in p
def test_chit_chat_does_not_force_memoir_question(self) -> None:
"""闲聊时 prompt 不应强行追问回忆录问题。"""
p = get_guided_conversation_prompt(
current_stage="childhood",
empty_slots=["place"],
filled_slots={},
user_message="今天天气真好哈哈",
conversation_turn_total=0,
same_topic_turns=0,
all_stages_coverage=None,
detected_user_stage="childhood",
user_profile_context="",
persona="default",
)
assert "偏闲聊" in p
assert "陪聊" in p
def test_topic_switch_not_triggered_at_3_turns(self) -> None:
"""聊了 3 轮同话题不应该就要换——用户可能还想继续。"""
p = get_guided_conversation_prompt(
current_stage="childhood",
empty_slots=["place", "people", "emotion"],
filled_slots={"daily_life": "放学后去河边玩"},
user_message="对啊,那条河特别浅",
conversation_turn_total=4,
same_topic_turns=3,
all_stages_coverage=None,
detected_user_stage="childhood",
user_profile_context="",
persona="default",
)
assert "聊得差不多了" not in p
def test_prompt_intro_mentions_empathy_first(self) -> None:
"""prompt 开头应强调"先接住对方"而不是"控制字数""""
p = get_guided_conversation_prompt(
current_stage="childhood",
empty_slots=["place"],
filled_slots={},
user_message="小时候家里穷",
conversation_turn_total=0,
same_topic_turns=0,
all_stages_coverage=None,
detected_user_stage="childhood",
user_profile_context="",
persona="default",
)
assert "接住" in p
# ── 回忆录文风回归 ──────────────────────────────────────────────────
class TestMemoirStyleRegressions:
"""保护"回忆录有文笔"体验。"""
def test_title_prompt_allows_literary_expression(self) -> None:
"""标题 prompt 不应禁止一切文学性表达——只禁止虚构。"""
prompt = get_creative_title_json_prompt(
stage="childhood",
emotion="warm",
slots={"place": "湖南老家", "turning_event": "爷爷背我过河"},
)
assert "禁止虚构" in prompt
assert "平实" not in prompt.lower()
def test_title_prompt_uses_facts_only_not_plain(self) -> None:
"""标题 prompt 应该走 facts_only允许文采而不是 plain要求平实"""
prompt = get_creative_title_json_prompt(
stage="childhood",
emotion="warm",
slots={"place": "老家"},
)
assert "优雅" in prompt or "书面语" in prompt or "文采" in prompt
def test_narrative_prompt_encourages_literary_quality(self) -> None:
"""叙事 prompt 应该鼓励"有温度"的书面语,不只是"清楚记事""""
sys_prompt = get_narrative_editor_system_prompt()
assert "温度" in sys_prompt or "优雅" in sys_prompt
assert "画面感" in sys_prompt or "生动" in sys_prompt
def test_narrative_json_prompt_allows_emotion_rendering(self) -> None:
"""叙事 JSON prompt 应允许情感渲染(不新增事实前提下)。"""
prompt = get_narrative_json_prompt(
stage="childhood",
slots={"turning_event": "爷爷背我过河"},
new_content="【本段用户口述】\n那年下大雨,爷爷背我过河,鞋全湿了,他一直笑。",
)
assert "文采服务于真实" in prompt or "虚构描写" in prompt
def test_merge_shrink_only_on_extreme_loss(self) -> None:
"""合并场景只有在极端缩水时才触发 fallback不因正常重组而退回。"""
existing = "这是一段已有的故事正文,讲述了童年在河边的回忆。" * 20
assert len(existing) > 400
half_content = existing[: len(existing) // 2]
import json
raw = json.dumps(
{"paragraphs": [{"content": half_content}]}, ensure_ascii=False
)
out, ft = sps._apply_narrative_fallbacks(
raw, "新的口述补充", existing, chapter_category="childhood"
)
assert ft == "none"