Files
life-echo/api/tests/test_reply_segments.py

44 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""segments_from_llm_response与客户端 split 规则对齐的单元校验。"""
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
segments_from_llm_response,
strip_markdown_for_chat,
)
def test_split_marker():
assert segments_from_llm_response("a[SPLIT]b", max_segments=3) == ["a", "b"]
def test_paragraph_fallback_when_no_marker():
a = "太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。"
b = "说到舞台,我忽然想起你黄浦江边的童年。从看着江水流淌,到在舞台上演绎别人的悲欢。"
assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [a, b]
def test_short_paragraphs_not_split():
t = "a\n\nb"
assert segments_from_llm_response(t, max_segments=3) == [t]
def test_nonempty_fallback_when_all_blank():
assert nonempty_segments_or_fallback(["", " "], fallback="ok") == ["ok"]
def test_split_marker_strips_markdown():
assert segments_from_llm_response("**A**[SPLIT]_B_", max_segments=3) == ["A", "B"]
def test_paragraph_split_strips_markdown():
a = "**太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。**"
b = "[详情](https://e.com)说到舞台,我忽然想起你黄浦江边的童年。"
assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [
"太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。",
"详情说到舞台,我忽然想起你黄浦江边的童年。",
]
def test_strip_markdown_for_chat_preserves_split_token():
assert "[SPLIT]" in strip_markdown_for_chat("a **b** [SPLIT] c")