life-echo/api/tests/test_reply_segments.py

"""segments_from_llm_response：与客户端 split 规则对齐的单元校验。"""

from app.agents.chat.reply_limits import (
    nonempty_segments_or_fallback,
    segments_from_llm_response,
    strip_leading_en_period_ack_for_chat,
    strip_markdown_for_chat,
    strip_parenthetical_asides_for_chat,
)


def test_split_marker():
    assert segments_from_llm_response("a[SPLIT]b", max_segments=3) == ["a", "b"]


def test_paragraph_fallback_when_no_marker():
    a = "太为你高兴了！在上海大剧院的舞台绽放，聚光灯下的你。"
    b = "说到舞台，我忽然想起你黄浦江边的童年。从看着江水流淌，到在舞台上演绎别人的悲欢。"
    assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [a, b]


def test_short_paragraphs_not_split():
    t = "a\n\nb"
    assert segments_from_llm_response(t, max_segments=3) == [t]


def test_nonempty_fallback_when_all_blank():
    assert nonempty_segments_or_fallback(["", "  "], fallback="ok") == ["ok"]


def test_split_marker_strips_markdown():
    assert segments_from_llm_response("**A**[SPLIT]_B_", max_segments=3) == ["A", "B"]


def test_paragraph_split_strips_markdown():
    a = "**太为你高兴了！在上海大剧院的舞台绽放，聚光灯下的你。**"
    b = "[详情](https://e.com)说到舞台，我忽然想起你黄浦江边的童年。"
    assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [
        "太为你高兴了！在上海大剧院的舞台绽放，聚光灯下的你。",
        "详情说到舞台，我忽然想起你黄浦江边的童年。",
    ]


def test_strip_markdown_for_chat_preserves_split_token():
    assert "[SPLIT]" in strip_markdown_for_chat("a **b** [SPLIT] c")


def test_strip_parenthetical_removes_stage_directions():
    assert strip_parenthetical_asides_for_chat("你好（轻轻笑） lately") == "你好 lately"
    assert strip_parenthetical_asides_for_chat("(sigh) okay") == "okay"
    assert strip_parenthetical_asides_for_chat("a（一）（二）b") == "ab"


def test_segments_strip_parentheticals_before_split():
    assert segments_from_llm_response(
        "先说（轻轻笑）承接[SPLIT]再问一句", max_segments=3
    ) == ["先说承接", "再问一句"]


def test_strip_parenthetical_multiple_passes():
    assert strip_parenthetical_asides_for_chat("a（一）b（二）c") == "abc"


def test_strip_leading_en_period_ack():
    assert strip_leading_en_period_ack_for_chat("嗯。后面正文") == "后面正文"
    assert strip_leading_en_period_ack_for_chat("嗯嗯。后面") == "后面"
    assert strip_leading_en_period_ack_for_chat("  嗯。  第二句") == "第二句"
    assert strip_leading_en_period_ack_for_chat("句中嗯。不打头") == "句中嗯。不打头"


def test_segments_strip_leading_en_ack():
    assert segments_from_llm_response("嗯。只有一句", max_segments=3) == ["只有一句"]
    assert segments_from_llm_response("嗯。A[SPLIT]嗯。B", max_segments=3) == ["A", "B"]