"""segments_from_llm_response:与客户端 split 规则对齐的单元校验。""" from app.agents.chat.reply_limits import ( nonempty_segments_or_fallback, segments_from_llm_response, strip_leading_en_period_ack_for_chat, strip_markdown_for_chat, strip_parenthetical_asides_for_chat, ) def test_split_marker(): assert segments_from_llm_response("a[SPLIT]b", max_segments=3) == ["a", "b"] def test_paragraph_fallback_when_no_marker(): a = "太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。" b = "说到舞台,我忽然想起你黄浦江边的童年。从看着江水流淌,到在舞台上演绎别人的悲欢。" assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [a, b] def test_short_paragraphs_not_split(): t = "a\n\nb" assert segments_from_llm_response(t, max_segments=3) == [t] def test_nonempty_fallback_when_all_blank(): assert nonempty_segments_or_fallback(["", " "], fallback="ok") == ["ok"] def test_split_marker_strips_markdown(): assert segments_from_llm_response("**A**[SPLIT]_B_", max_segments=3) == ["A", "B"] def test_paragraph_split_strips_markdown(): a = "**太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。**" b = "[详情](https://e.com)说到舞台,我忽然想起你黄浦江边的童年。" assert segments_from_llm_response(f"{a}\n\n{b}", max_segments=3) == [ "太为你高兴了!在上海大剧院的舞台绽放,聚光灯下的你。", "详情说到舞台,我忽然想起你黄浦江边的童年。", ] def test_strip_markdown_for_chat_preserves_split_token(): assert "[SPLIT]" in strip_markdown_for_chat("a **b** [SPLIT] c") def test_strip_parenthetical_removes_stage_directions(): assert strip_parenthetical_asides_for_chat("你好(轻轻笑) lately") == "你好 lately" assert strip_parenthetical_asides_for_chat("(sigh) okay") == "okay" assert strip_parenthetical_asides_for_chat("a(一)(二)b") == "ab" def test_segments_strip_parentheticals_before_split(): assert segments_from_llm_response( "先说(轻轻笑)承接[SPLIT]再问一句", max_segments=3 ) == ["先说承接", "再问一句"] def test_strip_parenthetical_multiple_passes(): assert strip_parenthetical_asides_for_chat("a(一)b(二)c") == "abc" def test_strip_leading_en_period_ack(): assert strip_leading_en_period_ack_for_chat("嗯。后面正文") == "后面正文" assert strip_leading_en_period_ack_for_chat("嗯嗯。后面") == "后面" assert strip_leading_en_period_ack_for_chat(" 嗯。 第二句") == "第二句" assert strip_leading_en_period_ack_for_chat("句中嗯。不打头") == "句中嗯。不打头" def test_segments_strip_leading_en_ack(): assert segments_from_llm_response("嗯。只有一句", max_segments=3) == ["只有一句"] assert segments_from_llm_response("嗯。A[SPLIT]嗯。B", max_segments=3) == ["A", "B"]