life-echo/api/tests/test_interview_reply_length.py

"""访谈回复长度策略：分桶与 InterviewAgent 的 max_tokens / 截断联动。"""

from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from app.agents.chat.interview_reply_length import (
    ReplyLengthMode,
    bump_reply_plan_for_background_voice,
    compute_reply_plan,
)
from app.agents.state_schema import MemoirStateSchema


def _fake_settings(**overrides: object) -> SimpleNamespace:
    base = {
        "chat_interview_max_tokens": 380,
        "chat_interview_max_segments": 2,
        "chat_interview_max_chars_per_segment": 260,
        "chat_interview_brief_max_tokens": 260,
        "chat_interview_brief_max_chars_per_segment": 200,
        "chat_interview_expanded_max_tokens": 520,
        "chat_interview_expanded_max_chars_per_segment": 380,
    }
    base.update(overrides)
    return SimpleNamespace(**base)


def test_strategy_brief_when_very_short() -> None:
    s = compute_reply_plan(
        "x" * 5,
        background_voice=None,
        settings=_fake_settings(),
    )
    assert s.mode == ReplyLengthMode.brief
    assert s.max_tokens == 260
    assert s.max_chars_per_segment == 200


def test_strategy_standard_mid_length() -> None:
    s = compute_reply_plan(
        "x" * 50,
        background_voice=None,
        settings=_fake_settings(),
    )
    assert s.mode == ReplyLengthMode.standard
    assert s.max_tokens == 380
    assert s.max_chars_per_segment == 260


def test_strategy_long_chit_stays_standard() -> None:
    msg = "今天天气真好哈哈" * 11
    assert len(msg) >= 80
    s = compute_reply_plan(
        msg,
        background_voice=None,
        settings=_fake_settings(),
    )
    assert s.mode == ReplyLengthMode.standard
    assert s.max_tokens == 380


def test_strategy_long_with_new_detail_expanded() -> None:
    base = "第一次认识他"
    msg = (base + "x" * 200)[:120]
    assert len(msg) == 120
    s = compute_reply_plan(
        msg,
        background_voice=None,
        settings=_fake_settings(),
    )
    assert s.mode == ReplyLengthMode.expanded
    assert s.max_tokens == 520
    assert s.max_chars_per_segment == 380


def test_strategy_boundary_len_20_brief_len_21_standard() -> None:
    a = compute_reply_plan(
        "x" * 20,
        background_voice=None,
        settings=_fake_settings(),
    )
    b = compute_reply_plan(
        "x" * 21,
        background_voice=None,
        settings=_fake_settings(),
    )
    assert a.mode == ReplyLengthMode.brief
    assert b.mode == ReplyLengthMode.standard


def test_bump_standard_only_for_cadre_military() -> None:
    s0 = compute_reply_plan(
        "x" * 50,
        background_voice=None,
        settings=_fake_settings(),
    )
    bumped = bump_reply_plan_for_background_voice(
        s0,
        background_voice="cadre",
        settings=_fake_settings(
            chat_interview_cadre_military_standard_extra_tokens=40,
            chat_interview_cadre_military_standard_extra_chars=40,
        ),
    )
    assert bumped.max_tokens == s0.max_tokens + 40
    assert bumped.max_chars_per_segment == s0.max_chars_per_segment + 40

    brief = compute_reply_plan(
        "x" * 5,
        background_voice=None,
        settings=_fake_settings(
            chat_interview_cadre_military_standard_extra_tokens=40,
            chat_interview_cadre_military_standard_extra_chars=40,
        ),
    )
    same = bump_reply_plan_for_background_voice(
        brief,
        background_voice="military",
        settings=_fake_settings(
            chat_interview_cadre_military_standard_extra_tokens=40,
            chat_interview_cadre_military_standard_extra_chars=40,
        ),
    )
    assert same.max_tokens == brief.max_tokens


def test_plan_short_information_rich_is_standard_not_brief() -> None:
    """短句但含高密度锚点（如「那年」「我爸」）→ standard，避免误压成 brief。"""
    p = compute_reply_plan(
        "那年我爸突然病了",
        background_voice=None,
        settings=_fake_settings(),
    )
    assert p.mode == ReplyLengthMode.standard
    assert p.information_rich is True


def test_plan_long_chit_stays_standard_not_expanded() -> None:
    """长段明显闲聊 → standard，不因字数进入 expanded。"""
    msg = "今天天气真好哈哈" * 11
    assert len(msg) >= 80
    p = compute_reply_plan(
        msg,
        background_voice=None,
        settings=_fake_settings(),
    )
    assert p.mode == ReplyLengthMode.standard
    assert p.likely_chit_chat is True


def test_strategy_boundary_len_79_standard_len_80_long_branch() -> None:
    a = compute_reply_plan(
        "x" * 79,
        background_voice=None,
        settings=_fake_settings(),
    )
    b = compute_reply_plan(
        "x" * 80,
        background_voice=None,
        settings=_fake_settings(),
    )
    assert a.mode == ReplyLengthMode.standard
    assert b.mode == ReplyLengthMode.standard


@pytest.mark.asyncio
async def test_interview_agent_passes_strategy_to_bind_and_truncate() -> None:
    """同一套 strategy 用于 llm.bind(max_tokens=) 与 truncate_chat_segments。"""
    from app.agents.chat import interview_agent as ia

    mock_llm = MagicMock()
    mock_bound = MagicMock()
    mock_bound.ainvoke = AsyncMock(
        return_value=MagicMock(content="你好，后来呢？[SPLIT]还有吗？")
    )
    mock_llm.bind = MagicMock(return_value=mock_bound)

    agent = ia.InterviewAgent()
    agent.llm = mock_llm

    state = MemoirStateSchema(
        stage_order=["childhood"],
        current_stage="childhood",
        covered_stages=[],
        slots={"childhood": {}},
    )

    with patch(
        "app.agents.chat.interview_agent.get_history_messages",
        new=AsyncMock(return_value=[]),
    ):
        turn = await agent.generate_response_with_state(
            conversation_id="c1",
            user_message="x" * 100 + "第一次认识他",
            memoir_state=state,
        )

    mock_llm.bind.assert_called_once()
    call_kw = mock_llm.bind.call_args[1]
    assert call_kw["max_tokens"] == 520

    assert len(turn.messages) >= 1
    for seg in turn.messages:
        assert len(seg) <= 380