life-echo/api/tests/test_judge_service.py

"""评审服务：保留真实失败原因，便于 internal eval 页面排障。"""

import pytest

from app.core.llm_call import LLMCallError
from app.features.evaluation.judge_schemas import ConversationJudgeOutput
from app.features.evaluation.judge_service import EvalJudgeService


def _conversation_payload() -> dict:
    return {
        "emotion_carry": 10,
        "empathy_depth": 8,
        "emotion_safety": 6,
        "emotion_guidance": 6,
        "fact_mining": 8,
        "info_completeness_guide": 8,
        "info_depth_mining": 9,
        "persona_understanding": 7,
        "persona_consistency_verify": 4,
        "persona_expression_guide": 4,
        "interview_structure": 6,
        "context_memory": 5,
        "rhythm_control": 4,
        "question_quality": 7,
        "follow_up_depth": 5,
        "non_leading": 3,
        "total_score": 100.0,
        "rationale": "整体表现稳定。",
    }


@pytest.mark.asyncio
async def test_judge_conversation_result_preserves_validation_error(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    async def _boom(*args, **kwargs):
        raise LLMCallError(
            "validation",
            "pydantic validation failed: total_score mismatch",
        )

    monkeypatch.setattr("app.features.evaluation.judge_service.allm_json_call", _boom)

    svc = EvalJudgeService(object())
    result = await svc.judge_conversation_result(full_transcript="[Turn 1]\n用户: hi\nAI: hello")

    assert result.output is None
    assert result.error is not None
    assert "结果校验失败" in result.error
    assert "total_score mismatch" in result.error


@pytest.mark.asyncio
async def test_judge_conversation_wrapper_keeps_legacy_shape(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    expected = ConversationJudgeOutput.model_validate(_conversation_payload())

    async def _ok(*args, **kwargs):
        return expected

    monkeypatch.setattr("app.features.evaluation.judge_service.allm_json_call", _ok)

    svc = EvalJudgeService(object())
    out = await svc.judge_conversation(full_transcript="[Turn 1]\n用户: hi\nAI: hello")

    assert out == expected
"""Evaluation judge prompt assembly tests."""

from app.features.evaluation.judge_service import _build_memoir_judge_prompt


def test_build_memoir_prompt_includes_source_and_reference_evidence() -> None:
    prompt = _build_memoir_judge_prompt(
        memoir_markdown="# 当前正文\n他后来去了南方。",
        source_transcript="用户: 我后来去了深圳工作。",
        reference_memoir_markdown="# 导出基线\n他去了深圳。",
        evidence_notes="必须严格核对真实性。",
    )

    assert "【评审说明】" in prompt
    assert "【原始访谈/证据】" in prompt
    assert "用户: 我后来去了深圳工作。" in prompt
    assert "【参考基线/导出成稿】" in prompt
    assert "【当前回忆录正文】" in prompt


def test_build_memoir_prompt_requires_conservative_scoring_without_evidence() -> None:
    prompt = _build_memoir_judge_prompt(
        memoir_markdown="# 当前正文\n他后来去了南方。"
    )

    assert "无可用原始访谈证据" in prompt
    assert "必须保守打分" in prompt