- Merge internal-eval into development.sh (single Celery/infra); internal-eval.sh wraps with LIFE_ECHO_WITH_INTERNAL_EVAL; EVAL_ATTACH_ONLY for attaching 8001 when :8000 is already up; document in api/docs/internal-eval.md. - Evaluation: transcript_for_judge, judge error surfacing, rubric/schema tweaks, execution_service and router updates; tests for judge and composite eval. - Memory: ingest nested transaction for embedding/enrichment rollback safety. - Conversation WS: logger.exception for pipeline errors (avoid loguru KeyError). - app-eval-web: Playground saved replays, dialogue turns helper, hash user_id for Memoir; Memoir chapter baseline↔DB row compare with title heuristics; Stories page (#memoir-stories); Markdown + copy buttons; toolbar/panel UI; react-markdown; development proxy and fixture updates.
53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
"""评测 transcript 格式化。"""
|
|
|
|
from types import SimpleNamespace
|
|
|
|
from app.features.evaluation.transcript_for_judge import (
|
|
format_eval_turn_block,
|
|
format_export_turns_with_labels,
|
|
format_session_messages_with_turn_labels,
|
|
pair_session_messages_to_turns,
|
|
)
|
|
|
|
|
|
def test_format_eval_turn_block_numbering() -> None:
|
|
s = format_eval_turn_block(0, "你好", "我在呢")
|
|
assert "[Turn 1]" in s
|
|
assert "用户: 你好" in s
|
|
assert "AI: 我在呢" in s
|
|
|
|
|
|
def test_split_token_normalized_in_ai() -> None:
|
|
s = format_eval_turn_block(1, "u", "a[SPLIT]b")
|
|
assert "AI: a\nb" in s
|
|
|
|
|
|
def test_export_turns_labels() -> None:
|
|
t = format_export_turns_with_labels([("u1", "a1"), ("u2", "a2")])
|
|
assert "[Turn 1]" in t
|
|
assert "[Turn 2]" in t
|
|
|
|
|
|
def test_pair_session_messages_to_turns() -> None:
|
|
msgs = [
|
|
SimpleNamespace(role="system", content="x"),
|
|
SimpleNamespace(role="human", content="hi"),
|
|
SimpleNamespace(role="assistant", content="yo"),
|
|
]
|
|
assert pair_session_messages_to_turns(msgs) == [("hi", "yo")]
|
|
|
|
|
|
def test_pair_session_messages_trailing_human() -> None:
|
|
msgs = [SimpleNamespace(role="human", content="only")]
|
|
assert pair_session_messages_to_turns(msgs) == [("only", "")]
|
|
|
|
|
|
def test_session_messages_with_turn_labels() -> None:
|
|
msgs = [
|
|
SimpleNamespace(role="human", content="你好"),
|
|
SimpleNamespace(role="assistant", content="我在"),
|
|
]
|
|
t = format_session_messages_with_turn_labels(msgs)
|
|
assert "[Turn 1]" in t
|
|
assert "用户: 你好" in t
|