feat(eval): internal-eval stack, judge fixes, and eval web overhaul
- Merge internal-eval into development.sh (single Celery/infra); internal-eval.sh wraps with LIFE_ECHO_WITH_INTERNAL_EVAL; EVAL_ATTACH_ONLY for attaching 8001 when :8000 is already up; document in api/docs/internal-eval.md. - Evaluation: transcript_for_judge, judge error surfacing, rubric/schema tweaks, execution_service and router updates; tests for judge and composite eval. - Memory: ingest nested transaction for embedding/enrichment rollback safety. - Conversation WS: logger.exception for pipeline errors (avoid loguru KeyError). - app-eval-web: Playground saved replays, dialogue turns helper, hash user_id for Memoir; Memoir chapter baseline↔DB row compare with title heuristics; Stories page (#memoir-stories); Markdown + copy buttons; toolbar/panel UI; react-markdown; development proxy and fixture updates.
This commit is contained in:
52
api/tests/test_transcript_for_judge.py
Normal file
52
api/tests/test_transcript_for_judge.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""评测 transcript 格式化。"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
from app.features.evaluation.transcript_for_judge import (
|
||||
format_eval_turn_block,
|
||||
format_export_turns_with_labels,
|
||||
format_session_messages_with_turn_labels,
|
||||
pair_session_messages_to_turns,
|
||||
)
|
||||
|
||||
|
||||
def test_format_eval_turn_block_numbering() -> None:
|
||||
s = format_eval_turn_block(0, "你好", "我在呢")
|
||||
assert "[Turn 1]" in s
|
||||
assert "用户: 你好" in s
|
||||
assert "AI: 我在呢" in s
|
||||
|
||||
|
||||
def test_split_token_normalized_in_ai() -> None:
|
||||
s = format_eval_turn_block(1, "u", "a[SPLIT]b")
|
||||
assert "AI: a\nb" in s
|
||||
|
||||
|
||||
def test_export_turns_labels() -> None:
|
||||
t = format_export_turns_with_labels([("u1", "a1"), ("u2", "a2")])
|
||||
assert "[Turn 1]" in t
|
||||
assert "[Turn 2]" in t
|
||||
|
||||
|
||||
def test_pair_session_messages_to_turns() -> None:
|
||||
msgs = [
|
||||
SimpleNamespace(role="system", content="x"),
|
||||
SimpleNamespace(role="human", content="hi"),
|
||||
SimpleNamespace(role="assistant", content="yo"),
|
||||
]
|
||||
assert pair_session_messages_to_turns(msgs) == [("hi", "yo")]
|
||||
|
||||
|
||||
def test_pair_session_messages_trailing_human() -> None:
|
||||
msgs = [SimpleNamespace(role="human", content="only")]
|
||||
assert pair_session_messages_to_turns(msgs) == [("only", "")]
|
||||
|
||||
|
||||
def test_session_messages_with_turn_labels() -> None:
|
||||
msgs = [
|
||||
SimpleNamespace(role="human", content="你好"),
|
||||
SimpleNamespace(role="assistant", content="我在"),
|
||||
]
|
||||
t = format_session_messages_with_turn_labels(msgs)
|
||||
assert "[Turn 1]" in t
|
||||
assert "用户: 你好" in t
|
||||
Reference in New Issue
Block a user