Files
life-echo/api/app/features/evaluation/transcript_for_judge.py
Kevin 99543d04c6 feat(eval): internal-eval stack, judge fixes, and eval web overhaul
- Merge internal-eval into development.sh (single Celery/infra); internal-eval.sh
  wraps with LIFE_ECHO_WITH_INTERNAL_EVAL; EVAL_ATTACH_ONLY for attaching 8001
  when :8000 is already up; document in api/docs/internal-eval.md.
- Evaluation: transcript_for_judge, judge error surfacing, rubric/schema tweaks,
  execution_service and router updates; tests for judge and composite eval.
- Memory: ingest nested transaction for embedding/enrichment rollback safety.
- Conversation WS: logger.exception for pipeline errors (avoid loguru KeyError).
- app-eval-web: Playground saved replays, dialogue turns helper, hash user_id
  for Memoir; Memoir chapter baseline↔DB row compare with title heuristics;
  Stories page (#memoir-stories); Markdown + copy buttons; toolbar/panel UI;
  react-markdown; development proxy and fixture updates.
2026-04-07 17:18:47 +08:00

79 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""评测用对话文本格式化(稳定 Turn 标签、便于评审引用)。"""
from __future__ import annotations
from typing import Any, Protocol
class _MessageLike(Protocol):
role: str | None
content: str | None
def assistant_text_for_eval_display(raw: str) -> str:
"""评审与 transcript 展示:避免字面量 [SPLIT] 干扰 judge 阅读。"""
return (raw or "").replace("[SPLIT]", "\n")
def format_eval_turn_block(turn_index_0: int, user: str, assistant: str) -> str:
"""单轮回放/节选:`[Turn k]` 从 1 起计。"""
u = (user or "").strip()
a = assistant_text_for_eval_display(assistant).strip()
k = int(turn_index_0) + 1
return f"[Turn {k}]\n用户: {u}\nAI: {a}"
def format_export_turns_with_labels(turns: list[tuple[str, str]]) -> str:
"""用户导出 fixture每轮 (user, ai)。"""
parts: list[str] = []
for i, (u, ai) in enumerate(turns):
parts.append(format_eval_turn_block(i, u, ai))
return "\n\n".join(parts)
def pair_session_messages_to_turns(messages: list[_MessageLike] | list[Any]) -> list[tuple[str, str]]:
"""将对话消息序列为 (user, assistant) 轮次列表,语义与 `format_session_messages_with_turn_labels` 一致。
末尾仅有 human、无紧随 assistant 时,补一轮 (user, "") 供 UI 与评审对齐。
"""
out: list[tuple[str, str]] = []
pending_user: str | None = None
for m in messages:
r = (getattr(m, "role", None) or "").lower()
body = (getattr(m, "content", None) or "").strip()
if r == "system":
continue
if not body and r != "human":
continue
if r == "human":
pending_user = body
elif r in ("ai", "assistant"):
u = (pending_user or "").strip()
pending_user = None
out.append((u, body))
if pending_user is not None:
out.append((pending_user.strip(), ""))
return out
def format_session_messages_with_turn_labels(messages: list[_MessageLike] | list[Any]) -> str:
"""会话消息序列:按出现顺序将相邻 human→assistant 合并为一轮。"""
blocks: list[str] = []
turn_idx = 0
pending_user: str | None = None
for m in messages:
r = (getattr(m, "role", None) or "").lower()
body = (getattr(m, "content", None) or "").strip()
if not body and r != "human":
continue
if r == "human":
pending_user = body
elif r in ("ai", "assistant", "system"):
if r == "system":
continue
u = (pending_user or "").strip()
pending_user = None
blocks.append(format_eval_turn_block(turn_idx, u, body))
turn_idx += 1
return "\n\n".join(blocks)