83 lines
2.7 KiB
Python
83 lines
2.7 KiB
Python
"""评测用对话文本格式化(稳定 Turn 标签、便于评审引用)。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Protocol
|
||
|
||
|
||
class _MessageLike(Protocol):
|
||
role: str | None
|
||
content: str | None
|
||
|
||
|
||
def assistant_text_for_eval_display(raw: str) -> str:
|
||
"""评审与 transcript 展示:避免字面量 [SPLIT] 干扰 judge 阅读。"""
|
||
return (raw or "").replace("[SPLIT]", "\n")
|
||
|
||
|
||
def format_eval_turn_block(turn_index_0: int, user: str, assistant: str) -> str:
|
||
"""单轮回放/节选:`[Turn k]` 从 1 起计。"""
|
||
u = (user or "").strip()
|
||
a = assistant_text_for_eval_display(assistant).strip()
|
||
k = int(turn_index_0) + 1
|
||
return f"[Turn {k}]\n用户: {u}\nAI: {a}"
|
||
|
||
|
||
def format_export_turns_with_labels(turns: list[tuple[str, str]]) -> str:
|
||
"""用户导出 fixture:每轮 (user, ai)。"""
|
||
parts: list[str] = []
|
||
for i, (u, ai) in enumerate(turns):
|
||
parts.append(format_eval_turn_block(i, u, ai))
|
||
return "\n\n".join(parts)
|
||
|
||
|
||
def pair_session_messages_to_turns(
|
||
messages: list[_MessageLike] | list[Any],
|
||
) -> list[tuple[str, str]]:
|
||
"""将对话消息序列为 (user, assistant) 轮次列表,语义与 `format_session_messages_with_turn_labels` 一致。
|
||
|
||
末尾仅有 human、无紧随 assistant 时,补一轮 (user, "") 供 UI 与评审对齐。
|
||
"""
|
||
out: list[tuple[str, str]] = []
|
||
pending_user: str | None = None
|
||
for m in messages:
|
||
r = (getattr(m, "role", None) or "").lower()
|
||
body = (getattr(m, "content", None) or "").strip()
|
||
if r == "system":
|
||
continue
|
||
if not body and r != "human":
|
||
continue
|
||
if r == "human":
|
||
pending_user = body
|
||
elif r in ("ai", "assistant"):
|
||
u = (pending_user or "").strip()
|
||
pending_user = None
|
||
out.append((u, body))
|
||
if pending_user is not None:
|
||
out.append((pending_user.strip(), ""))
|
||
return out
|
||
|
||
|
||
def format_session_messages_with_turn_labels(
|
||
messages: list[_MessageLike] | list[Any],
|
||
) -> str:
|
||
"""会话消息序列:按出现顺序将相邻 human→assistant 合并为一轮。"""
|
||
blocks: list[str] = []
|
||
turn_idx = 0
|
||
pending_user: str | None = None
|
||
for m in messages:
|
||
r = (getattr(m, "role", None) or "").lower()
|
||
body = (getattr(m, "content", None) or "").strip()
|
||
if not body and r != "human":
|
||
continue
|
||
if r == "human":
|
||
pending_user = body
|
||
elif r in ("ai", "assistant", "system"):
|
||
if r == "system":
|
||
continue
|
||
u = (pending_user or "").strip()
|
||
pending_user = None
|
||
blocks.append(format_eval_turn_block(turn_idx, u, body))
|
||
turn_idx += 1
|
||
return "\n\n".join(blocks)
|