Files
life-echo/api/app/features/evaluation/eval_trace_format.py
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

276 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""将证据闭包格式化为评审可读文本,并记录截断/丢弃区块(可审计)。"""
from __future__ import annotations
from app.features.conversation.models import Segment
from app.features.evaluation.eval_trace_schemas import (
ChapterEvidenceBundle,
EvidenceFormatMeta,
FormattedMemoirEvidence,
StoryEvidenceBundle,
)
from app.features.memory.models import (
MemoryChunk,
MemoryFact,
MemorySummary,
TimelineEvent,
)
# 与 judge_service._MEMOIR_EVIDENCE_MAX 对齐:访谈与结构化证据分预算,避免总长失控
_MEMOIR_TRANSCRIPT_CAP = 12_000
_MEMOIR_STRUCTURED_CAP = 12_000
def _approx_tokens(chars: int) -> int:
return max(0, chars // 4)
def _segment_message_id_header(seg: Segment) -> str:
um: str | None = None
am: str | None = None
lj = getattr(seg, "lineage_json", None)
if isinstance(lj, dict):
turns = lj.get("turns")
if isinstance(turns, list) and turns:
t0 = turns[0]
if isinstance(t0, dict):
um = str(t0.get("user_message_id") or "").strip() or None
am = str(t0.get("assistant_message_id") or "").strip() or None
if um is None:
raw_um = getattr(seg, "user_message_id", None)
if raw_um:
um = str(raw_um)
parts: list[str] = []
if um:
parts.append(f"user_msg={um}")
if am:
parts.append(f"assistant_msg={am}")
return " · ".join(parts) if parts else ""
def build_segment_transcript(
segments: list[Segment],
ai_by_segment: dict[str, str],
) -> str:
"""按 segment 绑定的局部访谈块(用户句 + AI 回复)。"""
blocks: list[str] = []
for i, seg in enumerate(segments, start=1):
uid = str(seg.id)
user_txt = (seg.user_input_text or "").strip()
ai_txt = (ai_by_segment.get(uid) or seg.agent_response or "").strip()
id_extra = _segment_message_id_header(seg)
head = (
f"### Segment {i} · id={uid} · conversation={seg.conversation_id}"
+ (f" · {id_extra}" if id_extra else "")
)
body_u = f"用户: {user_txt}" if user_txt else "用户: (空)"
body_a = f"AI: {ai_txt}" if ai_txt else "AI: (无日志/无 agent_response"
blocks.append(f"{head}\n{body_u}\n{body_a}")
return "\n\n".join(blocks)
def build_structured_evidence_text(
*,
chunks: list[MemoryChunk],
facts: list[MemoryFact],
events: list[TimelineEvent],
summaries: list[MemorySummary],
max_chars: int = _MEMOIR_STRUCTURED_CAP,
) -> tuple[str, bool, list[str]]:
"""
结构化记忆证据块;返回 (text, truncated, dropped_section_tags)。
"""
parts: list[str] = []
dropped: list[str] = []
used = 0
truncated = False
def _add_section(title: str, body: str) -> None:
nonlocal used, truncated
block = f"{title}\n{body}".strip()
if not block:
return
if used + len(block) + 2 > max_chars:
truncated = True
dropped.append(title.strip("【】").split("·")[0].strip())
return
parts.append(block)
used += len(block) + 2
if chunks:
lines = []
for c in chunks:
snippet = (c.content or "").strip()
if len(snippet) > 1200:
snippet = snippet[:1200] + ""
lines.append(f"- chunk `{c.id}`: {snippet}")
_add_section("【记忆片段 chunks】", "\n".join(lines))
if facts:
lines = []
for f in facts:
subj = (f.subject or "").strip()
pred = (f.predicate or "").strip()
lines.append(
f"- fact `{f.id}` ({f.fact_type}): {subj} · {pred}".strip(" ·")
)
_add_section("【记忆事实 facts】", "\n".join(lines))
if events:
lines = []
for e in events:
lines.append(
f"- timeline `{e.id}`: {e.title} ({e.event_year or e.event_date or ''})"
)
if e.description:
desc = (e.description or "").strip()
if len(desc) > 400:
desc = desc[:400] + ""
lines.append(f" {desc}")
_add_section("【时间线 timeline】", "\n".join(lines))
if summaries:
lines = []
for s in summaries:
body = (s.content or "").strip()
if len(body) > 2000:
body = body[:2000] + ""
lines.append(f"- summary `{s.id}` ({s.summary_type}): {body}")
_add_section("【摘要 summaries】", "\n".join(lines))
return "\n\n".join(parts).strip(), truncated, dropped
def evidence_summary_line(
*,
lineage_tier: str,
segment_n: int,
conv_n: int,
chunk_n: int,
fact_n: int,
tl_n: int,
sum_n: int,
notes: list[str],
) -> str:
bits = [
f"tier={lineage_tier}",
f"segments={segment_n}",
f"conversations={conv_n}",
f"chunks={chunk_n}",
f"facts={fact_n}",
f"timeline={tl_n}",
f"summaries={sum_n}",
]
if notes:
bits.append("notes=" + "; ".join(notes[:3]))
return "; ".join(bits)
def format_chapter_for_judge(
bundle: ChapterEvidenceBundle,
*,
transcript: str,
chunks: list[MemoryChunk],
facts: list[MemoryFact],
events: list[TimelineEvent],
summaries: list[MemorySummary],
) -> FormattedMemoirEvidence:
t_cap = _MEMOIR_TRANSCRIPT_CAP
s_cap = _MEMOIR_STRUCTURED_CAP
dropped: list[str] = []
truncated = False
t_in = transcript.strip()
if len(t_in) > t_cap:
truncated = True
dropped.append("source_transcript_tail")
t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)"
struct, s_trunc, s_drop = build_structured_evidence_text(
chunks=chunks,
facts=facts,
events=events,
summaries=summaries,
max_chars=s_cap,
)
if s_trunc:
truncated = True
dropped.extend(s_drop)
meta = EvidenceFormatMeta(
truncated=truncated,
dropped_sections=sorted(set(dropped)),
included_token_estimate=_approx_tokens(len(t_in) + len(struct)),
transcript_chars_included=len(t_in),
structured_evidence_chars_included=len(struct),
)
summary = evidence_summary_line(
lineage_tier=bundle.lineage_tier,
segment_n=len(bundle.segment_ids),
conv_n=len(bundle.conversation_ids),
chunk_n=len(bundle.memory_chunk_ids),
fact_n=len(bundle.memory_fact_ids),
tl_n=len(bundle.timeline_event_ids),
sum_n=len(bundle.summary_ids),
notes=bundle.notes,
)
return FormattedMemoirEvidence(
source_transcript=t_in,
structured_evidence=struct,
format_meta=meta,
evidence_summary=summary,
)
def format_story_for_judge(
bundle: StoryEvidenceBundle,
*,
transcript: str,
chunks: list[MemoryChunk],
facts: list[MemoryFact],
events: list[TimelineEvent],
summaries: list[MemorySummary],
) -> FormattedMemoirEvidence:
t_cap = _MEMOIR_TRANSCRIPT_CAP
s_cap = _MEMOIR_STRUCTURED_CAP
dropped: list[str] = []
truncated = False
t_in = transcript.strip()
if len(t_in) > t_cap:
truncated = True
dropped.append("source_transcript_tail")
t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)"
struct, s_trunc, s_drop = build_structured_evidence_text(
chunks=chunks,
facts=facts,
events=events,
summaries=summaries,
max_chars=s_cap,
)
if s_trunc:
truncated = True
dropped.extend(s_drop)
meta = EvidenceFormatMeta(
truncated=truncated,
dropped_sections=sorted(set(dropped)),
included_token_estimate=_approx_tokens(len(t_in) + len(struct)),
transcript_chars_included=len(t_in),
structured_evidence_chars_included=len(struct),
)
summary = evidence_summary_line(
lineage_tier=bundle.lineage_tier,
segment_n=len(bundle.segment_ids),
conv_n=len(bundle.conversation_ids),
chunk_n=len(bundle.memory_chunk_ids),
fact_n=len(bundle.memory_fact_ids),
tl_n=len(bundle.timeline_event_ids),
sum_n=len(bundle.summary_ids),
notes=bundle.notes,
)
return FormattedMemoirEvidence(
source_transcript=t_in,
structured_evidence=struct,
format_meta=meta,
evidence_summary=summary,
)