Files
life-echo/api/app/features/evaluation/eval_trace_service.py
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

517 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""组装 Chapter/Story 评测证据闭包并格式化为评审输入。"""
from __future__ import annotations
from typing import Literal
from sqlalchemy.ext.asyncio import AsyncSession
from app.features.conversation import repo as conversation_repo
from app.features.conversation.lineage_schemas import aggregate_lineage_from_segments
from app.features.evaluation.eval_trace_format import (
build_segment_transcript,
format_chapter_for_judge,
format_story_for_judge,
)
from app.features.evaluation.eval_trace_repo import (
fetch_ai_messages_for_segments,
fetch_memory_closure_for_conversations,
fetch_segments_for_user,
get_chapter_for_eval_trace,
get_story_for_eval_trace,
list_chapter_ids_for_story,
load_chunks_by_ids,
load_facts_by_ids,
load_summaries_by_ids,
load_timeline_by_ids,
normalize_source_segment_ids,
story_link_ids_by_type,
)
from app.features.evaluation.eval_trace_schemas import (
ChapterEvidenceBundle,
FormattedMemoirEvidence,
StoryEvidenceBundle,
)
from app.features.memoir.chapter_evidence_snapshot import (
EVIDENCE_SNAPSHOT_SCHEMA_VERSION,
)
from app.features.memoir.models import Chapter
from app.features.story.models import Story, StoryVersion
_MAX_EVIDENCE_CONVERSATIONS = 8
_MAX_EVIDENCE_TRANSCRIPT_CHARS = 16_000
def _segments_in_order(segments: list, segment_ids: list[str]) -> list:
order = {str(sid): i for i, sid in enumerate(segment_ids)}
return sorted(segments, key=lambda s: order.get(str(s.id), 9999))
def _trim_fallback_transcript(text: str) -> str:
s = (text or "").strip()
if len(s) <= _MAX_EVIDENCE_TRANSCRIPT_CHARS:
return s
return f"{s[:_MAX_EVIDENCE_TRANSCRIPT_CHARS]}\n\n…(访谈证据已截断)"
async def fallback_user_transcript_evidence(db: AsyncSession, user_id: str) -> str:
"""legacy最近若干会话全文仅作 fallback调用方须声明 tier=fallback"""
conversations = await conversation_repo.get_user_conversations(user_id, db)
if not conversations:
return ""
parts: list[str] = []
for conv in reversed(conversations[:_MAX_EVIDENCE_CONVERSATIONS]):
rows = await conversation_repo.get_conversation_messages(str(conv.id), db)
blocks: list[str] = []
for row in rows:
role = str(row.role or "").lower()
body = (row.content or "").strip()
if not body:
continue
label = "用户" if role == "human" else "AI"
blocks.append(f"{label}: {body}")
transcript = "\n\n".join(blocks)
if transcript:
parts.append(f"## 会话 {str(conv.id)}\n{transcript}")
return _trim_fallback_transcript("\n\n".join(parts))
class EvalTraceService:
def __init__(self, db: AsyncSession) -> None:
self._db = db
async def _story_dialogue_lineage(
self,
st: Story,
segments: list,
segment_ids_ordered: list[str],
) -> dict | None:
if getattr(st, "current_version_id", None):
ver = await self._db.get(StoryVersion, st.current_version_id)
if ver and isinstance(getattr(ver, "lineage_json", None), dict):
lj = ver.lineage_json
if lj.get("turns"):
return lj
if segments and segment_ids_ordered:
ordered = _segments_in_order(segments, segment_ids_ordered)
conv_ids = sorted(
{str(s.conversation_id) for s in segments if s.conversation_id}
)
return aggregate_lineage_from_segments(
ordered, conversation_id_fallback=conv_ids[0] if conv_ids else None
)
return None
def _chapter_closure_tier(
self,
*,
segment_ids_resolved: list[str],
chunk_ids: list[str],
fact_ids: list[str],
tl_ids: list[str],
sum_ids: list[str],
) -> Literal["strict", "partial", "fallback"]:
has_seg = bool(segment_ids_resolved)
has_mem = bool(chunk_ids or fact_ids or tl_ids or sum_ids)
if has_seg and has_mem:
return "strict"
if has_seg:
return "partial"
if has_mem:
return "partial"
return "fallback"
async def build_chapter_bundle(self, user_id: str, chapter: Chapter) -> ChapterEvidenceBundle:
notes: list[str] = []
live_segment_ids = normalize_source_segment_ids(
getattr(chapter, "source_segments", None)
)
row = getattr(chapter, "current_evidence_snapshot", None)
row_has_closure = bool(
(row and (row.segment_ids or []))
or (row and (row.memory_chunk_ids or row.memory_fact_ids or row.timeline_event_ids or row.summary_ids))
)
if (
row is not None
and str(row.user_id) == str(user_id)
and str(row.chapter_id) == str(chapter.id)
and int(row.schema_version or 0) == EVIDENCE_SNAPSHOT_SCHEMA_VERSION
and row_has_closure
):
segment_ids = [
str(x) for x in (row.segment_ids or []) if str(x).strip()
]
conv_ids = sorted(
{str(x) for x in (row.conversation_ids or []) if str(x).strip()}
)
chunk_ids = [
str(x) for x in (row.memory_chunk_ids or []) if str(x).strip()
]
fact_ids = [str(x) for x in (row.memory_fact_ids or []) if str(x).strip()]
tl_ids = [
str(x) for x in (row.timeline_event_ids or []) if str(x).strip()
]
sum_ids = [str(x) for x in (row.summary_ids or []) if str(x).strip()]
notes.extend([str(x) for x in (row.notes or []) if x])
notes.append("evidence_from_chapter_evidence_snapshot_table")
tier = self._chapter_closure_tier(
segment_ids_resolved=segment_ids,
chunk_ids=chunk_ids,
fact_ids=fact_ids,
tl_ids=tl_ids,
sum_ids=sum_ids,
)
if live_segment_ids and set(live_segment_ids) != set(segment_ids):
notes.append("live_source_segments_differ_from_snapshot_reconcile_in_pipeline")
dlg = getattr(row, "message_lineage_json", None)
return ChapterEvidenceBundle(
user_id=user_id,
chapter_id=str(chapter.id),
segment_ids=segment_ids,
conversation_ids=conv_ids,
memory_chunk_ids=chunk_ids,
memory_fact_ids=fact_ids,
timeline_event_ids=tl_ids,
summary_ids=sum_ids,
lineage_tier=tier,
notes=notes,
dialogue_lineage=dlg if isinstance(dlg, dict) else None,
)
snap = getattr(chapter, "evidence_bundle_json", None)
snap_uid = str(snap.get("user_id") or "") if isinstance(snap, dict) else ""
snap_has_closure = bool(
(isinstance(snap, dict) and (snap.get("segment_ids") or []))
or (
isinstance(snap, dict)
and (
snap.get("memory_chunk_ids")
or snap.get("memory_fact_ids")
or snap.get("timeline_event_ids")
or snap.get("summary_ids")
)
)
)
use_snap = (
isinstance(snap, dict)
and int(snap.get("schema_version") or 0) == EVIDENCE_SNAPSHOT_SCHEMA_VERSION
and str(snap.get("chapter_id") or "") == str(chapter.id)
and (not snap_uid or snap_uid == str(user_id))
and snap_has_closure
)
if use_snap and isinstance(snap, dict):
segment_ids = [str(x) for x in (snap.get("segment_ids") or []) if str(x).strip()]
conv_ids = sorted(
{str(x) for x in (snap.get("conversation_ids") or []) if str(x).strip()}
)
chunk_ids = [str(x) for x in (snap.get("memory_chunk_ids") or []) if str(x).strip()]
fact_ids = [str(x) for x in (snap.get("memory_fact_ids") or []) if str(x).strip()]
tl_ids = [str(x) for x in (snap.get("timeline_event_ids") or []) if str(x).strip()]
sum_ids = [str(x) for x in (snap.get("summary_ids") or []) if str(x).strip()]
notes.extend([str(x) for x in (snap.get("notes") or []) if x])
notes.append("evidence_from_chapter_evidence_bundle_json_column")
tier = self._chapter_closure_tier(
segment_ids_resolved=segment_ids,
chunk_ids=chunk_ids,
fact_ids=fact_ids,
tl_ids=tl_ids,
sum_ids=sum_ids,
)
if live_segment_ids and set(live_segment_ids) != set(segment_ids):
notes.append("live_source_segments_differ_from_snapshot_reconcile_in_pipeline")
snap_dlg = snap.get("message_lineage_json") if isinstance(snap, dict) else None
return ChapterEvidenceBundle(
user_id=user_id,
chapter_id=str(chapter.id),
segment_ids=segment_ids,
conversation_ids=conv_ids,
memory_chunk_ids=chunk_ids,
memory_fact_ids=fact_ids,
timeline_event_ids=tl_ids,
summary_ids=sum_ids,
lineage_tier=tier,
notes=notes,
dialogue_lineage=snap_dlg if isinstance(snap_dlg, dict) else None,
)
segment_ids = live_segment_ids
if not segment_ids:
notes.append("no_source_segments")
notes.append("fallback_lineage_transcript_pending")
return ChapterEvidenceBundle(
user_id=user_id,
chapter_id=str(chapter.id),
segment_ids=[],
conversation_ids=[],
lineage_tier="fallback",
notes=notes,
dialogue_lineage=None,
)
segments = await fetch_segments_for_user(
self._db, user_id=user_id, segment_ids=segment_ids
)
resolved_seg_ids = [s.id for s in segments] or segment_ids
if len(segments) < len(segment_ids):
notes.append("some_segments_missing_or_foreign_user")
conv_ids = sorted({str(s.conversation_id) for s in segments if s.conversation_id})
chunk_ids, fact_ids, tl_ids, sum_ids = await fetch_memory_closure_for_conversations(
self._db, user_id=user_id, conversation_ids=conv_ids
)
tier = self._chapter_closure_tier(
segment_ids_resolved=resolved_seg_ids,
chunk_ids=chunk_ids,
fact_ids=fact_ids,
tl_ids=tl_ids,
sum_ids=sum_ids,
)
if tier == "partial":
notes.append(
"chapter_source_segments_union_semantics=partial_lineage_until_snapshot"
)
elif tier == "strict":
notes.append("chapter_lineage_strict_segments_plus_memory_closure")
segs_ord = _segments_in_order(segments, resolved_seg_ids)
dlg_live = aggregate_lineage_from_segments(
segs_ord, conversation_id_fallback=conv_ids[0] if conv_ids else None
)
return ChapterEvidenceBundle(
user_id=user_id,
chapter_id=str(chapter.id),
segment_ids=resolved_seg_ids,
conversation_ids=conv_ids,
memory_chunk_ids=chunk_ids,
memory_fact_ids=fact_ids,
timeline_event_ids=tl_ids,
summary_ids=sum_ids,
lineage_tier=tier,
notes=notes,
dialogue_lineage=dlg_live,
)
async def format_chapter_bundle(
self, bundle: ChapterEvidenceBundle
) -> tuple[FormattedMemoirEvidence, ChapterEvidenceBundle]:
"""若 tier=fallback调用方应先将要并入 transcripts 写入 session此处只负责 segment 路径。"""
if bundle.lineage_tier == "fallback":
ft = await fallback_user_transcript_evidence(self._db, bundle.user_id)
notes = list(bundle.notes)
notes.append("used_legacy_recent_conversations_transcript")
bundle = bundle.model_copy(update={"notes": notes})
formatted = format_chapter_for_judge(
bundle,
transcript=ft,
chunks=[],
facts=[],
events=[],
summaries=[],
)
return formatted, bundle
segs = await fetch_segments_for_user(
self._db, user_id=bundle.user_id, segment_ids=bundle.segment_ids
)
ai_map = await fetch_ai_messages_for_segments(
self._db, user_id=bundle.user_id, segment_ids=[s.id for s in segs]
)
transcript = build_segment_transcript(segs, ai_map)
chunks = await load_chunks_by_ids(
self._db, user_id=bundle.user_id, chunk_ids=bundle.memory_chunk_ids
)
facts = await load_facts_by_ids(
self._db, user_id=bundle.user_id, fact_ids=bundle.memory_fact_ids
)
events = await load_timeline_by_ids(
self._db, user_id=bundle.user_id, event_ids=bundle.timeline_event_ids
)
summaries = await load_summaries_by_ids(
self._db, user_id=bundle.user_id, summary_ids=bundle.summary_ids
)
formatted = format_chapter_for_judge(
bundle,
transcript=transcript,
chunks=chunks,
facts=facts,
events=events,
summaries=summaries,
)
return formatted, bundle
async def build_story_bundle(self, user_id: str, story_id: str) -> StoryEvidenceBundle:
st = await get_story_for_eval_trace(self._db, user_id=user_id, story_id=story_id)
if not st:
return StoryEvidenceBundle(
user_id=user_id,
story_id=story_id,
lineage_tier="fallback",
notes=["story_not_found"],
dialogue_lineage=None,
)
links = list(st.evidence_links or [])
lc, lf, lt, ls = story_link_ids_by_type(links)
notes: list[str] = []
chapter_ids = await list_chapter_ids_for_story(
self._db, user_id=user_id, story_id=str(st.id)
)
if lc or lf or lt or ls:
# 结构化以 link 为准;会话级 transcript 尝试从挂靠章节 source_segments 收缩
seg_ids: list[str] = []
conv_ids: list[str] = []
for cid in chapter_ids:
ch = await get_chapter_for_eval_trace(
self._db, user_id=user_id, chapter_id=cid
)
if not ch:
continue
seg_ids.extend(normalize_source_segment_ids(ch.source_segments))
# 保序去重
seen_s: set[str] = set()
dedup_seg: list[str] = []
for s in seg_ids:
if s not in seen_s:
seen_s.add(s)
dedup_seg.append(s)
segments = await fetch_segments_for_user(
self._db, user_id=user_id, segment_ids=dedup_seg
)
conv_ids = sorted({str(s.conversation_id) for s in segments if s.conversation_id})
if dedup_seg and not segments:
notes.append("chapter_segment_ids_unresolved")
if conv_ids:
notes.append("transcript_from_chapter_source_segments")
else:
notes.append("no_chapter_segments_for_transcript_context")
bound_transcript = bool(segments)
story_tier: Literal["strict", "partial", "fallback"] = "strict"
if (lc or lf or lt or ls) and not bound_transcript:
notes.append("structured_evidence_without_bound_transcript")
story_tier = "partial"
dlg = await self._story_dialogue_lineage(st, segments, dedup_seg)
return StoryEvidenceBundle(
user_id=user_id,
story_id=str(st.id),
segment_ids=[s.id for s in segments] or dedup_seg,
conversation_ids=conv_ids,
memory_chunk_ids=lc,
memory_fact_ids=lf,
timeline_event_ids=lt,
summary_ids=ls,
lineage_tier=story_tier,
notes=notes,
augmented_with_chapter_context=bool(chapter_ids),
story_link_evidence_count=len(links),
fallback_chapter_ids=chapter_ids,
dialogue_lineage=dlg,
)
# 无 StoryEvidenceLink由章节 source_segments 推导 partial再不行则 fallback
seg_ids = []
conv_ids: list[str] = []
for cid in chapter_ids:
ch = await get_chapter_for_eval_trace(
self._db, user_id=user_id, chapter_id=cid
)
if not ch:
continue
seg_ids.extend(normalize_source_segment_ids(ch.source_segments))
seen_s = set()
dedup_seg = []
for s in seg_ids:
if s not in seen_s:
seen_s.add(s)
dedup_seg.append(s)
if dedup_seg:
segments = await fetch_segments_for_user(
self._db, user_id=user_id, segment_ids=dedup_seg
)
conv_ids = sorted({str(s.conversation_id) for s in segments if s.conversation_id})
chunk_ids, fact_ids, tl_ids, sum_ids = (
await fetch_memory_closure_for_conversations(
self._db, user_id=user_id, conversation_ids=conv_ids
)
)
notes.append("fallback_lineage_no_story_evidence_links")
notes.append("augmented_with_chapter_context")
dlg2 = await self._story_dialogue_lineage(st, segments, dedup_seg)
return StoryEvidenceBundle(
user_id=user_id,
story_id=str(st.id),
segment_ids=[s.id for s in segments] or dedup_seg,
conversation_ids=conv_ids,
memory_chunk_ids=chunk_ids,
memory_fact_ids=fact_ids,
timeline_event_ids=tl_ids,
summary_ids=sum_ids,
lineage_tier="partial",
notes=notes,
augmented_with_chapter_context=True,
story_link_evidence_count=0,
fallback_chapter_ids=chapter_ids,
dialogue_lineage=dlg2,
)
notes.append("no_story_evidence_links_and_no_chapter_segments")
notes.append("fallback_lineage_transcript_pending")
dlg3 = await self._story_dialogue_lineage(st, [], [])
return StoryEvidenceBundle(
user_id=user_id,
story_id=str(st.id),
lineage_tier="fallback",
notes=notes,
story_link_evidence_count=0,
fallback_chapter_ids=chapter_ids,
dialogue_lineage=dlg3,
)
async def format_story_bundle(
self, bundle: StoryEvidenceBundle
) -> tuple[FormattedMemoirEvidence, StoryEvidenceBundle]:
if bundle.lineage_tier == "fallback":
ft = await fallback_user_transcript_evidence(self._db, bundle.user_id)
notes = list(bundle.notes)
notes.append("used_legacy_recent_conversations_transcript")
bundle = bundle.model_copy(update={"notes": notes})
formatted = format_story_for_judge(
bundle,
transcript=ft,
chunks=[],
facts=[],
events=[],
summaries=[],
)
return formatted, bundle
segs = await fetch_segments_for_user(
self._db, user_id=bundle.user_id, segment_ids=bundle.segment_ids
)
ai_map = await fetch_ai_messages_for_segments(
self._db, user_id=bundle.user_id, segment_ids=[s.id for s in segs]
)
transcript = build_segment_transcript(segs, ai_map)
chunks = await load_chunks_by_ids(
self._db, user_id=bundle.user_id, chunk_ids=bundle.memory_chunk_ids
)
facts = await load_facts_by_ids(
self._db, user_id=bundle.user_id, fact_ids=bundle.memory_fact_ids
)
events = await load_timeline_by_ids(
self._db, user_id=bundle.user_id, event_ids=bundle.timeline_event_ids
)
summaries = await load_summaries_by_ids(
self._db, user_id=bundle.user_id, summary_ids=bundle.summary_ids
)
formatted = format_story_for_judge(
bundle,
transcript=transcript,
chunks=chunks,
facts=facts,
events=events,
summaries=summaries,
)
return formatted, bundle