feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。 业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。 内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。 app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。 工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
This commit is contained in:
@@ -91,28 +91,45 @@ def _build_memoir_judge_prompt(
|
||||
*,
|
||||
memoir_markdown: str,
|
||||
source_transcript: str = "",
|
||||
structured_evidence: str = "",
|
||||
reference_memoir_markdown: str = "",
|
||||
evidence_notes: str = "",
|
||||
) -> str:
|
||||
"""Assemble an evidence-aware memoir judging prompt."""
|
||||
source = (source_transcript or "").strip()
|
||||
struct = (structured_evidence or "").strip()
|
||||
reference = (reference_memoir_markdown or "").strip()
|
||||
notes = (evidence_notes or "").strip()
|
||||
sections = [
|
||||
MEMOIR_JUDGE_INSTRUCTIONS,
|
||||
"",
|
||||
"【证据与输入顺序】以下区块按优先级给出:评审说明(若有)→ 原始访谈证据 → 参考基线(若有)→ 待评成稿。**真实性相关细项必须以原始访谈证据为准。**",
|
||||
"【证据与输入顺序】以下区块按优先级给出:"
|
||||
"评审说明(若有)→ 原始访谈/对话证据(segment 绑定)→ 结构化记忆证据(chunk/fact/timeline/summary)"
|
||||
"→ 参考基线(若有)→ 待评成稿。**真实性、覆盖率、可追溯性以「artifact 绑定证据闭包」为准**;"
|
||||
"若存在 `lineage_tier=fallback` 或证据不足,须保守打分并写 `insufficient_evidence`。",
|
||||
"",
|
||||
]
|
||||
if notes:
|
||||
sections.extend(["【评审说明】", notes[:1200], ""])
|
||||
if source:
|
||||
sections.extend(["【原始访谈/证据】", source[:_MEMOIR_EVIDENCE_MAX], ""])
|
||||
sections.extend(["【原始访谈/对话证据】", source[:_MEMOIR_EVIDENCE_MAX], ""])
|
||||
else:
|
||||
sections.extend(
|
||||
[
|
||||
"【原始访谈/证据】",
|
||||
"无可用原始访谈证据。对于记忆忠实度、事实准确性、事实覆盖率、记忆可追溯性,必须保守打分,不得凭空高分。",
|
||||
"【原始访谈/对话证据】",
|
||||
"无可用局部对话证据。对于记忆忠实度、事实准确性、事实覆盖率、记忆可追溯性,必须保守打分,不得凭空高分。",
|
||||
"",
|
||||
]
|
||||
)
|
||||
if struct:
|
||||
sections.extend(
|
||||
["【结构化记忆证据】", struct[:_MEMOIR_EVIDENCE_MAX], ""]
|
||||
)
|
||||
else:
|
||||
sections.extend(
|
||||
[
|
||||
"【结构化记忆证据】",
|
||||
"(本 artifact 未绑定或未解析到 chunk/fact/timeline/summary 证据。)",
|
||||
"",
|
||||
]
|
||||
)
|
||||
@@ -268,12 +285,14 @@ class EvalJudgeService:
|
||||
*,
|
||||
memoir_markdown: str,
|
||||
source_transcript: str = "",
|
||||
structured_evidence: str = "",
|
||||
reference_memoir_markdown: str = "",
|
||||
evidence_notes: str = "",
|
||||
) -> MemoirJudgeOutput | None:
|
||||
result = await self.judge_memoir_result(
|
||||
memoir_markdown=memoir_markdown,
|
||||
source_transcript=source_transcript,
|
||||
structured_evidence=structured_evidence,
|
||||
reference_memoir_markdown=reference_memoir_markdown,
|
||||
evidence_notes=evidence_notes,
|
||||
)
|
||||
@@ -284,6 +303,7 @@ class EvalJudgeService:
|
||||
*,
|
||||
memoir_markdown: str,
|
||||
source_transcript: str = "",
|
||||
structured_evidence: str = "",
|
||||
reference_memoir_markdown: str = "",
|
||||
evidence_notes: str = "",
|
||||
) -> JudgeCallResult[MemoirJudgeOutput]:
|
||||
@@ -292,6 +312,7 @@ class EvalJudgeService:
|
||||
prompt = _build_memoir_judge_prompt(
|
||||
memoir_markdown=memoir_markdown,
|
||||
source_transcript=source_transcript,
|
||||
structured_evidence=structured_evidence,
|
||||
reference_memoir_markdown=reference_memoir_markdown,
|
||||
evidence_notes=evidence_notes,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user