Merge branch 'eval/elapsed-time-memoir-batch-chunk' into development

This commit is contained in:
Kevin
2026-04-10 10:27:41 +08:00
66 changed files with 5246 additions and 705 deletions

View File

@@ -30,10 +30,7 @@ TJudgeOutput = TypeVar(
_TURN_MAX = 768
_CONV_JUDGE_JSON_MAX = 2048
_CONV_HEADER = "【完整对话】(每轮以 `[Turn k]` 开头)\n\n"
_MEMOIR_MAX = 12000
_MEMOIR_JSON_MAX = 1536
_COMPARE_STREAM_MAX = 6144
_MEMOIR_EVIDENCE_MAX = 12000
def _eval_judge_prompt_char_pool_for_context(context_window_tokens: int) -> int:
@@ -251,10 +248,12 @@ def _build_memoir_judge_prompt(
"若存在 `lineage_tier=fallback` 或证据不足,须保守打分并写 `insufficient_evidence`。",
"",
]
ev_cap = max(1, int(settings.eval_judge_memoir_evidence_max_chars))
body_cap = max(1, int(settings.eval_judge_memoir_body_max_chars))
if notes:
sections.extend(["【评审说明】", notes[:1200], ""])
if source:
sections.extend(["【原始访谈/对话证据】", source[:_MEMOIR_EVIDENCE_MAX], ""])
sections.extend(["【原始访谈/对话证据】", source[:ev_cap], ""])
else:
sections.extend(
[
@@ -274,8 +273,8 @@ def _build_memoir_judge_prompt(
]
)
if reference:
sections.extend(["【参考基线/导出成稿】", reference[:_MEMOIR_EVIDENCE_MAX], ""])
sections.extend(["【当前回忆录正文】", memoir_markdown[:_MEMOIR_MAX]])
sections.extend(["【参考基线/导出成稿】", reference[:ev_cap], ""])
sections.extend(["【当前回忆录正文】", memoir_markdown[:body_cap]])
return "\n".join(sections)
@@ -493,11 +492,15 @@ class EvalJudgeService:
self._llm,
prompt,
MemoirJudgeOutput,
max_tokens=_MEMOIR_JSON_MAX,
max_tokens=max(512, int(settings.eval_judge_memoir_completion_max_tokens)),
agent="EvalJudgeService.judge_memoir",
)
return JudgeCallResult(output=out)
except LLMCallError as e:
error = _judge_error_message(e)
logger.warning("memoir judge failed: {}", error)
# 回忆录评审在 INFO 也要可见eval-web 排障);非异常路径、不刷堆栈
logger.info(
"event=eval_memoir_judge_llm_call_failed agent=EvalJudgeService.judge_memoir msg={}",
error,
)
return JudgeCallResult(output=None, error=error)