Merge branch 'eval/elapsed-time-memoir-batch-chunk' into development

This commit is contained in:
Kevin
2026-04-10 10:27:41 +08:00
66 changed files with 5246 additions and 705 deletions

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
from app.core.config import settings
from app.features.conversation.models import Segment
from app.features.evaluation.eval_trace_schemas import (
ChapterEvidenceBundle,
@@ -16,9 +17,10 @@ from app.features.memory.models import (
TimelineEvent,
)
# 与 judge_service._MEMOIR_EVIDENCE_MAX 对齐:访谈与结构化证据分预算,避免总长失控
_MEMOIR_TRANSCRIPT_CAP = 12_000
_MEMOIR_STRUCTURED_CAP = 12_000
def _memoir_evidence_char_cap() -> int:
"""与 ``Settings.eval_judge_memoir_evidence_max_chars`` 对齐。"""
return max(1000, int(settings.eval_judge_memoir_evidence_max_chars))
def _approx_tokens(chars: int) -> int:
@@ -74,11 +76,12 @@ def build_structured_evidence_text(
facts: list[MemoryFact],
events: list[TimelineEvent],
summaries: list[MemorySummary],
max_chars: int = _MEMOIR_STRUCTURED_CAP,
max_chars: int | None = None,
) -> tuple[str, bool, list[str]]:
"""
结构化记忆证据块;返回 (text, truncated, dropped_section_tags)。
"""
cap = max_chars if max_chars is not None else _memoir_evidence_char_cap()
parts: list[str] = []
dropped: list[str] = []
used = 0
@@ -89,7 +92,7 @@ def build_structured_evidence_text(
block = f"{title}\n{body}".strip()
if not block:
return
if used + len(block) + 2 > max_chars:
if used + len(block) + 2 > cap:
truncated = True
dropped.append(title.strip("【】").split("·")[0].strip())
return
@@ -171,23 +174,22 @@ def format_chapter_for_judge(
events: list[TimelineEvent],
summaries: list[MemorySummary],
) -> FormattedMemoirEvidence:
t_cap = _MEMOIR_TRANSCRIPT_CAP
s_cap = _MEMOIR_STRUCTURED_CAP
ev_cap = _memoir_evidence_char_cap()
dropped: list[str] = []
truncated = False
t_in = transcript.strip()
if len(t_in) > t_cap:
if len(t_in) > ev_cap:
truncated = True
dropped.append("source_transcript_tail")
t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)"
t_in = t_in[:ev_cap] + "\n\n…(原始对话证据已截断)"
struct, s_trunc, s_drop = build_structured_evidence_text(
chunks=chunks,
facts=facts,
events=events,
summaries=summaries,
max_chars=s_cap,
max_chars=ev_cap,
)
if s_trunc:
truncated = True
@@ -227,23 +229,22 @@ def format_story_for_judge(
events: list[TimelineEvent],
summaries: list[MemorySummary],
) -> FormattedMemoirEvidence:
t_cap = _MEMOIR_TRANSCRIPT_CAP
s_cap = _MEMOIR_STRUCTURED_CAP
ev_cap = _memoir_evidence_char_cap()
dropped: list[str] = []
truncated = False
t_in = transcript.strip()
if len(t_in) > t_cap:
if len(t_in) > ev_cap:
truncated = True
dropped.append("source_transcript_tail")
t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)"
t_in = t_in[:ev_cap] + "\n\n…(原始对话证据已截断)"
struct, s_trunc, s_drop = build_structured_evidence_text(
chunks=chunks,
facts=facts,
events=events,
summaries=summaries,
max_chars=s_cap,
max_chars=ev_cap,
)
if s_trunc:
truncated = True