Merge branch 'eval/elapsed-time-memoir-batch-chunk' into development
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.config import settings
|
||||
from app.features.conversation.models import Segment
|
||||
from app.features.evaluation.eval_trace_schemas import (
|
||||
ChapterEvidenceBundle,
|
||||
@@ -16,9 +17,10 @@ from app.features.memory.models import (
|
||||
TimelineEvent,
|
||||
)
|
||||
|
||||
# 与 judge_service._MEMOIR_EVIDENCE_MAX 对齐:访谈与结构化证据分预算,避免总长失控
|
||||
_MEMOIR_TRANSCRIPT_CAP = 12_000
|
||||
_MEMOIR_STRUCTURED_CAP = 12_000
|
||||
|
||||
def _memoir_evidence_char_cap() -> int:
|
||||
"""与 ``Settings.eval_judge_memoir_evidence_max_chars`` 对齐。"""
|
||||
return max(1000, int(settings.eval_judge_memoir_evidence_max_chars))
|
||||
|
||||
|
||||
def _approx_tokens(chars: int) -> int:
|
||||
@@ -74,11 +76,12 @@ def build_structured_evidence_text(
|
||||
facts: list[MemoryFact],
|
||||
events: list[TimelineEvent],
|
||||
summaries: list[MemorySummary],
|
||||
max_chars: int = _MEMOIR_STRUCTURED_CAP,
|
||||
max_chars: int | None = None,
|
||||
) -> tuple[str, bool, list[str]]:
|
||||
"""
|
||||
结构化记忆证据块;返回 (text, truncated, dropped_section_tags)。
|
||||
"""
|
||||
cap = max_chars if max_chars is not None else _memoir_evidence_char_cap()
|
||||
parts: list[str] = []
|
||||
dropped: list[str] = []
|
||||
used = 0
|
||||
@@ -89,7 +92,7 @@ def build_structured_evidence_text(
|
||||
block = f"{title}\n{body}".strip()
|
||||
if not block:
|
||||
return
|
||||
if used + len(block) + 2 > max_chars:
|
||||
if used + len(block) + 2 > cap:
|
||||
truncated = True
|
||||
dropped.append(title.strip("【】").split("·")[0].strip())
|
||||
return
|
||||
@@ -171,23 +174,22 @@ def format_chapter_for_judge(
|
||||
events: list[TimelineEvent],
|
||||
summaries: list[MemorySummary],
|
||||
) -> FormattedMemoirEvidence:
|
||||
t_cap = _MEMOIR_TRANSCRIPT_CAP
|
||||
s_cap = _MEMOIR_STRUCTURED_CAP
|
||||
ev_cap = _memoir_evidence_char_cap()
|
||||
dropped: list[str] = []
|
||||
truncated = False
|
||||
|
||||
t_in = transcript.strip()
|
||||
if len(t_in) > t_cap:
|
||||
if len(t_in) > ev_cap:
|
||||
truncated = True
|
||||
dropped.append("source_transcript_tail")
|
||||
t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)"
|
||||
t_in = t_in[:ev_cap] + "\n\n…(原始对话证据已截断)"
|
||||
|
||||
struct, s_trunc, s_drop = build_structured_evidence_text(
|
||||
chunks=chunks,
|
||||
facts=facts,
|
||||
events=events,
|
||||
summaries=summaries,
|
||||
max_chars=s_cap,
|
||||
max_chars=ev_cap,
|
||||
)
|
||||
if s_trunc:
|
||||
truncated = True
|
||||
@@ -227,23 +229,22 @@ def format_story_for_judge(
|
||||
events: list[TimelineEvent],
|
||||
summaries: list[MemorySummary],
|
||||
) -> FormattedMemoirEvidence:
|
||||
t_cap = _MEMOIR_TRANSCRIPT_CAP
|
||||
s_cap = _MEMOIR_STRUCTURED_CAP
|
||||
ev_cap = _memoir_evidence_char_cap()
|
||||
dropped: list[str] = []
|
||||
truncated = False
|
||||
|
||||
t_in = transcript.strip()
|
||||
if len(t_in) > t_cap:
|
||||
if len(t_in) > ev_cap:
|
||||
truncated = True
|
||||
dropped.append("source_transcript_tail")
|
||||
t_in = t_in[:t_cap] + "\n\n…(原始对话证据已截断)"
|
||||
t_in = t_in[:ev_cap] + "\n\n…(原始对话证据已截断)"
|
||||
|
||||
struct, s_trunc, s_drop = build_structured_evidence_text(
|
||||
chunks=chunks,
|
||||
facts=facts,
|
||||
events=events,
|
||||
summaries=summaries,
|
||||
max_chars=s_cap,
|
||||
max_chars=ev_cap,
|
||||
)
|
||||
if s_trunc:
|
||||
truncated = True
|
||||
|
||||
Reference in New Issue
Block a user