feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
@@ -216,7 +216,7 @@ class ChatOrchestrator:
|
||||
messages=responses, skip_tts=False, memory_retrieval_trace=None
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"资料收集处理失败: {e}", exc_info=True)
|
||||
logger.exception("资料收集处理失败: {}", e)
|
||||
return AgentChatTurn(
|
||||
messages=["不好意思刚才没接住,你再说一遍好吗?"],
|
||||
skip_tts=False,
|
||||
|
||||
@@ -6,7 +6,7 @@ from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Callable, Dict, List
|
||||
|
||||
from app.agents.memoir.prompts import get_batch_memoir_phase1_prep_prompt
|
||||
from app.agents.memoir.schemas import BatchPhase1LLMOutput
|
||||
@@ -135,7 +135,7 @@ def _run_batch_phase1_prep_chunk_with_bisect(
|
||||
if merged.keys() != expected:
|
||||
raise ValueError(
|
||||
"batch phase1 chunked bisect merge: segment ids do not match input"
|
||||
)
|
||||
) from None
|
||||
return merged
|
||||
|
||||
|
||||
@@ -145,6 +145,7 @@ def run_batch_phase1_prep_chunked(
|
||||
llm: Any,
|
||||
*,
|
||||
chunk_size: int,
|
||||
on_chunk: Callable[[int, int], None] | None = None,
|
||||
) -> Dict[str, BatchPhase1SegmentRow]:
|
||||
"""
|
||||
将 segments 按 chunk_size 切片多次调用 Phase1 批处理 LLM,合并 by_id。
|
||||
@@ -161,13 +162,16 @@ def run_batch_phase1_prep_chunked(
|
||||
chunk_idx = i // chunk_size + 1
|
||||
sub = segments[i : i + chunk_size]
|
||||
logger.info(
|
||||
"event=batch_phase1_chunk chunk_idx={}/{} segment_count={} batch_path=chunked",
|
||||
"event=batch_phase1_chunk chunk_idx={}/{} segment_count={} batch_path=chunked "
|
||||
"msg=Phase1 批处理分块调用",
|
||||
chunk_idx,
|
||||
total_chunks,
|
||||
len(sub),
|
||||
)
|
||||
part = _run_batch_phase1_prep_chunk_with_bisect(sub, state, llm)
|
||||
merged.update(part)
|
||||
if on_chunk is not None:
|
||||
on_chunk(chunk_idx, total_chunks)
|
||||
expected = {str(s.id) for s in segments}
|
||||
if merged.keys() != expected:
|
||||
missing = expected - merged.keys()
|
||||
|
||||
@@ -8,7 +8,7 @@ from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, Set, Tuple
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from app.agents.memoir.batch_phase1_prep import (
|
||||
STAGE_ALLOWED_SLOTS,
|
||||
@@ -63,6 +63,7 @@ class MemoirOrchestrator:
|
||||
get_or_create_state: Callable[[], MemoirStateSchema],
|
||||
update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema],
|
||||
llm_fast: Any | None = None,
|
||||
on_phase1_chunk: Optional[Callable[[int, int], None]] = None,
|
||||
) -> PreparedMemoirBatches:
|
||||
"""
|
||||
遍历 segments:Extraction → slot 更新 → Classification → 按 category 分桶。
|
||||
@@ -89,15 +90,19 @@ class MemoirOrchestrator:
|
||||
state=state,
|
||||
classify_extract_llm=classify_extract_llm,
|
||||
update_slot=update_slot,
|
||||
on_phase1_chunk=on_phase1_chunk,
|
||||
)
|
||||
logger.info(
|
||||
"event=phase1_batch_path_used segment_count={}",
|
||||
"event=phase1_batch_path_used segment_count={} "
|
||||
"msg=Phase1 批处理 LLM 路径已使用",
|
||||
len(segments),
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"MemoirOrchestrator.prepare_batches batch LLM 失败,回退逐段: {}",
|
||||
"event=phase1_batch_path_fallback segment_count={} exc={} "
|
||||
"msg=Phase1 批处理失败,回退逐段",
|
||||
len(segments),
|
||||
e,
|
||||
)
|
||||
|
||||
@@ -172,6 +177,7 @@ class MemoirOrchestrator:
|
||||
state: MemoirStateSchema,
|
||||
classify_extract_llm: Any,
|
||||
update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema],
|
||||
on_phase1_chunk: Optional[Callable[[int, int], None]] = None,
|
||||
) -> PreparedMemoirBatches:
|
||||
category_to_segments: Dict[str, List[Segment]] = {}
|
||||
segment_skip_story_ids: Set[str] = set()
|
||||
@@ -182,6 +188,7 @@ class MemoirOrchestrator:
|
||||
state,
|
||||
classify_extract_llm,
|
||||
chunk_size=int(settings.memoir_phase1_batch_llm_chunk_size),
|
||||
on_chunk=on_phase1_chunk,
|
||||
)
|
||||
|
||||
for segment in segments:
|
||||
@@ -294,6 +301,7 @@ class MemoirOrchestrator:
|
||||
llm_fast=llm_fast,
|
||||
get_or_create_state=get_or_create_state,
|
||||
update_slot=update_slot,
|
||||
on_phase1_chunk=None,
|
||||
)
|
||||
state = prepared.state
|
||||
chapters_to_enqueue: Set[str] = set()
|
||||
|
||||
Reference in New Issue
Block a user