feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions
--- a/api/app/agents/memoir/orchestrator.py
+++ b/api/app/agents/memoir/orchestrator.py
@@ -8,7 +8,7 @@ from __future__ import annotations

 import time
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Set, Tuple
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple

 from app.agents.memoir.batch_phase1_prep import (
    STAGE_ALLOWED_SLOTS,
@@ -63,6 +63,7 @@ class MemoirOrchestrator:
        get_or_create_state: Callable[[], MemoirStateSchema],
        update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema],
        llm_fast: Any | None = None,
+        on_phase1_chunk: Optional[Callable[[int, int], None]] = None,
    ) -> PreparedMemoirBatches:
        """
        遍历 segments：Extraction → slot 更新 → Classification → 按 category 分桶。
@@ -89,15 +90,19 @@ class MemoirOrchestrator:
                    state=state,
                    classify_extract_llm=classify_extract_llm,
                    update_slot=update_slot,
+                    on_phase1_chunk=on_phase1_chunk,
                )
                logger.info(
-                    "event=phase1_batch_path_used segment_count={}",
+                    "event=phase1_batch_path_used segment_count={} "
+                    "msg=Phase1 批处理 LLM 路径已使用",
                    len(segments),
                )
                return result
            except Exception as e:
                logger.warning(
-                    "MemoirOrchestrator.prepare_batches batch LLM 失败，回退逐段: {}",
+                    "event=phase1_batch_path_fallback segment_count={} exc={} "
+                    "msg=Phase1 批处理失败，回退逐段",
+                    len(segments),
                    e,
                )

@@ -172,6 +177,7 @@ class MemoirOrchestrator:
        state: MemoirStateSchema,
        classify_extract_llm: Any,
        update_slot: Callable[[str, str, str, List[str]], MemoirStateSchema],
+        on_phase1_chunk: Optional[Callable[[int, int], None]] = None,
    ) -> PreparedMemoirBatches:
        category_to_segments: Dict[str, List[Segment]] = {}
        segment_skip_story_ids: Set[str] = set()
@@ -182,6 +188,7 @@ class MemoirOrchestrator:
            state,
            classify_extract_llm,
            chunk_size=int(settings.memoir_phase1_batch_llm_chunk_size),
+            on_chunk=on_phase1_chunk,
        )

        for segment in segments:
@@ -294,6 +301,7 @@ class MemoirOrchestrator:
            llm_fast=llm_fast,
            get_or_create_state=get_or_create_state,
            update_slot=update_slot,
+            on_phase1_chunk=None,
        )
        state = prepared.state
        chapters_to_enqueue: Set[str] = set()