feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms; Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit. - Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with bisect fallback per chunk; Playground shows server timings. Made-with: Cursor
2026-04-09 13:38:53 +08:00
parent 064ad2161d
commit b0251e5b26
14 changed files with 544 additions and 14 deletions
--- a/api/app/agents/memoir/batch_phase1_prep.py
+++ b/api/app/agents/memoir/batch_phase1_prep.py
@@ -4,6 +4,7 @@ Phase1 批处理：一次 LLM 调用完成多段的抽取 + 章节分类（与

 from __future__ import annotations

+import math
 from dataclasses import dataclass
 from typing import Any, Dict, List

@@ -107,3 +108,76 @@ def run_batch_phase1_prep(
        logger.warning("batch phase1 id mismatch missing={} extra={}", missing, extra)
        raise ValueError("batch phase1 response segment ids do not match input")
    return by_id
+
+
+def _run_batch_phase1_prep_chunk_with_bisect(
+    segments: List[Segment],
+    state: MemoirStateSchema,
+    llm: Any,
+) -> Dict[str, BatchPhase1SegmentRow]:
+    """单块 LLM；失败时（如输出截断）将块二等分重试直至单段。"""
+    try:
+        return run_batch_phase1_prep(segments, state, llm)
+    except ValueError:
+        if len(segments) <= 1:
+            raise
+        mid = len(segments) // 2
+        if mid < 1:
+            raise
+        left = _run_batch_phase1_prep_chunk_with_bisect(
+            segments[:mid], state, llm
+        )
+        right = _run_batch_phase1_prep_chunk_with_bisect(
+            segments[mid:], state, llm
+        )
+        merged = {**left, **right}
+        expected = {str(s.id) for s in segments}
+        if merged.keys() != expected:
+            raise ValueError(
+                "batch phase1 chunked bisect merge: segment ids do not match input"
+            )
+        return merged
+
+
+def run_batch_phase1_prep_chunked(
+    segments: List[Segment],
+    state: MemoirStateSchema,
+    llm: Any,
+    *,
+    chunk_size: int,
+) -> Dict[str, BatchPhase1SegmentRow]:
+    """
+    将 segments 按 chunk_size 切片多次调用 Phase1 批处理 LLM，合并 by_id。
+    单块仍失败时在块内二分回退（最后回退到单段），与 orchestrator 外层逐段回退衔接。
+    """
+    if not segments:
+        return {}
+    if chunk_size < 1:
+        chunk_size = 1
+    n = len(segments)
+    total_chunks = max(1, math.ceil(n / chunk_size))
+    merged: Dict[str, BatchPhase1SegmentRow] = {}
+    for i in range(0, n, chunk_size):
+        chunk_idx = i // chunk_size + 1
+        sub = segments[i : i + chunk_size]
+        logger.info(
+            "event=batch_phase1_chunk chunk_idx={}/{} segment_count={} batch_path=chunked",
+            chunk_idx,
+            total_chunks,
+            len(sub),
+        )
+        part = _run_batch_phase1_prep_chunk_with_bisect(sub, state, llm)
+        merged.update(part)
+    expected = {str(s.id) for s in segments}
+    if merged.keys() != expected:
+        missing = expected - merged.keys()
+        extra = merged.keys() - expected
+        logger.warning(
+            "batch phase1 chunked id mismatch missing={} extra={}",
+            missing,
+            extra,
+        )
+        raise ValueError(
+            "batch phase1 chunked: merged segment ids do not match input"
+        )
+    return merged
--- a/api/app/agents/memoir/orchestrator.py
+++ b/api/app/agents/memoir/orchestrator.py
@@ -12,7 +12,7 @@ from typing import Any, Callable, Dict, List, Set, Tuple

 from app.agents.memoir.batch_phase1_prep import (
    STAGE_ALLOWED_SLOTS,
-    run_batch_phase1_prep,
+    run_batch_phase1_prep_chunked,
 )
 from app.agents.memoir.classification_agent import (
    ClassificationAgent,
@@ -177,7 +177,12 @@ class MemoirOrchestrator:
        segment_skip_story_ids: Set[str] = set()
        segment_chapter_category: Dict[str, str] = {}

-        by_id = run_batch_phase1_prep(segments, state, classify_extract_llm)
+        by_id = run_batch_phase1_prep_chunked(
+            segments,
+            state,
+            classify_extract_llm,
+            chunk_size=int(settings.memoir_phase1_batch_llm_chunk_size),
+        )

        for segment in segments:
            text = segment.user_input_text or ""