feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions
--- a/api/tests/test_memoir_pipeline_progress.py
+++ b/api/tests/test_memoir_pipeline_progress.py
@@ -0,0 +1,105 @@
+"""memoir_pipeline_progress：合并与读取逻辑（假 Redis 客户端）。"""
+
+import json
+
+import pytest
+
+import app.core.memoir_pipeline_progress as mpp
+
+
+class _FakeRedis:
+    def __init__(self) -> None:
+        self.store: dict[str, str] = {}
+
+    def get(self, key: str) -> str | None:
+        return self.store.get(key)
+
+    def setex(self, key: str, _ttl: int, value: str) -> None:
+        self.store[key] = value
+
+
+@pytest.fixture
+def fake_redis(monkeypatch: pytest.MonkeyPatch) -> _FakeRedis:
+    fr = _FakeRedis()
+    monkeypatch.setattr(mpp, "_client", fr)
+    return fr
+
+
+def test_merge_pipeline_run_creates_doc(fake_redis: _FakeRedis) -> None:
+    mpp.merge_pipeline_run("cid-1", {"phase1": {"step": "memory_ingest"}})
+    raw = fake_redis.store.get("memoir_pipeline_run:cid-1")
+    assert raw
+    doc = json.loads(raw)
+    assert doc["memoir_correlation_id"] == "cid-1"
+    assert doc["phase1"]["step"] == "memory_ingest"
+
+
+def test_merge_phase2_merges_by_task_id(fake_redis: _FakeRedis) -> None:
+    mpp.merge_pipeline_run(
+        "cid-2",
+        {
+            "phase2": [
+                {"chapter_category": "a", "task_id": "t1", "status": "enqueued"},
+            ],
+        },
+    )
+    mpp.merge_pipeline_run(
+        "cid-2",
+        {"phase2": [{"task_id": "t1", "status": "running"}]},
+    )
+    raw = fake_redis.store["memoir_pipeline_run:cid-2"]
+    doc = json.loads(raw)
+    assert len(doc["phase2"]) == 1
+    assert doc["phase2"][0]["task_id"] == "t1"
+    assert doc["phase2"][0]["status"] == "running"
+    assert doc["phase2"][0]["chapter_category"] == "a"
+
+
+def test_merge_fanout_lists_merge_by_id(fake_redis: _FakeRedis) -> None:
+    mpp.merge_pipeline_run(
+        "cid-3",
+        {
+            "fanout": {
+                "story_images": [
+                    {"story_id": "s1", "task_id": "img1", "status": "enqueued"},
+                ],
+            },
+        },
+    )
+    mpp.merge_pipeline_run(
+        "cid-3",
+        {
+            "fanout": {
+                "story_images": [
+                    {"story_id": "s1", "status": "success"},
+                ],
+            },
+        },
+    )
+    doc = json.loads(fake_redis.store["memoir_pipeline_run:cid-3"])
+    assert len(doc["fanout"]["story_images"]) == 1
+    assert doc["fanout"]["story_images"][0]["task_id"] == "img1"
+    assert doc["fanout"]["story_images"][0]["status"] == "success"
+
+
+def test_init_and_index_resolve(fake_redis: _FakeRedis) -> None:
+    mpp.init_pipeline_run_from_phase1(
+        "user-a", "cid-4", "p1tid", segment_count=3
+    )
+    cid = mpp.resolve_correlation_id_for_phase1_task("p1tid")
+    assert cid == "cid-4"
+    snap = mpp.get_pipeline_run_for_eval(
+        "user-a", phase1_task_id="p1tid"
+    )
+    assert snap is not None
+    assert snap["user_id"] == "user-a"
+    assert snap["phase1"]["task_id"] == "p1tid"
+
+
+def test_get_pipeline_run_for_eval_user_mismatch(fake_redis: _FakeRedis) -> None:
+    mpp.init_pipeline_run_from_phase1(
+        "user-a", "cid-5", "p1b", segment_count=1
+    )
+    assert (
+        mpp.get_pipeline_run_for_eval("other", phase1_task_id="p1b") is None
+    )