feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
Kevin
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions

View File

@@ -0,0 +1,105 @@
"""memoir_pipeline_progress合并与读取逻辑假 Redis 客户端)。"""
import json
import pytest
import app.core.memoir_pipeline_progress as mpp
class _FakeRedis:
def __init__(self) -> None:
self.store: dict[str, str] = {}
def get(self, key: str) -> str | None:
return self.store.get(key)
def setex(self, key: str, _ttl: int, value: str) -> None:
self.store[key] = value
@pytest.fixture
def fake_redis(monkeypatch: pytest.MonkeyPatch) -> _FakeRedis:
fr = _FakeRedis()
monkeypatch.setattr(mpp, "_client", fr)
return fr
def test_merge_pipeline_run_creates_doc(fake_redis: _FakeRedis) -> None:
mpp.merge_pipeline_run("cid-1", {"phase1": {"step": "memory_ingest"}})
raw = fake_redis.store.get("memoir_pipeline_run:cid-1")
assert raw
doc = json.loads(raw)
assert doc["memoir_correlation_id"] == "cid-1"
assert doc["phase1"]["step"] == "memory_ingest"
def test_merge_phase2_merges_by_task_id(fake_redis: _FakeRedis) -> None:
mpp.merge_pipeline_run(
"cid-2",
{
"phase2": [
{"chapter_category": "a", "task_id": "t1", "status": "enqueued"},
],
},
)
mpp.merge_pipeline_run(
"cid-2",
{"phase2": [{"task_id": "t1", "status": "running"}]},
)
raw = fake_redis.store["memoir_pipeline_run:cid-2"]
doc = json.loads(raw)
assert len(doc["phase2"]) == 1
assert doc["phase2"][0]["task_id"] == "t1"
assert doc["phase2"][0]["status"] == "running"
assert doc["phase2"][0]["chapter_category"] == "a"
def test_merge_fanout_lists_merge_by_id(fake_redis: _FakeRedis) -> None:
mpp.merge_pipeline_run(
"cid-3",
{
"fanout": {
"story_images": [
{"story_id": "s1", "task_id": "img1", "status": "enqueued"},
],
},
},
)
mpp.merge_pipeline_run(
"cid-3",
{
"fanout": {
"story_images": [
{"story_id": "s1", "status": "success"},
],
},
},
)
doc = json.loads(fake_redis.store["memoir_pipeline_run:cid-3"])
assert len(doc["fanout"]["story_images"]) == 1
assert doc["fanout"]["story_images"][0]["task_id"] == "img1"
assert doc["fanout"]["story_images"][0]["status"] == "success"
def test_init_and_index_resolve(fake_redis: _FakeRedis) -> None:
mpp.init_pipeline_run_from_phase1(
"user-a", "cid-4", "p1tid", segment_count=3
)
cid = mpp.resolve_correlation_id_for_phase1_task("p1tid")
assert cid == "cid-4"
snap = mpp.get_pipeline_run_for_eval(
"user-a", phase1_task_id="p1tid"
)
assert snap is not None
assert snap["user_id"] == "user-a"
assert snap["phase1"]["task_id"] == "p1tid"
def test_get_pipeline_run_for_eval_user_mismatch(fake_redis: _FakeRedis) -> None:
mpp.init_pipeline_run_from_phase1(
"user-a", "cid-5", "p1b", segment_count=1
)
assert (
mpp.get_pipeline_run_for_eval("other", phase1_task_id="p1b") is None
)