feat(eval): memoir A/B chapter judging and eval-web parity with dialogue
- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
105
api/tests/test_memoir_pipeline_progress.py
Normal file
105
api/tests/test_memoir_pipeline_progress.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""memoir_pipeline_progress:合并与读取逻辑(假 Redis 客户端)。"""
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
import app.core.memoir_pipeline_progress as mpp
|
||||
|
||||
|
||||
class _FakeRedis:
|
||||
def __init__(self) -> None:
|
||||
self.store: dict[str, str] = {}
|
||||
|
||||
def get(self, key: str) -> str | None:
|
||||
return self.store.get(key)
|
||||
|
||||
def setex(self, key: str, _ttl: int, value: str) -> None:
|
||||
self.store[key] = value
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_redis(monkeypatch: pytest.MonkeyPatch) -> _FakeRedis:
|
||||
fr = _FakeRedis()
|
||||
monkeypatch.setattr(mpp, "_client", fr)
|
||||
return fr
|
||||
|
||||
|
||||
def test_merge_pipeline_run_creates_doc(fake_redis: _FakeRedis) -> None:
|
||||
mpp.merge_pipeline_run("cid-1", {"phase1": {"step": "memory_ingest"}})
|
||||
raw = fake_redis.store.get("memoir_pipeline_run:cid-1")
|
||||
assert raw
|
||||
doc = json.loads(raw)
|
||||
assert doc["memoir_correlation_id"] == "cid-1"
|
||||
assert doc["phase1"]["step"] == "memory_ingest"
|
||||
|
||||
|
||||
def test_merge_phase2_merges_by_task_id(fake_redis: _FakeRedis) -> None:
|
||||
mpp.merge_pipeline_run(
|
||||
"cid-2",
|
||||
{
|
||||
"phase2": [
|
||||
{"chapter_category": "a", "task_id": "t1", "status": "enqueued"},
|
||||
],
|
||||
},
|
||||
)
|
||||
mpp.merge_pipeline_run(
|
||||
"cid-2",
|
||||
{"phase2": [{"task_id": "t1", "status": "running"}]},
|
||||
)
|
||||
raw = fake_redis.store["memoir_pipeline_run:cid-2"]
|
||||
doc = json.loads(raw)
|
||||
assert len(doc["phase2"]) == 1
|
||||
assert doc["phase2"][0]["task_id"] == "t1"
|
||||
assert doc["phase2"][0]["status"] == "running"
|
||||
assert doc["phase2"][0]["chapter_category"] == "a"
|
||||
|
||||
|
||||
def test_merge_fanout_lists_merge_by_id(fake_redis: _FakeRedis) -> None:
|
||||
mpp.merge_pipeline_run(
|
||||
"cid-3",
|
||||
{
|
||||
"fanout": {
|
||||
"story_images": [
|
||||
{"story_id": "s1", "task_id": "img1", "status": "enqueued"},
|
||||
],
|
||||
},
|
||||
},
|
||||
)
|
||||
mpp.merge_pipeline_run(
|
||||
"cid-3",
|
||||
{
|
||||
"fanout": {
|
||||
"story_images": [
|
||||
{"story_id": "s1", "status": "success"},
|
||||
],
|
||||
},
|
||||
},
|
||||
)
|
||||
doc = json.loads(fake_redis.store["memoir_pipeline_run:cid-3"])
|
||||
assert len(doc["fanout"]["story_images"]) == 1
|
||||
assert doc["fanout"]["story_images"][0]["task_id"] == "img1"
|
||||
assert doc["fanout"]["story_images"][0]["status"] == "success"
|
||||
|
||||
|
||||
def test_init_and_index_resolve(fake_redis: _FakeRedis) -> None:
|
||||
mpp.init_pipeline_run_from_phase1(
|
||||
"user-a", "cid-4", "p1tid", segment_count=3
|
||||
)
|
||||
cid = mpp.resolve_correlation_id_for_phase1_task("p1tid")
|
||||
assert cid == "cid-4"
|
||||
snap = mpp.get_pipeline_run_for_eval(
|
||||
"user-a", phase1_task_id="p1tid"
|
||||
)
|
||||
assert snap is not None
|
||||
assert snap["user_id"] == "user-a"
|
||||
assert snap["phase1"]["task_id"] == "p1tid"
|
||||
|
||||
|
||||
def test_get_pipeline_run_for_eval_user_mismatch(fake_redis: _FakeRedis) -> None:
|
||||
mpp.init_pipeline_run_from_phase1(
|
||||
"user-a", "cid-5", "p1b", segment_count=1
|
||||
)
|
||||
assert (
|
||||
mpp.get_pipeline_run_for_eval("other", phase1_task_id="p1b") is None
|
||||
)
|
||||
Reference in New Issue
Block a user