feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas. - Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error. - MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings. - app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS. - Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014. - Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions
--- a/api/tests/evaluation/test_memoir_pipeline_run_router.py
+++ b/api/tests/evaluation/test_memoir_pipeline_run_router.py
@@ -0,0 +1,101 @@
+"""GET /users/{user_id}/memoir-pipeline-run（快照读取）。"""
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from app.features.evaluation.internal_auth import get_internal_eval_principal
+
+
+@pytest.mark.asyncio
+async def test_memoir_pipeline_run_ok_by_phase1_task(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from fastapi import FastAPI
+
+    monkeypatch.setattr(
+        "app.core.config.settings.internal_eval_api_key",
+        "secret",
+        raising=False,
+    )
+    from app.features.evaluation.router import router
+
+    def _fake_eval(user_id: str, **kwargs: object):
+        assert user_id == "u1"
+        assert kwargs.get("phase1_task_id") == "tid-z"
+        return {
+            "memoir_correlation_id": "cid-z",
+            "user_id": "u1",
+            "started_at_utc": "2026-04-09T00:00:00Z",
+            "phase1": {"task_id": "tid-z", "status": "running", "step": "started"},
+            "phase2": [],
+            "fanout": {
+                "story_images": [],
+                "recompose_chapters": [],
+                "memory_enrichment": [],
+                "quality_pass": None,
+                "compaction": None,
+            },
+        }
+
+    monkeypatch.setattr(
+        "app.features.evaluation.router.get_pipeline_run_for_eval",
+        _fake_eval,
+    )
+
+    app = FastAPI()
+    app.include_router(router, prefix="/internal/api/evaluation")
+
+    async def _override_auth():
+        from app.features.evaluation.internal_auth import InternalEvalPrincipal
+
+        return InternalEvalPrincipal()
+
+    app.dependency_overrides[get_internal_eval_principal] = _override_auth
+
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://t") as client:
+        r = await client.get(
+            "/internal/api/evaluation/users/u1/memoir-pipeline-run",
+            headers={"X-Internal-Eval-Key": "secret"},
+            params={"phase1_task_id": "tid-z"},
+        )
+    assert r.status_code == 200
+    body = r.json()
+    assert body["memoir_correlation_id"] == "cid-z"
+    assert body["phase1"]["task_id"] == "tid-z"
+
+
+@pytest.mark.asyncio
+async def test_memoir_pipeline_run_400_both_ids(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from fastapi import FastAPI
+
+    monkeypatch.setattr(
+        "app.core.config.settings.internal_eval_api_key",
+        "secret",
+        raising=False,
+    )
+    from app.features.evaluation.router import router
+
+    app = FastAPI()
+    app.include_router(router, prefix="/internal/api/evaluation")
+
+    async def _override_auth():
+        from app.features.evaluation.internal_auth import InternalEvalPrincipal
+
+        return InternalEvalPrincipal()
+
+    app.dependency_overrides[get_internal_eval_principal] = _override_auth
+
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://t") as client:
+        r = await client.get(
+            "/internal/api/evaluation/users/u1/memoir-pipeline-run",
+            headers={"X-Internal-Eval-Key": "secret"},
+            params={
+                "phase1_task_id": "a",
+                "memoir_correlation_id": "b",
+            },
+        )
+    assert r.status_code == 400