feat(eval): memoir A/B chapter judging and eval-web parity with dialogue

- Judge baseline excerpt and library chapter separately; build_memoir_compare_summary for gate, nine-dim and leaf deltas.

- Memoir SSE chapter payload: baseline_judge, compare_summary, baseline_judge_error.

- MemoirJudgeOutput: loose score coercion and post-validate clamp; memoir judge prompt caps from settings.

- app-eval-web: two-column MemoirScoreCard layout, MemoirCompareSummary, chapter blocks and CSS.

- Add memoir_compare_summary, log_events, celery_log_context, memoir_pipeline_progress; tests and migration 0014.

- Misc: memory/evidence and enrichment paths, task/orchestrator updates, internal-eval docs, env examples.
This commit is contained in:
Kevin
2026-04-10 10:23:43 +08:00
parent b0251e5b26
commit ac49bc7f23
59 changed files with 4773 additions and 696 deletions

View File

@@ -0,0 +1,101 @@
"""GET /users/{user_id}/memoir-pipeline-run快照读取"""
import pytest
from httpx import ASGITransport, AsyncClient
from app.features.evaluation.internal_auth import get_internal_eval_principal
@pytest.mark.asyncio
async def test_memoir_pipeline_run_ok_by_phase1_task(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from fastapi import FastAPI
monkeypatch.setattr(
"app.core.config.settings.internal_eval_api_key",
"secret",
raising=False,
)
from app.features.evaluation.router import router
def _fake_eval(user_id: str, **kwargs: object):
assert user_id == "u1"
assert kwargs.get("phase1_task_id") == "tid-z"
return {
"memoir_correlation_id": "cid-z",
"user_id": "u1",
"started_at_utc": "2026-04-09T00:00:00Z",
"phase1": {"task_id": "tid-z", "status": "running", "step": "started"},
"phase2": [],
"fanout": {
"story_images": [],
"recompose_chapters": [],
"memory_enrichment": [],
"quality_pass": None,
"compaction": None,
},
}
monkeypatch.setattr(
"app.features.evaluation.router.get_pipeline_run_for_eval",
_fake_eval,
)
app = FastAPI()
app.include_router(router, prefix="/internal/api/evaluation")
async def _override_auth():
from app.features.evaluation.internal_auth import InternalEvalPrincipal
return InternalEvalPrincipal()
app.dependency_overrides[get_internal_eval_principal] = _override_auth
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://t") as client:
r = await client.get(
"/internal/api/evaluation/users/u1/memoir-pipeline-run",
headers={"X-Internal-Eval-Key": "secret"},
params={"phase1_task_id": "tid-z"},
)
assert r.status_code == 200
body = r.json()
assert body["memoir_correlation_id"] == "cid-z"
assert body["phase1"]["task_id"] == "tid-z"
@pytest.mark.asyncio
async def test_memoir_pipeline_run_400_both_ids(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from fastapi import FastAPI
monkeypatch.setattr(
"app.core.config.settings.internal_eval_api_key",
"secret",
raising=False,
)
from app.features.evaluation.router import router
app = FastAPI()
app.include_router(router, prefix="/internal/api/evaluation")
async def _override_auth():
from app.features.evaluation.internal_auth import InternalEvalPrincipal
return InternalEvalPrincipal()
app.dependency_overrides[get_internal_eval_principal] = _override_auth
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://t") as client:
r = await client.get(
"/internal/api/evaluation/users/u1/memoir-pipeline-run",
headers={"X-Internal-Eval-Key": "secret"},
params={
"phase1_task_id": "a",
"memoir_correlation_id": "b",
},
)
assert r.status_code == 400