feat(evaluation): memoir readiness, judge/replay updates, eval web playground

Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.
This commit is contained in:
Kevin
2026-04-08 09:38:07 +08:00
parent 99543d04c6
commit 6772e1269c
26 changed files with 1255 additions and 124 deletions

View File

@@ -127,6 +127,21 @@ def test_total_budget_downgrades_tail_rows(monkeypatch):
assert any("preview" in row and "body_for_route" not in row for row in data)
def test_opening_snippet_when_no_summary_but_body():
s = _story(
id="1",
summary="",
canonical_markdown="我在山东潍坊长大,小时候常和伙伴在河边玩。"
* 2,
)
settings = Settings()
rows = build_route_candidate_rows(
[s], {"1": {"char_count": 80, "version_count": 1}}, settings
)
assert "opening_snippet" in rows[0]
assert "潍坊" in rows[0]["opening_snippet"]
def test_json_includes_core_fields():
s = _story(
id="x1",