fix(memoir): 改善 story 合并决策，少生碎片篇

以前模型只看到很短预览，还容易被引导成新建 story。现在优先用已有摘要、按需带正文片段，并区分「像续写同一主题」和「像换了一件事」； beliefs/summary 更鼓励接着写， career/童年等仍可按新事件新开。
2026-04-03 11:02:05 +08:00
parent 545d5a4ae0
commit b853b986dd
7 changed files with 715 additions and 49 deletions
--- a/api/tests/test_story_route_payload.py
+++ b/api/tests/test_story_route_payload.py
@@ -0,0 +1,146 @@
+"""Story 路由候选 JSON：排序、summary 优先、预算降级。"""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from types import SimpleNamespace
+
+from app.agents.memoir.story_route_payload import (
+    build_route_candidate_json,
+    build_route_candidate_rows,
+    sort_stories_for_route,
+    _truncate_body_for_route,
+)
+from app.core.config import Settings
+
+
+def _story(**kwargs):
+    defaults = dict(
+        id="s-default",
+        title="T",
+        summary=None,
+        canonical_markdown="",
+        updated_at=None,
+        chapter_links=[],
+    )
+    defaults.update(kwargs)
+    return SimpleNamespace(**defaults)
+
+
+def test_sort_has_summary_first_then_recency():
+    older = _story(
+        id="old",
+        summary="x" * 40,
+        updated_at=datetime(2020, 1, 1, tzinfo=timezone.utc),
+    )
+    newer = _story(
+        id="new",
+        summary="",
+        canonical_markdown="body",
+        updated_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
+    )
+    meta = {
+        "old": {"char_count": 10, "version_count": 1},
+        "new": {"char_count": 20, "version_count": 2},
+    }
+    out = sort_stories_for_route([newer, older], meta, summary_min_chars=30)
+    assert [s.id for s in out] == ["old", "new"]
+
+
+def test_sort_tiebreak_version_then_char_then_id():
+    t = datetime(2024, 6, 1, tzinfo=timezone.utc)
+    a = _story(id="a", summary="", canonical_markdown="a", updated_at=t)
+    b = _story(id="b", summary="", canonical_markdown="bb", updated_at=t)
+    meta = {
+        "a": {"char_count": 100, "version_count": 1},
+        "b": {"char_count": 50, "version_count": 3},
+    }
+    out = sort_stories_for_route([a, b], meta, summary_min_chars=30)
+    assert [s.id for s in out] == ["b", "a"]
+
+
+def test_summary_sufficient_omits_body():
+    s = _story(
+        id="1",
+        summary="信" * 40,
+        canonical_markdown="正文" * 500,
+    )
+    settings = Settings()
+    rows = build_route_candidate_rows(
+        [s], {"1": {"char_count": 10, "version_count": 1}}, settings
+    )
+    assert "summary" in rows[0]
+    assert "body_for_route" not in rows[0]
+
+
+def test_short_summary_falls_back_to_body():
+    s = _story(
+        id="1",
+        summary="短",
+        canonical_markdown="唯一的正文用于路由",
+    )
+    settings = Settings()
+    rows = build_route_candidate_rows(
+        [s], {"1": {"char_count": 20, "version_count": 1}}, settings
+    )
+    assert "summary" not in rows[0]
+    assert rows[0].get("body_for_route")
+
+
+def test_long_body_uses_head_tail():
+    md = "块" * 3000
+    out = _truncate_body_for_route(
+        md,
+        body_max_chars=1600,
+        head_chars=100,
+        tail_chars=100,
+    )
+    assert "中间省略" in out
+    assert len(out) < len(md)
+
+
+def test_total_budget_downgrades_tail_rows(monkeypatch):
+    settings = Settings()
+    monkeypatch.setattr(settings, "story_route_candidate_total_max_chars", 800)
+    monkeypatch.setattr(settings, "story_route_index_preview_chars", 40)
+    stories = [
+        _story(
+            id="1",
+            summary="",
+            canonical_markdown="A" * 400,
+            updated_at=datetime(2025, 1, 2, tzinfo=timezone.utc),
+        ),
+        _story(
+            id="2",
+            summary="",
+            canonical_markdown="B" * 400,
+            updated_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
+        ),
+    ]
+    meta = {
+        "1": {"char_count": 400, "version_count": 1},
+        "2": {"char_count": 400, "version_count": 1},
+    }
+    payload = build_route_candidate_json(stories, meta, settings)
+    data = json.loads(payload)
+    assert any("preview" in row and "body_for_route" not in row for row in data)
+
+
+def test_json_includes_core_fields():
+    s = _story(
+        id="x1",
+        title="标题",
+        summary="y" * 40,
+        updated_at=datetime(2024, 1, 1, tzinfo=timezone.utc),
+    )
+    settings = Settings()
+    js = build_route_candidate_json(
+        [s], {"x1": {"char_count": 5, "version_count": 2}}, settings
+    )
+    row = json.loads(js)[0]
+    assert row["id"] == "x1"
+    assert row["title"] == "标题"
+    assert row["version_count"] == 2
+    assert row["char_count"] == 5
+    assert "updated_at" in row