fix(memoir): 改善 story 合并决策,少生碎片篇

以前模型只看到很短预览,还容易被引导成新建 story。现在优先用已有摘要、
按需带正文片段,并区分「像续写同一主题」和「像换了一件事」;
beliefs/summary 更鼓励接着写, career/童年等仍可按新事件新开。
This commit is contained in:
Kevin
2026-04-03 11:02:05 +08:00
parent 545d5a4ae0
commit b853b986dd
7 changed files with 715 additions and 49 deletions

View File

@@ -0,0 +1,146 @@
"""Story 路由候选 JSON排序、summary 优先、预算降级。"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from types import SimpleNamespace
from app.agents.memoir.story_route_payload import (
build_route_candidate_json,
build_route_candidate_rows,
sort_stories_for_route,
_truncate_body_for_route,
)
from app.core.config import Settings
def _story(**kwargs):
defaults = dict(
id="s-default",
title="T",
summary=None,
canonical_markdown="",
updated_at=None,
chapter_links=[],
)
defaults.update(kwargs)
return SimpleNamespace(**defaults)
def test_sort_has_summary_first_then_recency():
older = _story(
id="old",
summary="x" * 40,
updated_at=datetime(2020, 1, 1, tzinfo=timezone.utc),
)
newer = _story(
id="new",
summary="",
canonical_markdown="body",
updated_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
)
meta = {
"old": {"char_count": 10, "version_count": 1},
"new": {"char_count": 20, "version_count": 2},
}
out = sort_stories_for_route([newer, older], meta, summary_min_chars=30)
assert [s.id for s in out] == ["old", "new"]
def test_sort_tiebreak_version_then_char_then_id():
t = datetime(2024, 6, 1, tzinfo=timezone.utc)
a = _story(id="a", summary="", canonical_markdown="a", updated_at=t)
b = _story(id="b", summary="", canonical_markdown="bb", updated_at=t)
meta = {
"a": {"char_count": 100, "version_count": 1},
"b": {"char_count": 50, "version_count": 3},
}
out = sort_stories_for_route([a, b], meta, summary_min_chars=30)
assert [s.id for s in out] == ["b", "a"]
def test_summary_sufficient_omits_body():
s = _story(
id="1",
summary="" * 40,
canonical_markdown="正文" * 500,
)
settings = Settings()
rows = build_route_candidate_rows(
[s], {"1": {"char_count": 10, "version_count": 1}}, settings
)
assert "summary" in rows[0]
assert "body_for_route" not in rows[0]
def test_short_summary_falls_back_to_body():
s = _story(
id="1",
summary="",
canonical_markdown="唯一的正文用于路由",
)
settings = Settings()
rows = build_route_candidate_rows(
[s], {"1": {"char_count": 20, "version_count": 1}}, settings
)
assert "summary" not in rows[0]
assert rows[0].get("body_for_route")
def test_long_body_uses_head_tail():
md = "" * 3000
out = _truncate_body_for_route(
md,
body_max_chars=1600,
head_chars=100,
tail_chars=100,
)
assert "中间省略" in out
assert len(out) < len(md)
def test_total_budget_downgrades_tail_rows(monkeypatch):
settings = Settings()
monkeypatch.setattr(settings, "story_route_candidate_total_max_chars", 800)
monkeypatch.setattr(settings, "story_route_index_preview_chars", 40)
stories = [
_story(
id="1",
summary="",
canonical_markdown="A" * 400,
updated_at=datetime(2025, 1, 2, tzinfo=timezone.utc),
),
_story(
id="2",
summary="",
canonical_markdown="B" * 400,
updated_at=datetime(2025, 1, 1, tzinfo=timezone.utc),
),
]
meta = {
"1": {"char_count": 400, "version_count": 1},
"2": {"char_count": 400, "version_count": 1},
}
payload = build_route_candidate_json(stories, meta, settings)
data = json.loads(payload)
assert any("preview" in row and "body_for_route" not in row for row in data)
def test_json_includes_core_fields():
s = _story(
id="x1",
title="标题",
summary="y" * 40,
updated_at=datetime(2024, 1, 1, tzinfo=timezone.utc),
)
settings = Settings()
js = build_route_candidate_json(
[s], {"x1": {"char_count": 5, "version_count": 2}}, settings
)
row = json.loads(js)[0]
assert row["id"] == "x1"
assert row["title"] == "标题"
assert row["version_count"] == 2
assert row["char_count"] == 5
assert "updated_at" in row