feat(evaluation): memoir readiness, judge/replay updates, eval web playground

Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.
This commit is contained in:
Kevin
2026-04-08 09:38:07 +08:00
parent 99543d04c6
commit 6772e1269c
26 changed files with 1255 additions and 124 deletions

View File

@@ -425,8 +425,9 @@ def story_route_merge_hint_for_category(chapter_category: str) -> str:
if cc == "family":
return (
"### 本章类别路由倾向(家庭)\n"
"- 原则性反思、关系模式、相处之道的补充 → **倾向 append_story**。\n"
"- **明确的新事件链**(新场景、新时间线、不同人物组合的新经历)→ 可 new_story。"
"- **默认 append_story**:同一家庭成员、同一居住环境、婚姻育儿、节日团聚、童年与父母的回忆等,"
"只要仍围绕已出现的人物或关系网络补充细节,一律并入最匹配的候选,不要因为换了个场景就 new_story。\n"
"- 仅当口述出现**完全新的人物组合 + 可独立成篇的新事件链**(与所有候选正文都接不上)时,才 new_story。"
)
if cc in (
"childhood",
@@ -435,6 +436,13 @@ def story_route_merge_hint_for_category(chapter_category: str) -> str:
"career_achievement",
"career_challenge",
):
if cc in ("childhood", "education"):
return (
"### 本章类别路由倾向(童年 / 求学 — 少拆分)\n"
"- **默认 append_story**:同一成长阶段里,地点(老家、学校)、父母职业、玩伴、游戏影视、"
"怀旧细节等**主题延续**的补充,即使分段讲述,也应并入已有童年/求学故事,避免多篇开头重复交代背景。\n"
"- **仅当**口述出现**另一条清晰可辨的事件链**(时间/地点/人物线换了且与候选明显不是同一脉络)时,才 new_story。"
)
return (
"### 本章类别路由倾向(经历叙事)\n"
"- 以具体事件链为主:**不同事件 / 时期 / 地点** → 可 new_story。\n"
@@ -473,7 +481,7 @@ def get_story_route_prompt(
**路由边界(必须遵守)**:仅根据下方「本批口述合并文本」判断;不得将系统检索摘要、记忆摘录等当作本批口述内容来匹配候选。
**候选故事说明**:列表项可能含 `summary``body_for_route`(正文摘要);仅含 `preview` 者为索引项,信息不全。**append 时优先匹配带 summary body 的条目**;索引项仅作候选 id 备忘。
**候选故事说明**:列表项可能含 `summary``body_for_route`(正文摘要)或 `opening_snippet`(无 summary 时的纯文本开头提要);仅含 `preview` 者为索引项,信息不全。**append 时优先匹配带 summary / body / opening_snippet 的条目**;索引项仅作候选 id 备忘。
当前章节(写作容器):
- category: {chapter_category}
@@ -495,6 +503,7 @@ def get_story_route_prompt(
规则:
- **不要**只因「不太确定」就选 new_story在主题可并入某一候选时应 append_story。
- 仅当口述与**所有**候选在两层标准下都明显不兼容时,才选 new_story。
- 若已有候选故事(列表非空)且口述是对同一人生阶段的**补述**,却找不到精确 id仍应 **append_story** 到最相近的一条,而不是 new_story。
"""
@@ -525,7 +534,7 @@ def get_story_batch_plan_prompt(
- **append_story**:与某一候选在两层标准下可合并,且能对应到具体 candidate id
- **new_story**:该块与**所有**候选都明显不兼容,或确认为独立新经历
**候选故事说明**:条目可能含 `summary`/`body_for_route`;仅 `preview` 者为索引项。**优先用带摘要/正文的条目做 append 目标**。
**候选故事说明**:条目可能含 `summary` / `body_for_route` / `opening_snippet`;仅 `preview` 者为索引项。**优先用带摘要正文摘要或开头提要的条目做 append 目标**。
当前章节(写作容器):
- category: {chapter_category}
@@ -552,6 +561,8 @@ def get_story_batch_plan_prompt(
规则:
- `units` 中所有 `segment_ids` 拼接后,必须**不重不漏**地覆盖本批全部 id且顺序与【本批口述片段】数组一致
- **不要**仅因不确定就对整块选 new_story能并入候选时应 append_story
- **同一批里 new_story 单元至多 1 个**:除非口述中同时存在**至少两条**与所有候选都不兼容、且彼此也明显无关的独立长经历,否则禁止拆成多个 new_story连续多段若都在补充同一主题应合并为**一块 append_story**。
- 候选列表非空时,优先把本批当作「加厚已有篇章」,而不是再开新篇。
"""

View File

@@ -13,7 +13,10 @@ from app.agents.memoir.prompts import (
get_story_batch_plan_prompt,
get_story_route_prompt,
)
from app.agents.memoir.story_route_payload import build_route_candidate_json
from app.agents.memoir.story_route_payload import (
build_route_candidate_json,
sort_stories_for_route,
)
from app.core.config import settings
from app.core.llm_call import LLMCallError, llm_json_call
from app.core.logging import get_logger
@@ -25,6 +28,105 @@ logger = get_logger(__name__)
# 超过此数量跳过批量规划(单次路由),避免 prompt 过大
PLAN_BATCH_MAX_SEGMENTS = 48
# 童年 / 求学 / 家庭:模型与后处理均倾向「少拆分、优先续写」
APPEND_FIRST_CHAPTER_CATEGORIES = frozenset({"childhood", "education", "family"})
def default_append_target_story_id(
candidate_stories: list[Story],
story_meta: dict[str, dict[str, int]] | None,
settings: Any,
) -> str | None:
"""排序后的首选续写目标(与路由候选 JSON 顺序一致)。"""
if not candidate_stories:
return None
meta = story_meta or {}
ordered = sort_stories_for_route(
candidate_stories,
meta,
summary_min_chars=int(settings.story_route_summary_min_chars),
)
if not ordered:
return None
return str(ordered[0].id)
def merge_consecutive_new_story_units(
units: list[StoryBatchPlanUnit],
) -> list[StoryBatchPlanUnit]:
"""将相邻的多个 new_story 单元合并为一个,减少同批碎片叙事。"""
if not units:
return units
out: list[StoryBatchPlanUnit] = []
i = 0
while i < len(units):
u = units[i]
if u.decision != "new_story":
out.append(u)
i += 1
continue
run_segs: list[str] = list(u.segment_ids)
j = i + 1
while j < len(units) and units[j].decision == "new_story":
run_segs.extend(units[j].segment_ids)
j += 1
if j > i + 1:
out.append(
StoryBatchPlanUnit(
segment_ids=run_segs,
decision="new_story",
target_story_id=None,
reason="coalesced_consecutive_new_story",
)
)
else:
out.append(u)
i = j
return out
def normalize_batch_plan_reduce_new_story_fragmentation(
plan: StoryBatchPlan,
ordered_segment_ids: list[str],
*,
chapter_category: str,
candidate_stories: list[Story],
valid_story_ids: set[str],
story_meta: dict[str, dict[str, int]] | None,
settings: Any,
) -> StoryBatchPlan:
"""
LLM 校验通过后的确定性归一:合并相邻 new_story在 append-first 类目下若整批只有一个 new 块则改为 append。
"""
units = merge_consecutive_new_story_units(list(plan.units))
if (
chapter_category in APPEND_FIRST_CHAPTER_CATEGORIES
and candidate_stories
and len(units) == 1
and units[0].decision == "new_story"
):
tid = default_append_target_story_id(candidate_stories, story_meta, settings)
if tid and tid in valid_story_ids:
units = [
StoryBatchPlanUnit(
segment_ids=list(ordered_segment_ids),
decision="append_story",
target_story_id=tid,
reason="append_first_whole_batch_fallback",
)
]
candidate = StoryBatchPlan(units=units)
ok, err = validate_story_batch_plan(
ordered_segment_ids, candidate, valid_story_ids
)
if not ok:
logger.warning(
"batch_plan_normalize_revalidate_failed err={} keep_original",
err,
)
return plan
return candidate
class StoryBatchPlanUnit(BaseModel):
"""批量写入中的一个单元(连续 segment 块)。"""
@@ -120,6 +222,15 @@ class StoryRouteAgent:
story_meta: dict[str, dict[str, int]] | None = None,
) -> StoryRouteDecision:
if not llm:
fb = default_append_target_story_id(
candidate_stories, story_meta, settings
)
if fb and fb in valid_story_ids:
return StoryRouteDecision(
decision="append_story",
target_story_id=fb,
reason="no_llm_default_append",
)
return StoryRouteDecision(
decision="new_story",
new_story_title=None,
@@ -134,6 +245,15 @@ class StoryRouteAgent:
)
def _decide_fallback() -> StoryRouteDecision:
fb = default_append_target_story_id(
candidate_stories, story_meta, settings
)
if fb and fb in valid_story_ids:
return StoryRouteDecision(
decision="append_story",
target_story_id=fb,
reason="parse_error_default_append",
)
return StoryRouteDecision(
decision="new_story",
new_story_title=None,
@@ -152,8 +272,22 @@ class StoryRouteAgent:
if decision.decision == "append_story":
tid = decision.target_story_id
if not tid or tid not in valid_story_ids:
fb = default_append_target_story_id(
candidate_stories, story_meta, settings
)
if fb and fb in valid_story_ids:
logger.info(
"StoryRoute append 无效 target_story_id={},回退默认 append {}",
tid,
fb,
)
return StoryRouteDecision(
decision="append_story",
target_story_id=fb,
reason="invalid_target_default_append",
)
logger.warning(
"StoryRoute append 无效 target_story_id={},回退 new_story",
"StoryRoute append 无效 target_story_id={}且无可用默认目标,回退 new_story",
tid,
)
return StoryRouteDecision(
@@ -204,4 +338,12 @@ class StoryRouteAgent:
if not ok:
logger.warning("StoryRouteAgent.plan_batch 校验失败: {}", err)
return None
return plan
return normalize_batch_plan_reduce_new_story_fragmentation(
plan,
ordered,
chapter_category=chapter_category,
candidate_stories=candidate_stories,
valid_story_ids=valid_story_ids,
story_meta=story_meta,
settings=settings,
)

View File

@@ -7,6 +7,7 @@ Story 路由:候选故事 JSON 载荷summary 优先、预算裁剪、固定
from __future__ import annotations
import json
import re
from datetime import timezone
from typing import Any, TYPE_CHECKING
@@ -15,6 +16,23 @@ if TYPE_CHECKING:
from app.features.story.models import Story
_PLAIN_SNIPPET_NOISE = re.compile(r"[`*_#]+")
def _plain_opening_snippet_from_markdown(md: str, *, max_chars: int) -> str:
"""无 summary 时供路由辨题的短文摘(弱化 Markdown 噪声)。"""
t = (md or "").strip()
if not t:
return ""
t = re.sub(r"!\[[^\]]*\]\([^)]+\)", "", t)
t = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", t)
t = re.sub(r"asset://\S+", "", t)
t = _PLAIN_SNIPPET_NOISE.sub("", t)
t = re.sub(r"\s+", " ", t).strip()
if len(t) <= max_chars:
return t
return t[: max_chars - 1] + ""
def _linked_chapters(s: Story) -> list[str]:
links: list[str] = []
@@ -126,6 +144,9 @@ def _build_full_row(
)
if body:
row["body_for_route"] = body
osnip = _plain_opening_snippet_from_markdown(canon, max_chars=260)
if osnip and len(osnip) >= 40:
row["opening_snippet"] = osnip
return row