feat(evaluation): memoir readiness, judge/replay updates, eval web playground

Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.
This commit is contained in:
Kevin
2026-04-08 09:38:07 +08:00
parent 99543d04c6
commit 6772e1269c
26 changed files with 1255 additions and 124 deletions

View File

@@ -13,7 +13,10 @@ from app.agents.memoir.prompts import (
get_story_batch_plan_prompt,
get_story_route_prompt,
)
from app.agents.memoir.story_route_payload import build_route_candidate_json
from app.agents.memoir.story_route_payload import (
build_route_candidate_json,
sort_stories_for_route,
)
from app.core.config import settings
from app.core.llm_call import LLMCallError, llm_json_call
from app.core.logging import get_logger
@@ -25,6 +28,105 @@ logger = get_logger(__name__)
# 超过此数量跳过批量规划(单次路由),避免 prompt 过大
PLAN_BATCH_MAX_SEGMENTS = 48
# 童年 / 求学 / 家庭:模型与后处理均倾向「少拆分、优先续写」
APPEND_FIRST_CHAPTER_CATEGORIES = frozenset({"childhood", "education", "family"})
def default_append_target_story_id(
candidate_stories: list[Story],
story_meta: dict[str, dict[str, int]] | None,
settings: Any,
) -> str | None:
"""排序后的首选续写目标(与路由候选 JSON 顺序一致)。"""
if not candidate_stories:
return None
meta = story_meta or {}
ordered = sort_stories_for_route(
candidate_stories,
meta,
summary_min_chars=int(settings.story_route_summary_min_chars),
)
if not ordered:
return None
return str(ordered[0].id)
def merge_consecutive_new_story_units(
units: list[StoryBatchPlanUnit],
) -> list[StoryBatchPlanUnit]:
"""将相邻的多个 new_story 单元合并为一个,减少同批碎片叙事。"""
if not units:
return units
out: list[StoryBatchPlanUnit] = []
i = 0
while i < len(units):
u = units[i]
if u.decision != "new_story":
out.append(u)
i += 1
continue
run_segs: list[str] = list(u.segment_ids)
j = i + 1
while j < len(units) and units[j].decision == "new_story":
run_segs.extend(units[j].segment_ids)
j += 1
if j > i + 1:
out.append(
StoryBatchPlanUnit(
segment_ids=run_segs,
decision="new_story",
target_story_id=None,
reason="coalesced_consecutive_new_story",
)
)
else:
out.append(u)
i = j
return out
def normalize_batch_plan_reduce_new_story_fragmentation(
plan: StoryBatchPlan,
ordered_segment_ids: list[str],
*,
chapter_category: str,
candidate_stories: list[Story],
valid_story_ids: set[str],
story_meta: dict[str, dict[str, int]] | None,
settings: Any,
) -> StoryBatchPlan:
"""
LLM 校验通过后的确定性归一:合并相邻 new_story在 append-first 类目下若整批只有一个 new 块则改为 append。
"""
units = merge_consecutive_new_story_units(list(plan.units))
if (
chapter_category in APPEND_FIRST_CHAPTER_CATEGORIES
and candidate_stories
and len(units) == 1
and units[0].decision == "new_story"
):
tid = default_append_target_story_id(candidate_stories, story_meta, settings)
if tid and tid in valid_story_ids:
units = [
StoryBatchPlanUnit(
segment_ids=list(ordered_segment_ids),
decision="append_story",
target_story_id=tid,
reason="append_first_whole_batch_fallback",
)
]
candidate = StoryBatchPlan(units=units)
ok, err = validate_story_batch_plan(
ordered_segment_ids, candidate, valid_story_ids
)
if not ok:
logger.warning(
"batch_plan_normalize_revalidate_failed err={} keep_original",
err,
)
return plan
return candidate
class StoryBatchPlanUnit(BaseModel):
"""批量写入中的一个单元(连续 segment 块)。"""
@@ -120,6 +222,15 @@ class StoryRouteAgent:
story_meta: dict[str, dict[str, int]] | None = None,
) -> StoryRouteDecision:
if not llm:
fb = default_append_target_story_id(
candidate_stories, story_meta, settings
)
if fb and fb in valid_story_ids:
return StoryRouteDecision(
decision="append_story",
target_story_id=fb,
reason="no_llm_default_append",
)
return StoryRouteDecision(
decision="new_story",
new_story_title=None,
@@ -134,6 +245,15 @@ class StoryRouteAgent:
)
def _decide_fallback() -> StoryRouteDecision:
fb = default_append_target_story_id(
candidate_stories, story_meta, settings
)
if fb and fb in valid_story_ids:
return StoryRouteDecision(
decision="append_story",
target_story_id=fb,
reason="parse_error_default_append",
)
return StoryRouteDecision(
decision="new_story",
new_story_title=None,
@@ -152,8 +272,22 @@ class StoryRouteAgent:
if decision.decision == "append_story":
tid = decision.target_story_id
if not tid or tid not in valid_story_ids:
fb = default_append_target_story_id(
candidate_stories, story_meta, settings
)
if fb and fb in valid_story_ids:
logger.info(
"StoryRoute append 无效 target_story_id={},回退默认 append {}",
tid,
fb,
)
return StoryRouteDecision(
decision="append_story",
target_story_id=fb,
reason="invalid_target_default_append",
)
logger.warning(
"StoryRoute append 无效 target_story_id={},回退 new_story",
"StoryRoute append 无效 target_story_id={}且无可用默认目标,回退 new_story",
tid,
)
return StoryRouteDecision(
@@ -204,4 +338,12 @@ class StoryRouteAgent:
if not ok:
logger.warning("StoryRouteAgent.plan_batch 校验失败: {}", err)
return None
return plan
return normalize_batch_plan_reduce_new_story_fragmentation(
plan,
ordered,
chapter_category=chapter_category,
candidate_stories=candidate_stories,
valid_story_ids=valid_story_ids,
story_meta=story_meta,
settings=settings,
)