feat(evaluation): memoir readiness, judge/replay updates, eval web playground

Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.
This commit is contained in:
Kevin
2026-04-08 09:38:07 +08:00
parent 99543d04c6
commit 6772e1269c
26 changed files with 1255 additions and 124 deletions

View File

@@ -27,9 +27,11 @@ from app.agents.stage_constants import (
STAGE_TO_ORDER,
)
from app.agents.memoir.story_route_agent import (
APPEND_FIRST_CHAPTER_CATEGORIES,
PLAN_BATCH_MAX_SEGMENTS,
StoryBatchPlan,
StoryRouteAgent,
default_append_target_story_id,
)
from app.agents.state_schema import MemoirStateSchema
from app.core.config import settings
@@ -530,6 +532,8 @@ def _run_batch_plan_writes(
user_birth_year: int | None,
llm: Any,
narrative_agent: NarrativeAgent,
candidate_stories: list,
story_meta: dict[str, dict[str, int]],
background_voice: str = "default",
occupation: str = "",
memoir_correlation_id: str | None = None,
@@ -573,6 +577,33 @@ def _run_batch_plan_writes(
else:
target_story_id = st.id
existing_for_narrative = canon
elif (
unit.decision == "new_story"
and chapter_category in APPEND_FIRST_CHAPTER_CATEGORIES
and candidate_stories
and len(ut_norm)
<= int(settings.memoir_story_route_append_guardrail_oral_chars)
):
tid_g = default_append_target_story_id(
candidate_stories, story_meta, settings
)
if tid_g:
st = session.get(Story, tid_g)
if st and st.user_id == user_id:
canon = (st.canonical_markdown or "").strip()
vc = count_story_versions_sync(session, str(st.id))
if len(canon) <= max_chars and vc < max_ver:
target_story_id = st.id
existing_for_narrative = canon
decision_source = "append_guardrail_short_oral"
logger.info(
"event=story_route_append_guardrail memoir_correlation_id={} "
"chapter_category={} oral_len={} story_id={}",
memoir_correlation_id or "",
chapter_category,
len(ut_norm),
tid_g,
)
raw_gen = narrative_agent.generate_narrative(
stage=chapter_category,
@@ -663,12 +694,13 @@ def _run_batch_plan_writes(
elapsed = time.perf_counter() - t0
logger.info(
"event=story_generated memoir_correlation_id={} route_type=batch "
"decision_source={} route_decision={} "
"decision_source={} route_decision={} route_planned={} "
"unit_segments={} used_evidence={} narrative_json_valid={} fidelity_passed={} "
"fallback_type={} oral_len={} md_len={} chapter_category={} is_append={} "
"story_id={} seconds={:.3f} oral_normalize_changed={}",
memoir_correlation_id or "",
decision_source,
"append_story" if is_append else "new_story",
unit.decision,
len(unit.segment_ids),
bool(evidence_text.strip()),
@@ -839,6 +871,8 @@ def run_story_pipeline_for_category_batch(
user_birth_year=user_birth_year,
llm=llm,
narrative_agent=narrative_agent,
candidate_stories=candidates,
story_meta=story_meta,
background_voice=background_voice,
occupation=occupation,
memoir_correlation_id=memoir_correlation_id,
@@ -879,6 +913,31 @@ def run_story_pipeline_for_category_batch(
else:
target_story_id = st.id
existing_for_narrative = canon
elif (
route.decision == "new_story"
and chapter_category in APPEND_FIRST_CHAPTER_CATEGORIES
and candidates
and len(om_norm)
<= int(settings.memoir_story_route_append_guardrail_oral_chars)
):
tid_g = default_append_target_story_id(candidates, story_meta, settings)
if tid_g:
st = session.get(Story, tid_g)
if st and st.user_id == user_id:
canon = (st.canonical_markdown or "").strip()
vc = count_story_versions_sync(session, str(st.id))
if len(canon) <= max_chars and vc < max_ver:
target_story_id = st.id
existing_for_narrative = canon
decision_source = "append_guardrail_short_oral"
logger.info(
"event=story_route_append_guardrail memoir_correlation_id={} "
"chapter_category={} oral_len={} story_id={} route_type=single",
memoir_correlation_id or "",
chapter_category,
len(om_norm),
tid_g,
)
raw_gen = narrative_agent.generate_narrative(
stage=chapter_category,