feat(evaluation): memoir readiness, judge/replay updates, eval web playground

Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.
This commit is contained in:
Kevin
2026-04-08 09:38:07 +08:00
parent 99543d04c6
commit 6772e1269c
26 changed files with 1255 additions and 124 deletions

View File

@@ -98,7 +98,7 @@ class ReplayConversationService:
fixture_filename: str,
flush_memoir_after: bool,
skip_tts: bool,
) -> tuple[int, list[str]]:
) -> tuple[int, list[str], list[str]]:
try:
turns, _ = read_user_export_fixture(fixture_filename)
except ValueError as e:
@@ -108,13 +108,13 @@ class ReplayConversationService:
utterances = [u.strip() for u, _ in turns if (u or "").strip()]
if not utterances:
raise EvaluationBadRequestError("fixture produced no user utterances")
n = await self.replay_utterances(
n, segment_ids = await self.replay_utterances(
conversation_id=conversation_id,
utterances=utterances,
flush_memoir_after=flush_memoir_after,
skip_tts=skip_tts,
)
return n, utterances
return n, utterances, segment_ids
async def replay_utterances(
self,
@@ -123,7 +123,7 @@ class ReplayConversationService:
utterances: list[str],
flush_memoir_after: bool,
skip_tts: bool,
) -> int:
) -> tuple[int, list[str]]:
cid = (conversation_id or "").strip()
if not cid:
raise EvaluationBadRequestError("conversation_id is required")
@@ -136,11 +136,13 @@ class ReplayConversationService:
conv_service = ConversationService(self._db, self._quota)
count = 0
segment_ids: list[str] = []
for raw in utterances:
text = (raw or "").strip()
if not text:
continue
segment = await conv_service.create_user_segment(conv, conv.user_id, text)
segment_ids.append(segment.id)
ts = segment.created_at or conv.last_message_at
await background_runner.queue_message(
conv.user_id,
@@ -169,4 +171,4 @@ class ReplayConversationService:
flush_memoir_after,
skip_tts,
)
return count
return count, segment_ids