feat(evaluation): memoir readiness, judge/replay updates, eval web playground
Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.
This commit is contained in:
@@ -98,7 +98,7 @@ class ReplayConversationService:
|
||||
fixture_filename: str,
|
||||
flush_memoir_after: bool,
|
||||
skip_tts: bool,
|
||||
) -> tuple[int, list[str]]:
|
||||
) -> tuple[int, list[str], list[str]]:
|
||||
try:
|
||||
turns, _ = read_user_export_fixture(fixture_filename)
|
||||
except ValueError as e:
|
||||
@@ -108,13 +108,13 @@ class ReplayConversationService:
|
||||
utterances = [u.strip() for u, _ in turns if (u or "").strip()]
|
||||
if not utterances:
|
||||
raise EvaluationBadRequestError("fixture produced no user utterances")
|
||||
n = await self.replay_utterances(
|
||||
n, segment_ids = await self.replay_utterances(
|
||||
conversation_id=conversation_id,
|
||||
utterances=utterances,
|
||||
flush_memoir_after=flush_memoir_after,
|
||||
skip_tts=skip_tts,
|
||||
)
|
||||
return n, utterances
|
||||
return n, utterances, segment_ids
|
||||
|
||||
async def replay_utterances(
|
||||
self,
|
||||
@@ -123,7 +123,7 @@ class ReplayConversationService:
|
||||
utterances: list[str],
|
||||
flush_memoir_after: bool,
|
||||
skip_tts: bool,
|
||||
) -> int:
|
||||
) -> tuple[int, list[str]]:
|
||||
cid = (conversation_id or "").strip()
|
||||
if not cid:
|
||||
raise EvaluationBadRequestError("conversation_id is required")
|
||||
@@ -136,11 +136,13 @@ class ReplayConversationService:
|
||||
|
||||
conv_service = ConversationService(self._db, self._quota)
|
||||
count = 0
|
||||
segment_ids: list[str] = []
|
||||
for raw in utterances:
|
||||
text = (raw or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
segment = await conv_service.create_user_segment(conv, conv.user_id, text)
|
||||
segment_ids.append(segment.id)
|
||||
ts = segment.created_at or conv.last_message_at
|
||||
await background_runner.queue_message(
|
||||
conv.user_id,
|
||||
@@ -169,4 +171,4 @@ class ReplayConversationService:
|
||||
flush_memoir_after,
|
||||
skip_tts,
|
||||
)
|
||||
return count
|
||||
return count, segment_ids
|
||||
|
||||
Reference in New Issue
Block a user