api/app/features/evaluation/replay_service.py

"""内部评测：按 App 一致路径回放用户轮次（segment + orchestrator + memoir 队列）。"""

from __future__ import annotations

import secrets
import time
import uuid
from dataclasses import dataclass
from datetime import datetime

from sqlalchemy.ext.asyncio import AsyncSession

from app.core.db import utc_now
from app.core.logging import get_logger
from app.core.security import hash_password
from app.features.auth import repo as auth_repo
from app.features.conversation.models import Conversation
from app.features.conversation.service import ConversationService
from app.features.conversation.ws.pipeline import (
    memoir_ingest_scheduler,
    process_user_message,
)
from app.features.evaluation.errors import (
    EvaluationBadRequestError,
    EvaluationNotFoundError,
)
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
from app.features.quota.service import QuotaService
from app.features.user.models import User

logger = get_logger(__name__)


@dataclass(frozen=True)
class ReplayServerTiming:
    started_at_utc: datetime
    finished_at_utc: datetime
    elapsed_ms: int


class ReplayConversationService:
    def __init__(self, db: AsyncSession, quota_service: QuotaService) -> None:
        self._db = db
        self._quota = quota_service

    async def create_eval_sandbox(self) -> tuple[str, str, str, str]:
        """新建仅用于评测的临时用户（唯一伪手机号）+ 新会话。"""
        user_id = str(uuid.uuid4())
        phone: str | None = None
        for _ in range(8):
            candidate = f"eval_{secrets.token_hex(10)}"
            existing = await auth_repo.get_user_by_phone(candidate, self._db)
            if not existing:
                phone = candidate
                break
        if not phone:
            raise EvaluationBadRequestError("could not allocate eval phone")

        user = User(
            id=user_id,
            phone=phone,
            password_hash=hash_password(secrets.token_urlsafe(24)),
            nickname="评测临时用户",
            subscription_type="free",
            created_at=utc_now(),
        )
        await auth_repo.create_user(user, self._db)
        await self._db.commit()
        await self._db.refresh(user)

        conversation_id = str(uuid.uuid4())
        conv_service = ConversationService(self._db, self._quota)
        conv, err = await conv_service.ensure_ws_connection(conversation_id, user_id)
        if err or not conv:
            raise EvaluationBadRequestError(err or "failed to create conversation")

        logger.info(
            "eval sandbox user_id={} phone={} conversation_id={}",
            user_id,
            phone,
            conversation_id,
        )
        return user_id, conversation_id, phone, user.nickname

    async def bootstrap_conversation(self, user_id: str) -> str:
        uid = (user_id or "").strip()
        if not uid:
            raise EvaluationBadRequestError("user_id is required")
        user = await self._db.get(User, uid)
        if not user:
            raise EvaluationBadRequestError("user not found")
        conversation_id = str(uuid.uuid4())
        conv_service = ConversationService(self._db, self._quota)
        conv, err = await conv_service.ensure_ws_connection(conversation_id, uid)
        if err or not conv:
            raise EvaluationBadRequestError(err or "failed to create conversation")
        logger.info(
            "eval replay bootstrap conversation_id={} user_id={}",
            conversation_id,
            uid,
        )
        return conversation_id

    async def replay_fixture(
        self,
        *,
        conversation_id: str,
        fixture_filename: str,
        flush_memoir_after: bool,
        skip_memoir: bool,
        skip_tts: bool,
    ) -> tuple[int, list[str], list[str], ReplayServerTiming]:
        try:
            turns, _ = read_user_export_fixture(fixture_filename)
        except ValueError as e:
            raise EvaluationBadRequestError(str(e)) from e
        except FileNotFoundError:
            raise EvaluationNotFoundError("fixture not found") from None
        utterances = [u.strip() for u, _ in turns if (u or "").strip()]
        if not utterances:
            raise EvaluationBadRequestError("fixture produced no user utterances")
        n, segment_ids, timing = await self.replay_utterances(
            conversation_id=conversation_id,
            utterances=utterances,
            flush_memoir_after=flush_memoir_after,
            skip_memoir=skip_memoir,
            skip_tts=skip_tts,
        )
        return n, utterances, segment_ids, timing

    async def replay_utterances(
        self,
        *,
        conversation_id: str,
        utterances: list[str],
        flush_memoir_after: bool,
        skip_memoir: bool,
        skip_tts: bool,
    ) -> tuple[int, list[str], ReplayServerTiming]:
        t_wall0 = time.perf_counter()
        started_at_utc = utc_now()
        cid = (conversation_id or "").strip()
        if not cid:
            raise EvaluationBadRequestError("conversation_id is required")
        conv = await self._db.get(Conversation, cid)
        if not conv or conv.deleted_at is not None:
            raise EvaluationNotFoundError("conversation not found")
        user = await self._db.get(User, conv.user_id)
        if not user:
            raise EvaluationBadRequestError("user not found for conversation")

        conv_service = ConversationService(self._db, self._quota)
        count = 0
        segment_ids: list[str] = []
        for raw in utterances:
            text = (raw or "").strip()
            if not text:
                continue
            segment = await conv_service.create_user_segment(conv, conv.user_id, text)
            segment_ids.append(segment.id)
            ts = segment.created_at or conv.last_message_at
            if not skip_memoir:
                await memoir_ingest_scheduler.queue_segment(
                    conv.user_id,
                    segment.id,
                    text_char_count=len(text),
                    trigger="evaluation_replay",
                )
            await process_user_message(
                conversation_id=cid,
                user_message=text,
                conversation=conv,
                segment=segment,
                db=self._db,
                user=user,
                user_message_timestamp=ts,
                force_skip_tts=skip_tts,
            )
            count += 1

        if flush_memoir_after and conv.user_id and (not skip_memoir):
            await memoir_ingest_scheduler.flush_pending(
                conv.user_id,
                trigger="evaluation_replay",
            )

        logger.info(
            "eval replay done conversation_id={} turns={} flush={} skip_memoir={} skip_tts={}",
            cid,
            count,
            flush_memoir_after,
            skip_memoir,
            skip_tts,
        )
        finished_at_utc = utc_now()
        elapsed_ms = max(0, int((time.perf_counter() - t_wall0) * 1000))
        timing = ReplayServerTiming(
            started_at_utc=started_at_utc,
            finished_at_utc=finished_at_utc,
            elapsed_ms=elapsed_ms,
        )
        return count, segment_ids, timing
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								"""内部评测：按 App 一致路径回放用户轮次（segment + orchestrator + memoir 队列）。"""
 								from __future__ import annotations
 								import secrets
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								import time
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								import uuid
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								from dataclasses import dataclass
 								from datetime import datetime
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
 								from sqlalchemy.ext.asyncio import AsyncSession
 								from app.core.db import utc_now
 								from app.core.logging import get_logger
 								from app.core.security import hash_password
 								from app.features.auth import repo as auth_repo
 								from app.features.conversation.models import Conversation
 								from app.features.conversation.service import ConversationService
 								from app.features.conversation.ws.pipeline import (
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								    memoir_ingest_scheduler,
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								    process_user_message,
 								)
 								from app.features.evaluation.errors import (
 								    EvaluationBadRequestError,
 								    EvaluationNotFoundError,
 								)
 								from app.features.evaluation.user_export_fixtures import read_user_export_fixture
 								from app.features.quota.service import QuotaService
 								from app.features.user.models import User
 								logger = get_logger(__name__)
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								@dataclass(frozen=True)
 								class ReplayServerTiming:
 								    started_at_utc: datetime
 								    finished_at_utc: datetime
 								    elapsed_ms: int
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								class ReplayConversationService:
 								    def __init__(self, db: AsyncSession, quota_service: QuotaService) -> None:
 								        self._db = db
 								        self._quota = quota_service
 								    async def create_eval_sandbox(self) -> tuple[str, str, str, str]:
 								        """新建仅用于评测的临时用户（唯一伪手机号）+ 新会话。"""
 								        user_id = str(uuid.uuid4())
 								        phone: str | None = None
 								        for _ in range(8):
 								            candidate = f"eval_{secrets.token_hex(10)}"
 								            existing = await auth_repo.get_user_by_phone(candidate, self._db)
 								            if not existing:
 								                phone = candidate
 								                break
 								        if not phone:
 								            raise EvaluationBadRequestError("could not allocate eval phone")
 								        user = User(
 								            id=user_id,
 								            phone=phone,
 								            password_hash=hash_password(secrets.token_urlsafe(24)),
 								            nickname="评测临时用户",
 								            subscription_type="free",
 								            created_at=utc_now(),
 								        )
 								        await auth_repo.create_user(user, self._db)
 								        await self._db.commit()
 								        await self._db.refresh(user)
 								        conversation_id = str(uuid.uuid4())
 								        conv_service = ConversationService(self._db, self._quota)
 								        conv, err = await conv_service.ensure_ws_connection(conversation_id, user_id)
 								        if err or not conv:
 								            raise EvaluationBadRequestError(err or "failed to create conversation")
 								        logger.info(
 								            "eval sandbox user_id={} phone={} conversation_id={}",
 								            user_id,
 								            phone,
 								            conversation_id,
 								        )
 								        return user_id, conversation_id, phone, user.nickname
 								    async def bootstrap_conversation(self, user_id: str) -> str:
 								        uid = (user_id or "").strip()
 								        if not uid:
 								            raise EvaluationBadRequestError("user_id is required")
 								        user = await self._db.get(User, uid)
 								        if not user:
 								            raise EvaluationBadRequestError("user not found")
 								        conversation_id = str(uuid.uuid4())
 								        conv_service = ConversationService(self._db, self._quota)
 								        conv, err = await conv_service.ensure_ws_connection(conversation_id, uid)
 								        if err or not conv:
 								            raise EvaluationBadRequestError(err or "failed to create conversation")
 								        logger.info(
 								            "eval replay bootstrap conversation_id={} user_id={}",
 								            conversation_id,
 								            uid,
 								        )
 								        return conversation_id
 								    async def replay_fixture(
 								        self,
 								        *,
 								        conversation_id: str,
 								        fixture_filename: str,
 								        flush_memoir_after: bool,
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								        skip_memoir: bool,
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								        skip_tts: bool,
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								    ) -> tuple[int, list[str], list[str], ReplayServerTiming]:
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								        try:
 								            turns, _ = read_user_export_fixture(fixture_filename)
 								        except ValueError as e:
 								            raise EvaluationBadRequestError(str(e)) from e
 								        except FileNotFoundError:
 								            raise EvaluationNotFoundError("fixture not found") from None
 								        utterances = [u.strip() for u, _ in turns if (u or "").strip()]
 								        if not utterances:
 								            raise EvaluationBadRequestError("fixture produced no user utterances")
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								        n, segment_ids, timing = await self.replay_utterances(
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								            conversation_id=conversation_id,
 								            utterances=utterances,
 								            flush_memoir_after=flush_memoir_after,
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								            skip_memoir=skip_memoir,
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								            skip_tts=skip_tts,
 								        )
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								        return n, utterances, segment_ids, timing
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
 								    async def replay_utterances(
 								        self,
 								        *,
 								        conversation_id: str,
 								        utterances: list[str],
 								        flush_memoir_after: bool,
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								        skip_memoir: bool,
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								        skip_tts: bool,
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								    ) -> tuple[int, list[str], ReplayServerTiming]:
 								        t_wall0 = time.perf_counter()
 								        started_at_utc = utc_now()
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								        cid = (conversation_id or "").strip()
 								        if not cid:
 								            raise EvaluationBadRequestError("conversation_id is required")
 								        conv = await self._db.get(Conversation, cid)
 								        if not conv or conv.deleted_at is not None:
 								            raise EvaluationNotFoundError("conversation not found")
 								        user = await self._db.get(User, conv.user_id)
 								        if not user:
 								            raise EvaluationBadRequestError("user not found for conversation")
 								        conv_service = ConversationService(self._db, self._quota)
 								        count = 0
-												feat(evaluation): memoir readiness, judge/replay updates, eval web playground

Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.

											
										
										
											2026-04-08 09:38:07 +08:00
+								        segment_ids: list[str] = []
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								        for raw in utterances:
 								            text = (raw or "").strip()
 								            if not text:
 								                continue
 								            segment = await conv_service.create_user_segment(conv, conv.user_id, text)
-												feat(evaluation): memoir readiness, judge/replay updates, eval web playground

Add memoir_readiness_service and router tests; extend judge schemas/services, replay_service, and conversation rubric; align story route agent, payload, prompts, and story_pipeline_sync; update agent logging, config, and DI. Document internal-eval; add replayDraft util and PlaygroundPage changes in app-eval-web.

											
										
										
											2026-04-08 09:38:07 +08:00
+								            segment_ids.append(segment.id)
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								            ts = segment.created_at or conv.last_message_at
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								            if not skip_memoir:
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								                await memoir_ingest_scheduler.queue_segment(
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								                    conv.user_id,
 								                    segment.id,
 								                    text_char_count=len(text),
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								                    trigger="evaluation_replay",
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								                )
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								            await process_user_message(
 								                conversation_id=cid,
 								                user_message=text,
 								                conversation=conv,
 								                segment=segment,
 								                db=self._db,
 								                user=user,
 								                user_message_timestamp=ts,
 								                force_skip_tts=skip_tts,
 								            )
 								            count += 1
-												feat:
1. 建立问题库大纲，对应每个人生阶段槽位
2. 鼓励使用更生活化的交流语言共情与总结
3. 降低评审模型可能发生截断的概率
4. 成稿质量维度强化情感表达和上下文连贯性

											
										
										
											2026-04-09 15:32:35 +08:00
+								        if flush_memoir_after and conv.user_id and (not skip_memoir):
-												feat(api): 收敛对话与记忆流程边界，引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService；富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话；回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway（LlmUseCase），各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor

											
										
										
											2026-04-30 09:17:01 +08:00
+								            await memoir_ingest_scheduler.flush_pending(
 								                conv.user_id,
 								                trigger="evaluation_replay",
 								            )
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
 								        logger.info(
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								            "eval replay done conversation_id={} turns={} flush={} skip_memoir={} skip_tts={}",
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								            cid,
 								            count,
 								            flush_memoir_after,
-												refactor(eval+memoir)：精简内部评测路由与服务，composite/对话摘要与 judge 能力补强

- 访谈：新增 interview_state_hints，联动 orchestrator 与提示词
- 回忆录：story_pipeline_sync/state/memory/post_commit 与 Celery 任务调整
- 基建：开发用 celery broker、compose/development 脚本、依赖注入
- eval-web：移除数据集/实验/版本等页面与流式轮询，突出 Playground
- 文档与单测同步

											
										
										
											2026-04-08 21:36:12 +08:00
+								            skip_memoir,
-												feat/ eval

											
										
										
											2026-04-06 23:19:20 +08:00
+								            skip_tts,
 								        )
-												feat(eval): server-side replay/phase1 timing + memoir phase1 batch chunking

- Replay and memoir-submit responses include started/finished UTC and elapsed_ms;
  Phase1 poll exposes Redis-backed submit time and elapsed_ms_since_submit.
- Phase1 batch LLM splits segments by memoir_phase1_batch_llm_chunk_size with
  bisect fallback per chunk; Playground shows server timings.

Made-with: Cursor

											
										
										
											2026-04-09 13:38:53 +08:00
+								        finished_at_utc = utc_now()
 								        elapsed_ms = max(0, int((time.perf_counter() - t_wall0) * 1000))
 								        timing = ReplayServerTiming(
 								            started_at_utc=started_at_utc,
 								            finished_at_utc=finished_at_utc,
 								            elapsed_ms=elapsed_ms,
 								        )
 								        return count, segment_ids, timing