2026-04-06 23:19:20 +08:00
|
|
|
|
"""内部评测:按 App 一致路径回放用户轮次(segment + orchestrator + memoir 队列)。"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import secrets
|
2026-04-09 13:38:53 +08:00
|
|
|
|
import time
|
2026-04-06 23:19:20 +08:00
|
|
|
|
import uuid
|
2026-04-09 13:38:53 +08:00
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
from datetime import datetime
|
2026-04-06 23:19:20 +08:00
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.db import utc_now
|
|
|
|
|
|
from app.core.logging import get_logger
|
|
|
|
|
|
from app.core.security import hash_password
|
|
|
|
|
|
from app.features.auth import repo as auth_repo
|
|
|
|
|
|
from app.features.conversation.models import Conversation
|
|
|
|
|
|
from app.features.conversation.service import ConversationService
|
|
|
|
|
|
from app.features.conversation.ws.pipeline import (
|
2026-04-30 09:17:01 +08:00
|
|
|
|
memoir_ingest_scheduler,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
process_user_message,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.evaluation.errors import (
|
|
|
|
|
|
EvaluationBadRequestError,
|
|
|
|
|
|
EvaluationNotFoundError,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
|
|
|
|
|
|
from app.features.quota.service import QuotaService
|
|
|
|
|
|
from app.features.user.models import User
|
|
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-09 13:38:53 +08:00
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
|
class ReplayServerTiming:
|
|
|
|
|
|
started_at_utc: datetime
|
|
|
|
|
|
finished_at_utc: datetime
|
|
|
|
|
|
elapsed_ms: int
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 23:19:20 +08:00
|
|
|
|
class ReplayConversationService:
|
|
|
|
|
|
def __init__(self, db: AsyncSession, quota_service: QuotaService) -> None:
|
|
|
|
|
|
self._db = db
|
|
|
|
|
|
self._quota = quota_service
|
|
|
|
|
|
|
|
|
|
|
|
async def create_eval_sandbox(self) -> tuple[str, str, str, str]:
|
|
|
|
|
|
"""新建仅用于评测的临时用户(唯一伪手机号)+ 新会话。"""
|
|
|
|
|
|
user_id = str(uuid.uuid4())
|
|
|
|
|
|
phone: str | None = None
|
|
|
|
|
|
for _ in range(8):
|
|
|
|
|
|
candidate = f"eval_{secrets.token_hex(10)}"
|
|
|
|
|
|
existing = await auth_repo.get_user_by_phone(candidate, self._db)
|
|
|
|
|
|
if not existing:
|
|
|
|
|
|
phone = candidate
|
|
|
|
|
|
break
|
|
|
|
|
|
if not phone:
|
|
|
|
|
|
raise EvaluationBadRequestError("could not allocate eval phone")
|
|
|
|
|
|
|
|
|
|
|
|
user = User(
|
|
|
|
|
|
id=user_id,
|
|
|
|
|
|
phone=phone,
|
|
|
|
|
|
password_hash=hash_password(secrets.token_urlsafe(24)),
|
|
|
|
|
|
nickname="评测临时用户",
|
|
|
|
|
|
subscription_type="free",
|
|
|
|
|
|
created_at=utc_now(),
|
|
|
|
|
|
)
|
|
|
|
|
|
await auth_repo.create_user(user, self._db)
|
|
|
|
|
|
await self._db.commit()
|
|
|
|
|
|
await self._db.refresh(user)
|
|
|
|
|
|
|
|
|
|
|
|
conversation_id = str(uuid.uuid4())
|
|
|
|
|
|
conv_service = ConversationService(self._db, self._quota)
|
|
|
|
|
|
conv, err = await conv_service.ensure_ws_connection(conversation_id, user_id)
|
|
|
|
|
|
if err or not conv:
|
|
|
|
|
|
raise EvaluationBadRequestError(err or "failed to create conversation")
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
"eval sandbox user_id={} phone={} conversation_id={}",
|
|
|
|
|
|
user_id,
|
|
|
|
|
|
phone,
|
|
|
|
|
|
conversation_id,
|
|
|
|
|
|
)
|
|
|
|
|
|
return user_id, conversation_id, phone, user.nickname
|
|
|
|
|
|
|
|
|
|
|
|
async def bootstrap_conversation(self, user_id: str) -> str:
|
|
|
|
|
|
uid = (user_id or "").strip()
|
|
|
|
|
|
if not uid:
|
|
|
|
|
|
raise EvaluationBadRequestError("user_id is required")
|
|
|
|
|
|
user = await self._db.get(User, uid)
|
|
|
|
|
|
if not user:
|
|
|
|
|
|
raise EvaluationBadRequestError("user not found")
|
|
|
|
|
|
conversation_id = str(uuid.uuid4())
|
|
|
|
|
|
conv_service = ConversationService(self._db, self._quota)
|
|
|
|
|
|
conv, err = await conv_service.ensure_ws_connection(conversation_id, uid)
|
|
|
|
|
|
if err or not conv:
|
|
|
|
|
|
raise EvaluationBadRequestError(err or "failed to create conversation")
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
"eval replay bootstrap conversation_id={} user_id={}",
|
|
|
|
|
|
conversation_id,
|
|
|
|
|
|
uid,
|
|
|
|
|
|
)
|
|
|
|
|
|
return conversation_id
|
|
|
|
|
|
|
|
|
|
|
|
async def replay_fixture(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
fixture_filename: str,
|
|
|
|
|
|
flush_memoir_after: bool,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
skip_memoir: bool,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
skip_tts: bool,
|
2026-04-09 13:38:53 +08:00
|
|
|
|
) -> tuple[int, list[str], list[str], ReplayServerTiming]:
|
2026-04-06 23:19:20 +08:00
|
|
|
|
try:
|
|
|
|
|
|
turns, _ = read_user_export_fixture(fixture_filename)
|
|
|
|
|
|
except ValueError as e:
|
|
|
|
|
|
raise EvaluationBadRequestError(str(e)) from e
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
|
raise EvaluationNotFoundError("fixture not found") from None
|
|
|
|
|
|
utterances = [u.strip() for u, _ in turns if (u or "").strip()]
|
|
|
|
|
|
if not utterances:
|
|
|
|
|
|
raise EvaluationBadRequestError("fixture produced no user utterances")
|
2026-04-09 13:38:53 +08:00
|
|
|
|
n, segment_ids, timing = await self.replay_utterances(
|
2026-04-06 23:19:20 +08:00
|
|
|
|
conversation_id=conversation_id,
|
|
|
|
|
|
utterances=utterances,
|
|
|
|
|
|
flush_memoir_after=flush_memoir_after,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
skip_memoir=skip_memoir,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
skip_tts=skip_tts,
|
|
|
|
|
|
)
|
2026-04-09 13:38:53 +08:00
|
|
|
|
return n, utterances, segment_ids, timing
|
2026-04-06 23:19:20 +08:00
|
|
|
|
|
|
|
|
|
|
async def replay_utterances(
|
|
|
|
|
|
self,
|
|
|
|
|
|
*,
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
utterances: list[str],
|
|
|
|
|
|
flush_memoir_after: bool,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
skip_memoir: bool,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
skip_tts: bool,
|
2026-04-09 13:38:53 +08:00
|
|
|
|
) -> tuple[int, list[str], ReplayServerTiming]:
|
|
|
|
|
|
t_wall0 = time.perf_counter()
|
|
|
|
|
|
started_at_utc = utc_now()
|
2026-04-06 23:19:20 +08:00
|
|
|
|
cid = (conversation_id or "").strip()
|
|
|
|
|
|
if not cid:
|
|
|
|
|
|
raise EvaluationBadRequestError("conversation_id is required")
|
|
|
|
|
|
conv = await self._db.get(Conversation, cid)
|
|
|
|
|
|
if not conv or conv.deleted_at is not None:
|
|
|
|
|
|
raise EvaluationNotFoundError("conversation not found")
|
|
|
|
|
|
user = await self._db.get(User, conv.user_id)
|
|
|
|
|
|
if not user:
|
|
|
|
|
|
raise EvaluationBadRequestError("user not found for conversation")
|
|
|
|
|
|
|
|
|
|
|
|
conv_service = ConversationService(self._db, self._quota)
|
|
|
|
|
|
count = 0
|
2026-04-08 09:38:07 +08:00
|
|
|
|
segment_ids: list[str] = []
|
2026-04-06 23:19:20 +08:00
|
|
|
|
for raw in utterances:
|
|
|
|
|
|
text = (raw or "").strip()
|
|
|
|
|
|
if not text:
|
|
|
|
|
|
continue
|
|
|
|
|
|
segment = await conv_service.create_user_segment(conv, conv.user_id, text)
|
2026-04-08 09:38:07 +08:00
|
|
|
|
segment_ids.append(segment.id)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
ts = segment.created_at or conv.last_message_at
|
2026-04-08 21:36:12 +08:00
|
|
|
|
if not skip_memoir:
|
2026-04-30 09:17:01 +08:00
|
|
|
|
await memoir_ingest_scheduler.queue_segment(
|
2026-04-08 21:36:12 +08:00
|
|
|
|
conv.user_id,
|
|
|
|
|
|
segment.id,
|
|
|
|
|
|
text_char_count=len(text),
|
2026-04-30 09:17:01 +08:00
|
|
|
|
trigger="evaluation_replay",
|
2026-04-08 21:36:12 +08:00
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
await process_user_message(
|
|
|
|
|
|
conversation_id=cid,
|
|
|
|
|
|
user_message=text,
|
|
|
|
|
|
conversation=conv,
|
|
|
|
|
|
segment=segment,
|
|
|
|
|
|
db=self._db,
|
|
|
|
|
|
user=user,
|
|
|
|
|
|
user_message_timestamp=ts,
|
|
|
|
|
|
force_skip_tts=skip_tts,
|
|
|
|
|
|
)
|
|
|
|
|
|
count += 1
|
|
|
|
|
|
|
2026-04-09 15:32:35 +08:00
|
|
|
|
if flush_memoir_after and conv.user_id and (not skip_memoir):
|
2026-04-30 09:17:01 +08:00
|
|
|
|
await memoir_ingest_scheduler.flush_pending(
|
|
|
|
|
|
conv.user_id,
|
|
|
|
|
|
trigger="evaluation_replay",
|
|
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
|
|
|
|
|
|
logger.info(
|
2026-04-08 21:36:12 +08:00
|
|
|
|
"eval replay done conversation_id={} turns={} flush={} skip_memoir={} skip_tts={}",
|
2026-04-06 23:19:20 +08:00
|
|
|
|
cid,
|
|
|
|
|
|
count,
|
|
|
|
|
|
flush_memoir_after,
|
2026-04-08 21:36:12 +08:00
|
|
|
|
skip_memoir,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
skip_tts,
|
|
|
|
|
|
)
|
2026-04-09 13:38:53 +08:00
|
|
|
|
finished_at_utc = utc_now()
|
|
|
|
|
|
elapsed_ms = max(0, int((time.perf_counter() - t_wall0) * 1000))
|
|
|
|
|
|
timing = ReplayServerTiming(
|
|
|
|
|
|
started_at_utc=started_at_utc,
|
|
|
|
|
|
finished_at_utc=finished_at_utc,
|
|
|
|
|
|
elapsed_ms=elapsed_ms,
|
|
|
|
|
|
)
|
|
|
|
|
|
return count, segment_ids, timing
|