From a50b72e7b58c501b97866055aad05ecc9cfa1b7a Mon Sep 17 00:00:00 2001 From: Kevin Date: Tue, 7 Apr 2026 11:06:16 +0800 Subject: [PATCH] =?UTF-8?q?feat(app-eval-web):=20=E8=AF=84=E6=B5=8B?= =?UTF-8?q?=E5=8F=B0=20UI/UX=20=E9=87=8D=E6=9E=84=EF=BC=88=E4=BE=A7?= =?UTF-8?q?=E6=A0=8F=E5=AF=BC=E8=88=AA=E3=80=81=E5=88=86=E9=A1=B5=E3=80=81?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86=E4=B8=8E=E5=AE=9E=E9=AA=8C=E8=83=BD?= =?UTF-8?q?=E5=8A=9B=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 采用 hash 路由与会话式壳层(Playground / Datasets / Experiments / Versions / Memoir) - 抽取 api、types、hooks(轮询、通知、实验 SSE)与 NoticeContext - Playground:基线/实际生成双栏、重放、流式自动评分与 ScoreCard - Datasets:回归集与用例列表、Markdown/JSON 导入、会话快照 - Experiments:创建实验、提交运行、SSE 进度、DiffTable 与门禁展示 - 样式与无障碍:DM Sans + JetBrains Mono、侧栏响应式、? 快捷键帮助 --- app-eval-web/index.html | 8 +- app-eval-web/src/App.tsx | 1714 +---------------- app-eval-web/src/api.ts | 61 + app-eval-web/src/components/DiffTable.tsx | 148 ++ app-eval-web/src/components/EmptyState.tsx | 21 + app-eval-web/src/components/JsonPreview.tsx | 20 + app-eval-web/src/components/NoticeBar.tsx | 36 + app-eval-web/src/components/ScoreCard.tsx | 133 ++ app-eval-web/src/components/Sidebar.tsx | 58 + app-eval-web/src/components/StatusBadge.tsx | 12 + app-eval-web/src/config.ts | 22 + app-eval-web/src/context/NoticeContext.tsx | 30 + app-eval-web/src/eval.css | 1062 ++++++++++ app-eval-web/src/hooks/useExperimentStream.ts | 49 + app-eval-web/src/hooks/useHashRoute.ts | 36 + app-eval-web/src/hooks/useNotices.ts | 21 + app-eval-web/src/hooks/usePolling.ts | 20 + app-eval-web/src/index.css | 16 +- app-eval-web/src/pages/DatasetsPage.tsx | 301 +++ app-eval-web/src/pages/ExperimentsPage.tsx | 311 +++ app-eval-web/src/pages/MemoirPage.tsx | 228 +++ app-eval-web/src/pages/PlaygroundPage.tsx | 770 ++++++++ app-eval-web/src/pages/VersionsPage.tsx | 96 + app-eval-web/src/types.ts | 113 ++ app-eval-web/src/utils/formatTime.ts | 8 + app-eval-web/src/utils/noticeId.ts | 3 + app-eval-web/src/utils/utterances.ts | 8 + 27 files changed, 3662 insertions(+), 1643 deletions(-) create mode 100644 app-eval-web/src/api.ts create mode 100644 app-eval-web/src/components/DiffTable.tsx create mode 100644 app-eval-web/src/components/EmptyState.tsx create mode 100644 app-eval-web/src/components/JsonPreview.tsx create mode 100644 app-eval-web/src/components/NoticeBar.tsx create mode 100644 app-eval-web/src/components/ScoreCard.tsx create mode 100644 app-eval-web/src/components/Sidebar.tsx create mode 100644 app-eval-web/src/components/StatusBadge.tsx create mode 100644 app-eval-web/src/config.ts create mode 100644 app-eval-web/src/context/NoticeContext.tsx create mode 100644 app-eval-web/src/eval.css create mode 100644 app-eval-web/src/hooks/useExperimentStream.ts create mode 100644 app-eval-web/src/hooks/useHashRoute.ts create mode 100644 app-eval-web/src/hooks/useNotices.ts create mode 100644 app-eval-web/src/hooks/usePolling.ts create mode 100644 app-eval-web/src/pages/DatasetsPage.tsx create mode 100644 app-eval-web/src/pages/ExperimentsPage.tsx create mode 100644 app-eval-web/src/pages/MemoirPage.tsx create mode 100644 app-eval-web/src/pages/PlaygroundPage.tsx create mode 100644 app-eval-web/src/pages/VersionsPage.tsx create mode 100644 app-eval-web/src/types.ts create mode 100644 app-eval-web/src/utils/formatTime.ts create mode 100644 app-eval-web/src/utils/noticeId.ts create mode 100644 app-eval-web/src/utils/utterances.ts diff --git a/app-eval-web/index.html b/app-eval-web/index.html index d8a8666..09c7b2c 100644 --- a/app-eval-web/index.html +++ b/app-eval-web/index.html @@ -3,7 +3,13 @@ - Life Echo — 内部回归评测 + Life Echo · 内部评测台 + + +
diff --git a/app-eval-web/src/App.tsx b/app-eval-web/src/App.tsx index 2d3991e..978ee37 100644 --- a/app-eval-web/src/App.tsx +++ b/app-eval-web/src/App.tsx @@ -1,335 +1,49 @@ -import { useCallback, useEffect, useRef, useState } from "react"; +import { useEffect, useState } from "react"; +import { apiBase } from "./api"; +import { apiBaseHint } from "./config"; +import { NoticeProvider } from "./context/NoticeContext"; +import { NoticeBar } from "./components/NoticeBar"; +import { Sidebar } from "./components/Sidebar"; +import { useHashRoute } from "./hooks/useHashRoute"; +import { useNotices } from "./hooks/useNotices"; +import type { AppRoute } from "./types"; +import DatasetsPage from "./pages/DatasetsPage"; +import ExperimentsPage from "./pages/ExperimentsPage"; +import MemoirPage from "./pages/MemoirPage"; +import PlaygroundPage from "./pages/PlaygroundPage"; +import VersionsPage from "./pages/VersionsPage"; -const envApiBase = ( - import.meta.env.VITE_EVAL_API_BASE as string | undefined -)?.trim() ?? ""; -/** - * 开发 + 未设 VITE_EVAL_API_BASE:用相对路径走 Vite proxy → :8001(见 vite.config.ts)。 - * 生产构建未配 env 时仍回退直连 8001。 - */ -const apiBase = - envApiBase || (import.meta.env.DEV ? "" : "http://127.0.0.1:8001"); -const apiKey = - (import.meta.env.VITE_EVAL_API_KEY as string | undefined)?.trim() ?? ""; - -const apiBaseHint = - apiBase === "" - ? "(开发)请求经 Vite 代理到 http://127.0.0.1:8001" - : `直连 ${apiBase}`; - -/** 首页会话列表轮询 */ -const SESSION_LIST_POLL_MS = 4000; -/** 对比页左侧线上对话轮询 */ -const DIALOGUE_POLL_MS = 3500; -/** 高级页回归集 / 实验列表轮询 */ -const ADMIN_POLL_MS = 8000; - -/** 默认对照用导出快照(api/tests/user_exports/) */ -const DEFAULT_USER_EXPORT_FIXTURE = - "zuckxu_1ade609c-567a-450b-b8fb-776aaba3c2b3.md"; - -async function api( - path: string, - init?: RequestInit, -): Promise<{ ok: boolean; data?: T; error?: string; status: number }> { - const url = `${apiBase}${path.startsWith("/") ? path : `/${path}`}`; - try { - const r = await fetch(url, { - ...init, - headers: { - "X-Internal-Eval-Key": apiKey, - "Content-Type": "application/json", - ...(init?.headers ?? {}), - }, - signal: init?.signal, - }); - const text = await r.text(); - let data: T | undefined; - try { - data = text ? (JSON.parse(text) as T) : undefined; - } catch { - /* ignore */ - } - if (!r.ok) { - return { - ok: false, - status: r.status, - error: - typeof data === "object" && - data && - "detail" in (data as object) && - data !== null - ? String((data as unknown as { detail: unknown }).detail) - : text || r.statusText, - }; - } - return { ok: true, data, status: r.status }; - } catch (e: unknown) { - const name = e instanceof Error ? e.name : ""; - if (name === "AbortError") { - return { ok: false, status: 0, error: "aborted" }; - } - return { - ok: false, - status: 0, - error: e instanceof Error ? e.message : "network error", - }; +function RouteOutlet({ route }: { route: AppRoute }) { + switch (route) { + case "playground": + return ; + case "datasets": + return ; + case "experiments": + return ; + case "versions": + return ; + case "memoir": + return ; + default: + return ; } } -/** 与后端 replay 一致:strip 后非空的用户句。 */ -function utterancesForReplayFromTurns( - turns: { user: string; ai: string }[], -): string[] { - return turns - .map((t) => (t.user || "").trim()) - .filter((u) => u.length > 0 && u !== "(空)"); -} - -type SessionItem = { - id: string; - user_id: string; - user_phone: string | null; - started_at: string | null; - last_message_at: string | null; - conversation_stage: string | null; - current_topic: string | null; - status: string | null; -}; - -type DialogueMessage = { - role: string; - content: string; - created_at?: string | null; -}; - -function JsonPreview({ value }: { value: unknown }) { - if (value == null) return ; - return ( -
-      {JSON.stringify(value, null, 2)}
-    
- ); -} - -const shell: React.CSSProperties = { - minHeight: "100vh", - background: "#0f1419", - color: "#e6edf3", - fontFamily: - 'ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif', -}; - -const btn: React.CSSProperties = { - padding: "8px 14px", - borderRadius: 8, - border: "1px solid #30363d", - background: "#21262d", - color: "#e6edf3", - cursor: "pointer", - fontSize: 14, -}; - -const btnPrimary: React.CSSProperties = { - ...btn, - background: "#238636", - borderColor: "#238636", -}; - -function formatTime(iso: string | null | undefined) { - if (!iso) return "—"; - try { - const d = new Date(iso); - return d.toLocaleString(); - } catch { - return iso; - } -} - -type FixtureDetailResponse = { - turns: { user: string; ai: string }[]; - source_user_id?: string | null; - memoir_sections?: { title: string; body: string }[]; -}; - export default function App() { - const [mainView, setMainView] = useState<"conv" | "memoir" | "admin">("conv"); - const [msg, setMsg] = useState(""); - const [sessions, setSessions] = useState([]); - - const [dialogue, setDialogue] = useState([]); - const [fallbackUserLines, setFallbackUserLines] = useState([]); - const [loadingLeft, setLoadingLeft] = useState(false); - - const [versions, setVersions] = useState<{ id: string; name: string }[]>([]); - - const [evalUserId, setEvalUserId] = useState(""); - const [replayConversationId, setReplayConversationId] = useState(""); - const [replayBusy, setReplayBusy] = useState(false); - const [replayProgress, setReplayProgress] = useState<{ - current: number; - total: number; - } | null>(null); - const replayAbortRef = useRef(null); - const [judgeConvBusy, setJudgeConvBusy] = useState(false); - const [convJudgeBaseline, setConvJudgeBaseline] = useState(null); - const [convJudgeReplay, setConvJudgeReplay] = useState(null); - const [convJudgeStreamText, setConvJudgeStreamText] = useState(""); - const [convJudgeErrors, setConvJudgeErrors] = useState([]); - const [convJudgePhase, setConvJudgePhase] = useState(""); - const [memoirSnapshot, setMemoirSnapshot] = useState(null); - const [memoirSnapBusy, setMemoirSnapBusy] = useState(false); - const [memoirJudgeBusy, setMemoirJudgeBusy] = useState(false); - const [manualMemoirJudge, setManualMemoirJudge] = useState(null); - const [showSessionPicker, setShowSessionPicker] = useState(false); - - const [adminTab, setAdminTab] = useState< - "sets" | "versions" | "experiments" - >("experiments"); - const [sets, setSets] = useState<{ id: string; name: string }[]>([]); - const [experiments, setExperiments] = useState< - { id: string; name: string; status: string }[] - >([]); - const [selSet, setSelSet] = useState(""); - const [newSetName, setNewSetName] = useState("默认回归集"); - const [newVerName, setNewVerName] = useState("candidate-v1"); - const [verConfig, setVerConfig] = useState("{}"); - const [selExp, setSelExp] = useState(null); - const [expDetail, setExpDetail] = useState(null); - const [enqueueingExpId, setEnqueueingExpId] = useState(null); + const [route, setRoute] = useHashRoute(); + const { notices, pushNotice, dismissNotice } = useNotices(); const [evalReachable, setEvalReachable] = useState< "unknown" | "ok" | "bad" >("unknown"); - const [sessionsUpdatedAt, setSessionsUpdatedAt] = useState(null); - const [dialogueUpdatedAt, setDialogueUpdatedAt] = useState(null); - - const [fixtureFiles, setFixtureFiles] = useState([]); - const [fixtureName, setFixtureName] = useState(""); - const [fixtureTurns, setFixtureTurns] = useState< - { user: string; ai: string }[] - >([]); - const [fixtureMemoirSections, setFixtureMemoirSections] = useState< - { title: string; body: string }[] - >([]); - - /** 近期全部:含已结束会话;仅进行中:status=active(多数字段在用户挂断后为 ended,列表会空) */ - const [sessionFilter, setSessionFilter] = useState<"recent" | "active">( - "recent", - ); - - const refreshSessionList = useCallback(async () => { - const path = - sessionFilter === "active" - ? "/internal/api/evaluation/sessions?status=active&limit=80" - : "/internal/api/evaluation/sessions?limit=80"; - const r = await api<{ items: SessionItem[]; total: number }>(path); - if (r.ok && r.data) { - setSessions(r.data.items); - setSessionsUpdatedAt(new Date()); - setMsg(""); - } else { - const hint = - r.status === 404 - ? `找不到接口 (404)。请在终端执行: curl -s http://127.0.0.1:8001/internal/api/evaluation/ping (应返回 {"ok":true,...})。若此处也 404,说明 8001 上不是 internal_main。${apiBaseHint};也可删掉 VITE_EVAL_API_BASE 仅用代理。` - : (r.error ?? "加载会话失败"); - setMsg(hint); - } - }, [sessionFilter]); - - const refreshVersions = useCallback(async () => { - const r = await api<{ id: string; name: string }[]>( - "/internal/api/evaluation/versions", - ); - if (r.ok && r.data) setVersions(r.data); - }, []); - - const pullDialogue = useCallback( - async (conversationId: string, signal?: AbortSignal) => { - const d = await api<{ messages: DialogueMessage[] }>( - `/internal/api/evaluation/sessions/${conversationId}/dialogue`, - { signal }, - ); - if (d.error === "aborted") return; - if (d.ok && d.data?.messages?.length) { - setDialogue(d.data.messages); - setFallbackUserLines([]); - } else { - const t = await api<{ - user_utterances_from_messages: string[]; - user_utterances_from_segments: string[]; - }>(`/internal/api/evaluation/sessions/${conversationId}/transcript`, { - signal, - }); - if (t.error === "aborted") return; - if (t.ok && t.data) { - const lines = - t.data.user_utterances_from_messages.length > 0 - ? t.data.user_utterances_from_messages - : t.data.user_utterances_from_segments; - setDialogue([]); - setFallbackUserLines(lines); - } - } - setDialogueUpdatedAt(new Date()); - }, - [], - ); - - const stopReplay = useCallback(() => { - replayAbortRef.current?.abort(); - }, []); - - useEffect(() => { - const ac = replayAbortRef; - const onPageHide = () => ac.current?.abort(); - window.addEventListener("pagehide", onPageHide); - return () => { - window.removeEventListener("pagehide", onPageHide); - ac.current?.abort(); - }; - }, []); - - const refreshAdminData = useCallback(async () => { - const rs = await api<{ id: string; name: string }[]>( - "/internal/api/evaluation/regression-sets", - ); - if (rs.ok && rs.data) { - const rows = rs.data; - setSets(rows); - setSelSet((cur) => { - if (cur) return cur; - return rows[0]?.id ?? ""; - }); - } - const ex = await api<{ id: string; name: string; status: string }[]>( - "/internal/api/evaluation/experiments", - ); - if (ex.ok && ex.data) setExperiments(ex.data); - const vr = await api<{ id: string; name: string }[]>( - "/internal/api/evaluation/versions", - ); - if (vr.ok && vr.data) setVersions(vr.data); - }, []); + const [helpOpen, setHelpOpen] = useState(false); useEffect(() => { void (async () => { try { const url = `${apiBase}/internal/api/evaluation/ping`; const r = await fetch(url); - const j = (await r.json()) as { ok?: boolean; service?: string }; + const j = (await r.json()) as { ok?: boolean }; setEvalReachable(r.ok && j.ok === true ? "ok" : "bad"); } catch { setEvalReachable("bad"); @@ -338,1324 +52,70 @@ export default function App() { }, []); useEffect(() => { - if (mainView !== "conv") return; - void refreshSessionList(); - const t = setInterval(() => void refreshSessionList(), SESSION_LIST_POLL_MS); - return () => clearInterval(t); - }, [mainView, refreshSessionList]); - - useEffect(() => { - void refreshVersions(); - }, [refreshVersions]); - - useEffect(() => { - if (mainView !== "conv" || !replayConversationId.trim()) return; - let cancelled = false; - setLoadingLeft(true); - void pullDialogue(replayConversationId).finally(() => { - if (!cancelled) setLoadingLeft(false); - }); - const t = setInterval(() => { - void pullDialogue(replayConversationId); - }, DIALOGUE_POLL_MS); - return () => { - cancelled = true; - clearInterval(t); + const onKey = (e: KeyboardEvent) => { + if (e.key === "?" && !e.ctrlKey && !e.metaKey) { + const t = e.target as HTMLElement; + if (t.closest("input, textarea, select, [contenteditable=true]")) { + return; + } + e.preventDefault(); + setHelpOpen((v) => !v); + } + if (e.key === "Escape") setHelpOpen(false); }; - }, [mainView, replayConversationId, pullDialogue]); - - useEffect(() => { - if (mainView !== "conv" && mainView !== "memoir") return; - void (async () => { - const r = await api<{ items: string[] }>( - "/internal/api/evaluation/fixtures/user-exports", - ); - if (!r.ok || !r.data?.items?.length) { - setFixtureFiles([]); - return; - } - const items = r.data.items; - setFixtureFiles(items); - setFixtureName((cur) => { - if (cur && items.includes(cur)) return cur; - if (items.includes(DEFAULT_USER_EXPORT_FIXTURE)) - return DEFAULT_USER_EXPORT_FIXTURE; - return items[0] ?? ""; - }); - })(); - }, [mainView]); - - useEffect(() => { - if ((mainView !== "conv" && mainView !== "memoir") || !fixtureName) { - setFixtureTurns([]); - setFixtureMemoirSections([]); - return; - } - void (async () => { - const r = await api( - `/internal/api/evaluation/fixtures/user-exports/${encodeURIComponent(fixtureName)}`, - ); - if (r.ok && r.data?.turns) { - setFixtureTurns(r.data.turns); - setFixtureMemoirSections(r.data.memoir_sections ?? []); - const sid = r.data.source_user_id ?? null; - if (sid && mainView === "memoir") - setEvalUserId((prev) => (prev.trim() ? prev : sid)); - } else { - setFixtureTurns([]); - setFixtureMemoirSections([]); - } - })(); - }, [mainView, fixtureName]); - - useEffect(() => { - if (mainView !== "admin") return; - void refreshAdminData(); - const t = setInterval(() => void refreshAdminData(), ADMIN_POLL_MS); - return () => clearInterval(t); - }, [mainView, refreshAdminData]); - - async function createEvalSandboxOnly() { - const r = await api<{ - user_id: string; - conversation_id: string; - phone: string; - nickname: string; - }>("/internal/api/evaluation/sessions/eval-sandbox", { - method: "POST", - body: "{}", - }); - if (r.ok && r.data) { - setEvalUserId(r.data.user_id); - setReplayConversationId(r.data.conversation_id); - setConvJudgeBaseline(null); - setConvJudgeReplay(null); - setConvJudgeStreamText(""); - setConvJudgeErrors([]); - setConvJudgePhase(""); - setDialogue([]); - setFallbackUserLines([]); - setMsg( - `评测沙箱就绪:临时手机号 ${r.data.phone},user_id / conversation_id 已填入(可随时「新沙箱」清空重来)。`, - ); - } else { - setMsg(r.error ?? "创建沙箱失败"); - } - } - - async function bootstrapReplaySession() { - const uid = evalUserId.trim(); - if (!uid) { - setMsg("高级选项:请先填写已有用户的 UUID"); - return; - } - const r = await api<{ conversation_id: string }>( - "/internal/api/evaluation/sessions/replay-bootstrap", - { method: "POST", body: JSON.stringify({ user_id: uid }) }, - ); - setMsg( - r.ok - ? `已在该用户下新建会话 ${r.data?.conversation_id ?? ""}` - : (r.error ?? "bootstrap 失败"), - ); - if (r.ok && r.data?.conversation_id) { - setReplayConversationId(r.data.conversation_id); - setConvJudgeBaseline(null); - setConvJudgeReplay(null); - setConvJudgeStreamText(""); - setConvJudgeErrors([]); - setConvJudgePhase(""); - setDialogue([]); - setFallbackUserLines([]); - } - } - - async function runReplay() { - if (!fixtureName) { - setMsg("请选择基准 MD"); - return; - } - const utts = utterancesForReplayFromTurns(fixtureTurns); - if (!utts.length) { - setMsg("当前基准 MD 没有可回放的用户句(请先加载轮次)"); - return; - } - - replayAbortRef.current?.abort(); - const ac = new AbortController(); - replayAbortRef.current = ac; - const { signal } = ac; - - setReplayBusy(true); - setReplayProgress(null); - try { - let cid = replayConversationId.trim(); - if (!cid) { - const sb = await api<{ - user_id: string; - conversation_id: string; - phone: string; - }>("/internal/api/evaluation/sessions/eval-sandbox", { - method: "POST", - body: "{}", - signal, - }); - if (sb.error === "aborted") { - setMsg("回放已中止(关闭/刷新页面或「停止回放」)"); - return; - } - if (!sb.ok || !sb.data) { - setMsg(sb.error ?? "自动创建沙箱失败"); - return; - } - setEvalUserId(sb.data.user_id); - setReplayConversationId(sb.data.conversation_id); - cid = sb.data.conversation_id; - setConvJudgeBaseline(null); - setConvJudgeReplay(null); - setConvJudgeStreamText(""); - setConvJudgeErrors([]); - setConvJudgePhase(""); - setDialogue([]); - setFallbackUserLines([]); - } - - let replayed = 0; - for (let i = 0; i < utts.length; i++) { - if (signal.aborted) { - setMsg("回放已中止(关闭/刷新页面或「停止回放」)"); - return; - } - setReplayProgress({ current: i + 1, total: utts.length }); - const last = i === utts.length - 1; - const r = await api<{ - turns_replayed: number; - utterances_echo: string[]; - }>("/internal/api/evaluation/replay/conversation", { - method: "POST", - signal, - body: JSON.stringify({ - conversation_id: cid, - user_utterances: [utts[i]], - flush_memoir_after: last, - skip_tts: true, - }), - }); - if (r.error === "aborted") { - setMsg("回放已中止(关闭/刷新页面或「停止回放」)"); - return; - } - if (!r.ok) { - setMsg(r.error ?? "回放失败"); - return; - } - replayed += r.data?.turns_replayed ?? 0; - await pullDialogue(cid, signal); - } - - setMsg( - `回放完成:${replayed} 轮(分轮请求,避免长阻塞;当前会话 ${cid.slice(0, 8)}…;最后一轮已 flush 回忆录队列,成稿仍依赖 Celery)`, - ); - } finally { - setReplayBusy(false); - setReplayProgress(null); - } - } - - async function runJudgeConversationStream() { - const cid = replayConversationId.trim(); - if (!cid) { - setMsg("请先有一次会话(执行回放、仅建沙箱或粘贴会话 ID)"); - return; - } - setJudgeConvBusy(true); - setConvJudgeBaseline(null); - setConvJudgeReplay(null); - setConvJudgeStreamText(""); - setConvJudgeErrors([]); - setConvJudgePhase("连接评审服务…"); - try { - const url = `${apiBase}/internal/api/evaluation/judge/conversation-stream`; - const res = await fetch(url, { - method: "POST", - headers: { - "X-Internal-Eval-Key": apiKey, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - conversation_id: cid, - fixture_filename: fixtureName.trim() || null, - }), - }); - if (!res.ok) { - const t = await res.text(); - setMsg(`评审流启动失败:HTTP ${res.status} ${t.slice(0, 240)}`); - setConvJudgePhase(""); - return; - } - const reader = res.body?.getReader(); - if (!reader) { - setMsg("当前环境无法读取响应流"); - setConvJudgePhase(""); - return; - } - const decoder = new TextDecoder(); - let buf = ""; - while (true) { - const { done, value } = await reader.read(); - if (done) break; - buf += decoder.decode(value, { stream: true }); - const chunks = buf.split("\n\n"); - buf = chunks.pop() ?? ""; - for (const block of chunks) { - const line = block.trim(); - if (!line.startsWith("data: ")) continue; - let evt: Record; - try { - evt = JSON.parse(line.slice(6)) as Record; - } catch { - continue; - } - const ev = evt.event as string | undefined; - if (ev === "meta") { - setConvJudgePhase("GLM:基准整体打分…"); - } else if (ev === "warning") { - setConvJudgeErrors((prev) => [ - ...prev, - String(evt.message ?? "warning"), - ]); - } else if (ev === "baseline_judge") { - setConvJudgeBaseline(evt.judge ?? null); - setConvJudgePhase("GLM:回放对话整体打分…"); - } else if (ev === "replay_judge") { - setConvJudgeReplay(evt.judge ?? null); - setConvJudgePhase("GLM:对比与建议(流式输出)…"); - } else if (ev === "compare_delta") { - const piece = String(evt.text ?? ""); - if (piece) - setConvJudgeStreamText((prev) => prev + piece); - } else if (ev === "error") { - setConvJudgeErrors((prev) => [ - ...prev, - `${String(evt.phase ?? "error")}: ${String(evt.message ?? "")}`, - ]); - } else if (ev === "done") { - setConvJudgePhase(""); - setMsg("GLM 对话评审流已结束"); - } - } - } - } catch (e) { - setConvJudgeErrors((prev) => [ - ...prev, - e instanceof Error ? e.message : "评审流异常", - ]); - setMsg(e instanceof Error ? e.message : "评审流异常"); - } finally { - setJudgeConvBusy(false); - setConvJudgePhase(""); - } - } - - async function runMemoirSnapshot() { - const uid = evalUserId.trim(); - if (!uid) { - setMsg("请填写用户 ID"); - return; - } - setMemoirSnapBusy(true); - try { - const r = await api( - `/internal/api/evaluation/users/${encodeURIComponent(uid)}/memoir-snapshot`, - ); - setMsg(r.ok ? "已刷新库中章节 / 故事列表" : (r.error ?? "加载失败")); - if (r.ok) setMemoirSnapshot(r.data); - } finally { - setMemoirSnapBusy(false); - } - } - - async function runJudgeMemoir() { - const uid = evalUserId.trim(); - if (!uid) { - setMsg("请填写用户 ID"); - return; - } - setMemoirJudgeBusy(true); - try { - const r = await api("/internal/api/evaluation/judge/memoir-chapters", { - method: "POST", - body: JSON.stringify({ - user_id: uid, - baseline_sections: fixtureMemoirSections.length - ? fixtureMemoirSections - : null, - }), - }); - setMsg(r.ok ? "GLM 章节评审完成" : (r.error ?? "评审失败")); - if (r.ok) setManualMemoirJudge(r.data); - } finally { - setMemoirJudgeBusy(false); - } - } - - function pickSessionAsReplayTarget(id: string) { - setReplayConversationId(id); - setShowSessionPicker(false); - const s = sessions.find((x) => x.id === id); - if (s?.user_id) setEvalUserId((prev) => prev.trim() || s.user_id); - setMsg(`已选用会话 ${id.slice(0, 8)}… 为回放目标(将向该会话追加消息)`); - } - - async function createSet() { - const r = await api<{ id: string }>( - "/internal/api/evaluation/regression-sets", - { method: "POST", body: JSON.stringify({ name: newSetName, description: "" }) }, - ); - setMsg(r.ok ? "回归集已创建" : r.error ?? "失败"); - if (r.ok) { - const rs = await api<{ id: string; name: string }[]>( - "/internal/api/evaluation/regression-sets", - ); - if (rs.ok && rs.data) setSets(rs.data); - } - } - - async function createVersion() { - let cfg: Record | null = null; - try { - cfg = JSON.parse(verConfig || "{}") as Record; - } catch { - setMsg("config_json 无效"); - return; - } - const r = await api<{ id: string }>("/internal/api/evaluation/versions", { - method: "POST", - body: JSON.stringify({ - name: newVerName, - runner_kind: "llm_chat_v1", - config_json: cfg, - }), - }); - setMsg(r.ok ? "版本已创建" : r.error ?? "失败"); - if (r.ok) void refreshVersions(); - } - - async function snapshotFromDetail() { - const cid = replayConversationId.trim(); - if (!cid || !selSet) { - setMsg("先在对话评测中填写 conversation_id 并在高级页选用回归集"); - return; - } - const r = await api( - `/internal/api/evaluation/regression-sets/${selSet}/snapshot-from-conversation/${cid}`, - { - method: "POST", - body: JSON.stringify({ - title: "", - use_messages: true, - is_protected: false, - }), - }, - ); - setMsg(r.ok ? "已快照到回归集" : r.error ?? "失败"); - } - - async function loadExp(eid: string) { - setSelExp(eid); - const r = await api( - `/internal/api/evaluation/experiments/${eid}`, - ); - if (r.ok) setExpDetail(r.data); - else setMsg(r.error ?? "fail"); - } - - async function enqueueExperimentRun(eid: string) { - setEnqueueingExpId(eid); - try { - const r = await api<{ status?: string }>( - `/internal/api/evaluation/experiments/${eid}/run`, - { method: "POST" }, - ); - setMsg( - r.ok - ? "已提交 Celery 执行:回放 + GLM 评审写入各 run(需 worker 与 LLM 就绪)" - : (r.error ?? "提交失败"), - ); - if (r.ok) void refreshAdminData(); - } finally { - setEnqueueingExpId(null); - } - } + window.addEventListener("keydown", onKey); + return () => window.removeEventListener("keydown", onKey); + }, []); return ( -
-
- 回归评测台 - - {apiBaseHint} - {evalReachable === "ok" ? ( - · /ping OK - ) : evalReachable === "bad" ? ( - - · 连不上 internal /ping,请起{" "} - uvicorn app.internal_main:internal_app --port 8001 或{" "} - internal-eval.sh - - ) : null} - - · 网页是 5174;8001 仅为 API - - - - - - -
- - {msg ? ( -
- {msg} + +
+ +
+ +
+ +
- ) : null} - {mainView === "conv" ? ( -
-

- 默认不填用户与会话:点「执行回放」会自动创建临时用户 + 新会话(伪手机号{" "} - eval_… - ),再按基准里的用户句逐轮请求后端(每轮一次 HTTP,界面可保持响应)。左侧列是导出 MD 里的用户 + 当时导出的 AI,仅作对照;中间「落库对话」里的 AI 是当前环境重新生成的,必然与左侧导出 AI 不同——这是预期。关闭或刷新本页会中止未完成的回放。 - 回忆录模块在「回忆录章节」页;若用沙箱用户看章节,请先在本页跑完回放(并开 Celery)。 -

+ {helpOpen ? (
setHelpOpen(false)} > - - - - -
-
- - 当前 user_id{" "} - - {evalUserId ? `${evalUserId.slice(0, 10)}…` : "—"} - {" "} - · conversation_id{" "} - - {replayConversationId - ? `${replayConversationId.slice(0, 10)}…` - : "—"} - - - - - - - 对话同步每 {DIALOGUE_POLL_MS / 1000}s - {dialogueUpdatedAt - ? ` · ${dialogueUpdatedAt.toLocaleTimeString()}` - : ""} - -
- -
- - 高级:指定已有用户或粘贴 conversation_id -
e.stopPropagation()} > - - - -
-
- - {showSessionPicker ? ( -
-
- - - - 列表每 {SESSION_LIST_POLL_MS / 1000}s 刷新 - {sessionsUpdatedAt - ? ` · ${sessionsUpdatedAt.toLocaleTimeString()}` - : ""} - -
-
    - {sessions.map((s) => ( -
  • - {" "} - {s.id.slice(0, 10)}…{" "} - {s.user_phone ?? s.user_id.slice(0, 8)} -
  • - ))} -
-
- ) : null} - -
-
-

- 基准(导出 MD:用户 + AI 对照) -

- {!fixtureName || fixtureTurns.length === 0 ? ( -

选择 MD 后加载轮次

- ) : ( -
- {fixtureTurns.map((row, i) => ( -
-
-
- 用户 · 轮次 {i + 1} -
- {row.user} -
-
-
- 导出中的 AI -
- {row.ai?.trim() ? row.ai : "(空)"} -
-
- ))} -
- )} -
-
-

- 落库对话(DB · 用户句应与基准一致,AI 为当前后端新生成) -

- {loadingLeft ? ( -

加载中…

- ) : !replayConversationId.trim() ? ( -

- 执行回放或「仅建沙箱」后将自动拉取本轮会话的落库消息 -

- ) : dialogue.length > 0 ? ( -
- {dialogue.map((m, i) => ( -
-
- {m.role === "human" ? "用户" : "AI"} -
- {m.content} -
- ))} -
- ) : fallbackUserLines.length > 0 ? ( -
-

仅 transcript(无 messages 表)

- {fallbackUserLines.map((line, i) => ( -
- {i + 1}. {line} -
- ))} -
- ) : ( -

暂无消息

- )} -
-
- -
-

- 手动 GLM · 对话评审(页面底部) -

-

- 流程:两次整体打分(导出基准全文 transcript 一次、当前落库回放 transcript - 一次),再流式输出中文对比与改进建议。请在上文选择与本会话一致的基准 MD; - 未配置服务端 eval_judge_api_key / zhipu_api_key 时会报错。若某一侧 GLM - JSON 解析失败,见服务端日志中的 conversation judge failed。 -

- {convJudgePhase ? ( -

{convJudgePhase}

- ) : null} - {convJudgeErrors.length > 0 ? ( -
    - {convJudgeErrors.map((e, i) => ( -
  • {e}
  • - ))} -
- ) : null} -
-
-
- 基准(导出 MD)整体分 -
- {convJudgeBaseline && - typeof convJudgeBaseline === "object" && - convJudgeBaseline !== null ? ( - <> -
- {typeof (convJudgeBaseline as { total_score?: number }).total_score === - "number" - ? (convJudgeBaseline as { total_score: number }).total_score.toFixed(1) - : "—"} -
- ) ?? {}} - /> - - ) : ( -

- {!fixtureName.trim() - ? "未选择基准 MD:服务端仅对回放 transcript 做整体分与单侧建议。" - : "等待基准整体分…(若失败见上方红色错误与服务端日志)"} -

- )} -
-
-
- 回放 / 新测(DB)整体分 -
- {convJudgeReplay && - typeof convJudgeReplay === "object" && - convJudgeReplay !== null ? ( - <> -
- {typeof (convJudgeReplay as { total_score?: number }).total_score === - "number" - ? (convJudgeReplay as { total_score: number }).total_score.toFixed(1) - : "—"} -
- ) ?? {}} /> - - ) : ( -

等待打分结果…

- )} -
-
-
-
- 对比与建议(流式) -
-
- {convJudgeStreamText || ( - - 点击工具栏「GLM 评审对话(流式)」后,此处逐字显示模型输出。 - - )} -
-
-
-
- ) : null} - - {mainView === "memoir" ? ( -
-

回忆录章节评测

-

- 基准正文来自同一套 MD 的「回忆录章节」段落;与库中 Chapter/Story 对照后由 GLM 按 rubric 打分。 -

-
- - - - -
-

- 基线条目:{fixtureMemoirSections.length} 段(自 MD 解析) -

-
-
-

导出基线(节选)

-
    - {fixtureMemoirSections.map((s, i) => ( -
  • - {s.title} -
    - {(s.body || "").slice(0, 400)} - {(s.body || "").length > 400 ? "…" : ""} -
    -
  • - ))} -
-
-
-

数据库快照

- {memoirSnapshot && - typeof memoirSnapshot === "object" && - memoirSnapshot !== null ? ( - - ) : ( -

点击「刷新库中章节/故事」

- )} -
-
-
-

手动 GLM · 章节/故事

- {manualMemoirJudge ? : ( -

点击「GLM 评审章节」

- )} -
-
- ) : null} - - {mainView === "admin" ? ( -
-

- 在此页停留时,回归集与实验列表每 {ADMIN_POLL_MS / 1000}{" "} - 秒自动刷新(版本列表在进页时拉取;新建版本后会更新)。 -

- - - {adminTab === "sets" ? ( -
- setNewSetName(e.target.value)} - style={{ - padding: 8, - borderRadius: 6, - border: "1px solid #30363d", - background: "#0d1117", - color: "#e6edf3", - marginRight: 8, - }} - /> - -
    - {sets.map((s) => ( -
  • - {s.id.slice(0, 10)} — {s.name}{" "} - -
  • - ))} -
-
- ) : null} - - {adminTab === "versions" ? ( -
- setNewVerName(e.target.value)} - placeholder="版本名" - style={{ - display: "block", - marginBottom: 8, - padding: 8, - width: "100%", - maxWidth: 400, - borderRadius: 6, - border: "1px solid #30363d", - background: "#0d1117", - color: "#e6edf3", - }} - /> -