diff --git a/api/app/features/conversation/ws/pipeline.py b/api/app/features/conversation/ws/pipeline.py
index 206c8a6..1c936da 100644
--- a/api/app/features/conversation/ws/pipeline.py
+++ b/api/app/features/conversation/ws/pipeline.py
@@ -629,6 +629,8 @@ async def process_user_message(
     db: AsyncSession,
     user: User = None,
     user_message_timestamp: Optional[datetime] = None,
+    *,
+    force_skip_tts: bool = False,
 ) -> None:
     """处理用户消息，生成 Agent 回应。由 ChatOrchestrator 路由到 ProfileAgent 或 InterviewAgent。"""
     store = ConversationHistoryStore(db)
@@ -671,7 +673,7 @@ async def process_user_message(
                 turn.skip_tts,
             )
         responses = turn.messages
-        skip_tts = turn.skip_tts
+        skip_tts = bool(turn.skip_tts or force_skip_tts)
 
         segment.agent_response = AI_RESPONSE_SEGMENT_JOIN.join(responses)
         _mark_conversation_active(conversation)
diff --git a/api/app/features/evaluation/deps.py b/api/app/features/evaluation/deps.py
index abaaaad..60e3cc4 100644
--- a/api/app/features/evaluation/deps.py
+++ b/api/app/features/evaluation/deps.py
@@ -7,9 +7,26 @@ from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.core.db import get_async_db
 from app.features.evaluation.admin_service import EvaluationAdminService
+from app.features.evaluation.judge_manual_service import EvalJudgeManualService
+from app.features.evaluation.replay_service import ReplayConversationService
+from app.features.quota.deps import get_quota_service
+from app.features.quota.service import QuotaService
 
 
 def get_evaluation_admin_service(
     db: Annotated[AsyncSession, Depends(get_async_db)],
 ) -> EvaluationAdminService:
     return EvaluationAdminService(db)
+
+
+def get_replay_conversation_service(
+    db: Annotated[AsyncSession, Depends(get_async_db)],
+    quota: Annotated[QuotaService, Depends(get_quota_service)],
+) -> ReplayConversationService:
+    return ReplayConversationService(db, quota)
+
+
+def get_eval_judge_manual_service(
+    db: Annotated[AsyncSession, Depends(get_async_db)],
+) -> EvalJudgeManualService:
+    return EvalJudgeManualService(db)
diff --git a/api/app/features/evaluation/importers/user_export_markdown.py b/api/app/features/evaluation/importers/user_export_markdown.py
index 670590a..205d34b 100644
--- a/api/app/features/evaluation/importers/user_export_markdown.py
+++ b/api/app/features/evaluation/importers/user_export_markdown.py
@@ -49,3 +49,47 @@ def extract_dialogue_turns_from_export_md(text: str) -> list[tuple[str, str]]:
         raw_ai = ((ai_m.group(1) if ai_m else "") or "").strip()
         out.append((u, _normalize_export_ai_block(raw_ai)))
     return out
+
+
+_MEMOIR_SECTION_HEADER = re.compile(
+    r"^##\s*回忆录章节（生成正文）\s*$",
+    re.MULTILINE | re.IGNORECASE,
+)
+
+_IMAGE_REF = re.compile(r"\{\{IMAGE:[^}]*\}\}\s*", re.DOTALL)
+
+
+def extract_source_user_id_from_export_md(text: str) -> str | None:
+    """匹配导出头 ``**User ID:** `uuid` ``。"""
+    m = re.search(r"\*\*User ID:\*\*\s*`([0-9a-fA-F-]{36})`", text)
+    if not m:
+        return None
+    return m.group(1).strip()
+
+
+def extract_memoir_chapter_sections_from_export_md(text: str) -> list[tuple[str, str]]:
+    """从 ``## 回忆录章节（生成正文）`` 起按 ``##`` / ``###`` 标题切分基线正文（去掉 IMAGE 占位）。"""
+    m = _MEMOIR_SECTION_HEADER.search(text)
+    if not m:
+        return []
+    tail = (text[m.end() :] or "").strip()
+    if not tail:
+        return []
+    pieces = re.split(r"\n(?=(?:###\s|##\s+))", tail)
+    out: list[tuple[str, str]] = []
+    for piece in pieces:
+        piece = piece.strip()
+        if not piece.startswith("#"):
+            continue
+        first_nl = piece.find("\n")
+        if first_nl == -1:
+            title = piece.lstrip("#").strip()
+            body = ""
+        else:
+            title = piece[:first_nl].lstrip("#").strip()
+            body = (piece[first_nl + 1 :] or "").strip()
+        body = _IMAGE_REF.sub("", body)
+        body = re.sub(r"\n{3,}", "\n\n", body).strip()
+        if title and body:
+            out.append((title, body))
+    return out
diff --git a/api/app/features/evaluation/judge_manual_service.py b/api/app/features/evaluation/judge_manual_service.py
new file mode 100644
index 0000000..09ddedc
--- /dev/null
+++ b/api/app/features/evaluation/judge_manual_service.py
@@ -0,0 +1,372 @@
+"""手动触发 GLM 评审（不写 eval_runs）。"""
+
+from __future__ import annotations
+
+import re
+from collections.abc import AsyncIterator
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.dependencies import get_eval_judge_langchain_llm
+from app.core.logging import get_logger
+from app.features.evaluation.errors import (
+    EvaluationBadRequestError,
+    EvaluationNotFoundError,
+)
+from app.features.evaluation.execution_service import _assistant_text_for_eval_display
+from app.features.evaluation.judge_service import EvalJudgeService
+from app.features.evaluation.schemas import MemoirSectionBaselineOut
+from app.features.evaluation.session_catalog_service import SessionCatalogService
+from app.features.evaluation.user_export_fixtures import read_user_export_fixture
+from app.features.memoir.repo import get_chapters_for_memoir_list
+from app.features.story.repo import get_stories_for_user
+
+logger = get_logger(__name__)
+
+_MAX_JUDGE_MARKDOWN_CHARS = 20_000
+_MAX_EVAL_CHAPTERS = 30
+_MAX_EVAL_STORIES = 40
+
+
+def _clip_md_for_judge(text: str, max_chars: int = _MAX_JUDGE_MARKDOWN_CHARS) -> str:
+    s = (text or "").strip()
+    if len(s) <= max_chars:
+        return s
+    return f"{s[:max_chars]}\n\n…（已截断供评审）"
+
+
+def _transcript_from_export_turns(turns: list[tuple[str, str]]) -> str:
+    parts: list[str] = []
+    for u, ai in turns:
+        u = (u or "").strip()
+        ai = (ai or "").strip()
+        if u:
+            parts.append(f"用户: {u}")
+        if ai:
+            parts.append(f"AI: {_assistant_text_for_eval_display(ai)}")
+    return "\n\n".join(parts)
+
+
+def _normalize_title_key(title: str) -> str:
+    t = (title or "").strip().lower()
+    t = re.sub(r"^#+\s*", "", t)
+    return re.sub(r"\s+", " ", t)
+
+
+def _baseline_for_chapter_title(
+    baselines: list[MemoirSectionBaselineOut],
+    chapter_title: str,
+    index: int,
+) -> MemoirSectionBaselineOut | None:
+    if baselines:
+        key = _normalize_title_key(chapter_title)
+        for b in baselines:
+            if _normalize_title_key(b.title) == key:
+                return b
+        if 0 <= index < len(baselines):
+            return baselines[index]
+    return None
+
+
+class EvalJudgeManualService:
+    def __init__(self, db: AsyncSession) -> None:
+        self._db = db
+
+    async def judge_conversation(
+        self,
+        conversation_id: str,
+        fixture_filename: str | None,
+    ) -> dict[str, Any]:
+        cid = (conversation_id or "").strip()
+        if not cid:
+            raise EvaluationBadRequestError("conversation_id is required")
+
+        catalog = SessionCatalogService(self._db)
+        dialogue = await catalog.get_session_dialogue(cid)
+        if not dialogue:
+            raise EvaluationNotFoundError("conversation not found")
+
+        parts: list[str] = []
+        for m in dialogue.messages:
+            r = (m.role or "").lower()
+            label = "用户" if r == "human" else "AI"
+            raw = m.content or ""
+            out = _assistant_text_for_eval_display(raw) if r != "human" else raw
+            parts.append(f"{label}: {out}")
+        replay_transcript = "\n\n".join(parts)
+        if not replay_transcript.strip():
+            raise EvaluationBadRequestError("no messages to judge")
+
+        fn = (fixture_filename or "").strip() or None
+        baseline_transcript = ""
+        if fn:
+            try:
+                turns, _ = read_user_export_fixture(fn)
+                baseline_transcript = _transcript_from_export_turns(turns)
+            except ValueError as e:
+                raise EvaluationBadRequestError(str(e)) from e
+            except FileNotFoundError as e:
+                raise EvaluationNotFoundError("fixture not found") from e
+
+        errors: list[str] = []
+        judge_llm = get_eval_judge_langchain_llm()
+        judge = EvalJudgeService(judge_llm)
+        baseline_judge_dict: dict[str, Any] | None = None
+        if baseline_transcript.strip():
+            bj = await judge.judge_conversation(full_transcript=baseline_transcript)
+            if bj:
+                baseline_judge_dict = bj.model_dump()
+            else:
+                errors.append("baseline_glm_failed")
+        elif fn:
+            errors.append("baseline_transcript_empty")
+
+        rj = await judge.judge_conversation(full_transcript=replay_transcript)
+        replay_judge_dict = rj.model_dump() if rj else None
+        if not rj:
+            errors.append("replay_glm_failed")
+
+        return {
+            "conversation_id": cid,
+            "fixture_filename": fn,
+            "baseline_transcript": baseline_transcript,
+            "replay_transcript": replay_transcript,
+            "baseline_judge": baseline_judge_dict,
+            "replay_judge": replay_judge_dict,
+            "errors": errors,
+        }
+
+    async def iter_conversation_judge_sse(
+        self,
+        conversation_id: str,
+        fixture_filename: str | None,
+    ) -> AsyncIterator[dict[str, Any]]:
+        """供 SSE：先整体基准分、再整体回放分，再流式对比与建议。"""
+        cid = (conversation_id or "").strip()
+        if not cid:
+            yield {
+                "event": "error",
+                "phase": "validate",
+                "message": "conversation_id is required",
+            }
+            return
+
+        catalog = SessionCatalogService(self._db)
+        dialogue = await catalog.get_session_dialogue(cid)
+        if not dialogue:
+            yield {
+                "event": "error",
+                "phase": "load",
+                "message": "conversation not found",
+            }
+            return
+
+        parts: list[str] = []
+        for m in dialogue.messages:
+            r = (m.role or "").lower()
+            label = "用户" if r == "human" else "AI"
+            raw = m.content or ""
+            out = _assistant_text_for_eval_display(raw) if r != "human" else raw
+            parts.append(f"{label}: {out}")
+        replay_transcript = "\n\n".join(parts)
+        if not replay_transcript.strip():
+            yield {"event": "error", "phase": "load", "message": "no messages to judge"}
+            return
+
+        fn = (fixture_filename or "").strip() or None
+        baseline_transcript = ""
+        if fn:
+            try:
+                turns, _ = read_user_export_fixture(fn)
+                baseline_transcript = _transcript_from_export_turns(turns)
+            except ValueError as e:
+                yield {"event": "error", "phase": "fixture", "message": str(e)}
+                return
+            except FileNotFoundError:
+                yield {
+                    "event": "error",
+                    "phase": "fixture",
+                    "message": "fixture not found",
+                }
+                return
+
+        judge_llm = get_eval_judge_langchain_llm()
+        if not judge_llm:
+            yield {
+                "event": "error",
+                "phase": "config",
+                "message": "评审 LLM 未配置（eval_judge_api_key / zhipu_api_key）",
+            }
+            return
+
+        judge = EvalJudgeService(judge_llm)
+        yield {"event": "meta", "conversation_id": cid, "fixture_filename": fn}
+
+        if not baseline_transcript.strip():
+            yield {
+                "event": "warning",
+                "message": "未提供基准 MD 或基准无文本：仅对回放对话打分并输出单侧改进建议",
+            }
+
+        baseline_judge = None
+        if baseline_transcript.strip():
+            baseline_judge = await judge.judge_conversation(
+                full_transcript=baseline_transcript
+            )
+            yield {
+                "event": "baseline_judge",
+                "ok": baseline_judge is not None,
+                "judge": baseline_judge.model_dump() if baseline_judge else None,
+            }
+            if not baseline_judge:
+                yield {
+                    "event": "error",
+                    "phase": "baseline_glm",
+                    "message": "基准整体打分失败（密钥、限流或 JSON 解析失败，见服务端日志）",
+                }
+        else:
+            yield {
+                "event": "baseline_judge",
+                "ok": False,
+                "skipped": True,
+                "judge": None,
+            }
+
+        replay_judge = await judge.judge_conversation(full_transcript=replay_transcript)
+        yield {
+            "event": "replay_judge",
+            "ok": replay_judge is not None,
+            "judge": replay_judge.model_dump() if replay_judge else None,
+        }
+        if not replay_judge:
+            yield {
+                "event": "error",
+                "phase": "replay_glm",
+                "message": "回放对话整体 GLM 打分失败（空密钥、限流或 JSON 解析失败，见服务端日志）",
+            }
+            yield {"event": "done"}
+            return
+
+        async for piece in judge.stream_conversation_compare(
+            baseline_transcript=baseline_transcript,
+            replay_transcript=replay_transcript,
+            baseline_judge=baseline_judge,
+            replay_judge=replay_judge,
+        ):
+            if piece:
+                yield {"event": "compare_delta", "text": piece}
+
+        yield {"event": "done"}
+
+    async def judge_memoir_for_user(
+        self,
+        user_id: str,
+        baseline_sections: list[MemoirSectionBaselineOut] | None,
+    ) -> dict[str, Any]:
+        uid = (user_id or "").strip()
+        if not uid:
+            raise EvaluationBadRequestError("user_id is required")
+
+        judge_llm = get_eval_judge_langchain_llm()
+        judge = EvalJudgeService(judge_llm)
+        baselines = list(baseline_sections or [])
+
+        chapter_results: list[dict[str, Any]] = []
+        try:
+            chapters = await get_chapters_for_memoir_list(
+                uid, self._db, active_only=True, is_new_only=None
+            )
+            for i, ch in enumerate(chapters[:_MAX_EVAL_CHAPTERS]):
+                body = (ch.canonical_markdown or "").strip()
+                if not body:
+                    continue
+                bl = _baseline_for_chapter_title(baselines, ch.title or "", i)
+                baseline_excerpt = ""
+                if bl and (bl.body or "").strip():
+                    baseline_excerpt = _clip_md_for_judge(bl.body, max_chars=6000)
+                md = f"# 章节：{ch.title}\n\n"
+                if baseline_excerpt:
+                    md += f"## 导出基线（节选）\n\n{baseline_excerpt}\n\n"
+                md += f"## 当前成稿\n\n{_clip_md_for_judge(body)}"
+                cj = await judge.judge_memoir(memoir_markdown=md)
+                chapter_results.append(
+                    {
+                        "id": ch.id,
+                        "title": ch.title,
+                        "order_index": ch.order_index,
+                        "baseline_title": bl.title if bl else None,
+                        "judge": cj.model_dump() if cj else None,
+                    }
+                )
+        except Exception as e:
+            logger.warning("manual memoir chapter judges failed: {}", e)
+
+        story_results: list[dict[str, Any]] = []
+        try:
+            stories = await get_stories_for_user(self._db, uid, status="active")
+            for st in stories[:_MAX_EVAL_STORIES]:
+                body = (st.canonical_markdown or "").strip()
+                if not body:
+                    continue
+                md = f"# 故事：{st.title}\n\n{_clip_md_for_judge(body)}"
+                sj = await judge.judge_memoir(memoir_markdown=md)
+                story_results.append(
+                    {
+                        "id": st.id,
+                        "title": st.title,
+                        "stage": st.stage,
+                        "judge": sj.model_dump() if sj else None,
+                    }
+                )
+        except Exception as e:
+            logger.warning("manual memoir story judges failed: {}", e)
+
+        return {
+            "user_id": uid,
+            "chapter_results": chapter_results,
+            "story_results": story_results,
+        }
+
+    async def memoir_snapshot(self, user_id: str) -> dict[str, Any]:
+        uid = (user_id or "").strip()
+        if not uid:
+            raise EvaluationBadRequestError("user_id is required")
+
+        chapters_out: list[dict[str, Any]] = []
+        stories_out: list[dict[str, Any]] = []
+        try:
+            chapters = await get_chapters_for_memoir_list(
+                uid, self._db, active_only=True, is_new_only=None
+            )
+            for ch in chapters[:_MAX_EVAL_CHAPTERS]:
+                chapters_out.append(
+                    {
+                        "id": ch.id,
+                        "title": ch.title,
+                        "category": ch.category,
+                        "order_index": ch.order_index,
+                        "canonical_markdown": ch.canonical_markdown,
+                    }
+                )
+        except Exception as e:
+            logger.warning("memoir snapshot chapters failed: {}", e)
+        try:
+            stories = await get_stories_for_user(self._db, uid, status="active")
+            for st in stories[:_MAX_EVAL_STORIES]:
+                stories_out.append(
+                    {
+                        "id": st.id,
+                        "title": st.title,
+                        "stage": st.stage,
+                        "canonical_markdown": st.canonical_markdown,
+                    }
+                )
+        except Exception as e:
+            logger.warning("memoir snapshot stories failed: {}", e)
+
+        return {
+            "user_id": uid,
+            "chapters": chapters_out,
+            "stories": stories_out,
+        }
diff --git a/api/app/features/evaluation/judge_service.py b/api/app/features/evaluation/judge_service.py
index 51569ac..b52e4f2 100644
--- a/api/app/features/evaluation/judge_service.py
+++ b/api/app/features/evaluation/judge_service.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from collections.abc import AsyncIterator
 from typing import Any
 
 from app.core.llm_call import LLMCallError, allm_json_call
@@ -12,6 +13,7 @@ from app.features.evaluation.judge_schemas import (
     TurnJudgeOutput,
 )
 from app.features.evaluation.rubrics.conversation_v1 import (
+    COMPARE_CONV_STREAM_HINT,
     CONV_JUDGE_INSTRUCTIONS,
     TURN_JUDGE_INSTRUCTIONS,
 )
@@ -21,7 +23,9 @@ logger = get_logger(__name__)
 
 _TURN_MAX = 768
 _CONV_MAX = 8192
+_CONV_JUDGE_JSON_MAX = 2048
 _MEMOIR_MAX = 12000
+_COMPARE_STREAM_MAX = 6144
 
 
 class EvalJudgeService:
@@ -75,13 +79,81 @@ class EvalJudgeService:
                 self._llm,
                 prompt,
                 ConversationJudgeOutput,
-                max_tokens=_TURN_MAX,
+                max_tokens=_CONV_JUDGE_JSON_MAX,
                 agent="EvalJudgeService.judge_conversation",
             )
         except LLMCallError as e:
             logger.warning("conversation judge failed: {}", e)
             return None
 
+    async def stream_conversation_compare(
+        self,
+        *,
+        baseline_transcript: str,
+        replay_transcript: str,
+        baseline_judge: ConversationJudgeOutput | None,
+        replay_judge: ConversationJudgeOutput | None,
+    ) -> AsyncIterator[str]:
+        """流式输出中文对比与建议（非 JSON）。"""
+        if not self._llm:
+            yield "[错误] 未配置评审模型 API Key（eval_judge_api_key / zhipu_api_key）"
+            return
+        b_tr = (baseline_transcript or "").strip()[:_CONV_MAX]
+        r_tr = (replay_transcript or "").strip()[:_CONV_MAX]
+        b_json = (
+            baseline_judge.model_dump_json(ensure_ascii=False)
+            if baseline_judge
+            else "null"
+        )
+        r_json = (
+            replay_judge.model_dump_json(ensure_ascii=False) if replay_judge else "null"
+        )
+        if baseline_judge and replay_judge:
+            prompt = f"""你是访谈对话评测专家。下面给出两份完整对话 transcript 及各自的整体打分（JSON）。请用中文直接写正文（不要用 JSON、不要用 Markdown 代码块）：
+
+【A：导出基准对话】（历史快照：用户与当时导出的线上 AI，多轮合并为一篇）
+{b_tr}
+
+【B：本次回放/新测对话】（用户句与基准对齐，AI 为当前后端重新生成）
+{r_tr}
+
+【A 的整体评分 JSON】
+{b_json}
+
+【B 的整体评分 JSON】
+{r_json}
+
+请依次撰写：
+1) 两段对话在整体体验上的主要差异（共情、追问、重复感、自然度等）；
+2) B 相对 A 的优点与不足；
+3) 若 B 在关键维度明显弱于 A，给出可操作的改进方向（系统提示、访谈策略、模型或温度等）。
+
+笔调简洁、偏执行清单。"""
+        elif replay_judge:
+            prompt = f"""{COMPARE_CONV_STREAM_HINT}
+
+【回放/新测 transcript】
+{r_tr}
+
+【整体评分 JSON】
+{r_json}
+"""
+        else:
+            yield "[错误] 缺少回放对话评分，无法生成建议"
+            return
+
+        llm = self._llm
+        if hasattr(llm, "bind"):
+            llm = llm.bind(max_tokens=_COMPARE_STREAM_MAX)
+        try:
+            async for chunk in llm.astream(prompt):
+                piece = getattr(chunk, "content", None)
+                if piece:
+                    yield piece
+        except Exception as e:
+            logger.warning("conversation compare stream failed: {}", e)
+            yield f"\n\n[流式输出中断：{e}]"
+
     async def judge_memoir(self, *, memoir_markdown: str) -> MemoirJudgeOutput | None:
         if not self._llm:
             return None
diff --git a/api/app/features/evaluation/replay_service.py b/api/app/features/evaluation/replay_service.py
new file mode 100644
index 0000000..27d89fe
--- /dev/null
+++ b/api/app/features/evaluation/replay_service.py
@@ -0,0 +1,172 @@
+"""内部评测：按 App 一致路径回放用户轮次（segment + orchestrator + memoir 队列）。"""
+
+from __future__ import annotations
+
+import secrets
+import uuid
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.core.db import utc_now
+from app.core.logging import get_logger
+from app.core.security import hash_password
+from app.features.auth import repo as auth_repo
+from app.features.conversation.models import Conversation
+from app.features.conversation.service import ConversationService
+from app.features.conversation.ws.pipeline import (
+    background_runner,
+    process_user_message,
+)
+from app.features.evaluation.errors import (
+    EvaluationBadRequestError,
+    EvaluationNotFoundError,
+)
+from app.features.evaluation.user_export_fixtures import read_user_export_fixture
+from app.features.quota.service import QuotaService
+from app.features.user.models import User
+
+logger = get_logger(__name__)
+
+
+class ReplayConversationService:
+    def __init__(self, db: AsyncSession, quota_service: QuotaService) -> None:
+        self._db = db
+        self._quota = quota_service
+
+    async def create_eval_sandbox(self) -> tuple[str, str, str, str]:
+        """新建仅用于评测的临时用户（唯一伪手机号）+ 新会话。"""
+        user_id = str(uuid.uuid4())
+        phone: str | None = None
+        for _ in range(8):
+            candidate = f"eval_{secrets.token_hex(10)}"
+            existing = await auth_repo.get_user_by_phone(candidate, self._db)
+            if not existing:
+                phone = candidate
+                break
+        if not phone:
+            raise EvaluationBadRequestError("could not allocate eval phone")
+
+        user = User(
+            id=user_id,
+            phone=phone,
+            password_hash=hash_password(secrets.token_urlsafe(24)),
+            nickname="评测临时用户",
+            subscription_type="free",
+            created_at=utc_now(),
+        )
+        await auth_repo.create_user(user, self._db)
+        await self._db.commit()
+        await self._db.refresh(user)
+
+        conversation_id = str(uuid.uuid4())
+        conv_service = ConversationService(self._db, self._quota)
+        conv, err = await conv_service.ensure_ws_connection(conversation_id, user_id)
+        if err or not conv:
+            raise EvaluationBadRequestError(err or "failed to create conversation")
+
+        logger.info(
+            "eval sandbox user_id={} phone={} conversation_id={}",
+            user_id,
+            phone,
+            conversation_id,
+        )
+        return user_id, conversation_id, phone, user.nickname
+
+    async def bootstrap_conversation(self, user_id: str) -> str:
+        uid = (user_id or "").strip()
+        if not uid:
+            raise EvaluationBadRequestError("user_id is required")
+        user = await self._db.get(User, uid)
+        if not user:
+            raise EvaluationBadRequestError("user not found")
+        conversation_id = str(uuid.uuid4())
+        conv_service = ConversationService(self._db, self._quota)
+        conv, err = await conv_service.ensure_ws_connection(conversation_id, uid)
+        if err or not conv:
+            raise EvaluationBadRequestError(err or "failed to create conversation")
+        logger.info(
+            "eval replay bootstrap conversation_id={} user_id={}",
+            conversation_id,
+            uid,
+        )
+        return conversation_id
+
+    async def replay_fixture(
+        self,
+        *,
+        conversation_id: str,
+        fixture_filename: str,
+        flush_memoir_after: bool,
+        skip_tts: bool,
+    ) -> tuple[int, list[str]]:
+        try:
+            turns, _ = read_user_export_fixture(fixture_filename)
+        except ValueError as e:
+            raise EvaluationBadRequestError(str(e)) from e
+        except FileNotFoundError:
+            raise EvaluationNotFoundError("fixture not found") from None
+        utterances = [u.strip() for u, _ in turns if (u or "").strip()]
+        if not utterances:
+            raise EvaluationBadRequestError("fixture produced no user utterances")
+        n = await self.replay_utterances(
+            conversation_id=conversation_id,
+            utterances=utterances,
+            flush_memoir_after=flush_memoir_after,
+            skip_tts=skip_tts,
+        )
+        return n, utterances
+
+    async def replay_utterances(
+        self,
+        *,
+        conversation_id: str,
+        utterances: list[str],
+        flush_memoir_after: bool,
+        skip_tts: bool,
+    ) -> int:
+        cid = (conversation_id or "").strip()
+        if not cid:
+            raise EvaluationBadRequestError("conversation_id is required")
+        conv = await self._db.get(Conversation, cid)
+        if not conv or conv.deleted_at is not None:
+            raise EvaluationNotFoundError("conversation not found")
+        user = await self._db.get(User, conv.user_id)
+        if not user:
+            raise EvaluationBadRequestError("user not found for conversation")
+
+        conv_service = ConversationService(self._db, self._quota)
+        count = 0
+        for raw in utterances:
+            text = (raw or "").strip()
+            if not text:
+                continue
+            segment = await conv_service.create_user_segment(conv, conv.user_id, text)
+            ts = segment.created_at or conv.last_message_at
+            await background_runner.queue_message(
+                conv.user_id,
+                segment.id,
+                text_char_count=len(text),
+            )
+            await process_user_message(
+                conversation_id=cid,
+                user_message=text,
+                conversation=conv,
+                segment=segment,
+                db=self._db,
+                user=user,
+                user_message_timestamp=ts,
+                force_skip_tts=skip_tts,
+            )
+            count += 1
+
+        if flush_memoir_after and conv.user_id:
+            await background_runner.flush_pending(conv.user_id)
+
+        logger.info(
+            "eval replay done conversation_id={} turns={} flush={} skip_tts={}",
+            cid,
+            count,
+            flush_memoir_after,
+            skip_tts,
+        )
+        return count
diff --git a/api/app/features/evaluation/router.py b/api/app/features/evaluation/router.py
index 2568480..1b4317c 100644
--- a/api/app/features/evaluation/router.py
+++ b/api/app/features/evaluation/router.py
@@ -2,32 +2,55 @@
 
 from __future__ import annotations
 
+import json
 from typing import Annotated
 
 from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi.responses import StreamingResponse
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from app.core.db import get_async_db
 from app.features.evaluation.admin_service import EvaluationAdminService
-from app.features.evaluation.deps import get_evaluation_admin_service
+from app.features.evaluation.deps import (
+    get_eval_judge_manual_service,
+    get_evaluation_admin_service,
+    get_replay_conversation_service,
+)
 from app.features.evaluation.errors import (
     EvaluationBadRequestError,
     EvaluationNotFoundError,
 )
+from app.features.evaluation.importers.user_export_markdown import (
+    extract_memoir_chapter_sections_from_export_md,
+    extract_source_user_id_from_export_md,
+)
 from app.features.evaluation.internal_auth import InternalEvalAuth
+from app.features.evaluation.judge_manual_service import EvalJudgeManualService
 from app.features.evaluation.presenters import case_out, run_out
+from app.features.evaluation.replay_service import ReplayConversationService
 from app.features.evaluation.schemas import (
     CaseCreate,
     CaseOut,
     EvalRunOut,
+    EvalSandboxOut,
     ExperimentCreate,
     ExperimentDetailOut,
     ExperimentOut,
     GateVerdictOut,
     ImportJsonCaseBody,
     ImportMarkdownBody,
+    ManualJudgeConversationBody,
+    ManualJudgeConversationOut,
+    ManualJudgeConversationStreamBody,
+    ManualJudgeMemoirBody,
+    ManualJudgeMemoirOut,
+    MemoirSectionBaselineOut,
     RegressionSetCreate,
     RegressionSetOut,
+    ReplayBootstrapBody,
+    ReplayBootstrapOut,
+    ReplayConversationBody,
+    ReplayConversationOut,
     SessionDialogueOut,
     SessionEvalRunsOut,
     SessionListItem,
@@ -37,10 +60,12 @@ from app.features.evaluation.schemas import (
     UserExportFixtureDetailOut,
     UserExportFixtureListOut,
     UserExportFixtureTurnOut,
+    UserMemoirSnapshotOut,
     VersionCreate,
     VersionOut,
 )
 from app.features.evaluation.session_catalog_service import SessionCatalogService
+from app.features.evaluation.user_export_fixtures import read_user_export_fixture
 
 router = APIRouter(tags=["internal-evaluation"])
 
@@ -209,6 +234,175 @@ async def list_session_evaluation_runs(
     return await svc.list_session_evaluation_runs(conversation_id)
 
 
+@router.post("/sessions/replay-bootstrap", response_model=ReplayBootstrapOut)
+async def replay_bootstrap(
+    body: ReplayBootstrapBody,
+    _auth: InternalEvalAuth,
+    replay: Annotated[
+        ReplayConversationService, Depends(get_replay_conversation_service)
+    ],
+):
+    try:
+        cid = await replay.bootstrap_conversation(body.user_id)
+    except EvaluationBadRequestError as e:
+        raise _eval_http_exc(e) from e
+    return ReplayBootstrapOut(conversation_id=cid)
+
+
+@router.post("/sessions/eval-sandbox", response_model=EvalSandboxOut)
+async def create_eval_sandbox(
+    _auth: InternalEvalAuth,
+    replay: Annotated[
+        ReplayConversationService, Depends(get_replay_conversation_service)
+    ],
+):
+    try:
+        uid, cid, phone, nick = await replay.create_eval_sandbox()
+    except EvaluationBadRequestError as e:
+        raise _eval_http_exc(e) from e
+    return EvalSandboxOut(
+        user_id=uid,
+        conversation_id=cid,
+        phone=phone,
+        nickname=nick,
+    )
+
+
+@router.post("/replay/conversation", response_model=ReplayConversationOut)
+async def replay_conversation(
+    body: ReplayConversationBody,
+    _auth: InternalEvalAuth,
+    replay: Annotated[
+        ReplayConversationService, Depends(get_replay_conversation_service)
+    ],
+):
+    if body.fixture_filename and body.user_utterances:
+        raise HTTPException(
+            status_code=400,
+            detail="provide only one of fixture_filename or user_utterances",
+        )
+    try:
+        if body.fixture_filename:
+            fn = body.fixture_filename.strip()
+            n, echo = await replay.replay_fixture(
+                conversation_id=body.conversation_id,
+                fixture_filename=fn,
+                flush_memoir_after=body.flush_memoir_after,
+                skip_tts=body.skip_tts,
+            )
+        elif body.user_utterances is not None:
+            utt = [str(u) for u in body.user_utterances if str(u).strip()]
+            if not utt:
+                raise EvaluationBadRequestError("user_utterances is empty")
+            n = await replay.replay_utterances(
+                conversation_id=body.conversation_id,
+                utterances=utt,
+                flush_memoir_after=body.flush_memoir_after,
+                skip_tts=body.skip_tts,
+            )
+            echo = utt
+        else:
+            raise EvaluationBadRequestError(
+                "fixture_filename or user_utterances required"
+            )
+    except EvaluationNotFoundError as e:
+        raise _eval_http_exc(e) from e
+    except EvaluationBadRequestError as e:
+        raise _eval_http_exc(e) from e
+    return ReplayConversationOut(
+        conversation_id=body.conversation_id,
+        turns_replayed=n,
+        utterances_echo=echo,
+    )
+
+
+@router.post("/judge/conversation", response_model=ManualJudgeConversationOut)
+async def judge_conversation_manual(
+    body: ManualJudgeConversationBody,
+    _auth: InternalEvalAuth,
+    judge_svc: Annotated[
+        EvalJudgeManualService, Depends(get_eval_judge_manual_service)
+    ],
+):
+    try:
+        payload = await judge_svc.judge_conversation(
+            body.conversation_id,
+            body.fixture_filename,
+        )
+    except EvaluationNotFoundError as e:
+        raise _eval_http_exc(e) from e
+    except EvaluationBadRequestError as e:
+        raise _eval_http_exc(e) from e
+    return ManualJudgeConversationOut.model_validate(payload)
+
+
+@router.post("/judge/conversation-stream")
+async def judge_conversation_manual_stream(
+    body: ManualJudgeConversationStreamBody,
+    _auth: InternalEvalAuth,
+    judge_svc: Annotated[
+        EvalJudgeManualService, Depends(get_eval_judge_manual_service)
+    ],
+):
+    async def event_iter():
+        try:
+            async for evt in judge_svc.iter_conversation_judge_sse(
+                body.conversation_id,
+                body.fixture_filename,
+            ):
+                yield f"data: {json.dumps(evt, ensure_ascii=False)}\n\n"
+        except Exception as e:
+            err = json.dumps(
+                {"event": "error", "phase": "server", "message": str(e)},
+                ensure_ascii=False,
+            )
+            yield f"data: {err}\n\n"
+            yield f"data: {json.dumps({'event': 'done'}, ensure_ascii=False)}\n\n"
+
+    return StreamingResponse(
+        event_iter(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )
+
+
+@router.post("/judge/memoir-chapters", response_model=ManualJudgeMemoirOut)
+async def judge_memoir_chapters_manual(
+    body: ManualJudgeMemoirBody,
+    _auth: InternalEvalAuth,
+    judge_svc: Annotated[
+        EvalJudgeManualService, Depends(get_eval_judge_manual_service)
+    ],
+):
+    try:
+        payload = await judge_svc.judge_memoir_for_user(
+            body.user_id,
+            body.baseline_sections,
+        )
+    except EvaluationBadRequestError as e:
+        raise _eval_http_exc(e) from e
+    return ManualJudgeMemoirOut.model_validate(payload)
+
+
+@router.get("/users/{user_id}/memoir-snapshot", response_model=UserMemoirSnapshotOut)
+async def get_user_memoir_snapshot(
+    user_id: str,
+    _auth: InternalEvalAuth,
+    judge_svc: Annotated[
+        EvalJudgeManualService, Depends(get_eval_judge_manual_service)
+    ],
+):
+    try:
+        payload = await judge_svc.memoir_snapshot(user_id)
+    except EvaluationBadRequestError as e:
+        raise _eval_http_exc(e) from e
+    return UserMemoirSnapshotOut.model_validate(payload)
+
+
 @router.get(
     "/fixtures/user-exports",
     response_model=UserExportFixtureListOut,
@@ -227,19 +421,23 @@ async def list_user_export_fixtures(
 async def get_user_export_fixture(
     filename: str,
     _auth: InternalEvalAuth,
-    svc: Annotated[EvaluationAdminService, Depends(get_evaluation_admin_service)],
 ):
     try:
-        turns = svc.load_user_export_fixture_turns(filename)
+        turns, raw_md = read_user_export_fixture(filename)
     except ValueError:
         raise HTTPException(
             status_code=400, detail="invalid fixture filename"
         ) from None
     except FileNotFoundError:
         raise HTTPException(status_code=404, detail="fixture not found") from None
+    memoir_tuples = extract_memoir_chapter_sections_from_export_md(raw_md)
     return UserExportFixtureDetailOut(
         filename=filename,
         turns=[UserExportFixtureTurnOut(user=u, ai=a) for u, a in turns],
+        source_user_id=extract_source_user_id_from_export_md(raw_md),
+        memoir_sections=[
+            MemoirSectionBaselineOut(title=t, body=b) for t, b in memoir_tuples
+        ],
     )
 
 
diff --git a/api/app/features/evaluation/rubrics/conversation_v1.py b/api/app/features/evaluation/rubrics/conversation_v1.py
index f819eaa..a052119 100644
--- a/api/app/features/evaluation/rubrics/conversation_v1.py
+++ b/api/app/features/evaluation/rubrics/conversation_v1.py
@@ -22,3 +22,10 @@ CONV_JUDGE_INSTRUCTIONS = """你是访谈整段对话评审。给定完整 trans
 dimension_scores 建议至少包含：emotion, information, structure, repetition, naturalness（各 0-100 相对分量即可），用于反映整段是否重复盘问、是否自然；另可有 rationale。
 
 只输出 JSON：total_score, dimension_scores, rationale。"""
+
+
+COMPARE_CONV_STREAM_HINT = """你是访谈对话评测专家。下面给出一份「回放/新测」完整对话 transcript 及其整体评分（JSON）。请用中文直接写正文（不要用 JSON）：
+1) 对这段对话的整体评价与风险点；
+2) 可操作的改进建议（提示词、流程、模型参数等）。
+
+笔调简洁、可执行。"""
diff --git a/api/app/features/evaluation/schemas.py b/api/app/features/evaluation/schemas.py
index 386f68a..27e227a 100644
--- a/api/app/features/evaluation/schemas.py
+++ b/api/app/features/evaluation/schemas.py
@@ -133,9 +133,100 @@ class UserExportFixtureListOut(BaseModel):
     items: list[str]
 
 
+class MemoirSectionBaselineOut(BaseModel):
+    title: str
+    body: str
+
+
 class UserExportFixtureDetailOut(BaseModel):
     filename: str
     turns: list[UserExportFixtureTurnOut]
+    source_user_id: str | None = None
+    memoir_sections: list[MemoirSectionBaselineOut] = Field(default_factory=list)
+
+
+class ReplayBootstrapBody(BaseModel):
+    user_id: str
+
+
+class ReplayBootstrapOut(BaseModel):
+    conversation_id: str
+
+
+class EvalSandboxOut(BaseModel):
+    """内部评测专用：一次性临时账号 + 空白会话，不落真实手机号业务。"""
+
+    user_id: str
+    conversation_id: str
+    phone: str
+    nickname: str
+
+
+class ReplayConversationBody(BaseModel):
+    conversation_id: str
+    fixture_filename: str | None = None
+    user_utterances: list[str] | None = None
+    flush_memoir_after: bool = True
+    skip_tts: bool = True
+
+
+class ReplayConversationOut(BaseModel):
+    conversation_id: str
+    turns_replayed: int
+    utterances_echo: list[str] = Field(default_factory=list)
+
+
+class ManualJudgeConversationBody(BaseModel):
+    conversation_id: str
+    """与当前评测台选中的 MD 一致，供基准 transcript / 整体打分。"""
+    fixture_filename: str | None = None
+
+
+class ManualJudgeConversationStreamBody(BaseModel):
+    conversation_id: str
+    fixture_filename: str | None = None
+
+
+class ManualJudgeConversationOut(BaseModel):
+    conversation_id: str
+    fixture_filename: str | None = None
+    baseline_transcript: str = ""
+    replay_transcript: str
+    baseline_judge: dict[str, Any] | None = None
+    replay_judge: dict[str, Any] | None = None
+    errors: list[str] = Field(default_factory=list)
+
+
+class ManualJudgeMemoirBody(BaseModel):
+    user_id: str
+    baseline_sections: list[MemoirSectionBaselineOut] | None = None
+
+
+class ManualJudgeMemoirOut(BaseModel):
+    user_id: str
+    chapter_results: list[dict[str, Any]] = Field(default_factory=list)
+    story_results: list[dict[str, Any]] = Field(default_factory=list)
+
+
+class MemoirChapterSnapOut(BaseModel):
+    id: str
+    title: str
+    category: str | None = None
+    order_index: int | None = None
+    canonical_markdown: str | None = None
+
+
+class MemoirStorySnapOut(BaseModel):
+    id: str
+    title: str
+    stage: str | None = None
+    canonical_markdown: str | None = None
+
+
+class UserMemoirSnapshotOut(BaseModel):
+    user_id: str
+    chapters: list[MemoirChapterSnapOut]
+    stories: list[MemoirStorySnapOut]
 
 
 class SnapshotFromConversationBody(BaseModel):
diff --git a/api/docs/internal-eval.md b/api/docs/internal-eval.md
index 25b80b1..732d454 100644
--- a/api/docs/internal-eval.md
+++ b/api/docs/internal-eval.md
@@ -55,6 +55,39 @@ VITE_EVAL_API_BASE=http://127.0.0.1:8001 VITE_EVAL_API_KEY=与上同 npm run dev
 
 浏览器 `EventSource` 无法带自定义 Header，流式端点支持 **query** `?key=`，与 `X-Internal-Eval-Key` 等效。
 
+## 评测 Web：两大模块
+
+- **对话评测**：选 `api/tests/user_exports/*.md` 为基准 →「新建评测会话」或填写已有 `conversation_id` →「执行回放」→「GLM 评审对话」。
+- **回忆录章节**：同一套 fixture 会带上导出 MD 中的 `source_user_id` 与 `memoir_sections`；「刷新库中章节/故事」拉 DB 快照 →「GLM 评审章节」（基线节选与当前成稿一并送评）。
+
+## 真实链路透传回放（与 App 一致）
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| `POST` | `/internal/api/evaluation/sessions/eval-sandbox` | 无 body：新建**临时用户**（`eval_` 伪手机号）+ 空白 `conversation_id` |
+| `POST` | `/internal/api/evaluation/sessions/replay-bootstrap` | body：`{ "user_id" }`，在已有用户下返回新 `conversation_id` |
+| `POST` | `/internal/api/evaluation/replay/conversation` | body：`conversation_id`、`fixture_filename` **或** `user_utterances`；可选 `flush_memoir_after`（默认 true）、`skip_tts`（默认 true） |
+
+每轮等价于 WebSocket 文本路径：`create_user_segment` → `process_user_message`（内部可 `force_skip_tts`）→ `background_runner.queue_message`。
+
+- **TTS**：回放默认 `skip_tts: true`，不在评测台跑语音合成。
+- **Memory / 回忆录管线**：`queue_message` 与末尾 `flush_pending` 依赖 **Celery worker**（`process_memoir_phase1` 等）；仅起 internal API 未起 worker 时，对话会落库但章节异步不会推进。
+
+## 手动 GLM（不写 `eval_runs` 表）
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| `POST` | `/internal/api/evaluation/judge/conversation` | body：`{ "conversation_id" }`，返回轮次分 + 全文对话分 |
+| `POST` | `/internal/api/evaluation/judge/memoir-chapters` | body：`{ "user_id", "baseline_sections"? }`，Chapter/Story 分项 |
+| `GET` | `/internal/api/evaluation/users/{user_id}/memoir-snapshot` | 只读章节与故事正文快照 |
+
+## Fixture 详情扩展
+
+`GET /internal/api/evaluation/fixtures/user-exports/{filename}` 在原有 `turns` 外增加：
+
+- `source_user_id`：导出抬头中的 User ID
+- `memoir_sections`：`## 回忆录章节（生成正文）` 下按标题切分的基线正文（已去掉 `{{IMAGE:...}}` 占位）
+
 ## 门禁规则（v1）
 
 - 所有 case 的合成均分：候选须 **严格高于** 基线。
diff --git a/api/tests/evaluation/test_importers.py b/api/tests/evaluation/test_importers.py
index 329f4ee..b88c152 100644
--- a/api/tests/evaluation/test_importers.py
+++ b/api/tests/evaluation/test_importers.py
@@ -5,6 +5,8 @@ import pytest
 
 from app.features.evaluation.importers.user_export_markdown import (
     extract_dialogue_turns_from_export_md,
+    extract_memoir_chapter_sections_from_export_md,
+    extract_source_user_id_from_export_md,
     extract_user_utterances_from_export_md,
 )
 
@@ -72,3 +74,32 @@ def test_extract_dialogue_turns_from_repo_user_export() -> None:
     turns = extract_dialogue_turns_from_export_md(text)
     assert len(turns) >= 5
     assert "你好" in turns[0][0]
+
+
+def test_extract_source_user_id_from_export_md() -> None:
+    md = "- **User ID:** `e27fcd97-fefa-43b8-a7a3-3ecd49ebf5f0`\n"
+    assert (
+        extract_source_user_id_from_export_md(md)
+        == "e27fcd97-fefa-43b8-a7a3-3ecd49ebf5f0"
+    )
+
+
+def test_extract_memoir_chapter_sections_from_export_md() -> None:
+    md = """
+## 回忆录章节（生成正文）
+
+### First chapter
+
+Line a.
+{{IMAGE:foo}}
+
+### Second title
+
+Line b.
+"""
+    sections = extract_memoir_chapter_sections_from_export_md(md)
+    assert len(sections) == 2
+    assert sections[0][0] == "First chapter"
+    assert "Line a." in sections[0][1]
+    assert "{{IMAGE" not in sections[0][1]
+    assert sections[1][0] == "Second title"
diff --git a/api/tests/evaluation/test_replay_router.py b/api/tests/evaluation/test_replay_router.py
new file mode 100644
index 0000000..23724a7
--- /dev/null
+++ b/api/tests/evaluation/test_replay_router.py
@@ -0,0 +1,74 @@
+"""回放 / 评审路由参数校验（最小 HTTP）。"""
+
+import pytest
+from httpx import ASGITransport, AsyncClient
+
+from app.features.evaluation.internal_auth import get_internal_eval_principal
+
+
+@pytest.mark.asyncio
+async def test_replay_conversation_requires_fixture_or_utterances(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from fastapi import FastAPI
+
+    monkeypatch.setattr(
+        "app.core.config.settings.internal_eval_api_key",
+        "secret",
+        raising=False,
+    )
+    from app.features.evaluation.router import router
+
+    app = FastAPI()
+    app.include_router(router, prefix="/internal/api/evaluation")
+
+    async def _override_auth():
+        from app.features.evaluation.internal_auth import InternalEvalPrincipal
+
+        return InternalEvalPrincipal()
+
+    app.dependency_overrides[get_internal_eval_principal] = _override_auth
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://t") as client:
+        r = await client.post(
+            "/internal/api/evaluation/replay/conversation",
+            headers={"X-Internal-Eval-Key": "secret"},
+            json={"conversation_id": "00000000-0000-0000-0000-000000000001"},
+        )
+    assert r.status_code == 400
+
+
+@pytest.mark.asyncio
+async def test_replay_conversation_rejects_both_fixture_and_utterances(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from fastapi import FastAPI
+
+    monkeypatch.setattr(
+        "app.core.config.settings.internal_eval_api_key",
+        "secret",
+        raising=False,
+    )
+    from app.features.evaluation.router import router
+
+    app = FastAPI()
+    app.include_router(router, prefix="/internal/api/evaluation")
+
+    async def _override_auth():
+        from app.features.evaluation.internal_auth import InternalEvalPrincipal
+
+        return InternalEvalPrincipal()
+
+    app.dependency_overrides[get_internal_eval_principal] = _override_auth
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://t") as client:
+        r = await client.post(
+            "/internal/api/evaluation/replay/conversation",
+            headers={"X-Internal-Eval-Key": "secret"},
+            json={
+                "conversation_id": "00000000-0000-0000-0000-000000000001",
+                "fixture_filename": "x.md",
+                "user_utterances": ["a"],
+            },
+        )
+    assert r.status_code == 400
diff --git a/app-eval-web/src/App.tsx b/app-eval-web/src/App.tsx
index ee4ebbd..b24cf10 100644
--- a/app-eval-web/src/App.tsx
+++ b/app-eval-web/src/App.tsx
@@ -1,4 +1,4 @@
-import { useCallback, useEffect, useState } from "react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 
 const envApiBase = (
   import.meta.env.VITE_EVAL_API_BASE as string | undefined
@@ -21,8 +21,6 @@ const apiBaseHint =
 const SESSION_LIST_POLL_MS = 4000;
 /** 对比页左侧线上对话轮询 */
 const DIALOGUE_POLL_MS = 3500;
-/** 对比页右侧 GLM / 评测 run 轮询 */
-const SESSION_EVAL_POLL_MS = 8000;
 /** 高级页回归集 / 实验列表轮询 */
 const ADMIN_POLL_MS = 8000;
 
@@ -35,35 +33,94 @@ async function api<T>(
   init?: RequestInit,
 ): Promise<{ ok: boolean; data?: T; error?: string; status: number }> {
   const url = `${apiBase}${path.startsWith("/") ? path : `/${path}`}`;
-  const r = await fetch(url, {
-    ...init,
-    headers: {
-      "X-Internal-Eval-Key": apiKey,
-      "Content-Type": "application/json",
-      ...(init?.headers ?? {}),
-    },
-  });
-  const text = await r.text();
-  let data: T | undefined;
   try {
-    data = text ? (JSON.parse(text) as T) : undefined;
-  } catch {
-    /* ignore */
-  }
-  if (!r.ok) {
+    const r = await fetch(url, {
+      ...init,
+      headers: {
+        "X-Internal-Eval-Key": apiKey,
+        "Content-Type": "application/json",
+        ...(init?.headers ?? {}),
+      },
+      signal: init?.signal,
+    });
+    const text = await r.text();
+    let data: T | undefined;
+    try {
+      data = text ? (JSON.parse(text) as T) : undefined;
+    } catch {
+      /* ignore */
+    }
+    if (!r.ok) {
+      return {
+        ok: false,
+        status: r.status,
+        error:
+          typeof data === "object" &&
+          data &&
+          "detail" in (data as object) &&
+          data !== null
+            ? String((data as unknown as { detail: unknown }).detail)
+            : text || r.statusText,
+      };
+    }
+    return { ok: true, data, status: r.status };
+  } catch (e: unknown) {
+    const name = e instanceof Error ? e.name : "";
+    if (name === "AbortError") {
+      return { ok: false, status: 0, error: "aborted" };
+    }
     return {
       ok: false,
-      status: r.status,
-      error:
-        typeof data === "object" &&
-        data &&
-        "detail" in (data as object) &&
-        data !== null
-          ? String((data as unknown as { detail: unknown }).detail)
-          : text || r.statusText,
+      status: 0,
+      error: e instanceof Error ? e.message : "network error",
     };
   }
-  return { ok: true, data, status: r.status };
+}
+
+/** 与后端 replay 一致：strip 后非空的用户句。 */
+function utterancesForReplayFromTurns(
+  turns: { user: string; ai: string }[],
+): string[] {
+  return turns
+    .map((t) => (t.user || "").trim())
+    .filter((u) => u.length > 0 && u !== "（空）");
+}
+
+/** 将 DB 消息序合并为「一轮用户 + 拼接后的 AI」（多段 AGENT_RESPONSE）。 */
+function pairDialogueTurns(
+  messages: DialogueMessage[],
+): { user: string; assistant: string }[] {
+  const out: { user: string; assistant: string }[] = [];
+  let currentUser: string | null = null;
+  const aiAccum: string[] = [];
+
+  const closeTurn = () => {
+    if (currentUser !== null) {
+      out.push({
+        user: currentUser,
+        assistant: aiAccum.join("\n\n").trim(),
+      });
+      currentUser = null;
+      aiAccum.length = 0;
+    }
+  };
+
+  for (const m of messages) {
+    const r = (m.role || "").toLowerCase();
+    if (r === "human") {
+      closeTurn();
+      currentUser = (m.content || "").trim();
+    } else {
+      const t = (m.content || "").trim();
+      if (t) aiAccum.push(t);
+    }
+  }
+  closeTurn();
+  return out;
+}
+
+function normTurnText(s: string): string {
+  return (s || "").replace(/\r\n/g, "\n").trim();
 }
 
 type SessionItem = {
@@ -83,41 +140,6 @@ type DialogueMessage = {
   created_at?: string | null;
 };
 
-type RunTurnOut = {
-  id: string;
-  turn_index: number;
-  user_utterance: string;
-  assistant_reply: string | null;
-  duration_ms: number | null;
-  judge_scores_json: Record<string, unknown> | null;
-  judge_rationale: string | null;
-};
-
-type EvalRunOut = {
-  id: string;
-  experiment_id: string;
-  case_id: string;
-  side: string;
-  status: string;
-  error_message: string | null;
-  memoir_markdown: string | null;
-  conversation_score_total: number | null;
-  memoir_score_total: number | null;
-  composite_score: number | null;
-  judge_bundle_json: Record<string, unknown> | null;
-  turns: RunTurnOut[];
-};
-
-type SessionEvalRunItem = {
-  experiment_name: string;
-  run: EvalRunOut;
-};
-
-function fmtScore(n: unknown): string {
-  if (typeof n === "number" && !Number.isNaN(n)) return n.toFixed(1);
-  return "—";
-}
-
 function JsonPreview({ value }: { value: unknown }) {
   if (value == null) return <span style={{ color: "#6e7681" }}>—</span>;
   return (
@@ -174,11 +196,16 @@ function formatTime(iso: string | null | undefined) {
   }
 }
 
+type FixtureDetailResponse = {
+  turns: { user: string; ai: string }[];
+  source_user_id?: string | null;
+  memoir_sections?: { title: string; body: string }[];
+};
+
 export default function App() {
-  const [view, setView] = useState<"home" | "session" | "admin">("home");
+  const [mainView, setMainView] = useState<"conv" | "memoir" | "admin">("conv");
   const [msg, setMsg] = useState("");
   const [sessions, setSessions] = useState<SessionItem[]>([]);
-  const [selectedId, setSelectedId] = useState<string | null>(null);
 
   const [dialogue, setDialogue] = useState<DialogueMessage[]>([]);
   const [fallbackUserLines, setFallbackUserLines] = useState<string[]>([]);
@@ -186,12 +213,25 @@ export default function App() {
 
   const [versions, setVersions] = useState<{ id: string; name: string }[]>([]);
 
-  const [sessionEvalItems, setSessionEvalItems] = useState<SessionEvalRunItem[]>(
-    [],
-  );
-  const [sessionEvalUpdatedAt, setSessionEvalUpdatedAt] = useState<Date | null>(
-    null,
-  );
+  const [evalUserId, setEvalUserId] = useState("");
+  const [replayConversationId, setReplayConversationId] = useState("");
+  const [replayBusy, setReplayBusy] = useState(false);
+  const [replayProgress, setReplayProgress] = useState<{
+    current: number;
+    total: number;
+  } | null>(null);
+  const replayAbortRef = useRef<AbortController | null>(null);
+  const [judgeConvBusy, setJudgeConvBusy] = useState(false);
+  const [convJudgeBaseline, setConvJudgeBaseline] = useState<unknown>(null);
+  const [convJudgeReplay, setConvJudgeReplay] = useState<unknown>(null);
+  const [convJudgeStreamText, setConvJudgeStreamText] = useState("");
+  const [convJudgeErrors, setConvJudgeErrors] = useState<string[]>([]);
+  const [convJudgePhase, setConvJudgePhase] = useState("");
+  const [memoirSnapshot, setMemoirSnapshot] = useState<unknown>(null);
+  const [memoirSnapBusy, setMemoirSnapBusy] = useState(false);
+  const [memoirJudgeBusy, setMemoirJudgeBusy] = useState(false);
+  const [manualMemoirJudge, setManualMemoirJudge] = useState<unknown>(null);
+  const [showSessionPicker, setShowSessionPicker] = useState(false);
 
   const [adminTab, setAdminTab] = useState<
     "sets" | "versions" | "experiments"
@@ -218,6 +258,32 @@ export default function App() {
   const [fixtureTurns, setFixtureTurns] = useState<
     { user: string; ai: string }[]
   >([]);
+  const [fixtureMemoirSections, setFixtureMemoirSections] = useState<
+    { title: string; body: string }[]
+  >([]);
+
+  const turnAlignment = useMemo(() => {
+    const base = utterancesForReplayFromTurns(fixtureTurns);
+    const pairs = pairDialogueTurns(dialogue);
+    const n = Math.max(base.length, pairs.length);
+    const rows: {
+      index: number;
+      baselineUser: string;
+      dbUser: string;
+      match: boolean;
+    }[] = [];
+    for (let i = 0; i < n; i++) {
+      const b = base[i] ?? "";
+      const p = pairs[i]?.user ?? "";
+      rows.push({
+        index: i + 1,
+        baselineUser: b,
+        dbUser: p,
+        match: normTurnText(b) === normTurnText(p),
+      });
+    }
+    return rows;
+  }, [fixtureTurns, dialogue]);
 
   /** 近期全部：含已结束会话；仅进行中：status=active（多数字段在用户挂断后为 ended，列表会空） */
   const [sessionFilter, setSessionFilter] = useState<"recent" | "active">(
@@ -250,50 +316,51 @@ export default function App() {
     if (r.ok && r.data) setVersions(r.data);
   }, []);
 
-  const pullSessionEvalRuns = useCallback(async (conversationId: string) => {
-    const r = await api<{ items: SessionEvalRunItem[] }>(
-      `/internal/api/evaluation/sessions/${conversationId}/evaluation-runs`,
-    );
-    if (r.ok && r.data?.items) setSessionEvalItems(r.data.items);
-    else setSessionEvalItems([]);
-    setSessionEvalUpdatedAt(new Date());
-  }, []);
-
-  const pullDialogue = useCallback(async (conversationId: string) => {
-    const d = await api<{ messages: DialogueMessage[] }>(
-      `/internal/api/evaluation/sessions/${conversationId}/dialogue`,
-    );
-    if (d.ok && d.data?.messages?.length) {
-      setDialogue(d.data.messages);
-      setFallbackUserLines([]);
-    } else {
-      const t = await api<{
-        user_utterances_from_messages: string[];
-        user_utterances_from_segments: string[];
-      }>(`/internal/api/evaluation/sessions/${conversationId}/transcript`);
-      if (t.ok && t.data) {
-        const lines =
-          t.data.user_utterances_from_messages.length > 0
-            ? t.data.user_utterances_from_messages
-            : t.data.user_utterances_from_segments;
-        setDialogue([]);
-        setFallbackUserLines(lines);
+  const pullDialogue = useCallback(
+    async (conversationId: string, signal?: AbortSignal) => {
+      const d = await api<{ messages: DialogueMessage[] }>(
+        `/internal/api/evaluation/sessions/${conversationId}/dialogue`,
+        { signal },
+      );
+      if (d.error === "aborted") return;
+      if (d.ok && d.data?.messages?.length) {
+        setDialogue(d.data.messages);
+        setFallbackUserLines([]);
+      } else {
+        const t = await api<{
+          user_utterances_from_messages: string[];
+          user_utterances_from_segments: string[];
+        }>(`/internal/api/evaluation/sessions/${conversationId}/transcript`, {
+          signal,
+        });
+        if (t.error === "aborted") return;
+        if (t.ok && t.data) {
+          const lines =
+            t.data.user_utterances_from_messages.length > 0
+              ? t.data.user_utterances_from_messages
+              : t.data.user_utterances_from_segments;
+          setDialogue([]);
+          setFallbackUserLines(lines);
+        }
       }
-    }
-    setDialogueUpdatedAt(new Date());
+      setDialogueUpdatedAt(new Date());
+    },
+    [],
+  );
+
+  const stopReplay = useCallback(() => {
+    replayAbortRef.current?.abort();
   }, []);
 
-  const loadSessionPageInitial = useCallback(
-    (conversationId: string) => {
-      setLoadingLeft(true);
-      setDialogue([]);
-      setFallbackUserLines([]);
-      setSessionEvalItems([]);
-      setSessionEvalUpdatedAt(null);
-      void pullDialogue(conversationId).finally(() => setLoadingLeft(false));
-    },
-    [pullDialogue],
-  );
+  useEffect(() => {
+    const ac = replayAbortRef;
+    const onPageHide = () => ac.current?.abort();
+    window.addEventListener("pagehide", onPageHide);
+    return () => {
+      window.removeEventListener("pagehide", onPageHide);
+      ac.current?.abort();
+    };
+  }, []);
 
   const refreshAdminData = useCallback(async () => {
     const rs = await api<{ id: string; name: string }[]>(
@@ -328,34 +395,37 @@ export default function App() {
         setEvalReachable("bad");
       }
     })();
+  }, []);
+
+  useEffect(() => {
+    if (mainView !== "conv") return;
     void refreshSessionList();
     const t = setInterval(() => void refreshSessionList(), SESSION_LIST_POLL_MS);
     return () => clearInterval(t);
-  }, [refreshSessionList]);
+  }, [mainView, refreshSessionList]);
 
   useEffect(() => {
     void refreshVersions();
   }, [refreshVersions]);
 
   useEffect(() => {
-    if (view !== "session" || !selectedId) return;
+    if (mainView !== "conv" || !replayConversationId.trim()) return;
+    let cancelled = false;
+    setLoadingLeft(true);
+    void pullDialogue(replayConversationId).finally(() => {
+      if (!cancelled) setLoadingLeft(false);
+    });
     const t = setInterval(() => {
-      void pullDialogue(selectedId);
+      void pullDialogue(replayConversationId);
     }, DIALOGUE_POLL_MS);
-    return () => clearInterval(t);
-  }, [view, selectedId, pullDialogue]);
+    return () => {
+      cancelled = true;
+      clearInterval(t);
+    };
+  }, [mainView, replayConversationId, pullDialogue]);
 
   useEffect(() => {
-    if (view !== "session" || !selectedId) return;
-    void pullSessionEvalRuns(selectedId);
-    const t = setInterval(() => {
-      void pullSessionEvalRuns(selectedId);
-    }, SESSION_EVAL_POLL_MS);
-    return () => clearInterval(t);
-  }, [view, selectedId, pullSessionEvalRuns]);
-
-  useEffect(() => {
-    if (view !== "session" || !selectedId) return;
+    if (mainView !== "conv" && mainView !== "memoir") return;
     void (async () => {
       const r = await api<{ items: string[] }>(
         "/internal/api/evaluation/fixtures/user-exports",
@@ -373,33 +443,328 @@ export default function App() {
         return items[0] ?? "";
       });
     })();
-  }, [view, selectedId]);
+  }, [mainView]);
 
   useEffect(() => {
-    if (view !== "session" || !fixtureName) {
+    if ((mainView !== "conv" && mainView !== "memoir") || !fixtureName) {
       setFixtureTurns([]);
+      setFixtureMemoirSections([]);
       return;
     }
     void (async () => {
-      const r = await api<{ turns: { user: string; ai: string }[] }>(
+      const r = await api<FixtureDetailResponse>(
         `/internal/api/evaluation/fixtures/user-exports/${encodeURIComponent(fixtureName)}`,
       );
-      if (r.ok && r.data?.turns) setFixtureTurns(r.data.turns);
-      else setFixtureTurns([]);
+      if (r.ok && r.data?.turns) {
+        setFixtureTurns(r.data.turns);
+        setFixtureMemoirSections(r.data.memoir_sections ?? []);
+        const sid = r.data.source_user_id ?? null;
+        if (sid && mainView === "memoir")
+          setEvalUserId((prev) => (prev.trim() ? prev : sid));
+      } else {
+        setFixtureTurns([]);
+        setFixtureMemoirSections([]);
+      }
     })();
-  }, [view, fixtureName]);
+  }, [mainView, fixtureName]);
 
   useEffect(() => {
-    if (view !== "admin") return;
+    if (mainView !== "admin") return;
     void refreshAdminData();
     const t = setInterval(() => void refreshAdminData(), ADMIN_POLL_MS);
     return () => clearInterval(t);
-  }, [view, refreshAdminData]);
+  }, [mainView, refreshAdminData]);
 
-  function openSession(id: string) {
-    setSelectedId(id);
-    setView("session");
-    loadSessionPageInitial(id);
+  async function createEvalSandboxOnly() {
+    const r = await api<{
+      user_id: string;
+      conversation_id: string;
+      phone: string;
+      nickname: string;
+    }>("/internal/api/evaluation/sessions/eval-sandbox", {
+      method: "POST",
+      body: "{}",
+    });
+    if (r.ok && r.data) {
+      setEvalUserId(r.data.user_id);
+      setReplayConversationId(r.data.conversation_id);
+      setConvJudgeBaseline(null);
+      setConvJudgeReplay(null);
+      setConvJudgeStreamText("");
+      setConvJudgeErrors([]);
+      setConvJudgePhase("");
+      setDialogue([]);
+      setFallbackUserLines([]);
+      setMsg(
+        `评测沙箱就绪：临时手机号 ${r.data.phone}，user_id / conversation_id 已填入（可随时「新沙箱」清空重来）。`,
+      );
+    } else {
+      setMsg(r.error ?? "创建沙箱失败");
+    }
+  }
+
+  async function bootstrapReplaySession() {
+    const uid = evalUserId.trim();
+    if (!uid) {
+      setMsg("高级选项：请先填写已有用户的 UUID");
+      return;
+    }
+    const r = await api<{ conversation_id: string }>(
+      "/internal/api/evaluation/sessions/replay-bootstrap",
+      { method: "POST", body: JSON.stringify({ user_id: uid }) },
+    );
+    setMsg(
+      r.ok
+        ? `已在该用户下新建会话 ${r.data?.conversation_id ?? ""}`
+        : (r.error ?? "bootstrap 失败"),
+    );
+    if (r.ok && r.data?.conversation_id) {
+      setReplayConversationId(r.data.conversation_id);
+      setConvJudgeBaseline(null);
+      setConvJudgeReplay(null);
+      setConvJudgeStreamText("");
+      setConvJudgeErrors([]);
+      setConvJudgePhase("");
+      setDialogue([]);
+      setFallbackUserLines([]);
+    }
+  }
+
+  async function runReplay() {
+    if (!fixtureName) {
+      setMsg("请选择基准 MD");
+      return;
+    }
+    const utts = utterancesForReplayFromTurns(fixtureTurns);
+    if (!utts.length) {
+      setMsg("当前基准 MD 没有可回放的用户句（请先加载轮次）");
+      return;
+    }
+
+    replayAbortRef.current?.abort();
+    const ac = new AbortController();
+    replayAbortRef.current = ac;
+    const { signal } = ac;
+
+    setReplayBusy(true);
+    setReplayProgress(null);
+    try {
+      let cid = replayConversationId.trim();
+      if (!cid) {
+        const sb = await api<{
+          user_id: string;
+          conversation_id: string;
+          phone: string;
+        }>("/internal/api/evaluation/sessions/eval-sandbox", {
+          method: "POST",
+          body: "{}",
+          signal,
+        });
+        if (sb.error === "aborted") {
+          setMsg("回放已中止（关闭/刷新页面或「停止回放」）");
+          return;
+        }
+        if (!sb.ok || !sb.data) {
+          setMsg(sb.error ?? "自动创建沙箱失败");
+          return;
+        }
+        setEvalUserId(sb.data.user_id);
+        setReplayConversationId(sb.data.conversation_id);
+        cid = sb.data.conversation_id;
+        setConvJudgeBaseline(null);
+        setConvJudgeReplay(null);
+        setConvJudgeStreamText("");
+        setConvJudgeErrors([]);
+        setConvJudgePhase("");
+        setDialogue([]);
+        setFallbackUserLines([]);
+      }
+
+      let replayed = 0;
+      for (let i = 0; i < utts.length; i++) {
+        if (signal.aborted) {
+          setMsg("回放已中止（关闭/刷新页面或「停止回放」）");
+          return;
+        }
+        setReplayProgress({ current: i + 1, total: utts.length });
+        const last = i === utts.length - 1;
+        const r = await api<{
+          turns_replayed: number;
+          utterances_echo: string[];
+        }>("/internal/api/evaluation/replay/conversation", {
+          method: "POST",
+          signal,
+          body: JSON.stringify({
+            conversation_id: cid,
+            user_utterances: [utts[i]],
+            flush_memoir_after: last,
+            skip_tts: true,
+          }),
+        });
+        if (r.error === "aborted") {
+          setMsg("回放已中止（关闭/刷新页面或「停止回放」）");
+          return;
+        }
+        if (!r.ok) {
+          setMsg(r.error ?? "回放失败");
+          return;
+        }
+        replayed += r.data?.turns_replayed ?? 0;
+        await pullDialogue(cid, signal);
+      }
+
+      setMsg(
+        `回放完成：${replayed} 轮（分轮请求，避免长阻塞；当前会话 ${cid.slice(0, 8)}…；最后一轮已 flush 回忆录队列，成稿仍依赖 Celery）`,
+      );
+    } finally {
+      setReplayBusy(false);
+      setReplayProgress(null);
+    }
+  }
+
+  async function runJudgeConversationStream() {
+    const cid = replayConversationId.trim();
+    if (!cid) {
+      setMsg("请先有一次会话（执行回放、仅建沙箱或粘贴会话 ID）");
+      return;
+    }
+    setJudgeConvBusy(true);
+    setConvJudgeBaseline(null);
+    setConvJudgeReplay(null);
+    setConvJudgeStreamText("");
+    setConvJudgeErrors([]);
+    setConvJudgePhase("连接评审服务…");
+    try {
+      const url = `${apiBase}/internal/api/evaluation/judge/conversation-stream`;
+      const res = await fetch(url, {
+        method: "POST",
+        headers: {
+          "X-Internal-Eval-Key": apiKey,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          conversation_id: cid,
+          fixture_filename: fixtureName.trim() || null,
+        }),
+      });
+      if (!res.ok) {
+        const t = await res.text();
+        setMsg(`评审流启动失败：HTTP ${res.status} ${t.slice(0, 240)}`);
+        setConvJudgePhase("");
+        return;
+      }
+      const reader = res.body?.getReader();
+      if (!reader) {
+        setMsg("当前环境无法读取响应流");
+        setConvJudgePhase("");
+        return;
+      }
+      const decoder = new TextDecoder();
+      let buf = "";
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        buf += decoder.decode(value, { stream: true });
+        const chunks = buf.split("\n\n");
+        buf = chunks.pop() ?? "";
+        for (const block of chunks) {
+          const line = block.trim();
+          if (!line.startsWith("data: ")) continue;
+          let evt: Record<string, unknown>;
+          try {
+            evt = JSON.parse(line.slice(6)) as Record<string, unknown>;
+          } catch {
+            continue;
+          }
+          const ev = evt.event as string | undefined;
+          if (ev === "meta") {
+            setConvJudgePhase("GLM：基准整体打分…");
+          } else if (ev === "warning") {
+            setConvJudgeErrors((prev) => [
+              ...prev,
+              String(evt.message ?? "warning"),
+            ]);
+          } else if (ev === "baseline_judge") {
+            setConvJudgeBaseline(evt.judge ?? null);
+            setConvJudgePhase("GLM：回放对话整体打分…");
+          } else if (ev === "replay_judge") {
+            setConvJudgeReplay(evt.judge ?? null);
+            setConvJudgePhase("GLM：对比与建议（流式输出）…");
+          } else if (ev === "compare_delta") {
+            const piece = String(evt.text ?? "");
+            if (piece)
+              setConvJudgeStreamText((prev) => prev + piece);
+          } else if (ev === "error") {
+            setConvJudgeErrors((prev) => [
+              ...prev,
+              `${String(evt.phase ?? "error")}: ${String(evt.message ?? "")}`,
+            ]);
+          } else if (ev === "done") {
+            setConvJudgePhase("");
+            setMsg("GLM 对话评审流已结束");
+          }
+        }
+      }
+    } catch (e) {
+      setConvJudgeErrors((prev) => [
+        ...prev,
+        e instanceof Error ? e.message : "评审流异常",
+      ]);
+      setMsg(e instanceof Error ? e.message : "评审流异常");
+    } finally {
+      setJudgeConvBusy(false);
+      setConvJudgePhase("");
+    }
+  }
+
+  async function runMemoirSnapshot() {
+    const uid = evalUserId.trim();
+    if (!uid) {
+      setMsg("请填写用户 ID");
+      return;
+    }
+    setMemoirSnapBusy(true);
+    try {
+      const r = await api<unknown>(
+        `/internal/api/evaluation/users/${encodeURIComponent(uid)}/memoir-snapshot`,
+      );
+      setMsg(r.ok ? "已刷新库中章节 / 故事列表" : (r.error ?? "加载失败"));
+      if (r.ok) setMemoirSnapshot(r.data);
+    } finally {
+      setMemoirSnapBusy(false);
+    }
+  }
+
+  async function runJudgeMemoir() {
+    const uid = evalUserId.trim();
+    if (!uid) {
+      setMsg("请填写用户 ID");
+      return;
+    }
+    setMemoirJudgeBusy(true);
+    try {
+      const r = await api<unknown>("/internal/api/evaluation/judge/memoir-chapters", {
+        method: "POST",
+        body: JSON.stringify({
+          user_id: uid,
+          baseline_sections: fixtureMemoirSections.length
+            ? fixtureMemoirSections
+            : null,
+        }),
+      });
+      setMsg(r.ok ? "GLM 章节评审完成" : (r.error ?? "评审失败"));
+      if (r.ok) setManualMemoirJudge(r.data);
+    } finally {
+      setMemoirJudgeBusy(false);
+    }
+  }
+
+  function pickSessionAsReplayTarget(id: string) {
+    setReplayConversationId(id);
+    setShowSessionPicker(false);
+    const s = sessions.find((x) => x.id === id);
+    if (s?.user_id) setEvalUserId((prev) => prev.trim() || s.user_id);
+    setMsg(`已选用会话 ${id.slice(0, 8)}… 为回放目标（将向该会话追加消息）`);
   }
 
   async function createSet() {
@@ -437,12 +802,13 @@ export default function App() {
   }
 
   async function snapshotFromDetail() {
-    if (!selectedId || !selSet) {
-      setMsg("在「高级配置」中选回归集 ID");
+    const cid = replayConversationId.trim();
+    if (!cid || !selSet) {
+      setMsg("先在对话评测中填写 conversation_id 并在高级页选用回归集");
       return;
     }
     const r = await api<unknown>(
-      `/internal/api/evaluation/regression-sets/${selSet}/snapshot-from-conversation/${selectedId}`,
+      `/internal/api/evaluation/regression-sets/${selSet}/snapshot-from-conversation/${cid}`,
       {
         method: "POST",
         body: JSON.stringify({
@@ -513,26 +879,22 @@ export default function App() {
         <span style={{ flex: 1 }} />
         <button
           type="button"
-          style={{ ...btn, fontWeight: view === "home" ? 700 : 400 }}
-          onClick={() => {
-            setView("home");
-            setSelectedId(null);
-          }}
+          style={{ ...btn, fontWeight: mainView === "conv" ? 700 : 400 }}
+          onClick={() => setMainView("conv")}
         >
-          活跃会话
+          对话评测
         </button>
         <button
           type="button"
-          style={{ ...btn, fontWeight: view === "session" ? 700 : 400 }}
-          onClick={() => selectedId && setView("session")}
-          disabled={!selectedId}
+          style={{ ...btn, fontWeight: mainView === "memoir" ? 700 : 400 }}
+          onClick={() => setMainView("memoir")}
         >
-          当前对比
+          回忆录章节
         </button>
         <button
           type="button"
-          style={{ ...btn, fontWeight: view === "admin" ? 700 : 400 }}
-          onClick={() => setView("admin")}
+          style={{ ...btn, fontWeight: mainView === "admin" ? 700 : 400 }}
+          onClick={() => setMainView("admin")}
         >
           高级（回归集 / 实验）
         </button>
@@ -551,135 +913,23 @@ export default function App() {
         </div>
       ) : null}
 
-      {view === "home" ? (
-        <main style={{ padding: 20, maxWidth: 960, margin: "0 auto" }}>
-          <div
-            style={{
-              display: "flex",
-              justifyContent: "space-between",
-              alignItems: "center",
-              marginBottom: 12,
-              flexWrap: "wrap",
-              gap: 10,
-            }}
-          >
-            <h1 style={{ margin: 0, fontSize: 22, fontWeight: 600 }}>
-              {sessionFilter === "active" ? "进行中的会话" : "近期会话"}
-            </h1>
-            <div style={{ display: "flex", gap: 8, flexWrap: "wrap" }}>
-              <button
-                type="button"
-                style={{
-                  ...btn,
-                  borderColor:
-                    sessionFilter === "recent" ? "#58a6ff" : "#30363d",
-                }}
-                onClick={() => setSessionFilter("recent")}
-              >
-                近期全部
-              </button>
-              <button
-                type="button"
-                style={{
-                  ...btn,
-                  borderColor:
-                    sessionFilter === "active" ? "#58a6ff" : "#30363d",
-                }}
-                onClick={() => setSessionFilter("active")}
-              >
-                仅进行中
-              </button>
-            </div>
-          </div>
-          <p style={{ color: "#6e7681", marginTop: 0, fontSize: 12 }}>
-            每 {SESSION_LIST_POLL_MS / 1000} 秒自动刷新列表
-            {sessionsUpdatedAt
-              ? ` · 上次更新 ${sessionsUpdatedAt.toLocaleTimeString()}`
-              : ""}
-          </p>
-          <p style={{ color: "#8b949e", marginTop: 8, fontSize: 14 }}>
-            点选一条进入对比：左侧为线上落库对话，右侧为候选链路透传回放（流式）。
-            {sessionFilter === "active"
-              ? " 若无数据，多半是会话已结束（status=ended），请切到「近期全部」。"
-              : ""}
-          </p>
-          {sessions.length === 0 ? (
-            <p style={{ color: "#8b949e" }}>
-              暂无会话。请确认数据库有对话记录，且评测 API 与 App 共用同一 DATABASE_URL。
-            </p>
-          ) : (
-            <ul style={{ listStyle: "none", padding: 0, margin: 0 }}>
-              {sessions.map((s) => (
-                <li
-                  key={s.id}
-                  style={{
-                    border: "1px solid #30363d",
-                    borderRadius: 10,
-                    marginBottom: 10,
-                    padding: "14px 16px",
-                    cursor: "pointer",
-                    background: "#161b22",
-                  }}
-                  onClick={() => openSession(s.id)}
-                  onKeyDown={(e) =>
-                    e.key === "Enter" ? openSession(s.id) : undefined
-                  }
-                  role="button"
-                  tabIndex={0}
-                >
-                  <div
-                    style={{
-                      display: "flex",
-                      justifyContent: "space-between",
-                      gap: 12,
-                      flexWrap: "wrap",
-                    }}
-                  >
-                    <span style={{ fontWeight: 600, fontSize: 16 }}>
-                      {s.user_phone ?? "无手机号"} 
-                      <span style={{ color: "#8b949e", fontWeight: 400, marginLeft: 8 }}>
-                        {s.current_topic ? `· ${s.current_topic}` : ""}
-                      </span>
-                    </span>
-                    <span style={{ color: "#8b949e", fontSize: 13 }}>
-                      最近消息 {formatTime(s.last_message_at)}
-                    </span>
-                  </div>
-                  <div style={{ fontSize: 12, color: "#6e7681", marginTop: 6 }}>
-                    <span
-                      style={{
-                        color:
-                          s.status === "active"
-                            ? "#3fb950"
-                            : "#8b949e",
-                      }}
-                    >
-                      {s.status ?? "—"}
-                    </span>
-                    {" · "}
-                    阶段 {s.conversation_stage ?? "—"} ·{" "}
-                    <code style={{ color: "#79c0ff" }}>
-                      {s.id.slice(0, 12)}…
-                    </code>
-                  </div>
-                </li>
-              ))}
-            </ul>
-          )}
-        </main>
-      ) : null}
-
-      {view === "session" && selectedId ? (
+      {mainView === "conv" ? (
         <main
           style={{
             padding: "12px 16px 24px",
             display: "flex",
             flexDirection: "column",
             gap: 12,
-            height: "calc(100vh - 56px)",
+            minHeight: "calc(100vh - 56px)",
             boxSizing: "border-box",
           }}
         >
+          <p style={{ color: "#8b949e", margin: 0, fontSize: 13, lineHeight: 1.5 }}>
+            默认<strong>不填</strong>用户与会话：点「执行回放」会自动创建<strong>临时用户 + 新会话</strong>（伪手机号{" "}
+            <code>eval_…</code>
+            ），再按基准里的<strong>用户句</strong>逐轮请求后端（每轮一次 HTTP，界面可保持响应）。左侧列是导出 MD 里的<strong>用户 + 当时导出的 AI</strong>，仅作对照；中间「落库对话」里的 AI 是当前环境<strong>重新生成</strong>的，必然与左侧导出 AI 不同——这是预期。下方「逐轮用户句对齐」表可核对：每一轮写入 DB 的用户话是否和基准用户句一致。关闭或刷新本页会中止未完成的回放。
+            回忆录模块在「回忆录章节」页；若用沙箱用户看章节，请先在本页跑完回放（并开 Celery）。
+          </p>
           <div
             style={{
               display: "flex",
@@ -688,19 +938,8 @@ export default function App() {
               alignItems: "center",
             }}
           >
-            <button
-              type="button"
-              style={btn}
-              onClick={() => {
-                setView("home");
-                setSelectedId(null);
-              }}
-            >
-              ← 返回列表
-            </button>
-            <code style={{ color: "#79c0ff" }}>{selectedId}</code>
             <label style={{ fontSize: 14, color: "#d29922" }}>
-              对照导出 MD{" "}
+              基准 MD{" "}
               <select
                 value={fixtureName}
                 onChange={(e) => setFixtureName(e.target.value)}
@@ -725,105 +964,229 @@ export default function App() {
                 )}
               </select>
             </label>
+            <button
+              type="button"
+              style={btnPrimary}
+              onClick={() => void createEvalSandboxOnly()}
+            >
+              仅建沙箱（临时用户+会话）
+            </button>
+            <button
+              type="button"
+              style={btn}
+              onClick={() => {
+                setReplayConversationId("");
+                setEvalUserId("");
+                setConvJudgeBaseline(null);
+                setConvJudgeReplay(null);
+                setConvJudgeStreamText("");
+                setConvJudgeErrors([]);
+                setConvJudgePhase("");
+                setDialogue([]);
+                setFallbackUserLines([]);
+                setMsg("已清空沙箱字段；下次「执行回放」会再建新的临时用户。");
+              }}
+            >
+              清空沙箱
+            </button>
+            <button
+              type="button"
+              style={btn}
+              onClick={() => setShowSessionPicker((v) => !v)}
+            >
+              {showSessionPicker ? "收起会话列表" : "从近期会话选取"}
+            </button>
+          </div>
+          <div style={{ display: "flex", flexWrap: "wrap", gap: 8, alignItems: "center" }}>
             <span style={{ fontSize: 12, color: "#6e7681" }}>
-              左①每 {DIALOGUE_POLL_MS / 1000}s 同步 · 右③每{" "}
-              {SESSION_EVAL_POLL_MS / 1000}s 拉取评测
+              当前 user_id{" "}
+              <code style={{ color: "#79c0ff" }}>
+                {evalUserId ? `${evalUserId.slice(0, 10)}…` : "—"}
+              </code>{" "}
+              · conversation_id{" "}
+              <code style={{ color: "#79c0ff" }}>
+                {replayConversationId
+                  ? `${replayConversationId.slice(0, 10)}…`
+                  : "—"}
+              </code>
+            </span>
+            <button
+              type="button"
+              style={btnPrimary}
+              disabled={replayBusy}
+              onClick={() => void runReplay()}
+            >
+              {replayBusy
+                ? replayProgress
+                  ? `回放中 ${replayProgress.current}/${replayProgress.total}…`
+                  : "回放中…"
+                : "执行回放"}
+            </button>
+            <button
+              type="button"
+              style={btn}
+              disabled={!replayBusy}
+              onClick={() => stopReplay()}
+            >
+              停止回放
+            </button>
+            <button
+              type="button"
+              style={btn}
+              disabled={judgeConvBusy}
+              onClick={() => void runJudgeConversationStream()}
+            >
+              {judgeConvBusy ? "GLM 流式评审中…" : "GLM 评审对话（流式）"}
+            </button>
+            <span style={{ fontSize: 12, color: "#6e7681" }}>
+              对话同步每 {DIALOGUE_POLL_MS / 1000}s
               {dialogueUpdatedAt
-                ? ` · 对话 ${dialogueUpdatedAt.toLocaleTimeString()}`
-                : ""}
-              {sessionEvalUpdatedAt
-                ? ` · 评审 ${sessionEvalUpdatedAt.toLocaleTimeString()}`
+                ? ` · ${dialogueUpdatedAt.toLocaleTimeString()}`
                 : ""}
             </span>
           </div>
 
+          <details
+            style={{
+              border: "1px solid #30363d",
+              borderRadius: 8,
+              padding: "8px 12px",
+              background: "#161b22",
+            }}
+          >
+            <summary
+              style={{
+                cursor: "pointer",
+                color: "#8b949e",
+                fontSize: 13,
+                userSelect: "none",
+              }}
+            >
+              高级：指定已有用户或粘贴 conversation_id
+            </summary>
+            <div
+              style={{
+                marginTop: 12,
+                display: "flex",
+                flexWrap: "wrap",
+                gap: 10,
+                alignItems: "center",
+              }}
+            >
+              <label style={{ fontSize: 13, color: "#8b949e" }}>
+                用户 ID
+                <input
+                  value={evalUserId}
+                  onChange={(e) => setEvalUserId(e.target.value)}
+                  placeholder="已有用户 UUID"
+                  style={{
+                    marginLeft: 8,
+                    minWidth: 260,
+                    padding: "6px 10px",
+                    borderRadius: 6,
+                    background: "#0d1117",
+                    color: "#e6edf3",
+                    border: "1px solid #30363d",
+                  }}
+                />
+              </label>
+              <button
+                type="button"
+                style={btn}
+                onClick={() => void bootstrapReplaySession()}
+              >
+                在该用户下新建空会话
+              </button>
+              <label style={{ fontSize: 13, color: "#8b949e" }}>
+                conversation_id
+                <input
+                  value={replayConversationId}
+                  onChange={(e) => setReplayConversationId(e.target.value)}
+                  placeholder="手动粘贴"
+                  style={{
+                    marginLeft: 8,
+                    minWidth: 300,
+                    padding: "6px 10px",
+                    borderRadius: 6,
+                    background: "#0d1117",
+                    color: "#79c0ff",
+                    border: "1px solid #30363d",
+                    fontFamily: "monospace",
+                    fontSize: 12,
+                  }}
+                />
+              </label>
+            </div>
+          </details>
+
+          {showSessionPicker ? (
+            <div
+              style={{
+                border: "1px solid #30363d",
+                borderRadius: 10,
+                padding: 12,
+                background: "#161b22",
+                maxHeight: 220,
+                overflow: "auto",
+              }}
+            >
+              <div style={{ display: "flex", gap: 8, marginBottom: 8, flexWrap: "wrap" }}>
+                <button
+                  type="button"
+                  style={{
+                    ...btn,
+                    borderColor:
+                      sessionFilter === "recent" ? "#58a6ff" : "#30363d",
+                  }}
+                  onClick={() => setSessionFilter("recent")}
+                >
+                  近期全部
+                </button>
+                <button
+                  type="button"
+                  style={{
+                    ...btn,
+                    borderColor:
+                      sessionFilter === "active" ? "#58a6ff" : "#30363d",
+                  }}
+                  onClick={() => setSessionFilter("active")}
+                >
+                  仅进行中
+                </button>
+                <span style={{ fontSize: 12, color: "#6e7681" }}>
+                  列表每 {SESSION_LIST_POLL_MS / 1000}s 刷新
+                  {sessionsUpdatedAt
+                    ? ` · ${sessionsUpdatedAt.toLocaleTimeString()}`
+                    : ""}
+                </span>
+              </div>
+              <ul style={{ listStyle: "none", padding: 0, margin: 0 }}>
+                {sessions.map((s) => (
+                  <li key={s.id} style={{ marginBottom: 6 }}>
+                    <button
+                      type="button"
+                      style={{ ...btn, fontSize: 12 }}
+                      onClick={() => pickSessionAsReplayTarget(s.id)}
+                    >
+                      选用
+                    </button>{" "}
+                    <code style={{ color: "#79c0ff" }}>{s.id.slice(0, 10)}…</code>{" "}
+                    <span style={{ color: "#8b949e" }}>{s.user_phone ?? s.user_id.slice(0, 8)}</span>
+                  </li>
+                ))}
+              </ul>
+            </div>
+          ) : null}
+
           <div
             style={{
               flex: 1,
-              minHeight: 0,
+              minHeight: 320,
               display: "grid",
-              gridTemplateColumns: "minmax(0, 1fr) minmax(0, 1fr) minmax(0, 1fr)",
+              gridTemplateColumns: "minmax(0, 1fr) minmax(0, 1fr)",
               gap: 12,
             }}
           >
-            <section
-              style={{
-                border: "1px solid #30363d",
-                borderRadius: 12,
-                overflow: "auto",
-                padding: 16,
-                background: "#161b22",
-              }}
-            >
-              <h2
-                style={{
-                  margin: "0 0 12px",
-                  fontSize: 15,
-                  color: "#8b949e",
-                  fontWeight: 600,
-                }}
-              >
-                ① 线上 / 历史（DB · 自动同步）
-              </h2>
-              {loadingLeft ? (
-                <p style={{ color: "#8b949e" }}>加载中…</p>
-              ) : dialogue.length > 0 ? (
-                <div style={{ display: "flex", flexDirection: "column", gap: 12 }}>
-                  {dialogue.map((m, i) => (
-                    <div
-                      key={`${i}-${m.created_at ?? i}`}
-                      style={{
-                        alignSelf: m.role === "human" ? "flex-end" : "flex-start",
-                        maxWidth: "92%",
-                        padding: "10px 12px",
-                        borderRadius: 10,
-                        background:
-                          m.role === "human" ? "#1f3a5f" : "#21262d",
-                        border: "1px solid #30363d",
-                        whiteSpace: "pre-wrap",
-                        fontSize: 14,
-                        lineHeight: 1.5,
-                      }}
-                    >
-                      <div
-                        style={{
-                          fontSize: 11,
-                          color: "#8b949e",
-                          marginBottom: 4,
-                        }}
-                      >
-                        {m.role === "human" ? "用户" : "AI"} ·{" "}
-                        {formatTime(m.created_at ?? null)}
-                      </div>
-                      {m.content}
-                    </div>
-                  ))}
-                </div>
-              ) : fallbackUserLines.length > 0 ? (
-                <div>
-                  <p style={{ color: "#d29922", fontSize: 13 }}>
-                    无线上消息表记录，仅展示抽取的用户轮次（transcript）：
-                  </p>
-                  {fallbackUserLines.map((line, i) => (
-                    <div
-                      key={i}
-                      style={{
-                        padding: "8px 0",
-                        borderBottom: "1px solid #30363d",
-                        fontSize: 14,
-                        whiteSpace: "pre-wrap",
-                      }}
-                    >
-                      <span style={{ color: "#8b949e" }}>{i + 1}. </span>
-                      {line}
-                    </div>
-                  ))}
-                </div>
-              ) : (
-                <p style={{ color: "#8b949e" }}>暂无左侧数据。</p>
-              )}
-            </section>
-
             <section
               style={{
                 border: "1px solid #9e6a03",
@@ -833,47 +1196,26 @@ export default function App() {
                 background: "#1c1608",
               }}
             >
-              <h2
-                style={{
-                  margin: "0 0 12px",
-                  fontSize: 15,
-                  color: "#d29922",
-                  fontWeight: 600,
-                }}
-              >
-                ② 导出快照（tests/user_exports · 只读对照）
+              <h2 style={{ margin: "0 0 12px", fontSize: 15, color: "#d29922" }}>
+                基准（导出 MD：用户 + AI 对照）
               </h2>
-              {!fixtureName ? (
-                <p style={{ color: "#8b949e" }}>
-                  仓库内未找到{" "}
-                  <code>api/tests/user_exports/*.md</code>，或 API 无法读取该目录。
-                </p>
-              ) : fixtureTurns.length === 0 ? (
-                <p style={{ color: "#8b949e" }}>正在加载 {fixtureName}…</p>
+              {!fixtureName || fixtureTurns.length === 0 ? (
+                <p style={{ color: "#8b949e" }}>选择 MD 后加载轮次</p>
               ) : (
                 <div style={{ display: "flex", flexDirection: "column", gap: 16 }}>
                   {fixtureTurns.map((row, i) => (
                     <div key={`${i}-${row.user.slice(0, 12)}`}>
                       <div
                         style={{
-                          alignSelf: "flex-end",
-                          maxWidth: "92%",
-                          marginLeft: "auto",
-                          padding: "10px 12px",
-                          borderRadius: 10,
+                          padding: "8px 10px",
+                          borderRadius: 8,
                           background: "#3d2914",
-                          border: "1px solid #6e4e12",
                           whiteSpace: "pre-wrap",
-                          fontSize: 14,
+                          fontSize: 13,
+                          border: "1px solid #6e4e12",
                         }}
                       >
-                        <div
-                          style={{
-                            fontSize: 11,
-                            color: "#d29922",
-                            marginBottom: 4,
-                          }}
-                        >
+                        <div style={{ fontSize: 11, color: "#d29922", marginBottom: 4 }}>
                           用户 · 轮次 {i + 1}
                         </div>
                         {row.user}
@@ -881,312 +1223,440 @@ export default function App() {
                       <div
                         style={{
                           marginTop: 8,
-                          maxWidth: "92%",
-                          padding: "10px 12px",
-                          borderRadius: 10,
+                          padding: "8px 10px",
+                          borderRadius: 8,
                           background: "#252017",
-                          border: "1px solid #6e4e12",
                           whiteSpace: "pre-wrap",
-                          fontSize: 14,
+                          fontSize: 13,
+                          border: "1px solid #6e4e12",
                         }}
                       >
-                        <div
-                          style={{
-                            fontSize: 11,
-                            color: "#8b949e",
-                            marginBottom: 4,
-                          }}
-                        >
+                        <div style={{ fontSize: 11, color: "#8b949e", marginBottom: 4 }}>
                           导出中的 AI
                         </div>
-                        {row.ai || "（空）"}
+                        {row.ai?.trim() ? row.ai : "（空）"}
                       </div>
                     </div>
                   ))}
                 </div>
               )}
             </section>
-
             <section
               style={{
-                border: "1px solid #238636",
+                border: "1px solid #30363d",
                 borderRadius: 12,
                 overflow: "auto",
                 padding: 16,
-                background: "#0d1f12",
+                background: "#161b22",
               }}
             >
-              <h2
-                style={{
-                  margin: "0 0 12px",
-                  fontSize: 15,
-                  color: "#3fb950",
-                  fontWeight: 600,
-                }}
-              >
-                ③ GLM 评审（回归实验）
+              <h2 style={{ margin: "0 0 12px", fontSize: 15, color: "#8b949e" }}>
+                落库对话（DB · 用户句应与基准一致，AI 为当前后端新生成）
               </h2>
-              {sessionEvalItems.length === 0 ? (
-                <p style={{ color: "#8b949e", fontSize: 13, lineHeight: 1.5 }}>
-                  尚无命中本会话的评测 run。请先将该会话快照进回归集（case 需带{" "}
-                  <code>source_conversation_id</code>
-                  ），在高级页 enqueue 实验；跑完后此处会显示对话分、访谈摘录稿分、以及该用户名下各{" "}
-                  <strong>Chapter</strong> / <strong>Story</strong>{" "}
-                  正文（<code>canonical_markdown</code>）的成稿分项分。baseline 与 candidate
-                  各一条 run，可对比综合分与 bundle。
+              {loadingLeft ? (
+                <p style={{ color: "#8b949e" }}>加载中…</p>
+              ) : !replayConversationId.trim() ? (
+                <p style={{ color: "#8b949e" }}>
+                  执行回放或「仅建沙箱」后将自动拉取本轮会话的落库消息
                 </p>
-              ) : (
-                <div
-                  style={{ display: "flex", flexDirection: "column", gap: 22 }}
-                >
-                  {sessionEvalItems.map(({ experiment_name, run }) => {
-                    const bundle = run.judge_bundle_json;
-                    const chapters =
-                      bundle &&
-                      typeof bundle === "object" &&
-                      Array.isArray(
-                        (bundle as Record<string, unknown>).chapters,
-                      )
-                        ? ((bundle as Record<string, unknown>).chapters as unknown[])
-                        : [];
-                    const stories =
-                      bundle &&
-                      typeof bundle === "object" &&
-                      Array.isArray((bundle as Record<string, unknown>).stories)
-                        ? ((bundle as Record<string, unknown>).stories as unknown[])
-                        : [];
-                    const convJ =
-                      bundle &&
-                      typeof bundle === "object" &&
-                      "conversation_judge" in bundle
-                        ? (bundle as Record<string, unknown>).conversation_judge
-                        : null;
-                    const memJ =
-                      bundle &&
-                      typeof bundle === "object" &&
-                      "memoir_judge" in bundle
-                        ? (bundle as Record<string, unknown>).memoir_judge
-                        : null;
-                    return (
-                      <div
-                        key={run.id}
-                        style={{
-                          paddingBottom: 18,
-                          borderBottom: "1px solid #238636",
-                        }}
-                      >
-                        <div style={{ fontWeight: 700, fontSize: 14 }}>
-                          {experiment_name}
-                        </div>
-                        <div
-                          style={{
-                            fontSize: 12,
-                            color: "#8b949e",
-                            marginTop: 4,
-                          }}
-                        >
-                          <code>{run.side}</code> · {run.status}
-                          {run.error_message ? ` · ${run.error_message}` : ""}
-                        </div>
-                        <div
-                          style={{
-                            marginTop: 10,
-                            fontSize: 13,
-                            display: "flex",
-                            flexWrap: "wrap",
-                            gap: 12,
-                          }}
-                        >
-                          <span>
-                            综合{" "}
-                            <strong style={{ color: "#3fb950" }}>
-                              {fmtScore(run.composite_score)}
-                            </strong>
-                          </span>
-                          <span>
-                            对话{" "}
-                            <strong>{fmtScore(run.conversation_score_total)}</strong>
-                          </span>
-                          <span>
-                            成稿均值{" "}
-                            <strong>{fmtScore(run.memoir_score_total)}</strong>
-                          </span>
-                          <span style={{ color: "#6e7681" }}>
-                            （含摘录稿 + Chapter + Story 分项之平均，见实验执行逻辑）
-                          </span>
-                        </div>
-
-                        <p
-                          style={{
-                            fontSize: 12,
-                            color: "#8b949e",
-                            margin: "12px 0 6px",
-                            fontWeight: 600,
-                          }}
-                        >
-                          整段对话评审
-                        </p>
-                        <JsonPreview value={convJ} />
-
-                        <p
-                          style={{
-                            fontSize: 12,
-                            color: "#8b949e",
-                            margin: "12px 0 6px",
-                            fontWeight: 600,
-                          }}
-                        >
-                          访谈摘录稿（候选回放拼接稿）
-                        </p>
-                        <JsonPreview value={memJ} />
-
-                        <p
-                          style={{
-                            fontSize: 12,
-                            color: "#8b949e",
-                            margin: "12px 0 6px",
-                            fontWeight: 600,
-                          }}
-                        >
-                          Chapter（DB · 每章 GLM）
-                        </p>
-                        {chapters.length === 0 ? (
-                          <p style={{ color: "#6e7681", fontSize: 12 }}>无或未跑分</p>
-                        ) : (
-                          <ul
-                            style={{
-                              margin: 0,
-                              paddingLeft: 18,
-                              fontSize: 13,
-                            }}
-                          >
-                            {chapters.map((row, idx) => {
-                              const r = row as Record<string, unknown>;
-                              const j = r.judge as Record<string, unknown> | undefined;
-                              return (
-                                <li key={String(r.id ?? idx)} style={{ marginBottom: 8 }}>
-                                  <span style={{ color: "#d29922" }}>
-                                    {String(r.title ?? "")}
-                                  </span>{" "}
-                                  · 总分{" "}
-                                  <strong>{fmtScore(j?.total_score)}</strong>
-                                  {typeof j?.rationale === "string" &&
-                                  j.rationale.trim() ? (
-                                    <div
-                                      style={{
-                                        fontSize: 11,
-                                        color: "#6e7681",
-                                        marginTop: 6,
-                                        whiteSpace: "pre-wrap",
-                                      }}
-                                    >
-                                      {j.rationale}
-                                    </div>
-                                  ) : null}
-                                </li>
-                              );
-                            })}
-                          </ul>
-                        )}
-
-                        <p
-                          style={{
-                            fontSize: 12,
-                            color: "#8b949e",
-                            margin: "12px 0 6px",
-                            fontWeight: 600,
-                          }}
-                        >
-                          Story（DB · 每篇 GLM）
-                        </p>
-                        {stories.length === 0 ? (
-                          <p style={{ color: "#6e7681", fontSize: 12 }}>无或未跑分</p>
-                        ) : (
-                          <ul
-                            style={{
-                              margin: 0,
-                              paddingLeft: 18,
-                              fontSize: 13,
-                            }}
-                          >
-                            {stories.map((row, idx) => {
-                              const r = row as Record<string, unknown>;
-                              const j = r.judge as Record<string, unknown> | undefined;
-                              return (
-                                <li key={String(r.id ?? idx)} style={{ marginBottom: 8 }}>
-                                  <span style={{ color: "#d29922" }}>
-                                    {String(r.title ?? "")}
-                                  </span>{" "}
-                                  · 总分{" "}
-                                  <strong>{fmtScore(j?.total_score)}</strong>
-                                  {typeof j?.rationale === "string" &&
-                                  j.rationale.trim() ? (
-                                    <div
-                                      style={{
-                                        fontSize: 11,
-                                        color: "#6e7681",
-                                        marginTop: 6,
-                                        whiteSpace: "pre-wrap",
-                                      }}
-                                    >
-                                      {j.rationale}
-                                    </div>
-                                  ) : null}
-                                </li>
-                              );
-                            })}
-                          </ul>
-                        )}
-
-                        <p
-                          style={{
-                            fontSize: 12,
-                            color: "#8b949e",
-                            margin: "12px 0 6px",
-                            fontWeight: 600,
-                          }}
-                        >
-                          各轮对话分（候选回放）
-                        </p>
-                        {run.turns.length === 0 ? (
-                          <p style={{ color: "#6e7681", fontSize: 12 }}>无</p>
-                        ) : (
-                          <ul
-                            style={{
-                              margin: 0,
-                              paddingLeft: 18,
-                              fontSize: 12,
-                            }}
-                          >
-                            {run.turns.map((t) => (
-                              <li key={t.id} style={{ marginBottom: 6 }}>
-                                轮 {t.turn_index + 1} ·{" "}
-                                {fmtScore(
-                                  t.judge_scores_json &&
-                                    typeof t.judge_scores_json === "object" &&
-                                    "total_score" in t.judge_scores_json
-                                    ? (t.judge_scores_json as Record<string, unknown>)
-                                        .total_score
-                                    : null,
-                                )}
-                                {t.judge_rationale
-                                  ? ` — ${t.judge_rationale.slice(0, 120)}${t.judge_rationale.length > 120 ? "…" : ""}`
-                                  : ""}
-                              </li>
-                            ))}
-                          </ul>
-                        )}
+              ) : dialogue.length > 0 ? (
+                <div style={{ display: "flex", flexDirection: "column", gap: 10 }}>
+                  {dialogue.map((m, i) => (
+                    <div
+                      key={`${i}-${m.created_at ?? i}`}
+                      style={{
+                        alignSelf: m.role === "human" ? "flex-end" : "flex-start",
+                        maxWidth: "92%",
+                        padding: "8px 10px",
+                        borderRadius: 8,
+                        background: m.role === "human" ? "#1f3a5f" : "#21262d",
+                        border: "1px solid #30363d",
+                        whiteSpace: "pre-wrap",
+                        fontSize: 13,
+                      }}
+                    >
+                      <div style={{ fontSize: 11, color: "#8b949e", marginBottom: 4 }}>
+                        {m.role === "human" ? "用户" : "AI"}
                       </div>
-                    );
-                  })}
+                      {m.content}
+                    </div>
+                  ))}
                 </div>
+              ) : fallbackUserLines.length > 0 ? (
+                <div>
+                  <p style={{ color: "#d29922", fontSize: 12 }}>仅 transcript（无 messages 表）</p>
+                  {fallbackUserLines.map((line, i) => (
+                    <div key={i} style={{ fontSize: 13, marginBottom: 6, whiteSpace: "pre-wrap" }}>
+                      {i + 1}. {line}
+                    </div>
+                  ))}
+                </div>
+              ) : (
+                <p style={{ color: "#8b949e" }}>暂无消息</p>
               )}
             </section>
           </div>
+
+          {turnAlignment.length > 0 ? (
+            <section
+              style={{
+                border: "1px solid #30363d",
+                borderRadius: 10,
+                padding: 12,
+                background: "#0d1117",
+              }}
+            >
+              <h3 style={{ margin: "0 0 8px", fontSize: 14, color: "#58a6ff" }}>
+                逐轮用户句对齐（基准 vs DB 合并后的「每轮一条用户」）
+              </h3>
+              <p style={{ margin: "0 0 10px", fontSize: 12, color: "#6e7681" }}>
+                绿色表示与基准用户句一致；若不一致，多为会话里混入手动输入、或未清空旧会话就再次回放。
+              </p>
+              <div style={{ overflow: "auto", maxHeight: 220 }}>
+                <table
+                  style={{
+                    width: "100%",
+                    borderCollapse: "collapse",
+                    fontSize: 12,
+                  }}
+                >
+                  <thead>
+                    <tr style={{ color: "#8b949e", textAlign: "left" }}>
+                      <th style={{ padding: "6px 8px", borderBottom: "1px solid #30363d" }}>
+                        轮次
+                      </th>
+                      <th style={{ padding: "6px 8px", borderBottom: "1px solid #30363d" }}>
+                        状态
+                      </th>
+                      <th style={{ padding: "6px 8px", borderBottom: "1px solid #30363d" }}>
+                        基准用户句（节选）
+                      </th>
+                      <th style={{ padding: "6px 8px", borderBottom: "1px solid #30363d" }}>
+                        DB 用户句（节选）
+                      </th>
+                    </tr>
+                  </thead>
+                  <tbody>
+                    {turnAlignment.map((row) => {
+                      const clip = (s: string, n: number) => {
+                        const t = (s || "").replace(/\s+/g, " ").trim();
+                        return t.length > n ? `${t.slice(0, n)}…` : t || "—";
+                      };
+                      return (
+                        <tr key={row.index}>
+                          <td
+                            style={{
+                              padding: "6px 8px",
+                              borderBottom: "1px solid #21262d",
+                              color: "#79c0ff",
+                              verticalAlign: "top",
+                            }}
+                          >
+                            {row.index}
+                          </td>
+                          <td
+                            style={{
+                              padding: "6px 8px",
+                              borderBottom: "1px solid #21262d",
+                              verticalAlign: "top",
+                              color: row.match ? "#3fb950" : "#f85149",
+                              whiteSpace: "nowrap",
+                            }}
+                          >
+                            {row.match ? "一致" : "不一致"}
+                          </td>
+                          <td
+                            style={{
+                              padding: "6px 8px",
+                              borderBottom: "1px solid #21262d",
+                              verticalAlign: "top",
+                              color: "#e6edf3",
+                            }}
+                          >
+                            {clip(row.baselineUser, 120)}
+                          </td>
+                          <td
+                            style={{
+                              padding: "6px 8px",
+                              borderBottom: "1px solid #21262d",
+                              verticalAlign: "top",
+                              color: "#e6edf3",
+                            }}
+                          >
+                            {clip(row.dbUser, 120)}
+                          </td>
+                        </tr>
+                      );
+                    })}
+                  </tbody>
+                </table>
+              </div>
+            </section>
+          ) : null}
+
+          <section
+            style={{
+              border: "1px solid #238636",
+              borderRadius: 12,
+              padding: 16,
+              background: "#0d1f12",
+              marginTop: 8,
+            }}
+          >
+            <h3 style={{ margin: "0 0 6px", color: "#3fb950", fontSize: 15 }}>
+              手动 GLM · 对话评审（页面底部）
+            </h3>
+            <p style={{ margin: "0 0 12px", color: "#6e7681", fontSize: 12, lineHeight: 1.5 }}>
+              流程：<strong>两次整体打分</strong>（导出基准全文 transcript 一次、当前落库回放 transcript
+              一次），再<strong>流式输出</strong>中文对比与改进建议。请在上文选择与本会话一致的<strong>基准 MD</strong>；
+              未配置服务端 <code>eval_judge_api_key</code> / <code>zhipu_api_key</code> 时会报错。若某一侧 GLM
+              JSON 解析失败，见服务端日志中的 <code>conversation judge failed</code>。
+            </p>
+            {convJudgePhase ? (
+              <p style={{ color: "#d29922", fontSize: 12, margin: "0 0 8px" }}>{convJudgePhase}</p>
+            ) : null}
+            {convJudgeErrors.length > 0 ? (
+              <ul style={{ color: "#f85149", fontSize: 12, margin: "0 0 12px" }}>
+                {convJudgeErrors.map((e, i) => (
+                  <li key={`${i}-${e.slice(0, 24)}`}>{e}</li>
+                ))}
+              </ul>
+            ) : null}
+            <div
+              style={{
+                display: "grid",
+                gridTemplateColumns: "minmax(0,1fr) minmax(0,1fr)",
+                gap: 12,
+                marginBottom: 12,
+              }}
+            >
+              <div
+                style={{
+                  border: "1px solid #9e6a03",
+                  borderRadius: 8,
+                  padding: 10,
+                  background: "#1c1608",
+                }}
+              >
+                <div style={{ fontSize: 12, color: "#d29922", marginBottom: 6 }}>
+                  基准（导出 MD）整体分
+                </div>
+                {convJudgeBaseline &&
+                typeof convJudgeBaseline === "object" &&
+                convJudgeBaseline !== null ? (
+                  <>
+                    <div style={{ fontSize: 20, color: "#ffa657", fontWeight: 600 }}>
+                      {typeof (convJudgeBaseline as { total_score?: number }).total_score ===
+                      "number"
+                        ? (convJudgeBaseline as { total_score: number }).total_score.toFixed(1)
+                        : "—"}
+                    </div>
+                    <JsonPreview
+                      value={(convJudgeBaseline as Record<string, unknown>) ?? {}}
+                    />
+                  </>
+                ) : (
+                  <p style={{ color: "#6e7681", fontSize: 12, margin: 0 }}>
+                    {!fixtureName.trim()
+                      ? "未选择基准 MD：服务端仅对回放 transcript 做整体分与单侧建议。"
+                      : "等待基准整体分…（若失败见上方红色错误与服务端日志）"}
+                  </p>
+                )}
+              </div>
+              <div
+                style={{
+                  border: "1px solid #30363d",
+                  borderRadius: 8,
+                  padding: 10,
+                  background: "#161b22",
+                }}
+              >
+                <div style={{ fontSize: 12, color: "#79c0ff", marginBottom: 6 }}>
+                  回放 / 新测（DB）整体分
+                </div>
+                {convJudgeReplay &&
+                typeof convJudgeReplay === "object" &&
+                convJudgeReplay !== null ? (
+                  <>
+                    <div style={{ fontSize: 20, color: "#58a6ff", fontWeight: 600 }}>
+                      {typeof (convJudgeReplay as { total_score?: number }).total_score ===
+                      "number"
+                        ? (convJudgeReplay as { total_score: number }).total_score.toFixed(1)
+                        : "—"}
+                    </div>
+                    <JsonPreview value={(convJudgeReplay as Record<string, unknown>) ?? {}} />
+                  </>
+                ) : (
+                  <p style={{ color: "#6e7681", fontSize: 12, margin: 0 }}>等待打分结果…</p>
+                )}
+              </div>
+            </div>
+            <div
+              style={{
+                border: "1px solid #30363d",
+                borderRadius: 8,
+                padding: 12,
+                background: "#0d1117",
+                minHeight: 120,
+                maxHeight: 360,
+                overflow: "auto",
+              }}
+            >
+              <div style={{ fontSize: 12, color: "#8b949e", marginBottom: 8 }}>
+                对比与建议（流式）
+              </div>
+              <div
+                style={{
+                  whiteSpace: "pre-wrap",
+                  fontSize: 13,
+                  color: "#e6edf3",
+                  lineHeight: 1.55,
+                }}
+              >
+                {convJudgeStreamText || (
+                  <span style={{ color: "#6e7681" }}>
+                    点击工具栏「GLM 评审对话（流式）」后，此处逐字显示模型输出。
+                  </span>
+                )}
+              </div>
+            </div>
+          </section>
         </main>
       ) : null}
 
-      {view === "admin" ? (
+      {mainView === "memoir" ? (
+        <main style={{ padding: 20, maxWidth: 1100, margin: "0 auto" }}>
+          <h1 style={{ margin: "0 0 12px", fontSize: 22 }}>回忆录章节评测</h1>
+          <p style={{ color: "#8b949e", fontSize: 13 }}>
+            基准正文来自同一套 MD 的「回忆录章节」段落；与库中 Chapter/Story 对照后由 GLM 按 rubric 打分。
+          </p>
+          <div style={{ display: "flex", flexWrap: "wrap", gap: 10, marginBottom: 16, alignItems: "center" }}>
+            <label style={{ color: "#d29922" }}>
+              基准 MD{" "}
+              <select
+                value={fixtureName}
+                onChange={(e) => setFixtureName(e.target.value)}
+                style={{
+                  marginLeft: 6,
+                  maxWidth: 280,
+                  padding: "6px 10px",
+                  borderRadius: 6,
+                  background: "#21262d",
+                  color: "#e6edf3",
+                  border: "1px solid #30363d",
+                }}
+              >
+                {fixtureFiles.map((f) => (
+                  <option key={f} value={f}>
+                    {f}
+                  </option>
+                ))}
+              </select>
+            </label>
+            <label style={{ fontSize: 13, color: "#8b949e" }}>
+              用户 ID
+              <input
+                value={evalUserId}
+                onChange={(e) => setEvalUserId(e.target.value)}
+                style={{
+                  marginLeft: 8,
+                  minWidth: 280,
+                  padding: "6px 10px",
+                  borderRadius: 6,
+                  background: "#0d1117",
+                  color: "#e6edf3",
+                  border: "1px solid #30363d",
+                }}
+              />
+            </label>
+            <button
+              type="button"
+              style={btn}
+              disabled={memoirSnapBusy}
+              onClick={() => void runMemoirSnapshot()}
+            >
+              {memoirSnapBusy ? "加载中…" : "刷新库中章节/故事"}
+            </button>
+            <button
+              type="button"
+              style={btnPrimary}
+              disabled={memoirJudgeBusy}
+              onClick={() => void runJudgeMemoir()}
+            >
+              {memoirJudgeBusy ? "评审中…" : "GLM 评审章节"}
+            </button>
+          </div>
+          <p style={{ fontSize: 12, color: "#6e7681" }}>
+            基线条目：{fixtureMemoirSections.length} 段（自 MD 解析）
+          </p>
+          <div
+            style={{
+              display: "grid",
+              gridTemplateColumns: "minmax(0,1fr) minmax(0,1fr)",
+              gap: 12,
+              marginBottom: 16,
+            }}
+          >
+            <section
+              style={{
+                border: "1px solid #9e6a03",
+                borderRadius: 10,
+                padding: 12,
+                background: "#1c1608",
+                maxHeight: 360,
+                overflow: "auto",
+              }}
+            >
+              <h3 style={{ color: "#d29922", fontSize: 14, marginTop: 0 }}>导出基线（节选）</h3>
+              <ul style={{ margin: 0, paddingLeft: 18, fontSize: 12, color: "#e6edf3" }}>
+                {fixtureMemoirSections.map((s, i) => (
+                  <li key={`${i}-${s.title}`} style={{ marginBottom: 8 }}>
+                    <strong>{s.title}</strong>
+                    <div style={{ color: "#8b949e", marginTop: 4, whiteSpace: "pre-wrap" }}>
+                      {(s.body || "").slice(0, 400)}
+                      {(s.body || "").length > 400 ? "…" : ""}
+                    </div>
+                  </li>
+                ))}
+              </ul>
+            </section>
+            <section
+              style={{
+                border: "1px solid #30363d",
+                borderRadius: 10,
+                padding: 12,
+                background: "#161b22",
+                maxHeight: 360,
+                overflow: "auto",
+              }}
+            >
+              <h3 style={{ color: "#8b949e", fontSize: 14, marginTop: 0 }}>数据库快照</h3>
+              {memoirSnapshot &&
+              typeof memoirSnapshot === "object" &&
+              memoirSnapshot !== null ? (
+                <JsonPreview value={memoirSnapshot} />
+              ) : (
+                <p style={{ color: "#6e7681", fontSize: 12 }}>点击「刷新库中章节/故事」</p>
+              )}
+            </section>
+          </div>
+          <section
+            style={{
+              border: "1px solid #238636",
+              borderRadius: 12,
+              padding: 16,
+              background: "#0d1f12",
+            }}
+          >
+            <h3 style={{ color: "#3fb950", fontSize: 15, marginTop: 0 }}>手动 GLM · 章节/故事</h3>
+            {manualMemoirJudge ? <JsonPreview value={manualMemoirJudge} /> : (
+              <p style={{ color: "#6e7681", fontSize: 12, margin: 0 }}>点击「GLM 评审章节」</p>
+            )}
+          </section>
+        </main>
+      ) : null}
+
+      {mainView === "admin" ? (
         <main style={{ padding: 20, maxWidth: 900, margin: "0 auto" }}>
           <p style={{ color: "#6e7681", fontSize: 12, margin: "0 0 12px" }}>
             在此页停留时，回归集与实验列表每 {ADMIN_POLL_MS / 1000}{" "}