2026-04-08 09:38:07 +08:00
|
|
|
|
"""手动触发 GLM-5 评审(不写 eval_runs)。"""
|
2026-04-06 23:19:20 +08:00
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
from collections.abc import AsyncIterator
|
|
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
|
|
|
|
|
|
from app.core.dependencies import get_eval_judge_langchain_llm
|
|
|
|
|
|
from app.core.logging import get_logger
|
2026-04-07 10:34:59 +08:00
|
|
|
|
from app.features.conversation import repo as conversation_repo
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from app.features.evaluation.errors import (
|
|
|
|
|
|
EvaluationBadRequestError,
|
|
|
|
|
|
EvaluationNotFoundError,
|
|
|
|
|
|
)
|
|
|
|
|
|
from app.features.evaluation.judge_service import EvalJudgeService
|
2026-04-07 17:15:01 +08:00
|
|
|
|
from app.features.evaluation.transcript_for_judge import (
|
|
|
|
|
|
assistant_text_for_eval_display,
|
|
|
|
|
|
format_eval_turn_block,
|
|
|
|
|
|
format_export_turns_with_labels,
|
|
|
|
|
|
format_session_messages_with_turn_labels,
|
|
|
|
|
|
pair_session_messages_to_turns,
|
|
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
from app.features.evaluation.schemas import MemoirSectionBaselineOut
|
|
|
|
|
|
from app.features.evaluation.session_catalog_service import SessionCatalogService
|
|
|
|
|
|
from app.features.evaluation.user_export_fixtures import read_user_export_fixture
|
|
|
|
|
|
from app.features.memoir.repo import get_chapters_for_memoir_list
|
|
|
|
|
|
from app.features.story.repo import get_stories_for_user
|
|
|
|
|
|
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
_MAX_JUDGE_MARKDOWN_CHARS = 20_000
|
|
|
|
|
|
_MAX_EVAL_CHAPTERS = 30
|
|
|
|
|
|
_MAX_EVAL_STORIES = 40
|
2026-04-07 10:34:59 +08:00
|
|
|
|
_MAX_EVIDENCE_CONVERSATIONS = 8
|
|
|
|
|
|
_MAX_EVIDENCE_TRANSCRIPT_CHARS = 16_000
|
2026-04-07 17:15:01 +08:00
|
|
|
|
_PRIOR_TRANSCRIPT_MAX_CHARS = 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _iter_turn_judgments_for_turns(
|
|
|
|
|
|
judge: EvalJudgeService,
|
|
|
|
|
|
turns: list[tuple[str, str]],
|
|
|
|
|
|
*,
|
|
|
|
|
|
sse_event: str,
|
|
|
|
|
|
) -> AsyncIterator[dict[str, Any]]:
|
|
|
|
|
|
"""与 `execute_eval_run` 相同的逐轮 prior 截断与块累积。"""
|
|
|
|
|
|
prior_blocks: list[str] = []
|
|
|
|
|
|
for idx, (u_raw, ai_raw) in enumerate(turns):
|
|
|
|
|
|
u = (u_raw or "").strip()
|
|
|
|
|
|
reply = assistant_text_for_eval_display(str(ai_raw))
|
|
|
|
|
|
prior = "\n\n".join(prior_blocks)
|
|
|
|
|
|
if len(prior) > _PRIOR_TRANSCRIPT_MAX_CHARS:
|
|
|
|
|
|
prior = prior[-_PRIOR_TRANSCRIPT_MAX_CHARS:]
|
|
|
|
|
|
tj = await judge.judge_turn(
|
|
|
|
|
|
prior_transcript=prior,
|
|
|
|
|
|
user_utterance=u,
|
|
|
|
|
|
assistant_reply=reply,
|
|
|
|
|
|
turn_index_0=idx,
|
|
|
|
|
|
)
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": sse_event,
|
|
|
|
|
|
"turn_index": idx,
|
|
|
|
|
|
"ok": tj is not None,
|
|
|
|
|
|
"judge": tj.model_dump() if tj else None,
|
|
|
|
|
|
}
|
|
|
|
|
|
prior_blocks.append(format_eval_turn_block(idx, u, reply))
|
2026-04-06 23:19:20 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _clip_md_for_judge(text: str, max_chars: int = _MAX_JUDGE_MARKDOWN_CHARS) -> str:
|
|
|
|
|
|
s = (text or "").strip()
|
|
|
|
|
|
if len(s) <= max_chars:
|
|
|
|
|
|
return s
|
|
|
|
|
|
return f"{s[:max_chars]}\n\n…(已截断供评审)"
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-07 10:34:59 +08:00
|
|
|
|
def _trim_evidence_text(text: str, max_chars: int = _MAX_EVIDENCE_TRANSCRIPT_CHARS) -> str:
|
|
|
|
|
|
s = (text or "").strip()
|
|
|
|
|
|
if len(s) <= max_chars:
|
|
|
|
|
|
return s
|
|
|
|
|
|
return f"{s[:max_chars]}\n\n…(访谈证据已截断)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _conversation_transcript_for_manual(
|
|
|
|
|
|
db: AsyncSession, conversation_id: str
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
rows = await conversation_repo.get_conversation_messages(conversation_id, db)
|
2026-04-07 17:15:01 +08:00
|
|
|
|
return format_session_messages_with_turn_labels(rows)
|
2026-04-07 10:34:59 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _user_transcript_evidence(db: AsyncSession, user_id: str) -> str:
|
|
|
|
|
|
conversations = await conversation_repo.get_user_conversations(user_id, db)
|
|
|
|
|
|
if not conversations:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
parts: list[str] = []
|
|
|
|
|
|
for conv in reversed(conversations[:_MAX_EVIDENCE_CONVERSATIONS]):
|
|
|
|
|
|
transcript = await _conversation_transcript_for_manual(db, str(conv.id))
|
|
|
|
|
|
if transcript:
|
|
|
|
|
|
parts.append(f"## 会话 {str(conv.id)}\n{transcript}")
|
|
|
|
|
|
return _trim_evidence_text("\n\n".join(parts))
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 23:19:20 +08:00
|
|
|
|
def _normalize_title_key(title: str) -> str:
|
|
|
|
|
|
t = (title or "").strip().lower()
|
|
|
|
|
|
t = re.sub(r"^#+\s*", "", t)
|
|
|
|
|
|
return re.sub(r"\s+", " ", t)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _baseline_for_chapter_title(
|
|
|
|
|
|
baselines: list[MemoirSectionBaselineOut],
|
|
|
|
|
|
chapter_title: str,
|
|
|
|
|
|
index: int,
|
|
|
|
|
|
) -> MemoirSectionBaselineOut | None:
|
|
|
|
|
|
if baselines:
|
|
|
|
|
|
key = _normalize_title_key(chapter_title)
|
|
|
|
|
|
for b in baselines:
|
|
|
|
|
|
if _normalize_title_key(b.title) == key:
|
|
|
|
|
|
return b
|
|
|
|
|
|
if 0 <= index < len(baselines):
|
|
|
|
|
|
return baselines[index]
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EvalJudgeManualService:
|
|
|
|
|
|
def __init__(self, db: AsyncSession) -> None:
|
|
|
|
|
|
self._db = db
|
|
|
|
|
|
|
|
|
|
|
|
async def judge_conversation(
|
|
|
|
|
|
self,
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
fixture_filename: str | None,
|
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
|
cid = (conversation_id or "").strip()
|
|
|
|
|
|
if not cid:
|
|
|
|
|
|
raise EvaluationBadRequestError("conversation_id is required")
|
|
|
|
|
|
|
|
|
|
|
|
catalog = SessionCatalogService(self._db)
|
|
|
|
|
|
dialogue = await catalog.get_session_dialogue(cid)
|
|
|
|
|
|
if not dialogue:
|
|
|
|
|
|
raise EvaluationNotFoundError("conversation not found")
|
|
|
|
|
|
|
2026-04-07 17:15:01 +08:00
|
|
|
|
replay_transcript = format_session_messages_with_turn_labels(
|
|
|
|
|
|
list(dialogue.messages)
|
|
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
if not replay_transcript.strip():
|
|
|
|
|
|
raise EvaluationBadRequestError("no messages to judge")
|
|
|
|
|
|
|
|
|
|
|
|
fn = (fixture_filename or "").strip() or None
|
|
|
|
|
|
baseline_transcript = ""
|
|
|
|
|
|
if fn:
|
|
|
|
|
|
try:
|
|
|
|
|
|
turns, _ = read_user_export_fixture(fn)
|
2026-04-07 17:15:01 +08:00
|
|
|
|
baseline_transcript = format_export_turns_with_labels(turns)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
except ValueError as e:
|
|
|
|
|
|
raise EvaluationBadRequestError(str(e)) from e
|
|
|
|
|
|
except FileNotFoundError as e:
|
|
|
|
|
|
raise EvaluationNotFoundError("fixture not found") from e
|
|
|
|
|
|
|
|
|
|
|
|
errors: list[str] = []
|
|
|
|
|
|
judge_llm = get_eval_judge_langchain_llm()
|
|
|
|
|
|
judge = EvalJudgeService(judge_llm)
|
|
|
|
|
|
baseline_judge_dict: dict[str, Any] | None = None
|
|
|
|
|
|
if baseline_transcript.strip():
|
2026-04-07 17:15:01 +08:00
|
|
|
|
baseline_result = await judge.judge_conversation_result(
|
|
|
|
|
|
full_transcript=baseline_transcript
|
|
|
|
|
|
)
|
|
|
|
|
|
bj = baseline_result.output
|
2026-04-06 23:19:20 +08:00
|
|
|
|
if bj:
|
|
|
|
|
|
baseline_judge_dict = bj.model_dump()
|
|
|
|
|
|
else:
|
2026-04-07 17:15:01 +08:00
|
|
|
|
errors.append(
|
2026-04-08 09:38:07 +08:00
|
|
|
|
f"baseline_glm5_failed: {baseline_result.error or 'unknown error'}"
|
2026-04-07 17:15:01 +08:00
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
elif fn:
|
|
|
|
|
|
errors.append("baseline_transcript_empty")
|
|
|
|
|
|
|
2026-04-07 17:15:01 +08:00
|
|
|
|
replay_result = await judge.judge_conversation_result(
|
|
|
|
|
|
full_transcript=replay_transcript
|
|
|
|
|
|
)
|
|
|
|
|
|
rj = replay_result.output
|
2026-04-06 23:19:20 +08:00
|
|
|
|
replay_judge_dict = rj.model_dump() if rj else None
|
|
|
|
|
|
if not rj:
|
2026-04-07 17:15:01 +08:00
|
|
|
|
errors.append(
|
2026-04-08 09:38:07 +08:00
|
|
|
|
f"replay_glm5_failed: {replay_result.error or 'unknown error'}"
|
2026-04-07 17:15:01 +08:00
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"conversation_id": cid,
|
|
|
|
|
|
"fixture_filename": fn,
|
|
|
|
|
|
"baseline_transcript": baseline_transcript,
|
|
|
|
|
|
"replay_transcript": replay_transcript,
|
|
|
|
|
|
"baseline_judge": baseline_judge_dict,
|
|
|
|
|
|
"replay_judge": replay_judge_dict,
|
|
|
|
|
|
"errors": errors,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async def iter_conversation_judge_sse(
|
|
|
|
|
|
self,
|
|
|
|
|
|
conversation_id: str,
|
|
|
|
|
|
fixture_filename: str | None,
|
2026-04-07 17:15:01 +08:00
|
|
|
|
*,
|
|
|
|
|
|
include_turn_judges: bool = False,
|
|
|
|
|
|
include_baseline_turn_judges: bool = False,
|
2026-04-06 23:19:20 +08:00
|
|
|
|
) -> AsyncIterator[dict[str, Any]]:
|
2026-04-07 17:15:01 +08:00
|
|
|
|
"""供 SSE:先整体基准分、再整体回放分,可选逐轮分,再流式对比与建议。"""
|
2026-04-06 23:19:20 +08:00
|
|
|
|
cid = (conversation_id or "").strip()
|
|
|
|
|
|
if not cid:
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "error",
|
|
|
|
|
|
"phase": "validate",
|
|
|
|
|
|
"message": "conversation_id is required",
|
|
|
|
|
|
}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
catalog = SessionCatalogService(self._db)
|
|
|
|
|
|
dialogue = await catalog.get_session_dialogue(cid)
|
|
|
|
|
|
if not dialogue:
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "error",
|
|
|
|
|
|
"phase": "load",
|
|
|
|
|
|
"message": "conversation not found",
|
|
|
|
|
|
}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2026-04-07 17:15:01 +08:00
|
|
|
|
replay_transcript = format_session_messages_with_turn_labels(
|
|
|
|
|
|
list(dialogue.messages)
|
|
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
if not replay_transcript.strip():
|
|
|
|
|
|
yield {"event": "error", "phase": "load", "message": "no messages to judge"}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
fn = (fixture_filename or "").strip() or None
|
|
|
|
|
|
baseline_transcript = ""
|
2026-04-07 17:15:01 +08:00
|
|
|
|
export_turns: list[tuple[str, str]] | None = None
|
2026-04-06 23:19:20 +08:00
|
|
|
|
if fn:
|
|
|
|
|
|
try:
|
|
|
|
|
|
turns, _ = read_user_export_fixture(fn)
|
2026-04-07 17:15:01 +08:00
|
|
|
|
export_turns = list(turns)
|
|
|
|
|
|
baseline_transcript = format_export_turns_with_labels(turns)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
except ValueError as e:
|
|
|
|
|
|
yield {"event": "error", "phase": "fixture", "message": str(e)}
|
|
|
|
|
|
return
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "error",
|
|
|
|
|
|
"phase": "fixture",
|
|
|
|
|
|
"message": "fixture not found",
|
|
|
|
|
|
}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
judge_llm = get_eval_judge_langchain_llm()
|
|
|
|
|
|
if not judge_llm:
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "error",
|
|
|
|
|
|
"phase": "config",
|
|
|
|
|
|
"message": "评审 LLM 未配置(eval_judge_api_key / zhipu_api_key)",
|
|
|
|
|
|
}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
judge = EvalJudgeService(judge_llm)
|
|
|
|
|
|
yield {"event": "meta", "conversation_id": cid, "fixture_filename": fn}
|
|
|
|
|
|
|
|
|
|
|
|
if not baseline_transcript.strip():
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "warning",
|
|
|
|
|
|
"message": "未提供基准 MD 或基准无文本:仅对回放对话打分并输出单侧改进建议",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
baseline_judge = None
|
|
|
|
|
|
if baseline_transcript.strip():
|
2026-04-07 17:15:01 +08:00
|
|
|
|
baseline_result = await judge.judge_conversation_result(
|
2026-04-06 23:19:20 +08:00
|
|
|
|
full_transcript=baseline_transcript
|
|
|
|
|
|
)
|
2026-04-07 17:15:01 +08:00
|
|
|
|
baseline_judge = baseline_result.output
|
2026-04-06 23:19:20 +08:00
|
|
|
|
yield {
|
|
|
|
|
|
"event": "baseline_judge",
|
|
|
|
|
|
"ok": baseline_judge is not None,
|
|
|
|
|
|
"judge": baseline_judge.model_dump() if baseline_judge else None,
|
|
|
|
|
|
}
|
|
|
|
|
|
if not baseline_judge:
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "error",
|
2026-04-08 09:38:07 +08:00
|
|
|
|
"phase": "baseline_glm5",
|
2026-04-07 17:15:01 +08:00
|
|
|
|
"message": (
|
|
|
|
|
|
f"基准整体打分失败:{baseline_result.error}"
|
|
|
|
|
|
if baseline_result.error
|
|
|
|
|
|
else "基准整体打分失败(密钥、限流或 JSON 解析失败,见服务端日志)"
|
|
|
|
|
|
),
|
2026-04-06 23:19:20 +08:00
|
|
|
|
}
|
2026-04-07 17:15:01 +08:00
|
|
|
|
elif (
|
|
|
|
|
|
include_baseline_turn_judges
|
|
|
|
|
|
and export_turns
|
|
|
|
|
|
and baseline_judge is not None
|
|
|
|
|
|
):
|
|
|
|
|
|
yield {"event": "meta", "phase": "baseline_turn_judges_start"}
|
|
|
|
|
|
async for row in _iter_turn_judgments_for_turns(
|
|
|
|
|
|
judge,
|
|
|
|
|
|
export_turns,
|
|
|
|
|
|
sse_event="baseline_turn_judge",
|
|
|
|
|
|
):
|
|
|
|
|
|
yield row
|
2026-04-06 23:19:20 +08:00
|
|
|
|
else:
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "baseline_judge",
|
|
|
|
|
|
"ok": False,
|
|
|
|
|
|
"skipped": True,
|
|
|
|
|
|
"judge": None,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-07 17:15:01 +08:00
|
|
|
|
replay_result = await judge.judge_conversation_result(
|
|
|
|
|
|
full_transcript=replay_transcript
|
|
|
|
|
|
)
|
|
|
|
|
|
replay_judge = replay_result.output
|
2026-04-06 23:19:20 +08:00
|
|
|
|
yield {
|
|
|
|
|
|
"event": "replay_judge",
|
|
|
|
|
|
"ok": replay_judge is not None,
|
|
|
|
|
|
"judge": replay_judge.model_dump() if replay_judge else None,
|
|
|
|
|
|
}
|
|
|
|
|
|
if not replay_judge:
|
|
|
|
|
|
yield {
|
|
|
|
|
|
"event": "error",
|
2026-04-08 09:38:07 +08:00
|
|
|
|
"phase": "replay_glm5",
|
2026-04-07 17:15:01 +08:00
|
|
|
|
"message": (
|
2026-04-08 09:38:07 +08:00
|
|
|
|
f"回放对话整体 GLM-5 打分失败:{replay_result.error}"
|
2026-04-07 17:15:01 +08:00
|
|
|
|
if replay_result.error
|
2026-04-08 09:38:07 +08:00
|
|
|
|
else "回放对话整体 GLM-5 打分失败(空密钥、限流或 JSON 解析失败,见服务端日志)"
|
2026-04-07 17:15:01 +08:00
|
|
|
|
),
|
2026-04-06 23:19:20 +08:00
|
|
|
|
}
|
|
|
|
|
|
yield {"event": "done"}
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2026-04-07 17:15:01 +08:00
|
|
|
|
if include_turn_judges:
|
|
|
|
|
|
replay_pairs = pair_session_messages_to_turns(list(dialogue.messages))
|
|
|
|
|
|
if replay_pairs:
|
|
|
|
|
|
yield {"event": "meta", "phase": "replay_turn_judges_start"}
|
|
|
|
|
|
async for row in _iter_turn_judgments_for_turns(
|
|
|
|
|
|
judge,
|
|
|
|
|
|
replay_pairs,
|
|
|
|
|
|
sse_event="replay_turn_judge",
|
|
|
|
|
|
):
|
|
|
|
|
|
yield row
|
|
|
|
|
|
|
2026-04-06 23:19:20 +08:00
|
|
|
|
async for piece in judge.stream_conversation_compare(
|
|
|
|
|
|
baseline_transcript=baseline_transcript,
|
|
|
|
|
|
replay_transcript=replay_transcript,
|
|
|
|
|
|
baseline_judge=baseline_judge,
|
|
|
|
|
|
replay_judge=replay_judge,
|
|
|
|
|
|
):
|
|
|
|
|
|
if piece:
|
|
|
|
|
|
yield {"event": "compare_delta", "text": piece}
|
|
|
|
|
|
|
|
|
|
|
|
yield {"event": "done"}
|
|
|
|
|
|
|
|
|
|
|
|
async def judge_memoir_for_user(
|
|
|
|
|
|
self,
|
|
|
|
|
|
user_id: str,
|
|
|
|
|
|
baseline_sections: list[MemoirSectionBaselineOut] | None,
|
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
|
uid = (user_id or "").strip()
|
|
|
|
|
|
if not uid:
|
|
|
|
|
|
raise EvaluationBadRequestError("user_id is required")
|
|
|
|
|
|
|
|
|
|
|
|
judge_llm = get_eval_judge_langchain_llm()
|
|
|
|
|
|
judge = EvalJudgeService(judge_llm)
|
|
|
|
|
|
baselines = list(baseline_sections or [])
|
2026-04-07 10:34:59 +08:00
|
|
|
|
evidence_transcript = await _user_transcript_evidence(self._db, uid)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
|
|
|
|
|
|
chapter_results: list[dict[str, Any]] = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
chapters = await get_chapters_for_memoir_list(
|
|
|
|
|
|
uid, self._db, active_only=True, is_new_only=None
|
|
|
|
|
|
)
|
|
|
|
|
|
for i, ch in enumerate(chapters[:_MAX_EVAL_CHAPTERS]):
|
|
|
|
|
|
body = (ch.canonical_markdown or "").strip()
|
|
|
|
|
|
if not body:
|
|
|
|
|
|
continue
|
2026-04-07 10:34:59 +08:00
|
|
|
|
bl = _baseline_for_chapter_title(baselines, str(ch.title or ""), i)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
baseline_excerpt = ""
|
|
|
|
|
|
if bl and (bl.body or "").strip():
|
|
|
|
|
|
baseline_excerpt = _clip_md_for_judge(bl.body, max_chars=6000)
|
|
|
|
|
|
md = f"# 章节:{ch.title}\n\n"
|
|
|
|
|
|
if baseline_excerpt:
|
|
|
|
|
|
md += f"## 导出基线(节选)\n\n{baseline_excerpt}\n\n"
|
|
|
|
|
|
md += f"## 当前成稿\n\n{_clip_md_for_judge(body)}"
|
2026-04-07 10:34:59 +08:00
|
|
|
|
cj = await judge.judge_memoir(
|
|
|
|
|
|
memoir_markdown=md,
|
|
|
|
|
|
source_transcript=evidence_transcript,
|
|
|
|
|
|
reference_memoir_markdown=baseline_excerpt,
|
|
|
|
|
|
evidence_notes=(
|
|
|
|
|
|
"严格按文档打分;真实性、事实覆盖率、可追溯性必须优先对照该用户历史访谈证据。"
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
chapter_results.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": ch.id,
|
|
|
|
|
|
"title": ch.title,
|
|
|
|
|
|
"order_index": ch.order_index,
|
|
|
|
|
|
"baseline_title": bl.title if bl else None,
|
|
|
|
|
|
"judge": cj.model_dump() if cj else None,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning("manual memoir chapter judges failed: {}", e)
|
|
|
|
|
|
|
|
|
|
|
|
story_results: list[dict[str, Any]] = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
stories = await get_stories_for_user(self._db, uid, status="active")
|
|
|
|
|
|
for st in stories[:_MAX_EVAL_STORIES]:
|
|
|
|
|
|
body = (st.canonical_markdown or "").strip()
|
|
|
|
|
|
if not body:
|
|
|
|
|
|
continue
|
|
|
|
|
|
md = f"# 故事:{st.title}\n\n{_clip_md_for_judge(body)}"
|
2026-04-07 10:34:59 +08:00
|
|
|
|
sj = await judge.judge_memoir(
|
|
|
|
|
|
memoir_markdown=md,
|
|
|
|
|
|
source_transcript=evidence_transcript,
|
|
|
|
|
|
evidence_notes=(
|
|
|
|
|
|
"严格按文档打分;真实性、事实覆盖率、可追溯性必须优先对照该用户历史访谈证据。"
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
2026-04-06 23:19:20 +08:00
|
|
|
|
story_results.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": st.id,
|
|
|
|
|
|
"title": st.title,
|
|
|
|
|
|
"stage": st.stage,
|
|
|
|
|
|
"judge": sj.model_dump() if sj else None,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning("manual memoir story judges failed: {}", e)
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"user_id": uid,
|
|
|
|
|
|
"chapter_results": chapter_results,
|
|
|
|
|
|
"story_results": story_results,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async def memoir_snapshot(self, user_id: str) -> dict[str, Any]:
|
|
|
|
|
|
uid = (user_id or "").strip()
|
|
|
|
|
|
if not uid:
|
|
|
|
|
|
raise EvaluationBadRequestError("user_id is required")
|
|
|
|
|
|
|
|
|
|
|
|
chapters_out: list[dict[str, Any]] = []
|
|
|
|
|
|
stories_out: list[dict[str, Any]] = []
|
|
|
|
|
|
try:
|
|
|
|
|
|
chapters = await get_chapters_for_memoir_list(
|
|
|
|
|
|
uid, self._db, active_only=True, is_new_only=None
|
|
|
|
|
|
)
|
|
|
|
|
|
for ch in chapters[:_MAX_EVAL_CHAPTERS]:
|
|
|
|
|
|
chapters_out.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": ch.id,
|
|
|
|
|
|
"title": ch.title,
|
|
|
|
|
|
"category": ch.category,
|
|
|
|
|
|
"order_index": ch.order_index,
|
|
|
|
|
|
"canonical_markdown": ch.canonical_markdown,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning("memoir snapshot chapters failed: {}", e)
|
|
|
|
|
|
try:
|
|
|
|
|
|
stories = await get_stories_for_user(self._db, uid, status="active")
|
|
|
|
|
|
for st in stories[:_MAX_EVAL_STORIES]:
|
|
|
|
|
|
stories_out.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": st.id,
|
|
|
|
|
|
"title": st.title,
|
|
|
|
|
|
"stage": st.stage,
|
|
|
|
|
|
"canonical_markdown": st.canonical_markdown,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning("memoir snapshot stories failed: {}", e)
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"user_id": uid,
|
|
|
|
|
|
"chapters": chapters_out,
|
|
|
|
|
|
"stories": stories_out,
|
|
|
|
|
|
}
|