refactor(chat): AI-native prompts, remove interview heuristics

- Drop interview_reply_length and utterance_substance; always run stage LLM and memory retrieval when enabled; trim Settings fields and .env.example. - Replace guided/opening prompts with compact fact blocks plus unified behavior guidance; slim background_voice and persona to tone hints. - InterviewAgent uses fixed chat_interview max_tokens/chars/segments. Also includes stacked work: profile followup/extract path, evaluation rubric and judge schema updates, transcript SPLIT handling in execution service, user export markdown split tests, and golden case fixture.
2026-04-06 22:22:50 +08:00
parent ca8bcc8489
commit 2fded6fbd9
27 changed files with 426 additions and 1349 deletions
--- a/api/app/features/evaluation/execution_service.py
+++ b/api/app/features/evaluation/execution_service.py
@@ -51,6 +51,11 @@ def _utterances_for_case(case: EvalCase) -> list[str]:
    return [str(u).strip() for u in raw if str(u).strip()]


+def _assistant_text_for_eval_display(raw: str) -> str:
+    """评审与 transcript 展示：避免字面量 [SPLIT] 干扰 judge 阅读。"""
+    return (raw or "").replace("[SPLIT]", "\n")
+
+
 async def execute_eval_run(
    db: AsyncSession,
    *,
@@ -127,12 +132,14 @@ async def execute_eval_run(
    for i, u in enumerate(utterances):
        if i >= len(replies):
            break
-        transcript_parts.append(f"用户: {u}\nAI: {replies[i]}")
+        transcript_parts.append(
+            f"用户: {u}\nAI: {_assistant_text_for_eval_display(replies[i])}"
+        )
    prior = ""
    for idx, u in enumerate(utterances):
        if idx >= len(replies):
            break
-        reply = replies[idx]
+        reply = _assistant_text_for_eval_display(replies[idx])
        lat = latencies[idx] if idx < len(latencies) else None
        tj = await judge.judge_turn(
            prior_transcript=prior,
@@ -146,7 +153,7 @@ async def execute_eval_run(
            run_id=run.id,
            turn_index=idx,
            user_utterance=u,
-            assistant_reply=reply,
+            assistant_reply=replies[idx],
            duration_ms=lat,
            judge_scores_json=scores,
            judge_rationale=rationale,