refactor(chat): AI-native prompts, remove interview heuristics

- Drop interview_reply_length and utterance_substance; always run stage LLM
  and memory retrieval when enabled; trim Settings fields and .env.example.
- Replace guided/opening prompts with compact fact blocks plus unified
  behavior guidance; slim background_voice and persona to tone hints.
- InterviewAgent uses fixed chat_interview max_tokens/chars/segments.

Also includes stacked work: profile followup/extract path, evaluation rubric
and judge schema updates, transcript SPLIT handling in execution service,
user export markdown split tests, and golden case fixture.
This commit is contained in:
Kevin
2026-04-06 22:22:50 +08:00
parent ca8bcc8489
commit 2fded6fbd9
27 changed files with 426 additions and 1349 deletions

View File

@@ -51,6 +51,11 @@ def _utterances_for_case(case: EvalCase) -> list[str]:
return [str(u).strip() for u in raw if str(u).strip()]
def _assistant_text_for_eval_display(raw: str) -> str:
"""评审与 transcript 展示:避免字面量 [SPLIT] 干扰 judge 阅读。"""
return (raw or "").replace("[SPLIT]", "\n")
async def execute_eval_run(
db: AsyncSession,
*,
@@ -127,12 +132,14 @@ async def execute_eval_run(
for i, u in enumerate(utterances):
if i >= len(replies):
break
transcript_parts.append(f"用户: {u}\nAI: {replies[i]}")
transcript_parts.append(
f"用户: {u}\nAI: {_assistant_text_for_eval_display(replies[i])}"
)
prior = ""
for idx, u in enumerate(utterances):
if idx >= len(replies):
break
reply = replies[idx]
reply = _assistant_text_for_eval_display(replies[idx])
lat = latencies[idx] if idx < len(latencies) else None
tj = await judge.judge_turn(
prior_transcript=prior,
@@ -146,7 +153,7 @@ async def execute_eval_run(
run_id=run.id,
turn_index=idx,
user_utterance=u,
assistant_reply=reply,
assistant_reply=replies[idx],
duration_ms=lat,
judge_scores_json=scores,
judge_rationale=rationale,