refactor(chat): AI-native prompts, remove interview heuristics

- Drop interview_reply_length and utterance_substance; always run stage LLM and memory retrieval when enabled; trim Settings fields and .env.example. - Replace guided/opening prompts with compact fact blocks plus unified behavior guidance; slim background_voice and persona to tone hints. - InterviewAgent uses fixed chat_interview max_tokens/chars/segments. Also includes stacked work: profile followup/extract path, evaluation rubric and judge schema updates, transcript SPLIT handling in execution service, user export markdown split tests, and golden case fixture.
2026-04-06 22:22:50 +08:00
parent ca8bcc8489
commit 2fded6fbd9
27 changed files with 426 additions and 1349 deletions
--- a/api/app/agents/chat/interview_agent.py
+++ b/api/app/agents/chat/interview_agent.py
@@ -13,7 +13,6 @@ from app.agents.chat.helpers import format_history_string, get_history_with_wind
 from app.agents.chat.personas import normalize_interview_persona
 from app.agents.chat.prompt_context import ChatPromptContext
 from app.agents.chat.stage_detection import keyword_fallback_primary_stage
-from app.agents.chat.interview_reply_length import compute_reply_plan
 from app.agents.chat.prompts_conversation import (
    SLOT_NAME_MAP,
    get_opening_prompt,
@@ -67,23 +66,6 @@ class InterviewAgent:
        """关键词回退：与 stage_detection 一致（多阶段打分）。"""
        return keyword_fallback_primary_stage(user_message)

-    def _estimate_same_topic_turns(
-        self, history_messages: List[Any], current_filled_slots: dict
-    ) -> int:
-        """估算同一话题的连续轮数（保守：宁可多陪聊几轮再换）。"""
-        n_pairs = len(history_messages) // 2
-        if n_pairs <= 1:
-            return n_pairs
-        recent_window = min(n_pairs, 5)
-        recent = history_messages[-(recent_window * 2) :]
-        nonempty_user_turns = 0
-        for i in range(0, len(recent), 2):
-            msg = recent[i]
-            text = msg.content if hasattr(msg, "content") else str(msg)
-            if len(text.strip()) > 5:
-                nonempty_user_turns += 1
-        return nonempty_user_turns
-
    def _resolve_text_for_model(
        self,
        user_message: str,
@@ -137,27 +119,21 @@ class InterviewAgent:
                max_chars=settings.chat_history_max_chars,
            )
            conversation_turn_total = hw.turn_total
-            same_topic_turns = self._estimate_same_topic_turns(hw.window, filled_slots)
            all_stages_coverage = memoir_state.all_stages_coverage()
            persona = normalize_interview_persona(settings.chat_interview_persona)
-            reply_plan = compute_reply_plan(
-                text_for_model,
-                background_voice=background_voice,
-                settings=settings,
-            )
+            max_segments = int(settings.chat_interview_max_segments)
+            max_tokens = int(settings.chat_interview_max_tokens)
+            max_chars = int(settings.chat_interview_max_chars_per_segment)
+
            ctx = ChatPromptContext(
                current_stage=memoir_state.current_stage,
                empty_slots=empty_slots,
                filled_slots=filled_slots,
-                user_message=text_for_model,
-                conversation_turn_total=conversation_turn_total,
-                same_topic_turns=same_topic_turns,
                all_stages_coverage=all_stages_coverage,
                detected_user_stage=du,
                user_profile_context=user_profile_context,
                persona=persona,
                memory_evidence_text=memory_evidence_text,
-                reply_length_mode=reply_plan.mode.value,
                background_voice=background_voice,
                occupation=occupation,
            )
@@ -181,7 +157,7 @@ class InterviewAgent:
                    omit_system_body=settings.agent_log_omit_system_message_body,
                ),
            )
-            chat_llm = self.llm.bind(max_tokens=reply_plan.max_tokens)
+            chat_llm = self.llm.bind(max_tokens=max_tokens)
            prompt_chars = _message_contents_char_count(messages)
            llm_t0 = time.perf_counter()
            with agent_span(
@@ -212,26 +188,25 @@ class InterviewAgent:
            )
            raw_list = segments_from_llm_response(
                response_text,
-                max_segments=reply_plan.max_segments,
+                max_segments=max_segments,
            )
            if not raw_list:
                raw_list = [response_text.strip()]
            out = truncate_chat_segments(
                raw_list,
-                max_segments=reply_plan.max_segments,
-                max_chars_per_segment=reply_plan.max_chars_per_segment,
+                max_segments=max_segments,
+                max_chars_per_segment=max_chars,
            )
            if not out:
-                out = [response_text.strip()[: reply_plan.max_chars_per_segment]]
+                out = [response_text.strip()[:max_chars]]
            out = nonempty_segments_or_fallback(out, fallback=_FALLBACK_REPLY)
            log_agent_summary(
                logger,
                "InterviewAgent.generate_response segments={} conversation_id={} "
-                "reply_length_mode={} max_tokens={}",
+                "max_tokens={}",
                len(out),
                conversation_id,
-                reply_plan.mode.value,
-                reply_plan.max_tokens,
+                max_tokens,
            )
            return AgentChatTurn(messages=out, skip_tts=False)
        except Exception as e:
@@ -314,15 +289,11 @@ class InterviewAgent:
            raw_list = segments_from_llm_response(response_text, max_segments=2)
            if not raw_list:
                raw_list = [response_text.strip()]
-            open_plan = compute_reply_plan(
-                "x" * 50,
-                background_voice=background_voice,
-                settings=settings,
-            )
+            max_chars = int(settings.chat_interview_max_chars_per_segment)
            out = truncate_chat_segments(
                raw_list,
                max_segments=2,
-                max_chars_per_segment=open_plan.max_chars_per_segment,
+                max_chars_per_segment=max_chars,
            )
            log_agent_summary(
                logger,
@@ -330,11 +301,7 @@ class InterviewAgent:
                len(out),
                conversation_id,
            )
-            segments = (
-                out
-                if out
-                else [response_text.strip()[: open_plan.max_chars_per_segment]]
-            )
+            segments = out if out else [response_text.strip()[:max_chars]]
            return nonempty_segments_or_fallback(
                segments,
                fallback="你好呀～ 又见面了，最近有没有什么事想跟我说说？",