""" InterviewAgent:正式访谈 Specialist 负责状态感知回复、开场白,不负责 Redis 持久化(由 Orchestrator 统一处理) """ import time from typing import Any, List, Optional from langchain_core.messages import HumanMessage, SystemMessage from app.agents.chat.agent_turn import AgentChatTurn from app.agents.chat.helpers import format_history_string, get_history_with_window from app.agents.chat.interview_state_hints import ( apply_autobiographical_boundary_guard, apply_duplicate_question_guard, extract_recent_questions, segments_are_only_duplicate_guard_fallback, update_recent_questions, ) from app.agents.chat.interview_turn_plan import plan_interview_turn from app.agents.chat.personas import normalize_interview_persona from app.agents.chat.prompt_context import ChatPromptContext from app.agents.chat.prompts_conversation import ( SLOT_NAME_MAP, get_opening_prompt, get_re_greeting_prompt, ) from app.agents.chat.reply_limits import ( nonempty_segments_or_fallback, segments_from_llm_response, truncate_chat_segments, ) from app.agents.chat.reply_planner import maybe_refine_turn_plan_with_llm from app.agents.chat.stage_detection import keyword_fallback_primary_stage from app.agents.state_schema import ( MemoirStateSchema, interview_control_state, narrative_coverage_state, ) from app.core.agent_logging import ( agent_span, log_agent_payload, log_agent_summary, ) from app.core.config import settings from app.core.llm_gateway import LlmGateway, LlmUseCase from app.core.logging import get_logger from app.features.conversation.input_normalize import normalize_chat_input_for_agent logger = get_logger(__name__) # LLM 不可用或调用失败时对用户展示(不暴露异常细节、不触发 TTS) _FALLBACK_REPLY = "刚才网络不太稳,没接上。你可以再说一遍,或稍后再试。" # 仅在「重复问句守卫」把正文削成单句兜底时追加二次 system,只多调一次模型。 _DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX = """## 二次生成(纠偏) 上一版模型输出因包含与「最近已问过的问题」或「已确认事实」重复的问句,已被系统弃用。请**重新写一整条回复**: - 仍须遵守上文全部主规则; - 先贴着用户本轮原话承接半句到一两句(可有画面感); - **禁止**再用与刚才同义、仅换说法的确认型问句; - 若要提问,须换**全新角度**,并锚在用户刚说的具体细节里;也可以本轮**完全不提问**,只并肩承接; - **禁止**整段只有「这一段我记住了」或同类无信息套话。""" def _finalize_chat_segments_after_llm( response_text: str, *, max_segments: int, max_chars: int, memoir_state: MemoirStateSchema, recent_questions: list[str], ) -> tuple[list[str], bool]: raw_list = segments_from_llm_response( response_text, max_segments=max_segments, ) if not raw_list: raw_list = [response_text.strip()] out = truncate_chat_segments( raw_list, max_segments=max_segments, max_chars_per_segment=max_chars, ) if not out: out = [response_text.strip()[:max_chars]] out = nonempty_segments_or_fallback(out, fallback=_FALLBACK_REPLY) out, deduped = apply_duplicate_question_guard( out, state=memoir_state, recent_questions=recent_questions, ) return out, deduped def _get_langchain_llm(): try: return LlmGateway().langchain_llm_for(LlmUseCase("chat.interview")) except Exception: return None def _message_contents_char_count(messages: List[Any]) -> int: n = 0 for m in messages: c = getattr(m, "content", None) if isinstance(c, str): n += len(c) return n class InterviewAgent: """正式访谈 Specialist Agent""" def __init__(self): self.llm = _get_langchain_llm() def _detect_user_stage(self, user_message: str) -> str: """关键词回退:与 stage_detection 一致(多阶段打分)。""" return keyword_fallback_primary_stage(user_message) def _resolve_text_for_model( self, user_message: str, normalized_user_message: Optional[str], ) -> str: """模型侧净稿:编排层已归一则直接用;否则在本层补一次(含可选 LLM)。""" if normalized_user_message is not None: return (normalized_user_message or "").strip() llm_n = None if settings.chat_input_normalize_enabled and ( (settings.chat_input_normalize_mode or "").strip().lower() == "llm" ): llm_n = self.llm return normalize_chat_input_for_agent(user_message or "", llm=llm_n) async def generate_response_with_state( self, conversation_id: str, user_message: str, memoir_state: MemoirStateSchema, user_profile_context: str = "", detected_user_stage: Optional[str] = None, memory_evidence_text: str = "", memory_anchor_source: str = "", memory_planner_text: str = "", background_voice: str = "default", normalized_user_message: Optional[str] = None, occupation: str = "", profile_birth_year: int | None = None, profile_era_place: str = "", stage_switched_this_turn: bool = False, scene_cues_for_planner: Optional[list[str]] = None, ) -> AgentChatTurn: """生成状态感知的访谈回复,不持久化(由 Orchestrator 负责)""" if not self.llm: logger.warning("InterviewAgent: LLM 未配置,返回兜底文案") return AgentChatTurn(messages=[_FALLBACK_REPLY], skip_tts=True) try: text_for_model = self._resolve_text_for_model( user_message, normalized_user_message ) narrative_state = narrative_coverage_state(memoir_state) control_state = interview_control_state(memoir_state) empty_slots = control_state.prompt_empty_slots_for_stage( narrative_state, memoir_state.current_stage ) filled_slots = narrative_state.filled_slots_for_stage( memoir_state.current_stage ) if detected_user_stage is not None: du = detected_user_stage else: du = self._detect_user_stage(text_for_model) hw = await get_history_with_window( conversation_id, max_pairs=settings.chat_history_max_pairs, max_chars=settings.chat_history_max_chars, ) recent_questions = extract_recent_questions(hw.window) conversation_turn_total = hw.turn_total all_stages_coverage = narrative_state.all_stages_coverage() persona = normalize_interview_persona(settings.chat_interview_persona) max_segments = int(settings.chat_interview_max_segments) max_tokens = int(settings.chat_interview_max_tokens) max_chars = int(settings.chat_interview_max_chars_per_segment) turn_plan = plan_interview_turn( current_stage=memoir_state.current_stage, empty_slots=empty_slots, normalized_user_message=text_for_model, memory_evidence_text=(memory_anchor_source or "").strip(), stage_switched_this_turn=stage_switched_this_turn, ) logger.info( "event=interview_turn_plan mode={} anchor_slot={} snippet_len={}", turn_plan.mode, turn_plan.anchor_slot_key or "-", len(turn_plan.anchor_snippet or ""), ) reply_planner_raw = "" baseline_mode = turn_plan.mode baseline_primary_focus = turn_plan.primary_focus if settings.chat_reply_planner_llm_enabled: rq_preview = ( "\n".join(recent_questions[-4:]) if recent_questions else "" ) turn_plan, reply_planner_raw = await maybe_refine_turn_plan_with_llm( self.llm, plan=turn_plan, text_for_model=text_for_model, memory_evidence_text=(memory_planner_text or memory_evidence_text) or "", max_tokens=int(settings.chat_reply_planner_max_tokens), temperature=float(settings.chat_reply_planner_temperature), scene_cues_for_planner=scene_cues_for_planner or [], recent_questions_preview=rq_preview, ) if reply_planner_raw: logger.info( "event=reply_planner_applied memory_usage={} reply_shape={} " "mode={} primary_focus={} focus_source={}", turn_plan.memory_usage, turn_plan.reply_shape, turn_plan.mode, turn_plan.primary_focus, turn_plan.focus_source, ) ctx = ChatPromptContext( current_stage=memoir_state.current_stage, empty_slots=empty_slots, filled_slots=filled_slots, all_stages_coverage=all_stages_coverage, detected_user_stage=du, user_profile_context=user_profile_context, persona=persona, memory_evidence_text=memory_evidence_text, background_voice=background_voice, occupation=occupation, profile_birth_year=profile_birth_year, profile_era_place=profile_era_place, known_facts=memoir_state.known_facts, persona_threads=memoir_state.persona_threads, recent_questions=recent_questions or memoir_state.recent_questions, turn_plan=turn_plan, ) system_prompt = ctx.guided_system_prompt() messages: List[Any] = [SystemMessage(content=system_prompt)] messages.extend(hw.window) messages.append(HumanMessage(content=text_for_model)) history_pairs_windowed = len(hw.window) // 2 window_chars = sum(len(getattr(m, "content", "") or "") for m in hw.window) logger.info( "event=history_window_applied total={} windowed={} chars={}", conversation_turn_total, history_pairs_windowed, window_chars, ) log_agent_payload( logger, "InterviewAgent.generate_response.prompt", format_history_string( messages, omit_system_body=settings.agent_log_omit_system_message_body, ), ) chat_llm = self.llm.bind( max_tokens=max_tokens, temperature=float(settings.chat_interview_temperature), ) prompt_chars = _message_contents_char_count(messages) llm_t0 = time.perf_counter() with agent_span( logger, "InterviewAgent.generate_response.llm", conversation_id=conversation_id, stage=memoir_state.current_stage, ): logger.info( "event=chat_prompt_built agent=InterviewAgent.generate_response_with_state " "prompt_chars={} history_pairs_total={} history_pairs_windowed={}", prompt_chars, conversation_turn_total, history_pairs_windowed, ) response = await chat_llm.ainvoke(messages) response_ms = (time.perf_counter() - llm_t0) * 1000 logger.info( "event=chat_llm_done agent=InterviewAgent.generate_response_with_state " "response_latency_ms={:.2f}", response_ms, ) response_text = ( response.content if hasattr(response, "content") else str(response) ) log_agent_payload( logger, "InterviewAgent.generate_response.raw_response", response_text ) rq_base = recent_questions or memoir_state.recent_questions out, deduped = _finalize_chat_segments_after_llm( response_text, max_segments=max_segments, max_chars=max_chars, memoir_state=memoir_state, recent_questions=rq_base, ) retry_used = False if deduped and segments_are_only_duplicate_guard_fallback(out): retry_system = ( f"{system_prompt}\n\n{_DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX}" ) retry_messages: List[Any] = [ SystemMessage(content=retry_system), *hw.window, HumanMessage(content=text_for_model), ] log_agent_payload( logger, "InterviewAgent.generate_response.retry_prompt", format_history_string( retry_messages, omit_system_body=settings.agent_log_omit_system_message_body, ), ) llm_t1 = time.perf_counter() with agent_span( logger, "InterviewAgent.generate_response.llm_retry", conversation_id=conversation_id, stage=memoir_state.current_stage, ): logger.info( "event=chat_prompt_built agent=InterviewAgent.duplicate_guard_retry " "prompt_chars={} conversation_id={}", _message_contents_char_count(retry_messages), conversation_id, ) response_retry = await chat_llm.ainvoke(retry_messages) logger.info( "event=chat_llm_done agent=InterviewAgent.duplicate_guard_retry " "response_latency_ms={:.2f}", (time.perf_counter() - llm_t1) * 1000, ) response_text_retry = ( response_retry.content if hasattr(response_retry, "content") else str(response_retry) ) log_agent_payload( logger, "InterviewAgent.generate_response.raw_response_retry", response_text_retry, ) out, deduped = _finalize_chat_segments_after_llm( response_text_retry, max_segments=max_segments, max_chars=max_chars, memoir_state=memoir_state, recent_questions=rq_base, ) retry_used = True out, auto_bio = apply_autobiographical_boundary_guard(out) updated_recent_questions = update_recent_questions(rq_base, out) log_agent_summary( logger, "InterviewAgent.generate_response segments={} conversation_id={} " "max_tokens={}", len(out), conversation_id, max_tokens, ) return AgentChatTurn( messages=out, skip_tts=False, interview_state_meta={ "recent_questions": updated_recent_questions, "duplicate_question_guard_triggered": deduped, "duplicate_question_guard_llm_retry": retry_used, "autobiographical_boundary_guard_triggered": auto_bio, "reply_planner_llm_used": bool( settings.chat_reply_planner_llm_enabled and (reply_planner_raw or "").strip() ), "reply_planner_raw_preview": (reply_planner_raw or "")[:800], "focus_planner_baseline_mode": baseline_mode, "focus_planner_baseline_primary_focus": baseline_primary_focus, "focus_planner_mode": turn_plan.mode, "focus_planner_primary_focus": turn_plan.primary_focus, "focus_planner_focus_source": turn_plan.focus_source, "focus_planner_focus_summary": (turn_plan.focus_summary or "")[:200], }, ) except Exception as e: logger.error("生成回应失败: {}", e, exc_info=True) return AgentChatTurn(messages=[_FALLBACK_REPLY], skip_tts=True) async def generate_opening_message( self, conversation_id: str, memoir_state: MemoirStateSchema, user_profile_context: str = "", background_voice: str = "default", occupation: str = "", profile_birth_year: Optional[int] = None, profile_era_place: str = "", ) -> List[str]: """生成空对话开场白,不持久化(由 Orchestrator 负责)""" if not self.llm: return ["你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?"] try: narrative_state = narrative_coverage_state(memoir_state) control_state = interview_control_state(memoir_state) empty_slots = control_state.prompt_empty_slots_for_stage( narrative_state, memoir_state.current_stage ) empty_slots_readable = [SLOT_NAME_MAP.get(s, s) for s in empty_slots] persona = normalize_interview_persona(settings.chat_interview_persona) prompt = get_opening_prompt( current_stage=memoir_state.current_stage, empty_slots_readable=empty_slots_readable, user_profile_context=user_profile_context, persona=persona, background_voice=background_voice, occupation=occupation, profile_birth_year=profile_birth_year, profile_era_place=profile_era_place, ) hw = await get_history_with_window( conversation_id, max_pairs=settings.chat_history_max_pairs, max_chars=settings.chat_history_max_chars, ) messages: List[Any] = [SystemMessage(content=prompt)] messages.extend(hw.window) if not hw.window: messages.append( HumanMessage(content="(对话刚开始,请自然地说出你的开场白。)") ) else: messages.append( HumanMessage(content="(请根据上文,自然接续并说出你的开场白。)") ) log_agent_payload( logger, "InterviewAgent.opening.prompt", format_history_string( messages, omit_system_body=settings.agent_log_omit_system_message_body, ), ) opening_llm = self.llm.bind( max_tokens=settings.chat_opening_max_tokens, temperature=float(settings.chat_interview_temperature), ) prompt_chars = _message_contents_char_count(messages) llm_t0 = time.perf_counter() with agent_span( logger, "InterviewAgent.opening.llm", conversation_id=conversation_id, ): logger.info( "event=chat_prompt_built agent=InterviewAgent.generate_opening_message " "prompt_chars={} history_pairs_total={} history_pairs_windowed={}", prompt_chars, hw.turn_total, len(hw.window) // 2, ) response = await opening_llm.ainvoke(messages) logger.info( "event=chat_llm_done agent=InterviewAgent.generate_opening_message " "response_latency_ms={:.2f}", (time.perf_counter() - llm_t0) * 1000, ) response_text = ( response.content if hasattr(response, "content") else str(response) ) log_agent_payload( logger, "InterviewAgent.opening.raw_response", response_text ) raw_list = segments_from_llm_response(response_text, max_segments=2) if not raw_list: raw_list = [response_text.strip()] max_chars = int(settings.chat_interview_max_chars_per_segment) out = truncate_chat_segments( raw_list, max_segments=2, max_chars_per_segment=max_chars, ) log_agent_summary( logger, "InterviewAgent.opening segments={} conversation_id={}", len(out), conversation_id, ) segments = out if out else [response_text.strip()[:max_chars]] return nonempty_segments_or_fallback( segments, fallback="你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?", ) except Exception as e: logger.error("生成开场白失败: {}", e, exc_info=True) return ["你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?"] async def generate_re_greeting_message( self, conversation_id: str, memoir_state: MemoirStateSchema, idle_hours: float, user_profile_context: str = "", background_voice: str = "default", occupation: str = "", profile_birth_year: Optional[int] = None, profile_era_place: str = "", ) -> List[str]: """老对话回访问候:用户带着已有历史回到对话时,AI 主动做承接式开场。 与 generate_opening_message 的差异:prompt 明确告知有历史 + 距上次的时间感受, 要求轻轻引用历史里的具体细节,不能用首次见面式硬开场。 """ if not self.llm: return ["上次聊到的事我还记着,今天想继续往下讲讲吗?"] try: narrative_state = narrative_coverage_state(memoir_state) control_state = interview_control_state(memoir_state) empty_slots = control_state.prompt_empty_slots_for_stage( narrative_state, memoir_state.current_stage ) empty_slots_readable = [SLOT_NAME_MAP.get(s, s) for s in empty_slots] persona = normalize_interview_persona(settings.chat_interview_persona) prompt = get_re_greeting_prompt( current_stage=memoir_state.current_stage, empty_slots_readable=empty_slots_readable, user_profile_context=user_profile_context, persona=persona, background_voice=background_voice, occupation=occupation, profile_birth_year=profile_birth_year, profile_era_place=profile_era_place, idle_hours=idle_hours, ) hw = await get_history_with_window( conversation_id, max_pairs=settings.chat_history_max_pairs, max_chars=settings.chat_history_max_chars, ) messages: List[Any] = [SystemMessage(content=prompt)] messages.extend(hw.window) messages.append( HumanMessage( content=( "(用户回到这个已有历史的对话,还没说话。" "请基于上文做温和的承接式回访问候。)" ) ) ) log_agent_payload( logger, "InterviewAgent.re_greeting.prompt", format_history_string( messages, omit_system_body=settings.agent_log_omit_system_message_body, ), ) re_greet_llm = self.llm.bind( max_tokens=settings.chat_opening_max_tokens, temperature=float(settings.chat_interview_temperature), ) llm_t0 = time.perf_counter() with agent_span( logger, "InterviewAgent.re_greeting.llm", conversation_id=conversation_id, ): logger.info( "event=chat_prompt_built agent=InterviewAgent.generate_re_greeting_message " "prompt_chars={} history_pairs_total={} history_pairs_windowed={} idle_hours={:.2f}", _message_contents_char_count(messages), hw.turn_total, len(hw.window) // 2, idle_hours, ) response = await re_greet_llm.ainvoke(messages) logger.info( "event=chat_llm_done agent=InterviewAgent.generate_re_greeting_message " "response_latency_ms={:.2f}", (time.perf_counter() - llm_t0) * 1000, ) response_text = ( response.content if hasattr(response, "content") else str(response) ) log_agent_payload( logger, "InterviewAgent.re_greeting.raw_response", response_text ) raw_list = segments_from_llm_response(response_text, max_segments=2) if not raw_list: raw_list = [response_text.strip()] max_chars = int(settings.chat_interview_max_chars_per_segment) out = truncate_chat_segments( raw_list, max_segments=2, max_chars_per_segment=max_chars, ) log_agent_summary( logger, "InterviewAgent.re_greeting segments={} conversation_id={} idle_hours={:.2f}", len(out), conversation_id, idle_hours, ) segments = out if out else [response_text.strip()[:max_chars]] return nonempty_segments_or_fallback( segments, fallback="上次聊到的事我还记着,今天想继续往下讲讲吗?", ) except Exception as e: logger.error("生成回访问候失败: {}", e, exc_info=True) return ["上次聊到的事我还记着,今天想继续往下讲讲吗?"]