Files
life-echo/api/app/agents/chat/interview_agent.py
Kevin 71fbd39e32 feat(api)!: memory single chain — async MemoryService, strict eval closure
Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor
2026-04-30 14:11:50 +08:00

506 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
InterviewAgent正式访谈 Specialist
负责状态感知回复、开场白,不负责 Redis 持久化(由 Orchestrator 统一处理)
"""
import time
from typing import Any, List, Optional
from langchain_core.messages import HumanMessage, SystemMessage
from app.agents.chat.agent_turn import AgentChatTurn
from app.agents.chat.helpers import format_history_string, get_history_with_window
from app.agents.chat.interview_state_hints import (
apply_autobiographical_boundary_guard,
apply_duplicate_question_guard,
extract_recent_questions,
segments_are_only_duplicate_guard_fallback,
update_recent_questions,
)
from app.agents.chat.interview_turn_plan import plan_interview_turn
from app.agents.chat.personas import normalize_interview_persona
from app.agents.chat.prompt_context import ChatPromptContext
from app.agents.chat.prompts_conversation import (
SLOT_NAME_MAP,
get_opening_prompt,
)
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
segments_from_llm_response,
truncate_chat_segments,
)
from app.agents.chat.reply_planner import maybe_refine_turn_plan_with_llm
from app.agents.chat.stage_detection import keyword_fallback_primary_stage
from app.agents.state_schema import (
MemoirStateSchema,
interview_control_state,
narrative_coverage_state,
)
from app.core.agent_logging import (
agent_span,
log_agent_payload,
log_agent_summary,
)
from app.core.config import settings
from app.core.llm_gateway import LlmGateway, LlmUseCase
from app.core.logging import get_logger
from app.features.conversation.input_normalize import normalize_chat_input_for_agent
logger = get_logger(__name__)
# LLM 不可用或调用失败时对用户展示(不暴露异常细节、不触发 TTS
_FALLBACK_REPLY = "刚才网络不太稳,没接上。你可以再说一遍,或稍后再试。"
# 仅在「重复问句守卫」把正文削成单句兜底时追加二次 system只多调一次模型。
_DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX = """## 二次生成(纠偏)
上一版模型输出因包含与「最近已问过的问题」或「已确认事实」重复的问句,已被系统弃用。请**重新写一整条回复**
- 仍须遵守上文全部主规则;
- 先贴着用户本轮原话承接半句到一两句(可有画面感);
- **禁止**再用与刚才同义、仅换说法的确认型问句;
- 若要提问,须换**全新角度**,并锚在用户刚说的具体细节里;也可以本轮**完全不提问**,只并肩承接;
- **禁止**整段只有「这一段我记住了」或同类无信息套话。"""
def _finalize_chat_segments_after_llm(
response_text: str,
*,
max_segments: int,
max_chars: int,
memoir_state: MemoirStateSchema,
recent_questions: list[str],
) -> tuple[list[str], bool]:
raw_list = segments_from_llm_response(
response_text,
max_segments=max_segments,
)
if not raw_list:
raw_list = [response_text.strip()]
out = truncate_chat_segments(
raw_list,
max_segments=max_segments,
max_chars_per_segment=max_chars,
)
if not out:
out = [response_text.strip()[:max_chars]]
out = nonempty_segments_or_fallback(out, fallback=_FALLBACK_REPLY)
out, deduped = apply_duplicate_question_guard(
out,
state=memoir_state,
recent_questions=recent_questions,
)
return out, deduped
def _get_langchain_llm():
try:
return LlmGateway().langchain_llm_for(LlmUseCase("chat.interview"))
except Exception:
return None
def _message_contents_char_count(messages: List[Any]) -> int:
n = 0
for m in messages:
c = getattr(m, "content", None)
if isinstance(c, str):
n += len(c)
return n
class InterviewAgent:
"""正式访谈 Specialist Agent"""
def __init__(self):
self.llm = _get_langchain_llm()
def _detect_user_stage(self, user_message: str) -> str:
"""关键词回退:与 stage_detection 一致(多阶段打分)。"""
return keyword_fallback_primary_stage(user_message)
def _resolve_text_for_model(
self,
user_message: str,
normalized_user_message: Optional[str],
) -> str:
"""模型侧净稿:编排层已归一则直接用;否则在本层补一次(含可选 LLM"""
if normalized_user_message is not None:
return (normalized_user_message or "").strip()
llm_n = None
if settings.chat_input_normalize_enabled and (
(settings.chat_input_normalize_mode or "").strip().lower() == "llm"
):
llm_n = self.llm
return normalize_chat_input_for_agent(user_message or "", llm=llm_n)
async def generate_response_with_state(
self,
conversation_id: str,
user_message: str,
memoir_state: MemoirStateSchema,
user_profile_context: str = "",
detected_user_stage: Optional[str] = None,
memory_evidence_text: str = "",
memory_anchor_source: str = "",
memory_planner_text: str = "",
background_voice: str = "default",
normalized_user_message: Optional[str] = None,
occupation: str = "",
profile_birth_year: int | None = None,
profile_era_place: str = "",
stage_switched_this_turn: bool = False,
scene_cues_for_planner: Optional[list[str]] = None,
) -> AgentChatTurn:
"""生成状态感知的访谈回复,不持久化(由 Orchestrator 负责)"""
if not self.llm:
logger.warning("InterviewAgent: LLM 未配置,返回兜底文案")
return AgentChatTurn(messages=[_FALLBACK_REPLY], skip_tts=True)
try:
text_for_model = self._resolve_text_for_model(
user_message, normalized_user_message
)
narrative_state = narrative_coverage_state(memoir_state)
control_state = interview_control_state(memoir_state)
empty_slots = control_state.prompt_empty_slots_for_stage(
narrative_state, memoir_state.current_stage
)
filled_slots = narrative_state.filled_slots_for_stage(
memoir_state.current_stage
)
if detected_user_stage is not None:
du = detected_user_stage
else:
du = self._detect_user_stage(text_for_model)
hw = await get_history_with_window(
conversation_id,
max_pairs=settings.chat_history_max_pairs,
max_chars=settings.chat_history_max_chars,
)
recent_questions = extract_recent_questions(hw.window)
conversation_turn_total = hw.turn_total
all_stages_coverage = narrative_state.all_stages_coverage()
persona = normalize_interview_persona(settings.chat_interview_persona)
max_segments = int(settings.chat_interview_max_segments)
max_tokens = int(settings.chat_interview_max_tokens)
max_chars = int(settings.chat_interview_max_chars_per_segment)
turn_plan = plan_interview_turn(
current_stage=memoir_state.current_stage,
empty_slots=empty_slots,
normalized_user_message=text_for_model,
memory_evidence_text=(memory_anchor_source or "").strip(),
stage_switched_this_turn=stage_switched_this_turn,
)
logger.info(
"event=interview_turn_plan mode={} anchor_slot={} snippet_len={}",
turn_plan.mode,
turn_plan.anchor_slot_key or "-",
len(turn_plan.anchor_snippet or ""),
)
reply_planner_raw = ""
baseline_mode = turn_plan.mode
baseline_primary_focus = turn_plan.primary_focus
if settings.chat_reply_planner_llm_enabled:
rq_preview = (
"\n".join(recent_questions[-4:])
if recent_questions
else ""
)
turn_plan, reply_planner_raw = await maybe_refine_turn_plan_with_llm(
self.llm,
plan=turn_plan,
text_for_model=text_for_model,
memory_evidence_text=(memory_planner_text or memory_evidence_text)
or "",
max_tokens=int(settings.chat_reply_planner_max_tokens),
temperature=float(settings.chat_reply_planner_temperature),
scene_cues_for_planner=scene_cues_for_planner or [],
recent_questions_preview=rq_preview,
)
if reply_planner_raw:
logger.info(
"event=reply_planner_applied memory_usage={} reply_shape={} "
"mode={} primary_focus={} focus_source={}",
turn_plan.memory_usage,
turn_plan.reply_shape,
turn_plan.mode,
turn_plan.primary_focus,
turn_plan.focus_source,
)
ctx = ChatPromptContext(
current_stage=memoir_state.current_stage,
empty_slots=empty_slots,
filled_slots=filled_slots,
all_stages_coverage=all_stages_coverage,
detected_user_stage=du,
user_profile_context=user_profile_context,
persona=persona,
memory_evidence_text=memory_evidence_text,
background_voice=background_voice,
occupation=occupation,
profile_birth_year=profile_birth_year,
profile_era_place=profile_era_place,
known_facts=memoir_state.known_facts,
persona_threads=memoir_state.persona_threads,
recent_questions=recent_questions or memoir_state.recent_questions,
turn_plan=turn_plan,
)
system_prompt = ctx.guided_system_prompt()
messages: List[Any] = [SystemMessage(content=system_prompt)]
messages.extend(hw.window)
messages.append(HumanMessage(content=text_for_model))
history_pairs_windowed = len(hw.window) // 2
window_chars = sum(len(getattr(m, "content", "") or "") for m in hw.window)
logger.info(
"event=history_window_applied total={} windowed={} chars={}",
conversation_turn_total,
history_pairs_windowed,
window_chars,
)
log_agent_payload(
logger,
"InterviewAgent.generate_response.prompt",
format_history_string(
messages,
omit_system_body=settings.agent_log_omit_system_message_body,
),
)
chat_llm = self.llm.bind(
max_tokens=max_tokens,
temperature=float(settings.chat_interview_temperature),
)
prompt_chars = _message_contents_char_count(messages)
llm_t0 = time.perf_counter()
with agent_span(
logger,
"InterviewAgent.generate_response.llm",
conversation_id=conversation_id,
stage=memoir_state.current_stage,
):
logger.info(
"event=chat_prompt_built agent=InterviewAgent.generate_response_with_state "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
prompt_chars,
conversation_turn_total,
history_pairs_windowed,
)
response = await chat_llm.ainvoke(messages)
response_ms = (time.perf_counter() - llm_t0) * 1000
logger.info(
"event=chat_llm_done agent=InterviewAgent.generate_response_with_state "
"response_latency_ms={:.2f}",
response_ms,
)
response_text = (
response.content if hasattr(response, "content") else str(response)
)
log_agent_payload(
logger, "InterviewAgent.generate_response.raw_response", response_text
)
rq_base = recent_questions or memoir_state.recent_questions
out, deduped = _finalize_chat_segments_after_llm(
response_text,
max_segments=max_segments,
max_chars=max_chars,
memoir_state=memoir_state,
recent_questions=rq_base,
)
retry_used = False
if deduped and segments_are_only_duplicate_guard_fallback(out):
retry_system = (
f"{system_prompt}\n\n{_DUPLICATE_GUARD_LLM_RETRY_SYSTEM_APPENDIX}"
)
retry_messages: List[Any] = [
SystemMessage(content=retry_system),
*hw.window,
HumanMessage(content=text_for_model),
]
log_agent_payload(
logger,
"InterviewAgent.generate_response.retry_prompt",
format_history_string(
retry_messages,
omit_system_body=settings.agent_log_omit_system_message_body,
),
)
llm_t1 = time.perf_counter()
with agent_span(
logger,
"InterviewAgent.generate_response.llm_retry",
conversation_id=conversation_id,
stage=memoir_state.current_stage,
):
logger.info(
"event=chat_prompt_built agent=InterviewAgent.duplicate_guard_retry "
"prompt_chars={} conversation_id={}",
_message_contents_char_count(retry_messages),
conversation_id,
)
response_retry = await chat_llm.ainvoke(retry_messages)
logger.info(
"event=chat_llm_done agent=InterviewAgent.duplicate_guard_retry "
"response_latency_ms={:.2f}",
(time.perf_counter() - llm_t1) * 1000,
)
response_text_retry = (
response_retry.content
if hasattr(response_retry, "content")
else str(response_retry)
)
log_agent_payload(
logger,
"InterviewAgent.generate_response.raw_response_retry",
response_text_retry,
)
out, deduped = _finalize_chat_segments_after_llm(
response_text_retry,
max_segments=max_segments,
max_chars=max_chars,
memoir_state=memoir_state,
recent_questions=rq_base,
)
retry_used = True
out, auto_bio = apply_autobiographical_boundary_guard(out)
updated_recent_questions = update_recent_questions(rq_base, out)
log_agent_summary(
logger,
"InterviewAgent.generate_response segments={} conversation_id={} "
"max_tokens={}",
len(out),
conversation_id,
max_tokens,
)
return AgentChatTurn(
messages=out,
skip_tts=False,
interview_state_meta={
"recent_questions": updated_recent_questions,
"duplicate_question_guard_triggered": deduped,
"duplicate_question_guard_llm_retry": retry_used,
"autobiographical_boundary_guard_triggered": auto_bio,
"reply_planner_llm_used": bool(
settings.chat_reply_planner_llm_enabled
and (reply_planner_raw or "").strip()
),
"reply_planner_raw_preview": (reply_planner_raw or "")[:800],
"focus_planner_baseline_mode": baseline_mode,
"focus_planner_baseline_primary_focus": baseline_primary_focus,
"focus_planner_mode": turn_plan.mode,
"focus_planner_primary_focus": turn_plan.primary_focus,
"focus_planner_focus_source": turn_plan.focus_source,
"focus_planner_focus_summary": (turn_plan.focus_summary or "")[:200],
},
)
except Exception as e:
logger.error("生成回应失败: {}", e, exc_info=True)
return AgentChatTurn(messages=[_FALLBACK_REPLY], skip_tts=True)
async def generate_opening_message(
self,
conversation_id: str,
memoir_state: MemoirStateSchema,
user_profile_context: str = "",
background_voice: str = "default",
occupation: str = "",
profile_birth_year: Optional[int] = None,
profile_era_place: str = "",
) -> List[str]:
"""生成空对话开场白,不持久化(由 Orchestrator 负责)"""
if not self.llm:
return ["你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?"]
try:
narrative_state = narrative_coverage_state(memoir_state)
control_state = interview_control_state(memoir_state)
empty_slots = control_state.prompt_empty_slots_for_stage(
narrative_state, memoir_state.current_stage
)
empty_slots_readable = [SLOT_NAME_MAP.get(s, s) for s in empty_slots]
persona = normalize_interview_persona(settings.chat_interview_persona)
prompt = get_opening_prompt(
current_stage=memoir_state.current_stage,
empty_slots_readable=empty_slots_readable,
user_profile_context=user_profile_context,
persona=persona,
background_voice=background_voice,
occupation=occupation,
profile_birth_year=profile_birth_year,
profile_era_place=profile_era_place,
)
hw = await get_history_with_window(
conversation_id,
max_pairs=settings.chat_history_max_pairs,
max_chars=settings.chat_history_max_chars,
)
messages: List[Any] = [SystemMessage(content=prompt)]
messages.extend(hw.window)
if not hw.window:
messages.append(
HumanMessage(content="(对话刚开始,请自然地说出你的开场白。)")
)
else:
messages.append(
HumanMessage(content="(请根据上文,自然接续并说出你的开场白。)")
)
log_agent_payload(
logger,
"InterviewAgent.opening.prompt",
format_history_string(
messages,
omit_system_body=settings.agent_log_omit_system_message_body,
),
)
opening_llm = self.llm.bind(
max_tokens=settings.chat_opening_max_tokens,
temperature=float(settings.chat_interview_temperature),
)
prompt_chars = _message_contents_char_count(messages)
llm_t0 = time.perf_counter()
with agent_span(
logger,
"InterviewAgent.opening.llm",
conversation_id=conversation_id,
):
logger.info(
"event=chat_prompt_built agent=InterviewAgent.generate_opening_message "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
prompt_chars,
hw.turn_total,
len(hw.window) // 2,
)
response = await opening_llm.ainvoke(messages)
logger.info(
"event=chat_llm_done agent=InterviewAgent.generate_opening_message "
"response_latency_ms={:.2f}",
(time.perf_counter() - llm_t0) * 1000,
)
response_text = (
response.content if hasattr(response, "content") else str(response)
)
log_agent_payload(
logger, "InterviewAgent.opening.raw_response", response_text
)
raw_list = segments_from_llm_response(response_text, max_segments=2)
if not raw_list:
raw_list = [response_text.strip()]
max_chars = int(settings.chat_interview_max_chars_per_segment)
out = truncate_chat_segments(
raw_list,
max_segments=2,
max_chars_per_segment=max_chars,
)
log_agent_summary(
logger,
"InterviewAgent.opening segments={} conversation_id={}",
len(out),
conversation_id,
)
segments = out if out else [response_text.strip()[:max_chars]]
return nonempty_segments_or_fallback(
segments,
fallback="你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?",
)
except Exception as e:
logger.error("生成开场白失败: {}", e, exc_info=True)
return ["你好呀~ 又见面了。今天想从人生里哪一小段回忆开始聊聊?"]