feat(api+app): 对话阶段化、回忆录流水线与客户端会话体验

- DB: segments 用户输入文本(Alembic 0002)
- Chat: 阶段检测/阶段提示/回复限制,编排与访谈/画像 prompts 调整
- Memoir: 忠实度检查 agent,叙事与分类等链路更新
- Core: agent 日志、Alembic 启动、LangChain/日志/配置等
- Story: time_hints;Memory 检索与相关测试
- Expo: 助手头像、会话页与消息拆分、实时会话与文案/i18n
- Docs/scripts/tests: 迁移脚本、LLM JSON/记忆检索文档、新增单测
This commit is contained in:
Kevin
2026-03-26 12:13:36 +08:00
parent 49b089354c
commit a3f61fcc0f
94 changed files with 3332 additions and 672 deletions

View File

@@ -3,9 +3,10 @@ InterviewAgent正式访谈 Specialist
负责状态感知回复、开场白,不负责 Redis 持久化(由 Orchestrator 统一处理)
"""
from typing import Any, List
from typing import Any, List, Optional
from app.agents.chat.agent_turn import AgentChatTurn
from app.agents.chat.stage_detection import keyword_fallback_primary_stage
from app.core.dependencies import get_llm_provider
from app.core.logging import get_logger
@@ -16,6 +17,13 @@ from app.agents.chat.prompts_conversation import (
get_opening_prompt,
)
from app.agents.state_schema import MemoirStateSchema
from app.agents.chat.reply_limits import truncate_chat_segments
from app.core.agent_logging import (
agent_span,
log_agent_payload,
log_agent_summary,
)
from app.core.config import settings
logger = get_logger(__name__)
@@ -38,89 +46,8 @@ class InterviewAgent:
self.llm = _get_langchain_llm()
def _detect_user_stage(self, user_message: str) -> str:
"""根据关键词检测用户正在谈论的人生阶段"""
message = user_message.lower()
stage_keywords = {
"childhood": [
"童年",
"小时候",
"出生",
"家乡",
"小镇",
"爸妈",
"父亲",
"母亲",
"爷爷",
"奶奶",
"外公",
"外婆",
"幼儿园",
],
"education": [
"上学",
"学校",
"老师",
"同学",
"教育",
"大学",
"高中",
"初中",
"小学",
"考试",
"毕业",
"读书",
"高考",
"课堂",
],
"career": [
"工作",
"职业",
"事业",
"公司",
"同事",
"创业",
"升职",
"跳槽",
"老板",
"行业",
"项目",
"加班",
"薪水",
"面试",
],
"family": [
"伴侣",
"孩子",
"家庭",
"家人",
"结婚",
"爱人",
"老婆",
"老公",
"丈夫",
"妻子",
"儿子",
"女儿",
"婚礼",
"恋爱",
],
"belief": [
"信念",
"价值观",
"座右铭",
"坚持",
"原则",
"信仰",
"意义",
"感悟",
"遗憾",
"骄傲",
],
}
for stage, keywords in stage_keywords.items():
if any(word in message for word in keywords):
return stage
return ""
"""关键词回退:与 stage_detection 一致(多阶段打分)。"""
return keyword_fallback_primary_stage(user_message)
def _estimate_same_topic_turns(
self, history_messages: List[Any], current_filled_slots: dict
@@ -153,6 +80,7 @@ class InterviewAgent:
user_message: str,
memoir_state: MemoirStateSchema,
user_profile_context: str = "",
detected_user_stage: Optional[str] = None,
) -> AgentChatTurn:
"""生成状态感知的访谈回复,不持久化(由 Orchestrator 负责)"""
if not self.llm:
@@ -167,7 +95,10 @@ class InterviewAgent:
).items()
if value.snippet
}
detected_user_stage = self._detect_user_stage(user_message)
if detected_user_stage is not None:
du = detected_user_stage
else:
du = self._detect_user_stage(user_message)
history_messages = await get_history_messages(conversation_id)
conversation_turn = len(history_messages) // 2
same_topic_turns = self._estimate_same_topic_turns(
@@ -182,22 +113,52 @@ class InterviewAgent:
conversation_turn=conversation_turn,
same_topic_turns=same_topic_turns,
all_stages_coverage=all_stages_coverage,
detected_user_stage=detected_user_stage,
detected_user_stage=du,
user_profile_context=user_profile_context,
)
history_string = format_history_string(history_messages)
full_prompt = f"{system_prompt}\n\n{history_string}\n\nHuman: {user_message}\n\nAssistant:"
response = await self.llm.ainvoke(full_prompt)
log_agent_payload(
logger, "InterviewAgent.generate_response.prompt", full_prompt
)
chat_llm = self.llm.bind(max_tokens=settings.chat_interview_max_tokens)
with agent_span(
logger,
"InterviewAgent.generate_response.llm",
conversation_id=conversation_id,
stage=memoir_state.current_stage,
):
response = await chat_llm.ainvoke(full_prompt)
response_text = (
response.content if hasattr(response, "content") else str(response)
)
log_agent_payload(
logger, "InterviewAgent.generate_response.raw_response", response_text
)
messages = [
msg.strip() for msg in response_text.split("[SPLIT]") if msg.strip()
]
out = messages[:3] if messages else [response_text]
raw_list = messages if messages else [response_text.strip()]
out = truncate_chat_segments(
raw_list,
max_segments=settings.chat_interview_max_segments,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
)
if not out:
out = [
response_text.strip()[
: settings.chat_interview_max_chars_per_segment
]
]
log_agent_summary(
logger,
"InterviewAgent.generate_response segments={} conversation_id={}",
len(out),
conversation_id,
)
return AgentChatTurn(messages=out, skip_tts=False)
except Exception as e:
logger.error("生成回应失败: %s", e, exc_info=True)
logger.error("生成回应失败: {}", e, exc_info=True)
return AgentChatTurn(messages=[_FALLBACK_REPLY], skip_tts=True)
async def generate_opening_message(
@@ -218,14 +179,44 @@ class InterviewAgent:
user_profile_context=user_profile_context,
)
full_prompt = f"{prompt}\n\nAssistant:"
response = await self.llm.ainvoke(full_prompt)
log_agent_payload(logger, "InterviewAgent.opening.prompt", full_prompt)
opening_llm = self.llm.bind(max_tokens=settings.chat_opening_max_tokens)
with agent_span(
logger,
"InterviewAgent.opening.llm",
conversation_id=conversation_id,
):
response = await opening_llm.ainvoke(full_prompt)
response_text = (
response.content if hasattr(response, "content") else str(response)
)
log_agent_payload(
logger, "InterviewAgent.opening.raw_response", response_text
)
messages = [
msg.strip() for msg in response_text.split("[SPLIT]") if msg.strip()
]
return messages[:2] if messages else [response_text]
raw_list = messages if messages else [response_text.strip()]
out = truncate_chat_segments(
raw_list,
max_segments=2,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
)
log_agent_summary(
logger,
"InterviewAgent.opening segments={} conversation_id={}",
len(out),
conversation_id,
)
return (
out
if out
else [
response_text.strip()[
: settings.chat_interview_max_chars_per_segment
]
]
)
except Exception as e:
logger.error("生成开场白失败: %s", e, exc_info=True)
logger.error("生成开场白失败: {}", e, exc_info=True)
return ["你好呀~ 又见面了,最近有没有什么事想跟我说说?"]