Files
life-echo/api/app/agents/chat/profile_agent.py
Kevin e4bf0710c7 feat(memory,conversation): 记忆富化/证据包、时间线幂等字段与对话分段全链路
数据库
- 新增迁移 0003:timeline_events.memory_source_id 外键 → memory_sources,便于按 ingest 源做时间线幂等

后端 - 记忆
- 新增 ingest 后 LLM 富化(摘要/事实/时间线),可配置开关与最大字符数
- 新增证据包组装:合并 chunk、摘要、事实、时间线、故事等检索结果;支持空 query 时是否仍带 rolling 等开关
- repo/retriever/service/router/schemas/summarizer/timeline/extractor 等扩展;文档 memory-retrieval.md 更新

后端 - 对话 WS
- 增加 PING/PONG;分段 ASR 日志与空音频处理;转写失败与「无助手回复」错误提示更明确
- 助手多段回复持久化使用统一分隔符,与分段逻辑一致

后端 - Agent
- reply_limits:按 [SPLIT] 与段落拆段,并保证非空 fallback,供 WS 与 TTS 多段下发

后端 - 回忆录任务
- transcript ingest 记录 source_id;任务成功结?
2026-03-27 16:24:43 +08:00

235 lines
9.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
ProfileAgent用户资料收集 Specialist
负责提取资料、资料追问、资料收集开场白,不负责 Redis 持久化(由 Orchestrator 统一处理)
"""
import json
from typing import Any, Dict, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from app.agents.chat.helpers import format_history_string, get_history_messages
from app.agents.chat.prompts_profile import (
get_profile_extraction_prompt,
get_profile_followup_prompt,
get_profile_greeting_prompt,
)
from app.core.dependencies import get_llm_provider
from app.core.langchain_llm import ainvoke_json_object
from app.core.agent_logging import agent_span, log_agent_payload, log_agent_summary
from app.core.config import settings
from app.core.logging import get_logger
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
segments_from_llm_response,
truncate_chat_segments,
)
from app.features.memoir.memoir_images.json_payload import extract_json_payload
logger = get_logger(__name__)
def _get_langchain_llm():
try:
provider = get_llm_provider()
return getattr(provider, "langchain_llm", None)
except Exception:
return None
class ProfileAgent:
"""用户资料收集 Specialist Agent"""
def __init__(self):
self.llm = _get_langchain_llm()
async def extract_profile_from_message(
self,
user_message: str,
missing_fields: List[str],
conversation_id: Optional[str] = None,
) -> Dict[str, Any]:
"""从用户消息中提取资料字段,不持久化"""
if not self.llm or not missing_fields:
return {}
recent_dialogue = ""
if conversation_id:
history_messages = await get_history_messages(conversation_id)
recent = (
history_messages[-4:] if len(history_messages) > 4 else history_messages
)
parts = []
for msg in recent:
if isinstance(msg, HumanMessage):
parts.append(f"用户: {msg.content}")
elif isinstance(msg, AIMessage):
parts.append(f"助手: {msg.content}")
recent_dialogue = "\n".join(parts) if parts else ""
try:
prompt = get_profile_extraction_prompt(
user_message, missing_fields, recent_dialogue=recent_dialogue or None
)
content = await ainvoke_json_object(
self.llm,
prompt,
max_tokens=512,
agent="ProfileAgent.extract_profile_from_message",
)
parsed = json.loads(extract_json_payload(content))
result = {}
if "birth_year" in parsed and parsed["birth_year"] is not None:
raw = parsed["birth_year"]
if isinstance(raw, int) and 1900 <= raw <= 2100:
result["birth_year"] = raw
elif isinstance(raw, str) and raw.isdigit():
y = int(raw)
if y < 100:
y = 1900 + y if y >= 50 else 2000 + y
if 1900 <= y <= 2100:
result["birth_year"] = y
if "birth_place" in parsed and parsed["birth_place"]:
result["birth_place"] = str(parsed["birth_place"])
if "grew_up_place" in parsed and parsed["grew_up_place"]:
result["grew_up_place"] = str(parsed["grew_up_place"])
if "occupation" in parsed and parsed["occupation"]:
result["occupation"] = str(parsed["occupation"])
return result
except (json.JSONDecodeError, Exception) as e:
logger.error("提取资料信息失败: {}", e)
return {}
async def generate_profile_followup(
self,
conversation_id: str,
user_message: str,
missing_fields: List[str],
filled_fields: Dict[str, str],
nickname: str = "",
interview_stage_hint: str = "",
) -> List[str]:
"""生成资料追问回复,不持久化(由 Orchestrator 负责)"""
if not self.llm:
return ["谢谢!还能告诉我更多吗?"]
try:
prompt = get_profile_followup_prompt(
missing_fields,
filled_fields,
user_message,
nickname,
interview_stage_hint=interview_stage_hint,
)
history_messages = await get_history_messages(conversation_id)
history_string = format_history_string(history_messages)
full_prompt = (
f"{prompt}\n\n{history_string}\n\nHuman: {user_message}\n\nAssistant:"
)
log_agent_payload(logger, "ProfileAgent.followup.prompt", full_prompt)
chat_llm = self.llm.bind(
max_tokens=settings.chat_profile_followup_max_tokens
)
with agent_span(
logger,
"ProfileAgent.followup.llm",
conversation_id=conversation_id,
):
response = await chat_llm.ainvoke(full_prompt)
response_text = (
response.content if hasattr(response, "content") else str(response)
)
log_agent_payload(
logger, "ProfileAgent.followup.raw_response", response_text
)
raw_list = segments_from_llm_response(response_text, max_segments=3)
if not raw_list:
raw_list = [response_text.strip()]
out = truncate_chat_segments(
raw_list,
max_segments=3,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
)
log_agent_summary(
logger,
"ProfileAgent.followup segments={} conversation_id={}",
len(out),
conversation_id,
)
segments = (
out
if out
else [
response_text.strip()[
: settings.chat_interview_max_chars_per_segment
]
]
)
return nonempty_segments_or_fallback(
segments,
fallback="谢谢分享!能再告诉我一些吗?",
)
except Exception as e:
logger.error("生成资料跟进回复失败: {}", e)
return ["谢谢分享!能再告诉我一些吗?"]
async def generate_profile_greeting(
self,
conversation_id: str,
missing_fields: List[str],
nickname: str = "",
) -> List[str]:
"""生成资料收集开场白,不持久化(由 Orchestrator 负责)"""
if not self.llm:
return ["你好!在开始之前,能告诉我你是哪一年出生的吗?"]
try:
prompt = get_profile_greeting_prompt(missing_fields, nickname)
history_messages = await get_history_messages(conversation_id)
history_string = format_history_string(history_messages)
full_prompt = f"{prompt}\n\n{history_string}" if history_string else prompt
log_agent_payload(logger, "ProfileAgent.greeting.prompt", full_prompt)
chat_llm = self.llm.bind(
max_tokens=settings.chat_profile_followup_max_tokens
)
with agent_span(
logger,
"ProfileAgent.greeting.llm",
conversation_id=conversation_id,
):
response = await chat_llm.ainvoke(full_prompt)
response_text = (
response.content if hasattr(response, "content") else str(response)
)
log_agent_payload(
logger, "ProfileAgent.greeting.raw_response", response_text
)
raw_list = segments_from_llm_response(response_text, max_segments=2)
if not raw_list:
raw_list = [response_text.strip()]
out = truncate_chat_segments(
raw_list,
max_segments=2,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
)
log_agent_summary(
logger,
"ProfileAgent.greeting segments={} conversation_id={}",
len(out),
conversation_id,
)
segments = (
out
if out
else [
response_text.strip()[
: settings.chat_interview_max_chars_per_segment
]
]
)
return nonempty_segments_or_fallback(
segments,
fallback="你好!在开始之前,能告诉我你是哪一年出生的吗?",
)
except Exception as e:
logger.error("生成资料收集开场白失败: {}", e)
return [
"你好!在我们开始聊人生故事之前,能先简单介绍一下你自己吗?比如你是哪一年出生的?"
]