Files
life-echo/api/app/agents/chat/profile_agent.py
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

296 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
ProfileAgent用户资料收集 Specialist
负责提取资料、资料追问、资料收集开场白,不负责 Redis 持久化(由 Orchestrator 统一处理)
"""
import time
from typing import Any, Dict, List, Optional
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from app.agents.chat.helpers import format_history_string, get_history_with_window
from app.agents.chat.prompts_profile import (
get_profile_extraction_prompt,
get_profile_followup_prompt,
get_profile_greeting_prompt,
)
from app.agents.chat.reply_limits import (
nonempty_segments_or_fallback,
segments_from_llm_response,
truncate_chat_segments,
)
from app.agents.chat.schemas import ProfileExtractionOutput
from app.core.agent_logging import agent_span, log_agent_payload, log_agent_summary
from app.core.config import settings
from app.core.dependencies import get_llm_provider
from app.core.llm_call import allm_json_call
from app.core.logging import get_logger
logger = get_logger(__name__)
def _get_langchain_llm():
try:
provider = get_llm_provider()
return getattr(provider, "langchain_llm", None)
except Exception:
return None
def _message_contents_char_count(messages: List[Any]) -> int:
n = 0
for m in messages:
c = getattr(m, "content", None)
if isinstance(c, str):
n += len(c)
return n
class ProfileAgent:
"""用户资料收集 Specialist Agent"""
def __init__(self):
self.llm = _get_langchain_llm()
async def _invoke_chat(
self,
messages: List[Any],
*,
max_tokens: int,
conversation_id: Optional[str],
agent_name: str,
) -> str:
chat_llm = self.llm.bind(max_tokens=max_tokens)
llm_t0 = time.perf_counter()
with agent_span(
logger, f"{agent_name}.llm", conversation_id=conversation_id or ""
):
response = await chat_llm.ainvoke(messages)
logger.info(
"event=chat_llm_done agent={} response_latency_ms={:.2f}",
agent_name,
(time.perf_counter() - llm_t0) * 1000,
)
return (
response.content if hasattr(response, "content") else str(response)
) or ""
async def _segments_from_response(
self,
response_text: str,
*,
max_segments: int,
max_chars_per_segment: int,
fallback: str,
) -> List[str]:
log_agent_payload(
logger,
"ProfileAgent._segments_from_response.raw_response",
response_text,
)
raw_list = segments_from_llm_response(response_text, max_segments=max_segments)
if not raw_list:
raw_list = [response_text.strip()]
out = truncate_chat_segments(
raw_list,
max_segments=max_segments,
max_chars_per_segment=max_chars_per_segment,
)
segments = out if out else [response_text.strip()[:max_chars_per_segment]]
return nonempty_segments_or_fallback(segments, fallback=fallback)
async def extract_profile_from_message(
self,
user_message: str,
missing_fields: List[str],
conversation_id: Optional[str] = None,
) -> Dict[str, Any]:
"""从用户消息中提取资料字段,不持久化"""
if not self.llm or not missing_fields:
return {}
recent_dialogue = ""
if conversation_id:
hw = await get_history_with_window(
conversation_id,
max_pairs=settings.chat_history_max_pairs,
max_chars=settings.chat_history_max_chars,
)
recent = hw.window[-4:] if len(hw.window) > 4 else hw.window
parts = []
for msg in recent:
if isinstance(msg, HumanMessage):
parts.append(f"用户: {msg.content}")
elif isinstance(msg, AIMessage):
parts.append(f"助手: {msg.content}")
recent_dialogue = "\n".join(parts) if parts else ""
try:
prompt = get_profile_extraction_prompt(
user_message, missing_fields, recent_dialogue=recent_dialogue or None
)
parsed = await allm_json_call(
self.llm,
prompt,
ProfileExtractionOutput,
max_tokens=settings.chat_profile_extract_max_tokens,
agent="ProfileAgent.extract_profile_from_message",
fallback_factory=lambda: ProfileExtractionOutput(),
)
result = {}
if parsed.birth_year is not None:
raw = parsed.birth_year
if isinstance(raw, int) and 1900 <= raw <= 2100:
result["birth_year"] = raw
elif isinstance(raw, str) and raw.isdigit():
y = int(raw)
if y < 100:
y = 1900 + y if y >= 50 else 2000 + y
if 1900 <= y <= 2100:
result["birth_year"] = y
if parsed.birth_place:
result["birth_place"] = str(parsed.birth_place)
if parsed.grew_up_place:
result["grew_up_place"] = str(parsed.grew_up_place)
if parsed.occupation:
result["occupation"] = str(parsed.occupation)
bp = result.get("birth_place")
gp = result.get("grew_up_place")
if bp and not gp:
result["grew_up_place"] = bp
elif gp and not bp:
result["birth_place"] = gp
return result
except Exception as e:
logger.error("提取资料信息失败: {}", e)
return {}
async def generate_profile_followup(
self,
conversation_id: str,
user_message: str,
missing_fields: List[str],
filled_fields: Dict[str, str],
nickname: str = "",
interview_stage_hint: str = "",
) -> List[str]:
"""生成资料追问回复,不持久化(由 Orchestrator 负责)"""
if not self.llm:
return ["谢谢!还能告诉我更多吗?"]
try:
prompt = get_profile_followup_prompt(
missing_fields,
filled_fields,
nickname,
interview_stage_hint=interview_stage_hint,
)
hw = await get_history_with_window(
conversation_id,
max_pairs=settings.chat_history_max_pairs,
max_chars=settings.chat_history_max_chars,
)
messages: List[Any] = [SystemMessage(content=prompt)]
messages.extend(hw.window)
messages.append(HumanMessage(content=user_message))
log_agent_payload(
logger,
"ProfileAgent.followup.prompt",
format_history_string(
messages,
omit_system_body=settings.agent_log_omit_system_message_body,
),
)
prompt_chars = _message_contents_char_count(messages)
logger.info(
"event=chat_prompt_built agent=ProfileAgent.generate_profile_followup "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
prompt_chars,
hw.turn_total,
len(hw.window) // 2,
)
response_text = await self._invoke_chat(
messages,
max_tokens=settings.chat_profile_followup_max_tokens,
conversation_id=conversation_id,
agent_name="ProfileAgent.generate_profile_followup",
)
segments = await self._segments_from_response(
response_text,
max_segments=3,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
fallback="谢谢分享!能再告诉我一些吗?",
)
log_agent_summary(
logger,
"ProfileAgent.followup segments={} conversation_id={}",
len(segments),
conversation_id,
)
return segments
except Exception as e:
logger.error("生成资料跟进回复失败: {}", e)
return ["谢谢分享!能再告诉我一些吗?"]
async def generate_profile_greeting(
self,
conversation_id: str,
missing_fields: List[str],
nickname: str = "",
) -> List[str]:
"""生成资料收集开场白,不持久化(由 Orchestrator 负责)"""
if not self.llm:
return ["你好!在开始之前,能告诉我你是哪一年出生的吗?"]
try:
prompt = get_profile_greeting_prompt(missing_fields, nickname)
hw = await get_history_with_window(
conversation_id,
max_pairs=settings.chat_history_max_pairs,
max_chars=settings.chat_history_max_chars,
)
messages: List[Any] = [SystemMessage(content=prompt)]
messages.extend(hw.window)
if hw.window:
messages.append(
HumanMessage(content="(请根据上文自然接话,继续资料收集开场。)")
)
else:
messages.append(HumanMessage(content="(请说出资料收集开场白。)"))
log_agent_payload(
logger,
"ProfileAgent.greeting.prompt",
format_history_string(
messages,
omit_system_body=settings.agent_log_omit_system_message_body,
),
)
prompt_chars = _message_contents_char_count(messages)
logger.info(
"event=chat_prompt_built agent=ProfileAgent.generate_profile_greeting "
"prompt_chars={} history_pairs_total={} history_pairs_windowed={}",
prompt_chars,
hw.turn_total,
len(hw.window) // 2,
)
response_text = await self._invoke_chat(
messages,
max_tokens=settings.chat_profile_followup_max_tokens,
conversation_id=conversation_id,
agent_name="ProfileAgent.generate_profile_greeting",
)
segments = await self._segments_from_response(
response_text,
max_segments=2,
max_chars_per_segment=settings.chat_interview_max_chars_per_segment,
fallback="你好!在开始之前,能告诉我你是哪一年出生的吗?",
)
log_agent_summary(
logger,
"ProfileAgent.greeting segments={} conversation_id={}",
len(segments),
conversation_id,
)
return segments
except Exception as e:
logger.error("生成资料收集开场白失败: {}", e)
return [
"你好!在我们开始聊人生故事之前,能先简单介绍一下你自己吗?比如你是哪一年出生的?"
]