feat(api): 收敛对话与记忆流程边界,引入 LLM 网关与专用服务
- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService;富化派发经 MemoryEnrichmentScheduler - WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话;回忆录片段入队由 MemoirIngestScheduler 封装 - 新增 LlmGateway(LlmUseCase),各 agent、任务与适配器对齐 ports - 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试 Made-with: Cursor
This commit is contained in:
@@ -18,7 +18,6 @@ from app.agents.chat.interview_state_hints import (
|
||||
update_recent_questions,
|
||||
)
|
||||
from app.agents.chat.interview_turn_plan import plan_interview_turn
|
||||
from app.agents.chat.reply_planner import maybe_refine_turn_plan_with_llm
|
||||
from app.agents.chat.personas import normalize_interview_persona
|
||||
from app.agents.chat.prompt_context import ChatPromptContext
|
||||
from app.agents.chat.prompts_conversation import (
|
||||
@@ -30,6 +29,7 @@ from app.agents.chat.reply_limits import (
|
||||
segments_from_llm_response,
|
||||
truncate_chat_segments,
|
||||
)
|
||||
from app.agents.chat.reply_planner import maybe_refine_turn_plan_with_llm
|
||||
from app.agents.chat.stage_detection import keyword_fallback_primary_stage
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.agent_logging import (
|
||||
@@ -38,7 +38,7 @@ from app.core.agent_logging import (
|
||||
log_agent_summary,
|
||||
)
|
||||
from app.core.config import settings
|
||||
from app.core.dependencies import get_llm_provider
|
||||
from app.core.llm_gateway import LlmGateway, LlmUseCase
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.input_normalize import normalize_chat_input_for_agent
|
||||
|
||||
@@ -89,8 +89,7 @@ def _finalize_chat_segments_after_llm(
|
||||
|
||||
def _get_langchain_llm():
|
||||
try:
|
||||
provider = get_llm_provider()
|
||||
return getattr(provider, "langchain_llm", None)
|
||||
return LlmGateway().langchain_llm_for(LlmUseCase("chat.interview"))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ ChatOrchestrator:AI 回复用户模块的编排层
|
||||
"""
|
||||
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, List, Optional
|
||||
|
||||
@@ -24,7 +25,8 @@ from app.agents.chat.stage_detection import (
|
||||
from app.agents.state_schema import MemoirStateSchema
|
||||
from app.core.agent_logging import agent_summary_enabled, log_agent_detail
|
||||
from app.core.config import settings
|
||||
from app.core.dependencies import get_llm_provider
|
||||
from app.core.dependencies import get_embedding_provider
|
||||
from app.core.llm_gateway import LlmGateway
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.input_normalize import normalize_chat_input_for_agent
|
||||
from app.features.memoir.state_service import (
|
||||
@@ -32,18 +34,20 @@ from app.features.memoir.state_service import (
|
||||
save_interview_state_meta,
|
||||
switch_stage,
|
||||
)
|
||||
from app.features.memory.prompt_adapter import MemoryPromptAdapter
|
||||
|
||||
|
||||
def _llm_for_chat_input_normalize():
|
||||
try:
|
||||
p = get_llm_provider()
|
||||
return getattr(p, "langchain_llm", None)
|
||||
return LlmGateway().langchain_llm_for()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.features.user.models import User
|
||||
from app.ports.embedding import EmbeddingProvider
|
||||
from app.ports.llm import LLMProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -56,9 +60,10 @@ async def _fetch_interview_memory_bundle(
|
||||
db: AsyncSession,
|
||||
user_id: str,
|
||||
user_message: str,
|
||||
*,
|
||||
get_embedding_provider_fn: Callable[[], "EmbeddingProvider"],
|
||||
) -> tuple[dict | None, object | None]:
|
||||
"""检索记忆 bundle(原始结构);是否进主 prompt 由 `slice_interview_memory` 再筛。"""
|
||||
from app.core.dependencies import get_embedding_provider
|
||||
"""检索记忆 bundle(原始结构);是否进主 prompt 由 adapter 再筛。"""
|
||||
from app.features.memory.retrieval_trace import (
|
||||
chat_memory_retrieval_trace_from_bundle,
|
||||
)
|
||||
@@ -76,7 +81,7 @@ async def _fetch_interview_memory_bundle(
|
||||
)
|
||||
return None, None
|
||||
try:
|
||||
emb = get_embedding_provider()
|
||||
emb = get_embedding_provider_fn()
|
||||
ms = MemoryService(db, embedding_provider=emb)
|
||||
top_k = settings.chat_memory_top_k
|
||||
bundle = await ms.retrieve(user_id, msg, top_k=top_k)
|
||||
@@ -103,11 +108,22 @@ class ChatOrchestrator:
|
||||
"""
|
||||
聊天编排器:根据用户资料完成度路由到 ProfileAgent 或 InterviewAgent。
|
||||
不直接写入 Redis/DB;由 WS pipeline / ConversationHistoryStore 落库并同步缓存。
|
||||
|
||||
``get_embedding_provider_fn`` / ``llm_provider`` 供测试或脚本注入;默认使用全局依赖。
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.profile_agent = ProfileAgent()
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
get_embedding_provider_fn: Callable[[], "EmbeddingProvider"] | None = None,
|
||||
llm_provider: "LLMProvider | None" = None,
|
||||
):
|
||||
self._get_embedding_provider_fn = (
|
||||
get_embedding_provider_fn or get_embedding_provider
|
||||
)
|
||||
self.profile_agent = ProfileAgent(llm_provider=llm_provider)
|
||||
self.interview_agent = InterviewAgent()
|
||||
self.memory_prompt_adapter = MemoryPromptAdapter()
|
||||
|
||||
async def process_user_message(
|
||||
self,
|
||||
@@ -272,12 +288,16 @@ class ChatOrchestrator:
|
||||
background_voice = infer_background_voice(user.occupation)
|
||||
occupation = user.occupation or ""
|
||||
|
||||
from app.features.memory.chat_memory_injection import slice_interview_memory
|
||||
|
||||
memory_bundle, mem_trace = await _fetch_interview_memory_bundle(
|
||||
db, user_id, normalized_user_message
|
||||
db,
|
||||
user_id,
|
||||
normalized_user_message,
|
||||
get_embedding_provider_fn=self._get_embedding_provider_fn,
|
||||
)
|
||||
mem_slices = self.memory_prompt_adapter.slice_for_interview(
|
||||
memory_bundle,
|
||||
normalized_user_message,
|
||||
)
|
||||
mem_slices = slice_interview_memory(memory_bundle, normalized_user_message)
|
||||
# 场景关键词仅作为 focus planner 的辅助输入,不直接拼进记忆块,避免抢过用户明确的关系/身份线索
|
||||
scene_cues_for_planner = extract_scene_cues(normalized_user_message)
|
||||
|
||||
|
||||
@@ -24,19 +24,36 @@ from app.core.agent_logging import agent_span, log_agent_payload, log_agent_summ
|
||||
from app.core.config import settings
|
||||
from app.core.dependencies import get_llm_provider
|
||||
from app.core.llm_call import allm_json_call
|
||||
from app.core.llm_gateway import LlmGateway, LlmUseCase
|
||||
from app.core.logging import get_logger
|
||||
from app.ports.llm import LLMProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _get_langchain_llm():
|
||||
try:
|
||||
provider = get_llm_provider()
|
||||
return getattr(provider, "langchain_llm", None)
|
||||
return LlmGateway().langchain_llm_for(LlmUseCase("chat.profile"))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _langchain_messages_to_port(messages: List[Any]) -> list[dict]:
|
||||
"""LangChain message 列表 → ``LLMProvider.complete`` 的 ``role/content`` 结构。"""
|
||||
out: list[dict] = []
|
||||
for m in messages:
|
||||
if isinstance(m, SystemMessage):
|
||||
out.append({"role": "system", "content": str(m.content)})
|
||||
elif isinstance(m, HumanMessage):
|
||||
out.append({"role": "user", "content": str(m.content)})
|
||||
elif isinstance(m, AIMessage):
|
||||
out.append({"role": "assistant", "content": str(m.content)})
|
||||
else:
|
||||
c = getattr(m, "content", None)
|
||||
out.append({"role": "user", "content": str(c) if c is not None else ""})
|
||||
return out
|
||||
|
||||
|
||||
def _message_contents_char_count(messages: List[Any]) -> int:
|
||||
n = 0
|
||||
for m in messages:
|
||||
@@ -49,9 +66,15 @@ def _message_contents_char_count(messages: List[Any]) -> int:
|
||||
class ProfileAgent:
|
||||
"""用户资料收集 Specialist Agent"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, llm_provider: LLMProvider | None = None):
|
||||
self._llm_provider = llm_provider
|
||||
self.llm = _get_langchain_llm()
|
||||
|
||||
def _provider(self) -> LLMProvider:
|
||||
if self._llm_provider is not None:
|
||||
return self._llm_provider
|
||||
return get_llm_provider()
|
||||
|
||||
async def _invoke_chat(
|
||||
self,
|
||||
messages: List[Any],
|
||||
@@ -60,20 +83,21 @@ class ProfileAgent:
|
||||
conversation_id: Optional[str],
|
||||
agent_name: str,
|
||||
) -> str:
|
||||
chat_llm = self.llm.bind(max_tokens=max_tokens)
|
||||
port_messages = _langchain_messages_to_port(messages)
|
||||
llm_t0 = time.perf_counter()
|
||||
with agent_span(
|
||||
logger, f"{agent_name}.llm", conversation_id=conversation_id or ""
|
||||
):
|
||||
response = await chat_llm.ainvoke(messages)
|
||||
response_text = await self._provider().complete(
|
||||
port_messages,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
logger.info(
|
||||
"event=chat_llm_done agent={} response_latency_ms={:.2f}",
|
||||
agent_name,
|
||||
(time.perf_counter() - llm_t0) * 1000,
|
||||
)
|
||||
return (
|
||||
response.content if hasattr(response, "content") else str(response)
|
||||
) or ""
|
||||
return response_text or ""
|
||||
|
||||
async def _segments_from_response(
|
||||
self,
|
||||
|
||||
@@ -25,7 +25,6 @@ from app.agents.chat.background_voice import (
|
||||
get_background_voice_tone_hint,
|
||||
)
|
||||
from app.agents.chat.occupation_context import get_occupation_chat_hint
|
||||
from app.agents.chat.output_rules import chat_output_rules
|
||||
from app.agents.chat.personas import (
|
||||
get_interview_persona_tone_hint,
|
||||
normalize_interview_persona,
|
||||
@@ -35,7 +34,6 @@ from app.agents.stage_constants import CHAT_STAGES, STAGE_DISPLAY_ZH
|
||||
from app.agents.state_schema import KnownFact, PersonaThread
|
||||
from app.agents.style_profiles import ChatStyleProfile
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Context 层:状态与素材(纯数据视图,不立行为规则)
|
||||
# =============================================================================
|
||||
@@ -213,7 +211,7 @@ def build_behavior_policy_block() -> str:
|
||||
"- 若用户直接追问**你的**身世、籍贯、童年、感情或家庭,必须守住这条边界:明确你没有这些真实经历,再把话题轻轻带回用户;**绝不能**把「用户信息」「已确认事实」「人物主线」或「记忆线索」里的内容拿来冒充助手自己的资料(例如不能把用户的成长地答成「我是上海人」)。但这些上下文仍可继续用来服务回答,只能以**明确归因**方式转回用户(如「你刚提到上海」「你之前说过那段童年」)。\n"
|
||||
"\n## 身份与语气\n"
|
||||
"- 你们是**平等聊天**:底色暖、有安全感;**不是**冷冰冰盘问或庭审式追问。仍须避免**晚会串联腔、播报腔**(如「那么接下来」「让我们回到」)——好的主持人**自然勾回话题**,不靠节目硬切。\n"
|
||||
"- **主持人职责(与温情并存)**:你心里守着**回忆口述这条主线**。用户若只给寒暄、天气、泛泛忙累、纯近况而**几乎没有人生叙事实质**:最多**一两句**并肩承接,随后**必须**用**一条**带锚的开放式问题,把话头带回「当前阶段 / 还可聊的方向 / 已确认事实或人物主线 /(若有)一条极短记忆线索」之一;像朋友**绕着弯把话头勾回来**,**禁止**长时间停在纯日常闲聊里空转。**不要把「今天过得怎样」「最近好吗」当默认整轮主线**。\n"
|
||||
"- **主持人职责(与温情并存)**:你心里守着**回忆口述这条主线**。用户若只给寒暄、天气、泛泛忙累、纯近况而**几乎没有人生叙事实质**:通常最多**一两句**并肩承接,并参考顶部「本轮编排指令」决定是否用带锚的开放式问题,把话头带回「当前阶段 / 还可聊的方向 / 已确认事实或人物主线 /(若有)一条极短记忆线索」之一;像朋友**绕着弯把话头勾回来**,避免长时间停在纯日常闲聊里空转。**不要把「今天过得怎样」「最近好吗」当默认整轮主线**。\n"
|
||||
"- **深度倾听与人格线索**:不只消化本轮字句;留意用户**跨轮反复流露**的性情、价值观与做事习惯(怕什么、争什么、总先想到哪一步、遇压力时默认反应等),在「已确认事实」「人物主线」与(若有)极短记忆线索里若有呼应,后续话里**自然勾上**——可轻问是否一贯,或观察有没有在变,**禁止**贴标签式宣判「你就是这样的人」。\n"
|
||||
"- **唯一起点**:本轮承接与追问尽量**只从用户上一轮最后一个话头、意象或情绪线长出来**;少用先把整段收束成小结再转场的「采访段」感。\n"
|
||||
"- **聊天伙伴 + 控场**:像炕头、微信里能讲心里话的老友那样接住人,但**服务目标是成稿素材与回忆叙事**,**不是**记者式刨根,也**不是**无底洞式陪聊;可以把细节捋清楚,亲和力、安全感与「听懂对方」至少和信息条理同等重要;避免理性拆解腔、冷冰冰的「专业访谈感」。\n"
|
||||
@@ -246,8 +244,8 @@ def build_reply_strategy_block() -> str:
|
||||
"- **先抓重点**:承接与追问优先对齐顶部「本轮承接重点」与**用户原词**(人名、关系、面子、身份、场景);若二者冲突,以顶部为准。\n"
|
||||
"- **追问与承接**:每轮由**你自己判断**该先接住、轻声并肩,还是带着锚往下挖;按情绪与画面自然取舍。\n"
|
||||
"- **情绪与大纲**:外显情绪很重或用户在溃堤式宣泄时,多承接、少搜集;**不要**把「写得长」或「带点感慨」误当成必须整轮不问。\n"
|
||||
"- **追问义务回正**:若你方已连续两轮**完全无问句**(无句末问号也无隐性探询),而用户仍在展开叙事,**短承接后须带回一条**带锚的开放式问;本条与「情绪优先」冲突时,**以顶部指令为准**。\n"
|
||||
"- **纯跑题**:若用户几乎只有寒暄/天气而无人生实质,短承接后仍须**勾回回忆叙事**(见「身份与语气」里的主持人职责)。\n"
|
||||
"- **追问节奏校准**:若你方已连续两轮**完全无问句**(无句末问号也无隐性探询),而用户仍在展开叙事,把它视为需要校准节奏的信号;具体是否追问、问几问,仍以顶部「本轮编排指令」为准。\n"
|
||||
"- **纯跑题**:若用户几乎只有寒暄/天气而无人生实质,把它视为需要回到回忆叙事主线的信号;具体回法见顶部「本轮编排指令」与「身份与语气」里的主持人职责。\n"
|
||||
"- **大纲**:每次只撬一个叙述槽;从大纲借问题时,把抽象词换成对方嘴里出现过的具体词。\n"
|
||||
"- **跟随—沉浸**:长段后可极短并肩画面或体感,须贴着对方物象;共情用泛指,**禁止**助手自传式亲历。\n"
|
||||
"- **承接**:钉住对方上一句里的名词、动词或比喻;少用「听起来你…」式判语。\n"
|
||||
|
||||
Reference in New Issue
Block a user