feat(api): 收敛对话与记忆流程边界,引入 LLM 网关与专用服务

- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService;富化派发经 MemoryEnrichmentScheduler
- WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话;回忆录片段入队由 MemoirIngestScheduler 封装
- 新增 LlmGateway(LlmUseCase),各 agent、任务与适配器对齐 ports
- 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试

Made-with: Cursor
This commit is contained in:
Kevin
2026-04-30 09:17:01 +08:00
parent eddb2c3078
commit ac436b87a2
37 changed files with 1400 additions and 199 deletions

View File

@@ -0,0 +1,70 @@
from __future__ import annotations
from datetime import datetime, timezone
import pytest
from app.agents.chat.agent_turn import AgentChatTurn
from app.features.conversation.chat_turn import (
ChatTurnContext,
ChatTurnInput,
ChatTurnService,
)
class _FakeOrchestrator:
def __init__(self) -> None:
self.calls: list[dict] = []
async def process_user_message(self, **kwargs):
self.calls.append(kwargs)
return AgentChatTurn(
messages=["第一泡", "第二泡"],
skip_tts=False,
memory_retrieval_trace={"chunks": 1},
interview_state_meta={"recent_questions": ["你当时在哪里?"]},
)
@pytest.mark.asyncio
async def test_chat_turn_service_exposes_one_turn_contract() -> None:
orchestrator = _FakeOrchestrator()
service = ChatTurnService(orchestrator=orchestrator)
ts = datetime(2026, 4, 29, tzinfo=timezone.utc)
result = await service.process_turn(
ChatTurnInput(
conversation_id="conv-1",
user_message="我小时候住在河边。",
is_from_voice=True,
voice_session_id="voice-1",
user_message_timestamp=ts,
audio_duration_seconds=12,
force_skip_tts=True,
),
ChatTurnContext(
db=object(),
user=object(),
conversation=object(),
apply_extracted_profile_fn=lambda *args, **kwargs: None,
get_missing_profile_fields_fn=lambda user: [],
get_filled_profile_fields_fn=lambda user: {},
),
)
assert result.messages == ["第一泡", "第二泡"]
assert result.skip_tts is True
assert result.memory_retrieval_trace == {"chunks": 1}
assert result.interview_state_meta == {
"recent_questions": ["你当时在哪里?"]
}
assert result.decision.force_skip_tts is True
assert len(orchestrator.calls) == 1
call = orchestrator.calls[0]
assert call["conversation_id"] == "conv-1"
assert call["user_message"] == "我小时候住在河边。"
assert call["is_from_voice"] is True
assert call["voice_session_id"] == "voice-1"
assert call["user_message_timestamp"] is ts
assert call["audio_duration_seconds"] == 12

View File

@@ -2,6 +2,7 @@
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from app.agents.chat.helpers import format_history_string
from app.agents.chat.interview_state_hints import (
AUTOBIOGRAPHICAL_BOUNDARY_FALLBACK_ZH,
DUPLICATE_QUESTION_GUARD_FALLBACK_ZH,
@@ -10,20 +11,19 @@ from app.agents.chat.interview_state_hints import (
extract_scene_cues,
segments_are_only_duplicate_guard_fallback,
)
from app.agents.chat.output_rules import chat_output_rules
from app.agents.chat.personas import normalize_interview_persona
from app.agents.chat.prompts_conversation import (
get_guided_conversation_prompt,
get_opening_prompt,
)
from app.agents.chat.slot_question_bank import SLOT_QUESTION_OUTLINES
from app.agents.state_schema import (
KnownFact,
MemoirStateSchema,
PersonaThread,
default_slots,
)
from app.agents.chat.helpers import format_history_string
from app.agents.chat.personas import normalize_interview_persona
from app.agents.chat.output_rules import chat_output_rules
from app.agents.chat.prompts_conversation import (
get_guided_conversation_prompt,
get_opening_prompt,
)
from app.agents.chat.slot_question_bank import SLOT_QUESTION_OUTLINES
def test_guided_prompt_does_not_embed_raw_user_message_in_system_text():
@@ -132,6 +132,21 @@ def test_guided_prompt_host_tone_and_context_forward():
assert "行为" in p and "影响" in p
def test_guided_prompt_leaves_turn_level_question_contract_to_turn_plan() -> None:
p = get_guided_conversation_prompt(
current_stage="career",
empty_slots=["job"],
filled_slots={},
detected_user_stage="career",
user_profile_context="",
persona="default",
)
assert "随后**必须**用**一条**" not in p
assert "短承接后须带回一条" not in p
assert "仍须**勾回回忆叙事**" not in p
assert "具体问几问、是否必须追问,见顶部" in p
def test_education_and_family_change_outlines_differ():
edu = SLOT_QUESTION_OUTLINES[("education", "change")]
fam = SLOT_QUESTION_OUTLINES[("family", "change")]

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
import pytest
from app.core.llm_gateway import LlmGateway, LlmUseCase
class _FakeProvider:
def __init__(self, name: str) -> None:
self.name = name
self.langchain_llm = f"lc-{name}"
self.complete_calls: list[dict] = []
async def complete(self, messages, **kwargs) -> str:
self.complete_calls.append({"messages": messages, **kwargs})
return f"ok-{self.name}"
def test_llm_gateway_selects_default_or_fast_provider(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from app.core import llm_gateway as gateway_mod
default = _FakeProvider("default")
fast = _FakeProvider("fast")
monkeypatch.setattr(gateway_mod, "get_llm_provider", lambda: default)
monkeypatch.setattr(gateway_mod, "get_llm_provider_fast", lambda: fast)
gateway = LlmGateway()
assert gateway.langchain_llm_for() == "lc-default"
assert gateway.langchain_llm_for(LlmUseCase("memory", fast=True)) == "lc-fast"
@pytest.mark.asyncio
async def test_llm_gateway_chat_text_applies_use_case_defaults(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from app.core import llm_gateway as gateway_mod
provider = _FakeProvider("default")
monkeypatch.setattr(gateway_mod, "get_llm_provider", lambda: provider)
text = await LlmGateway().chat_text(
[{"role": "user", "content": "hi"}],
use_case=LlmUseCase(
"chat",
max_tokens=99,
temperature=0.2,
model="model-a",
),
)
assert text == "ok-default"
assert provider.complete_calls == [
{
"messages": [{"role": "user", "content": "hi"}],
"temperature": 0.2,
"model": "model-a",
"max_tokens": 99,
}
]

View File

@@ -0,0 +1,65 @@
from __future__ import annotations
import pytest
from app.features.memoir.ingest_scheduler import MemoirIngestScheduler
class _FakeRunner:
def __init__(self) -> None:
self.queued: list[tuple[str, str, int]] = []
self.flushed: list[tuple[str, list[str]]] = []
async def queue_message(
self,
user_id: str,
segment_id: str,
*,
text_char_count: int = 0,
) -> None:
self.queued.append((user_id, segment_id, text_char_count))
async def flush_pending(
self,
user_id: str,
*,
extra_segment_ids: list[str] | None = None,
) -> str:
self.flushed.append((user_id, list(extra_segment_ids or [])))
return "task-1"
@pytest.mark.asyncio
async def test_queue_segment_returns_visible_phase_plan() -> None:
runner = _FakeRunner()
scheduler = MemoirIngestScheduler(runner=runner)
plan = await scheduler.queue_segment(
"user-1",
"seg-1",
text_char_count=42,
trigger="evaluation_replay",
)
assert runner.queued == [("user-1", "seg-1", 42)]
assert plan.user_id == "user-1"
assert plan.segment_ids == ("seg-1",)
assert plan.trigger == "evaluation_replay"
@pytest.mark.asyncio
async def test_flush_pending_returns_plan_and_task_id() -> None:
runner = _FakeRunner()
scheduler = MemoirIngestScheduler(runner=runner)
plan, task_id = await scheduler.flush_pending(
"user-1",
extra_segment_ids=["seg-1", "seg-2"],
trigger="conversation_end",
)
assert runner.flushed == [("user-1", ["seg-1", "seg-2"])]
assert task_id == "task-1"
assert plan.user_id == "user-1"
assert plan.segment_ids == ("seg-1", "seg-2")
assert plan.trigger == "conversation_end"

View File

@@ -0,0 +1,143 @@
from __future__ import annotations
from dataclasses import dataclass
import pytest
from app.features.memory.prompt_adapter import MemoryPromptAdapter
from app.features.memory.runtime_types import MemoryEvidenceBundle
def test_memory_evidence_bundle_and_prompt_adapter_contract() -> None:
evidence = MemoryEvidenceBundle.from_mapping(
{
"relevant_chunks": [
{"id": "c1", "content": "我小时候在河边长大,夏天常去玩水。"},
],
"relevant_summaries": [],
"relevant_facts": [],
"timeline_hints": [],
"relevant_stories": [],
}
)
slices = MemoryPromptAdapter().slice_for_interview(
evidence,
"那条河一到夏天就特别热闹,我现在都记得。",
)
assert evidence.has_any is True
assert slices.had_retrieval is True
assert "用户曾说" in slices.prompt_excerpt
assert slices.anchor_source.startswith("用户曾说")
@pytest.mark.asyncio
async def test_memory_retrieval_service_delegates_to_retriever(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from app.features.memory import retrieval_service as retrieval_mod
from app.features.memory.retrieval_service import MemoryRetrievalService
calls: list[dict] = []
class FakeRetriever:
def __init__(self, db, *, embedding_provider=None) -> None:
calls.append({"db": db, "embedding_provider": embedding_provider})
async def retrieve(self, *, user_id: str, query: str, top_k: int) -> dict:
calls.append({"user_id": user_id, "query": query, "top_k": top_k})
return {
"relevant_chunks": [{"id": "c1", "content": "chunk"}],
"relevant_summaries": [],
"relevant_facts": [],
"timeline_hints": [],
"relevant_stories": [],
}
class FakeEmbedding:
def is_available(self) -> bool:
return True
db = object()
embedding = FakeEmbedding()
monkeypatch.setattr(retrieval_mod, "HybridRetriever", FakeRetriever)
bundle = await MemoryRetrievalService(
db,
embedding_provider=embedding,
).retrieve("user-1", "hello", top_k=3)
assert calls == [
{"db": db, "embedding_provider": embedding},
{"user_id": "user-1", "query": "hello", "top_k": 3},
]
assert bundle.relevant_chunks == [{"id": "c1", "content": "chunk"}]
@pytest.mark.asyncio
async def test_memory_ingest_service_commits_before_enrichment(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from app.features.memory import ingest_service as ingest_mod
from app.features.memory.ingest_service import MemoryIngestService
events: list[tuple] = []
@dataclass
class FakeRow:
id: str
class FakeDb:
async def flush(self) -> None:
events.append(("flush",))
async def commit(self) -> None:
events.append(("commit",))
class FakeEmbedding:
async def embed_texts(self, texts: list[str]) -> list[list[float]]:
events.append(("embed_texts", tuple(texts)))
return [[1.0], [2.0]]
def is_available(self) -> bool:
return True
class FakeScheduler:
def schedule(self, request) -> str:
events.append(("schedule", request.user_id, request.source_id))
return "enrich-1"
async def fake_create_source(db, **kwargs):
events.append(("create_source", kwargs["user_id"], kwargs["conversation_id"]))
return FakeRow("source-1")
async def fake_create_chunk(db, **kwargs):
events.append(("create_chunk", kwargs["chunk_index"], kwargs["content"]))
return FakeRow(f"chunk-{kwargs['chunk_index']}")
async def fake_update_chunk_embedding(db, chunk_id, emb):
events.append(("update_embedding", chunk_id, tuple(emb)))
monkeypatch.setattr(ingest_mod, "chunk_transcript", lambda text: ["a", "b"])
monkeypatch.setattr(ingest_mod, "create_source", fake_create_source)
monkeypatch.setattr(ingest_mod, "create_chunk", fake_create_chunk)
monkeypatch.setattr(
ingest_mod,
"update_chunk_embedding",
fake_update_chunk_embedding,
)
source_id = await MemoryIngestService(
FakeDb(),
embedding_provider=FakeEmbedding(),
enrichment_scheduler=FakeScheduler(),
).ingest_transcript("user-1", "conv-1", "hello")
assert source_id == "source-1"
assert events.index(("commit",)) < events.index(
("schedule", "user-1", "source-1")
)
assert ("embed_texts", ("a", "b")) in events
assert ("update_embedding", "chunk-0", (1.0,)) in events
assert ("update_embedding", "chunk-1", (2.0,)) in events

View File

@@ -2,6 +2,7 @@
import pytest
from app.core.config import settings
from app.features.memory import evidence as evidence_mod
from app.features.memory.evidence_format import format_evidence_chunks_for_chat_prompt
from app.features.memory.evidence import (
@@ -9,6 +10,7 @@ from app.features.memory.evidence import (
_facts_to_dicts,
_stories_to_dicts,
_timeline_to_dicts,
retrieve_evidence_bundle_async,
retrieve_evidence_bundle_sync,
)
from app.features.memory.schemas import EvidenceBundle
@@ -190,3 +192,69 @@ def test_slice_interview_memory_suppresses_long_new_topic():
s = slice_interview_memory(evidence, long_msg)
assert s.prompt_excerpt == ""
assert s.anchor_source == ""
async def test_retrieve_evidence_bundle_async_non_empty_merges_precomputed_chunks(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""非空 query异步路径以 merged_chunk_dicts 为主,元数据来自 fetch_evidence_metadata_async。"""
meta = {
"relevant_facts": [
{
"id": "f1",
"fact_type": "bio",
"subject": "s",
"predicate": "p",
"object_json": {},
}
],
"timeline_hints": [],
"relevant_summaries": [
{
"id": "s1",
"summary_type": "session",
"content": "sum",
"source_chunk_ids": [],
}
],
"relevant_stories": [],
}
async def fake_fetch_meta(db, user_id, q, top_k):
assert user_id == "u1"
assert q == "hello"
assert top_k == 7
return meta
monkeypatch.setattr(evidence_mod, "fetch_evidence_metadata_async", fake_fetch_meta)
merged = [{"id": "c1", "content": "chunk body", "chunk_index": 0}]
out = await retrieve_evidence_bundle_async(
object(),
"u1",
" hello ",
top_k=7,
merged_chunk_dicts=merged,
)
assert out == {"relevant_chunks": merged, **meta}
async def test_empty_query_evidence_bundle_async_and_sync_aligned_when_rolling_off(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "memory_evidence_empty_query_include_rolling", False)
out_a = await retrieve_evidence_bundle_async(
object(),
"u1",
" ",
top_k=10,
merged_chunk_dicts=[],
)
assert out_a == dict(EMPTY_EVIDENCE_BUNDLE)
out_s = retrieve_evidence_bundle_sync(
session=object(),
user_id="u1",
query="",
top_k=10,
embedding_provider=None,
)
assert out_s == dict(EMPTY_EVIDENCE_BUNDLE)