2026-03-27 16:01:28 +08:00
|
|
|
|
"""Memory evidence 组装与检索契约(纯函数 / 无 DB)。"""
|
|
|
|
|
|
|
2026-04-03 11:43:16 +08:00
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
from app.features.memory import evidence as evidence_mod
|
2026-04-22 16:56:28 +08:00
|
|
|
|
from app.features.memory.evidence_format import format_evidence_chunks_for_chat_prompt
|
2026-03-27 16:01:28 +08:00
|
|
|
|
from app.features.memory.evidence import (
|
|
|
|
|
|
EMPTY_EVIDENCE_BUNDLE,
|
|
|
|
|
|
_facts_to_dicts,
|
|
|
|
|
|
_stories_to_dicts,
|
|
|
|
|
|
_timeline_to_dicts,
|
2026-04-03 11:43:16 +08:00
|
|
|
|
retrieve_evidence_bundle_sync,
|
2026-03-27 16:01:28 +08:00
|
|
|
|
)
|
|
|
|
|
|
from app.features.memory.schemas import EvidenceBundle
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 11:43:16 +08:00
|
|
|
|
class _FakeEmbedding:
|
|
|
|
|
|
def is_available(self) -> bool:
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def embed_text_sync(self, text: str) -> list[float]:
|
|
|
|
|
|
return [0.25, 0.5, 0.75]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_retrieve_evidence_bundle_sync_uses_vector_search(
|
|
|
|
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
searched: list[tuple] = []
|
|
|
|
|
|
|
|
|
|
|
|
def fake_search(session, user_id, emb, top_k):
|
|
|
|
|
|
searched.append((user_id, emb, top_k))
|
|
|
|
|
|
return [
|
|
|
|
|
|
{
|
|
|
|
|
|
"id": "c1",
|
|
|
|
|
|
"content": "chunk body",
|
|
|
|
|
|
"chunk_index": 0,
|
|
|
|
|
|
"distance": 0.1,
|
|
|
|
|
|
}
|
|
|
|
|
|
]
|
|
|
|
|
|
|
2026-04-10 16:09:44 +08:00
|
|
|
|
def fake_meta(user_id, q, top_k):
|
2026-04-03 11:43:16 +08:00
|
|
|
|
return {
|
|
|
|
|
|
"relevant_facts": [],
|
|
|
|
|
|
"timeline_hints": [],
|
|
|
|
|
|
"relevant_summaries": [],
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(evidence_mod, "search_chunks_vector_sync", fake_search)
|
2026-04-10 20:35:57 +08:00
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
|
evidence_mod, "fetch_evidence_metadata_parallel_sync", fake_meta
|
|
|
|
|
|
)
|
2026-04-03 11:43:16 +08:00
|
|
|
|
|
|
|
|
|
|
out = retrieve_evidence_bundle_sync(
|
|
|
|
|
|
session=object(),
|
|
|
|
|
|
user_id="u1",
|
|
|
|
|
|
query=" hello ",
|
|
|
|
|
|
top_k=7,
|
|
|
|
|
|
embedding_provider=_FakeEmbedding(),
|
|
|
|
|
|
)
|
|
|
|
|
|
assert len(searched) == 1
|
|
|
|
|
|
assert searched[0][0] == "u1"
|
|
|
|
|
|
assert searched[0][1] == [0.25, 0.5, 0.75]
|
|
|
|
|
|
assert searched[0][2] == 7
|
|
|
|
|
|
assert out["relevant_chunks"] == [
|
|
|
|
|
|
{"id": "c1", "content": "chunk body", "chunk_index": 0},
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-27 16:01:28 +08:00
|
|
|
|
def test_empty_evidence_bundle_keys() -> None:
|
|
|
|
|
|
assert set(EMPTY_EVIDENCE_BUNDLE.keys()) == {
|
|
|
|
|
|
"relevant_chunks",
|
|
|
|
|
|
"relevant_summaries",
|
|
|
|
|
|
"relevant_facts",
|
|
|
|
|
|
"timeline_hints",
|
|
|
|
|
|
"relevant_stories",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_evidence_bundle_model_accepts_dict() -> None:
|
|
|
|
|
|
b = EvidenceBundle.model_validate(EMPTY_EVIDENCE_BUNDLE)
|
|
|
|
|
|
assert b.relevant_chunks == []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_format_helpers_empty() -> None:
|
|
|
|
|
|
assert _facts_to_dicts([]) == []
|
|
|
|
|
|
assert _timeline_to_dicts([]) == []
|
|
|
|
|
|
assert _stories_to_dicts([]) == []
|
2026-04-22 16:56:28 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_format_evidence_chunks_for_chat_prompt_reframes_and_labels() -> None:
|
|
|
|
|
|
evidence = {
|
|
|
|
|
|
"relevant_chunks": [
|
|
|
|
|
|
{"id": "chunk-1", "content": "我小时候在河边长大,夏天常去玩水。"},
|
|
|
|
|
|
],
|
|
|
|
|
|
"relevant_summaries": [],
|
|
|
|
|
|
"relevant_facts": [],
|
|
|
|
|
|
"timeline_hints": [],
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
text = format_evidence_chunks_for_chat_prompt(evidence)
|
|
|
|
|
|
assert "聊天专用" in text
|
|
|
|
|
|
assert "归因" in text
|
|
|
|
|
|
assert "[M1]" in text
|
|
|
|
|
|
assert "用户曾说" in text
|
|
|
|
|
|
assert "我小时候在河边长大" in text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_slice_interview_memory_empty_bundle():
|
|
|
|
|
|
from app.features.memory.chat_memory_injection import slice_interview_memory
|
|
|
|
|
|
|
|
|
|
|
|
s = slice_interview_memory(None, "你好")
|
|
|
|
|
|
assert s.prompt_excerpt == ""
|
|
|
|
|
|
assert s.anchor_source == ""
|
|
|
|
|
|
assert s.planner_preview == ""
|
|
|
|
|
|
assert s.had_retrieval is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_slice_interview_memory_retrieval_not_equal_inject_dismissive():
|
|
|
|
|
|
"""有检索预览但 gating 后不进主 prompt / anchor。"""
|
|
|
|
|
|
from app.features.memory.chat_memory_injection import slice_interview_memory
|
|
|
|
|
|
|
|
|
|
|
|
evidence = {
|
|
|
|
|
|
"relevant_chunks": [
|
|
|
|
|
|
{"id": "c1", "content": "很久以前在校园礼堂排练到很晚。"},
|
|
|
|
|
|
],
|
|
|
|
|
|
"relevant_summaries": [],
|
|
|
|
|
|
"relevant_facts": [],
|
|
|
|
|
|
"timeline_hints": [],
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
s = slice_interview_memory(evidence, "哈哈,早就不会了")
|
|
|
|
|
|
assert s.prompt_excerpt == ""
|
|
|
|
|
|
assert s.anchor_source == ""
|
|
|
|
|
|
assert s.planner_preview.strip() != ""
|
|
|
|
|
|
assert s.had_retrieval is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_slice_interview_memory_minimal_inject_when_aligned():
|
|
|
|
|
|
from app.features.memory.chat_memory_injection import slice_interview_memory
|
|
|
|
|
|
|
|
|
|
|
|
evidence = {
|
|
|
|
|
|
"relevant_chunks": [
|
|
|
|
|
|
{"id": "c1", "content": "你在校园演出里饰演罗密欧。"},
|
|
|
|
|
|
],
|
|
|
|
|
|
"relevant_summaries": [],
|
|
|
|
|
|
"relevant_facts": [],
|
|
|
|
|
|
"timeline_hints": [],
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
s = slice_interview_memory(evidence, "那次排练其实挺紧张的,灯光一打我就忘词。")
|
|
|
|
|
|
assert "记忆线索" in s.prompt_excerpt
|
|
|
|
|
|
assert "校园演出" in s.prompt_excerpt or "罗密欧" in s.prompt_excerpt
|
|
|
|
|
|
assert s.anchor_source
|
|
|
|
|
|
assert s.had_retrieval is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_slice_interview_memory_keeps_first_person_but_marks_ownership():
|
|
|
|
|
|
from app.features.memory.chat_memory_injection import slice_interview_memory
|
|
|
|
|
|
|
|
|
|
|
|
evidence = {
|
|
|
|
|
|
"relevant_chunks": [
|
|
|
|
|
|
{"id": "c1", "content": "我小时候在河边长大,夏天常去玩水。"},
|
|
|
|
|
|
],
|
|
|
|
|
|
"relevant_summaries": [],
|
|
|
|
|
|
"relevant_facts": [],
|
|
|
|
|
|
"timeline_hints": [],
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
s = slice_interview_memory(evidence, "那条河一到夏天就特别热闹,我现在都记得。")
|
|
|
|
|
|
assert "用户曾说" in s.prompt_excerpt
|
|
|
|
|
|
assert "我小时候在河边长大" in s.prompt_excerpt
|
|
|
|
|
|
assert s.anchor_source.startswith("用户曾说")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_slice_interview_memory_suppresses_long_new_topic():
|
|
|
|
|
|
from app.features.memory.chat_memory_injection import slice_interview_memory
|
|
|
|
|
|
|
|
|
|
|
|
evidence = {
|
|
|
|
|
|
"relevant_chunks": [
|
|
|
|
|
|
{"id": "c1", "content": "旧记忆关于河边。"},
|
|
|
|
|
|
],
|
|
|
|
|
|
"relevant_summaries": [],
|
|
|
|
|
|
"relevant_facts": [],
|
|
|
|
|
|
"timeline_hints": [],
|
|
|
|
|
|
"relevant_stories": [],
|
|
|
|
|
|
}
|
|
|
|
|
|
long_msg = "我今天想随便聊聊工作里的事,项目压力很大。" * 6
|
|
|
|
|
|
assert len(long_msg) > 72
|
|
|
|
|
|
s = slice_interview_memory(evidence, long_msg)
|
|
|
|
|
|
assert s.prompt_excerpt == ""
|
|
|
|
|
|
assert s.anchor_source == ""
|