"""Memory evidence 组装与检索契约(纯函数 / 无 DB)。""" import pytest from app.features.memory import evidence as evidence_mod from app.features.memory.evidence_format import format_evidence_chunks_for_chat_prompt from app.features.memory.evidence import ( EMPTY_EVIDENCE_BUNDLE, _facts_to_dicts, _stories_to_dicts, _timeline_to_dicts, retrieve_evidence_bundle_sync, ) from app.features.memory.schemas import EvidenceBundle class _FakeEmbedding: def is_available(self) -> bool: return True def embed_text_sync(self, text: str) -> list[float]: return [0.25, 0.5, 0.75] def test_retrieve_evidence_bundle_sync_uses_vector_search( monkeypatch: pytest.MonkeyPatch, ) -> None: searched: list[tuple] = [] def fake_search(session, user_id, emb, top_k): searched.append((user_id, emb, top_k)) return [ { "id": "c1", "content": "chunk body", "chunk_index": 0, "distance": 0.1, } ] def fake_meta(user_id, q, top_k): return { "relevant_facts": [], "timeline_hints": [], "relevant_summaries": [], "relevant_stories": [], } monkeypatch.setattr(evidence_mod, "search_chunks_vector_sync", fake_search) monkeypatch.setattr( evidence_mod, "fetch_evidence_metadata_parallel_sync", fake_meta ) out = retrieve_evidence_bundle_sync( session=object(), user_id="u1", query=" hello ", top_k=7, embedding_provider=_FakeEmbedding(), ) assert len(searched) == 1 assert searched[0][0] == "u1" assert searched[0][1] == [0.25, 0.5, 0.75] assert searched[0][2] == 7 assert out["relevant_chunks"] == [ {"id": "c1", "content": "chunk body", "chunk_index": 0}, ] def test_empty_evidence_bundle_keys() -> None: assert set(EMPTY_EVIDENCE_BUNDLE.keys()) == { "relevant_chunks", "relevant_summaries", "relevant_facts", "timeline_hints", "relevant_stories", } def test_evidence_bundle_model_accepts_dict() -> None: b = EvidenceBundle.model_validate(EMPTY_EVIDENCE_BUNDLE) assert b.relevant_chunks == [] def test_format_helpers_empty() -> None: assert _facts_to_dicts([]) == [] assert _timeline_to_dicts([]) == [] assert _stories_to_dicts([]) == [] def test_format_evidence_chunks_for_chat_prompt_reframes_and_labels() -> None: evidence = { "relevant_chunks": [ {"id": "chunk-1", "content": "我小时候在河边长大,夏天常去玩水。"}, ], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } text = format_evidence_chunks_for_chat_prompt(evidence) assert "聊天专用" in text assert "归因" in text assert "[M1]" in text assert "用户曾说" in text assert "我小时候在河边长大" in text def test_slice_interview_memory_empty_bundle(): from app.features.memory.chat_memory_injection import slice_interview_memory s = slice_interview_memory(None, "你好") assert s.prompt_excerpt == "" assert s.anchor_source == "" assert s.planner_preview == "" assert s.had_retrieval is False def test_slice_interview_memory_retrieval_not_equal_inject_dismissive(): """有检索预览但 gating 后不进主 prompt / anchor。""" from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "很久以前在校园礼堂排练到很晚。"}, ], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } s = slice_interview_memory(evidence, "哈哈,早就不会了") assert s.prompt_excerpt == "" assert s.anchor_source == "" assert s.planner_preview.strip() != "" assert s.had_retrieval is True def test_slice_interview_memory_minimal_inject_when_aligned(): from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "你在校园演出里饰演罗密欧。"}, ], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } s = slice_interview_memory(evidence, "那次排练其实挺紧张的,灯光一打我就忘词。") assert "记忆线索" in s.prompt_excerpt assert "校园演出" in s.prompt_excerpt or "罗密欧" in s.prompt_excerpt assert s.anchor_source assert s.had_retrieval is True def test_slice_interview_memory_keeps_first_person_but_marks_ownership(): from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "我小时候在河边长大,夏天常去玩水。"}, ], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } s = slice_interview_memory(evidence, "那条河一到夏天就特别热闹,我现在都记得。") assert "用户曾说" in s.prompt_excerpt assert "我小时候在河边长大" in s.prompt_excerpt assert s.anchor_source.startswith("用户曾说") def test_slice_interview_memory_suppresses_long_new_topic(): from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "旧记忆关于河边。"}, ], "relevant_summaries": [], "relevant_facts": [], "timeline_hints": [], "relevant_stories": [], } long_msg = "我今天想随便聊聊工作里的事,项目压力很大。" * 6 assert len(long_msg) > 72 s = slice_interview_memory(evidence, long_msg) assert s.prompt_excerpt == "" assert s.anchor_source == ""