"""Memory evidence 组装与检索契约(纯函数 / 无 DB)。""" import pytest from app.features.memory import evidence as evidence_mod from app.features.memory.evidence import ( EMPTY_EVIDENCE_BUNDLE, _facts_to_dicts, _stories_to_dicts, retrieve_evidence_bundle_async, ) from app.features.memory.evidence_format import format_evidence_chunks_for_chat_prompt from app.features.memory.schemas import EvidenceBundle def test_empty_evidence_bundle_keys() -> None: assert set(EMPTY_EVIDENCE_BUNDLE.keys()) == { "relevant_chunks", "relevant_summaries", "relevant_facts", "relevant_stories", } def test_evidence_bundle_model_accepts_dict() -> None: b = EvidenceBundle.model_validate(EMPTY_EVIDENCE_BUNDLE) assert b.relevant_chunks == [] def test_format_helpers_empty() -> None: assert _facts_to_dicts([]) == [] assert _stories_to_dicts([]) == [] def test_format_evidence_chunks_for_chat_prompt_reframes_and_labels() -> None: evidence = { "relevant_chunks": [ {"id": "chunk-1", "content": "我小时候在河边长大,夏天常去玩水。"}, ], "relevant_summaries": [], "relevant_facts": [], "relevant_stories": [], } text = format_evidence_chunks_for_chat_prompt(evidence) assert "聊天专用" in text assert "归因" in text assert "[M1]" in text assert "用户曾说" in text assert "我小时候在河边长大" in text def test_slice_interview_memory_empty_bundle(): from app.features.memory.chat_memory_injection import slice_interview_memory s = slice_interview_memory(None, "你好") assert s.prompt_excerpt == "" assert s.anchor_source == "" assert s.planner_preview == "" assert s.had_retrieval is False def test_slice_interview_memory_retrieval_not_equal_inject_dismissive(): """有检索预览但 gating 后不进主 prompt / anchor。""" from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "很久以前在校园礼堂排练到很晚。"}, ], "relevant_summaries": [], "relevant_facts": [], "relevant_stories": [], } s = slice_interview_memory(evidence, "哈哈,早就不会了") assert s.prompt_excerpt == "" assert s.anchor_source == "" assert s.planner_preview.strip() != "" assert s.had_retrieval is True def test_slice_interview_memory_minimal_inject_when_aligned(): from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "你在校园演出里饰演罗密欧。"}, ], "relevant_summaries": [], "relevant_facts": [], "relevant_stories": [], } s = slice_interview_memory(evidence, "那次排练其实挺紧张的,灯光一打我就忘词。") assert "记忆线索" in s.prompt_excerpt assert "校园演出" in s.prompt_excerpt or "罗密欧" in s.prompt_excerpt assert s.anchor_source assert s.had_retrieval is True def test_slice_interview_memory_keeps_first_person_but_marks_ownership(): from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "我小时候在河边长大,夏天常去玩水。"}, ], "relevant_summaries": [], "relevant_facts": [], "relevant_stories": [], } s = slice_interview_memory(evidence, "那条河一到夏天就特别热闹,我现在都记得。") assert "用户曾说" in s.prompt_excerpt assert "我小时候在河边长大" in s.prompt_excerpt assert s.anchor_source.startswith("用户曾说") def test_slice_interview_memory_suppresses_long_new_topic(): from app.features.memory.chat_memory_injection import slice_interview_memory evidence = { "relevant_chunks": [ {"id": "c1", "content": "旧记忆关于河边。"}, ], "relevant_summaries": [], "relevant_facts": [], "relevant_stories": [], } long_msg = "我今天想随便聊聊工作里的事,项目压力很大。" * 6 assert len(long_msg) > 72 s = slice_interview_memory(evidence, long_msg) assert s.prompt_excerpt == "" assert s.anchor_source == "" async def test_retrieve_evidence_bundle_async_non_empty_merges_precomputed_chunks( monkeypatch: pytest.MonkeyPatch, ) -> None: """非空 query:异步路径以 merged_chunk_dicts 为主,元数据来自 fetch_evidence_metadata_async。""" meta = { "relevant_facts": [ { "id": "f1", "fact_type": "bio", "subject": "s", "predicate": "p", "object_json": {}, } ], "relevant_summaries": [ { "id": "s1", "summary_type": "session", "content": "sum", "source_chunk_ids": [], } ], "relevant_stories": [], } async def fake_fetch_meta(db, user_id, q, top_k): assert user_id == "u1" assert q == "hello" assert top_k == 7 return meta monkeypatch.setattr(evidence_mod, "fetch_evidence_metadata_async", fake_fetch_meta) merged = [{"id": "c1", "content": "chunk body", "chunk_index": 0}] out = await retrieve_evidence_bundle_async( object(), "u1", " hello ", top_k=7, merged_chunk_dicts=merged, ) assert out == {"relevant_chunks": merged, **meta} async def test_empty_query_evidence_bundle_async_returns_empty() -> None: out_a = await retrieve_evidence_bundle_async( object(), "u1", " ", top_k=10, merged_chunk_dicts=[], ) assert out_a == dict(EMPTY_EVIDENCE_BUNDLE)