Files
life-echo/api/tests/test_memory_boundaries.py
Kevin 71fbd39e32 feat(api)!: memory single chain — async MemoryService, strict eval closure
Route all memory ingest/retrieve/enrichment/compaction through async MemoryService.
Remove legacy sync memory implementations (ingest/retrieve/compaction); Celery and
memoir Phase2 call asyncio.run into MemoryService-backed helpers.

Memoir Phase1 batch ingest uses MemoryService.ingest_transcripts_batch; drop chapters.
evidence_bundle_json mirror (Alembic 0015). Evaluation uses snapshot/link-only bundles;
raise EvidenceClosureMissing instead of partial/fallback lineage tiers.

Split memoir state into NarrativeCoverageState and InterviewControlState; delete the
_interview_meta_store adapter layer. Remove rolling-query and recent-fact fallback
settings from config and evidence assembly.

Update judges, docs, tests, and PlaygroundPage alignment.

Made-with: Cursor
2026-04-30 14:11:50 +08:00

184 lines
6.0 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import pytest
from app.features.memory.prompt_adapter import MemoryPromptAdapter
from app.features.memory.runtime_types import MemoryEvidenceBundle
def test_memory_evidence_bundle_and_prompt_adapter_contract() -> None:
evidence = MemoryEvidenceBundle.from_mapping(
{
"relevant_chunks": [
{"id": "c1", "content": "我小时候在河边长大,夏天常去玩水。"},
],
"relevant_summaries": [],
"relevant_facts": [],
"timeline_hints": [],
"relevant_stories": [],
}
)
slices = MemoryPromptAdapter().slice_for_interview(
evidence,
"那条河一到夏天就特别热闹,我现在都记得。",
)
assert evidence.has_any is True
assert slices.had_retrieval is True
assert "用户曾说" in slices.prompt_excerpt
assert slices.anchor_source.startswith("用户曾说")
@pytest.mark.asyncio
async def test_memory_retrieval_service_delegates_to_retriever(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from app.features.memory import retrieval_service as retrieval_mod
from app.features.memory.retrieval_service import MemoryRetrievalService
calls: list[dict] = []
class FakeRetriever:
def __init__(self, db, *, embedding_provider=None) -> None:
calls.append({"db": db, "embedding_provider": embedding_provider})
async def retrieve(self, *, user_id: str, query: str, top_k: int) -> dict:
calls.append({"user_id": user_id, "query": query, "top_k": top_k})
return {
"relevant_chunks": [{"id": "c1", "content": "chunk"}],
"relevant_summaries": [],
"relevant_facts": [],
"timeline_hints": [],
"relevant_stories": [],
}
class FakeEmbedding:
def is_available(self) -> bool:
return True
db = object()
embedding = FakeEmbedding()
monkeypatch.setattr(retrieval_mod, "HybridRetriever", FakeRetriever)
bundle = await MemoryRetrievalService(
db,
embedding_provider=embedding,
).retrieve("user-1", "hello", top_k=3)
assert calls == [
{"db": db, "embedding_provider": embedding},
{"user_id": "user-1", "query": "hello", "top_k": 3},
]
assert bundle.relevant_chunks == [{"id": "c1", "content": "chunk"}]
@pytest.mark.asyncio
async def test_memory_ingest_service_commits_before_enrichment(
monkeypatch: pytest.MonkeyPatch,
) -> None:
from app.features.memory import ingest_service as ingest_mod
from app.features.memory.ingest_service import MemoryIngestService
events: list[tuple] = []
@dataclass
class FakeRow:
id: str
class FakeDb:
async def flush(self) -> None:
events.append(("flush",))
async def commit(self) -> None:
events.append(("commit",))
class FakeEmbedding:
async def embed_texts(self, texts: list[str]) -> list[list[float]]:
events.append(("embed_texts", tuple(texts)))
return [[1.0], [2.0]]
def is_available(self) -> bool:
return True
class FakeScheduler:
def schedule(self, request) -> str:
events.append(("schedule", request.user_id, request.source_id))
return "enrich-1"
async def fake_create_source(db, **kwargs):
events.append(("create_source", kwargs["user_id"], kwargs["conversation_id"]))
return FakeRow("source-1")
async def fake_create_chunk(db, **kwargs):
events.append(("create_chunk", kwargs["chunk_index"], kwargs["content"]))
return FakeRow(f"chunk-{kwargs['chunk_index']}")
async def fake_update_chunk_embedding(db, chunk_id, emb):
events.append(("update_embedding", chunk_id, tuple(emb)))
monkeypatch.setattr(ingest_mod, "chunk_transcript", lambda text: ["a", "b"])
monkeypatch.setattr(ingest_mod, "create_source", fake_create_source)
monkeypatch.setattr(ingest_mod, "create_chunk", fake_create_chunk)
monkeypatch.setattr(
ingest_mod,
"update_chunk_embedding",
fake_update_chunk_embedding,
)
source_id = await MemoryIngestService(
FakeDb(),
embedding_provider=FakeEmbedding(),
enrichment_scheduler=FakeScheduler(),
).ingest_transcript("user-1", "conv-1", "hello")
assert source_id == "source-1"
assert events.index(("commit",)) < events.index(
("schedule", "user-1", "source-1")
)
assert ("embed_texts", ("a", "b")) in events
assert ("update_embedding", "chunk-0", (1.0,)) in events
assert ("update_embedding", "chunk-1", (2.0,)) in events
def test_memory_single_chain_architecture_guard() -> None:
"""Keep removed memory compatibility paths from creeping back in."""
repo_root = Path(__file__).resolve().parents[2]
banned = [
"retrieve_evidence" + "_sync",
"retrieve_evidence_bundle" + "_sync",
"ingest_transcript" + "_sync",
"ingest_transcripts_batch" + "_sync",
"lineage" + "_tier=" + '"fallback"',
"lineage" + "_tier=" + "'fallback'",
"evidence_bundle" + "_json",
"memory_fact_search_use_recent" + "_fallback",
"memory_evidence_empty_query_include" + "_rolling",
"_interview_meta" + "_store",
]
roots = [
repo_root / "api" / "app",
repo_root / "api" / "tests",
repo_root / "api" / "docs",
]
files: list[Path] = []
for root in roots:
files.extend(
p
for p in root.rglob("*")
if p.is_file() and p.suffix in {".py", ".md", ".txt"}
)
files.extend(p for p in (repo_root / "api").glob(".env*") if p.is_file())
hits: list[str] = []
for path in files:
text = path.read_text(encoding="utf-8")
for needle in banned:
if needle in text:
hits.append(f"{path.relative_to(repo_root)}: {needle}")
assert hits == []