"""Memory compaction 回归测试。""" from __future__ import annotations from contextlib import contextmanager from datetime import datetime, timezone from types import SimpleNamespace import pytest from app.core.config import settings from app.core import memory_compaction_schedule as schedule from app.features.memory.compaction_service import ( canonical_score, count_duplicate_layers, embedding_layer_match, metadata_layer_match, run_memory_compaction_sync, text_layer_match, ) from app.features.memory.service import ingest_transcript_sync from app.tasks.memory_compaction_tasks import memory_compaction_run class FakeRedis: def __init__(self) -> None: self._store: dict[str, str] = {} def get(self, key: str) -> str | None: return self._store.get(key) def set( self, key: str, value: str, *, nx: bool = False, ex: int | None = None, ) -> bool: if nx and key in self._store: return False self._store[key] = value return True def delete(self, key: str) -> int: existed = key in self._store self._store.pop(key, None) return int(existed) def test_embedding_layer_uses_cosine_distance() -> None: assert embedding_layer_match(0.05, similarity_threshold=0.92) is True assert embedding_layer_match(0.2, similarity_threshold=0.92) is False def test_text_layer_jaccard() -> None: a = "我在乡下度过童年 夏天很热" b = "我在乡下度过童年 夏天很热" assert text_layer_match(a, b, jaccard_min=0.55) is True def test_metadata_layer_same_source() -> None: from unittest.mock import MagicMock c = MagicMock() c.source_id = "s1" c.event_year = None assert ( metadata_layer_match( c, {"source_id": "s1", "event_year": None}, event_year_window=1 ) is True ) assert ( metadata_layer_match( c, {"source_id": "s2", "event_year": None}, event_year_window=1 ) is False ) def test_count_duplicate_layers_requires_min() -> None: from unittest.mock import MagicMock c = MagicMock() c.source_id = "a" c.event_year = 1990 c.content = "hello world test duplicate" nb = { "content": "hello world test duplicate", "source_id": "b", "event_year": 1991, } layers = count_duplicate_layers( chunk=c, neighbor=nb, distance=0.02, similarity_threshold=0.9, jaccard_min=0.55, event_year_window=1, ) assert layers >= 2 def test_canonical_score_prefers_longer_and_draft() -> None: s1 = canonical_score(content="short", metadata_json={}, source_type="transcript") s2 = canonical_score(content="short", metadata_json={}, source_type="draft") assert s2 > s1 def test_schedule_merges_subsequent_triggers(monkeypatch) -> None: fake_redis = FakeRedis() calls: list[tuple[str, dict, int]] = [] monkeypatch.setattr(settings, "memory_compaction_enabled", True) monkeypatch.setattr(settings, "memory_compaction_debounce_seconds", 30) monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis) monkeypatch.setattr(schedule.time, "time", lambda: 100.0) monkeypatch.setattr( schedule, "_enqueue_memory_compaction_task", lambda user_id, context, *, countdown: calls.append( (user_id, context or {}, countdown) ), ) schedule.schedule_memory_compaction_run("u1", {"trigger_source": "memoir_segments"}) schedule.schedule_memory_compaction_run( "u1", {"trigger_source": "chapter_recompose"} ) assert len(calls) == 1 assert fake_redis.get(schedule.scheduler_key("u1")) == "1" assert fake_redis.get(schedule.debounce_key("u1")) == "130.0" def test_finalize_reschedules_when_deadline_extended(monkeypatch) -> None: fake_redis = FakeRedis() calls: list[tuple[str, dict, int]] = [] monkeypatch.setattr(settings, "memory_compaction_debounce_seconds", 30) monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis) monkeypatch.setattr(schedule.time, "time", lambda: 140.0) monkeypatch.setattr( schedule, "_enqueue_memory_compaction_task", lambda user_id, context, *, countdown: calls.append( (user_id, context or {}, countdown) ), ) fake_redis.set(schedule.debounce_key("u1"), "175.0") fake_redis.set(schedule.scheduler_key("u1"), "1") schedule.finalize_memory_compaction_run( "u1", observed_deadline_ts=130.0, context={"trigger_source": "memoir_segments"}, ) assert len(calls) == 1 assert calls[0][2] == 35 assert fake_redis.get(schedule.scheduler_key("u1")) == "1" assert fake_redis.get(schedule.debounce_key("u1")) == "175.0" def test_finalize_clears_stale_deadline_when_not_extended(monkeypatch) -> None: fake_redis = FakeRedis() monkeypatch.setattr(settings, "memory_compaction_debounce_seconds", 30) monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis) fake_redis.set(schedule.debounce_key("u1"), "130.0") fake_redis.set(schedule.scheduler_key("u1"), "1") schedule.finalize_memory_compaction_run( "u1", observed_deadline_ts=130.0, context={"trigger_source": "memoir_segments"}, ) assert fake_redis.get(schedule.scheduler_key("u1")) is None assert fake_redis.get(schedule.debounce_key("u1")) is None def test_ingest_transcript_sync_populates_embeddings(monkeypatch) -> None: class FakeSession: def __init__(self) -> None: self.flush_calls = 0 self.commit_calls = 0 def flush(self) -> None: self.flush_calls += 1 def commit(self) -> None: self.commit_calls += 1 class FakeEmbeddingProvider: async def embed_texts(self, texts: list[str]) -> list[list[float]]: return [[float(i)] for i, _ in enumerate(texts, start=1)] fake_session = FakeSession() embedded: list[tuple[str, list[float]]] = [] fts_updated: list[str] = [] monkeypatch.setattr(settings, "memory_enrichment_enabled", False) monkeypatch.setattr( "app.core.dependencies.get_embedding_provider", lambda: FakeEmbeddingProvider(), ) monkeypatch.setattr( "app.features.memory.chunker.chunk_transcript", lambda transcript: ["第一段", "第二段"], ) monkeypatch.setattr( "app.features.memory.repo.create_source_sync", lambda *args, **kwargs: SimpleNamespace(id="src-1"), ) monkeypatch.setattr( "app.features.memory.repo.create_chunk_sync", lambda *args, **kwargs: SimpleNamespace(id=f"chunk-{kwargs['chunk_index']}"), ) monkeypatch.setattr( "app.features.memory.repo.update_chunk_fts_sync", lambda session, chunk_id: fts_updated.append(chunk_id), ) monkeypatch.setattr( "app.features.memory.repo.update_chunk_embedding_sync", lambda session, chunk_id, embedding: embedded.append((chunk_id, embedding)), ) source_id = ingest_transcript_sync( fake_session, user_id="u1", conversation_id="conv-1", transcript="hello", ) assert source_id == "src-1" assert [chunk_id for chunk_id, _ in embedded] == ["chunk-0", "chunk-1"] assert fts_updated == ["chunk-0", "chunk-1"] assert fake_session.commit_calls == 1 def test_run_memory_compaction_stops_before_missing_embedding(monkeypatch) -> None: chunk = SimpleNamespace( id="chunk-1", created_at=datetime(2024, 1, 1, tzinfo=timezone.utc), ) row = SimpleNamespace( id="chunk-1", created_at=datetime(2024, 1, 1, tzinfo=timezone.utc), is_excluded=False, embedding=None, ) monkeypatch.setattr( "app.features.memory.compaction_service.list_incremental_chunks_for_compaction_sync", lambda *args, **kwargs: [chunk], ) monkeypatch.setattr( "app.features.memory.compaction_service.get_memory_chunk_sync", lambda *args, **kwargs: row, ) out = run_memory_compaction_sync( session=object(), user_id="u1", context={ "_cursor_pair_override": ( datetime(1970, 1, 1, tzinfo=timezone.utc), "00000000-0000-0000-0000-000000000000", ) }, ) assert out["skipped_reason"] == "awaiting_embeddings" assert out["pending_chunk_id"] == "chunk-1" assert out["new_cursor_id"] is None def test_curation_action_details_include_trigger_context(monkeypatch) -> None: now = datetime(2024, 1, 1, tzinfo=timezone.utc) chunk = SimpleNamespace( id="chunk-1", created_at=now, content="hello world duplicate", metadata_json={"a": 1}, source_id="s1", event_year=1990, embedding=[0.1, 0.2], is_excluded=False, ) loser = SimpleNamespace(id="chunk-2", is_excluded=False) captured: list[dict] = [] monkeypatch.setattr( "app.features.memory.compaction_service.list_incremental_chunks_for_compaction_sync", lambda *args, **kwargs: [chunk], ) def fake_get_memory_chunk_sync(_session, chunk_id: str, _user_id: str): if chunk_id == "chunk-1": return chunk if chunk_id == "chunk-2": return loser return None monkeypatch.setattr( "app.features.memory.compaction_service.get_memory_chunk_sync", fake_get_memory_chunk_sync, ) monkeypatch.setattr( "app.features.memory.compaction_service._source_type_for_chunk", lambda *args, **kwargs: "draft", ) monkeypatch.setattr( "app.features.memory.compaction_service.search_nearest_chunks_for_compaction_sync", lambda *args, **kwargs: [ { "id": "chunk-2", "content": "hello world duplicate", "source_id": "s1", "event_year": 1990, "metadata_json": {"b": 2}, "source_type": "transcript", "created_at": now, "distance": 0.01, } ], ) monkeypatch.setattr( "app.features.memory.compaction_service.set_chunk_excluded_sync", lambda *args, **kwargs: True, ) monkeypatch.setattr( "app.features.memory.compaction_service.create_curation_action_sync", lambda _session, **kwargs: captured.append(kwargs["details"]), ) out = run_memory_compaction_sync( session=object(), user_id="u1", context={ "_cursor_pair_override": ( datetime(1970, 1, 1, tzinfo=timezone.utc), "00000000-0000-0000-0000-000000000000", ), "trigger_source": "memoir_segments", "trigger_time": "2026-03-30T00:00:00+00:00", "pipeline_run_id": "run-1", "request_id": "req-1", "story_dispatch_ids": ["story-1"], "candidate_source_ids": ["s1"], "chapters_to_enqueue": ["chapter-1"], }, ) assert out["chunks_excluded"] == 1 assert captured[0]["trigger_time"] == "2026-03-30T00:00:00+00:00" assert captured[0]["candidate_source_ids"] == ["s1"] assert captured[0]["chapters_to_enqueue"] == ["chapter-1"] @pytest.mark.parametrize("failure_stage", ["run", "commit"]) def test_memory_compaction_run_releases_gate_and_retries_on_failure( monkeypatch, failure_stage: str ) -> None: events: list[str] = [] class RetryTriggered(RuntimeError): pass class FakeSession: def commit(self) -> None: events.append("commit") if failure_stage == "commit": raise RuntimeError("db hiccup") @contextmanager def fake_get_sync_db(): yield FakeSession() def fake_run_memory_compaction_sync(session, user_id: str, context: dict | None): events.append("run") if failure_stage == "run": raise RuntimeError("compaction failed") return { "chunks_scanned": 0, "chunks_excluded": 0, "candidates_considered": 0, "new_cursor_ts": None, "new_cursor_id": None, "duration_ms": 0.0, "skipped_reason": None, } def fake_retry(*, exc): events.append(f"retry:{type(exc).__name__}") raise RetryTriggered("retried") monkeypatch.setattr(settings, "memory_compaction_enabled", True) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.read_debounce_deadline_ts", lambda user_id: 100.0, ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.acquire_redis_lock", lambda *args, **kwargs: SimpleNamespace(key="lock", token=b"t"), ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.release_redis_lock", lambda lock: events.append("release_lock"), ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.release_scheduler_gate", lambda user_id: events.append("release_gate"), ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.get_sync_db", fake_get_sync_db, ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.run_memory_compaction_sync", fake_run_memory_compaction_sync, ) monkeypatch.setattr( memory_compaction_run, "retry", fake_retry, ) with pytest.raises(RetryTriggered): memory_compaction_run.run("u1", {"trigger_source": "memoir_segments"}) assert "release_gate" in events assert "retry:RuntimeError" in events assert "release_lock" in events assert events.index("release_gate") < events.index("retry:RuntimeError")