"""Memory compaction 回归测试。""" from __future__ import annotations from datetime import datetime, timezone from types import SimpleNamespace import pytest from app.core import memory_compaction_schedule as schedule from app.core.config import settings from app.features.memory.compaction_service import ( canonical_score, count_duplicate_layers, embedding_layer_match, metadata_layer_match, run_memory_compaction, text_layer_match, ) from app.tasks.memory_compaction_tasks import ( memory_compaction_run, memory_compaction_sweep, ) class FakeRedis: def __init__(self) -> None: self._store: dict[str, str] = {} def get(self, key: str) -> str | None: return self._store.get(key) def set( self, key: str, value: str, *, nx: bool = False, ex: int | None = None, ) -> bool: if nx and key in self._store: return False self._store[key] = value return True def delete(self, key: str) -> int: existed = key in self._store self._store.pop(key, None) return int(existed) def test_embedding_layer_uses_cosine_distance() -> None: assert embedding_layer_match(0.05, similarity_threshold=0.92) is True assert embedding_layer_match(0.2, similarity_threshold=0.92) is False def test_text_layer_jaccard() -> None: a = "我在乡下度过童年 夏天很热" b = "我在乡下度过童年 夏天很热" assert text_layer_match(a, b, jaccard_min=0.55) is True def test_metadata_layer_same_source() -> None: from unittest.mock import MagicMock c = MagicMock() c.source_id = "s1" c.event_year = None assert ( metadata_layer_match( c, {"source_id": "s1", "event_year": None}, event_year_window=1 ) is True ) assert ( metadata_layer_match( c, {"source_id": "s2", "event_year": None}, event_year_window=1 ) is False ) def test_count_duplicate_layers_requires_min() -> None: from unittest.mock import MagicMock c = MagicMock() c.source_id = "a" c.event_year = 1990 c.content = "hello world test duplicate" nb = { "content": "hello world test duplicate", "source_id": "b", "event_year": 1991, } layers = count_duplicate_layers( chunk=c, neighbor=nb, distance=0.02, similarity_threshold=0.9, jaccard_min=0.55, event_year_window=1, ) assert layers >= 2 def test_canonical_score_prefers_longer_and_draft() -> None: s1 = canonical_score(content="short", metadata_json={}, source_type="transcript") s2 = canonical_score(content="short", metadata_json={}, source_type="draft") assert s2 > s1 def test_schedule_merges_subsequent_triggers(monkeypatch) -> None: fake_redis = FakeRedis() calls: list[tuple[str, dict, int]] = [] monkeypatch.setattr(settings, "memory_compaction_enabled", True) monkeypatch.setattr(settings, "memory_compaction_debounce_seconds", 30) monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis) monkeypatch.setattr(schedule.time, "time", lambda: 100.0) monkeypatch.setattr( schedule, "_enqueue_memory_compaction_task", lambda user_id, context, *, countdown: calls.append( (user_id, context or {}, countdown) ), ) schedule.schedule_memory_compaction_run("u1", {"trigger_source": "memoir_segments"}) schedule.schedule_memory_compaction_run( "u1", {"trigger_source": "chapter_recompose"} ) assert len(calls) == 1 assert fake_redis.get(schedule.scheduler_key("u1")) == "1" assert fake_redis.get(schedule.debounce_key("u1")) == "130.0" def test_finalize_reschedules_when_deadline_extended(monkeypatch) -> None: fake_redis = FakeRedis() calls: list[tuple[str, dict, int]] = [] monkeypatch.setattr(settings, "memory_compaction_debounce_seconds", 30) monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis) monkeypatch.setattr(schedule.time, "time", lambda: 140.0) monkeypatch.setattr( schedule, "_enqueue_memory_compaction_task", lambda user_id, context, *, countdown: calls.append( (user_id, context or {}, countdown) ), ) fake_redis.set(schedule.debounce_key("u1"), "175.0") fake_redis.set(schedule.scheduler_key("u1"), "1") schedule.finalize_memory_compaction_run( "u1", observed_deadline_ts=130.0, context={"trigger_source": "memoir_segments"}, ) assert len(calls) == 1 assert calls[0][2] == 35 assert fake_redis.get(schedule.scheduler_key("u1")) == "1" assert fake_redis.get(schedule.debounce_key("u1")) == "175.0" def test_finalize_clears_stale_deadline_when_not_extended(monkeypatch) -> None: fake_redis = FakeRedis() monkeypatch.setattr(settings, "memory_compaction_debounce_seconds", 30) monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis) fake_redis.set(schedule.debounce_key("u1"), "130.0") fake_redis.set(schedule.scheduler_key("u1"), "1") schedule.finalize_memory_compaction_run( "u1", observed_deadline_ts=130.0, context={"trigger_source": "memoir_segments"}, ) assert fake_redis.get(schedule.scheduler_key("u1")) is None assert fake_redis.get(schedule.debounce_key("u1")) is None @pytest.mark.asyncio async def test_run_memory_compaction_stops_before_missing_embedding(monkeypatch) -> None: chunk = SimpleNamespace( id="chunk-1", created_at=datetime(2024, 1, 1, tzinfo=timezone.utc), ) row = SimpleNamespace( id="chunk-1", created_at=datetime(2024, 1, 1, tzinfo=timezone.utc), is_excluded=False, embedding=None, ) async def fake_list(*args, **kwargs): return [chunk] async def fake_get(*args, **kwargs): return row monkeypatch.setattr( "app.features.memory.compaction_service.list_incremental_chunks_for_compaction", fake_list, ) monkeypatch.setattr( "app.features.memory.compaction_service.get_memory_chunk_for_user", fake_get, ) out = await run_memory_compaction( db=object(), user_id="u1", context={ "_cursor_pair_override": ( datetime(1970, 1, 1, tzinfo=timezone.utc), "00000000-0000-0000-0000-000000000000", ) }, ) assert out["skipped_reason"] == "awaiting_embeddings" assert out["pending_chunk_id"] == "chunk-1" assert out["new_cursor_id"] is None @pytest.mark.asyncio async def test_curation_action_details_include_trigger_context(monkeypatch) -> None: now = datetime(2024, 1, 1, tzinfo=timezone.utc) chunk = SimpleNamespace( id="chunk-1", created_at=now, content="hello world duplicate", metadata_json={"a": 1}, source_id="s1", event_year=1990, embedding=[0.1, 0.2], is_excluded=False, ) loser = SimpleNamespace(id="chunk-2", is_excluded=False) captured: list[dict] = [] async def fake_list(*args, **kwargs): return [chunk] async def fake_get_memory_chunk(_db, chunk_id: str, _user_id: str): if chunk_id == "chunk-1": return chunk if chunk_id == "chunk-2": return loser return None monkeypatch.setattr( "app.features.memory.compaction_service.list_incremental_chunks_for_compaction", fake_list, ) monkeypatch.setattr( "app.features.memory.compaction_service.get_memory_chunk_for_user", fake_get_memory_chunk, ) async def fake_source_type(*args, **kwargs): return "draft" monkeypatch.setattr( "app.features.memory.compaction_service._source_type_for_chunk", fake_source_type, ) async def fake_search(*args, **kwargs): return [ { "id": "chunk-2", "content": "hello world duplicate", "source_id": "s1", "event_year": 1990, "metadata_json": {"b": 2}, "source_type": "transcript", "created_at": now, "distance": 0.01, } ] monkeypatch.setattr( "app.features.memory.compaction_service.search_nearest_chunks_for_compaction", fake_search, ) async def fake_set(*args, **kwargs): return True async def fake_create(_db, **kwargs): captured.append(kwargs["details"]) async def fake_stale(*_a, **_k): return 0 monkeypatch.setattr( "app.features.memory.compaction_service.set_chunk_excluded", fake_set, ) monkeypatch.setattr( "app.features.memory.compaction_service.create_curation_action", fake_create, ) monkeypatch.setattr( "app.features.memory.compaction_service.mark_facts_stale_for_excluded_chunk", fake_stale, ) out = await run_memory_compaction( db=object(), user_id="u1", context={ "_cursor_pair_override": ( datetime(1970, 1, 1, tzinfo=timezone.utc), "00000000-0000-0000-0000-000000000000", ), "trigger_source": "memoir_segments", "trigger_time": "2026-03-30T00:00:00+00:00", "pipeline_run_id": "run-1", "request_id": "req-1", "story_dispatch_ids": ["story-1"], "candidate_source_ids": ["s1"], "chapters_to_enqueue": ["chapter-1"], }, ) assert out["chunks_excluded"] == 1 assert captured[0]["trigger_time"] == "2026-03-30T00:00:00+00:00" assert captured[0]["candidate_source_ids"] == ["s1"] assert captured[0]["chapters_to_enqueue"] == ["chapter-1"] def test_memory_compaction_run_releases_gate_and_retries_on_failure( monkeypatch, ) -> None: events: list[str] = [] class RetryTriggered(RuntimeError): pass async def fake_run_memory_compaction(user_id: str, context: dict | None): events.append("run") raise RuntimeError("compaction failed") def fake_retry(*, exc): events.append(f"retry:{type(exc).__name__}") raise RetryTriggered("retried") monkeypatch.setattr(settings, "memory_compaction_enabled", True) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.read_debounce_deadline_ts", lambda user_id: 100.0, ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.acquire_redis_lock", lambda *args, **kwargs: SimpleNamespace(key="lock", token=b"t"), ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.release_redis_lock", lambda lock: events.append("release_lock"), ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.release_scheduler_gate", lambda user_id: events.append("release_gate"), ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks._run_memory_compaction_async", fake_run_memory_compaction, ) monkeypatch.setattr( memory_compaction_run, "retry", fake_retry, ) with pytest.raises(RetryTriggered): memory_compaction_run.run("u1", {"trigger_source": "memoir_segments"}) assert "release_gate" in events assert "retry:RuntimeError" in events assert "release_lock" in events assert events.index("release_gate") < events.index("retry:RuntimeError") def test_memory_compaction_sweep_skipped_when_disabled( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setattr(settings, "memory_compaction_enabled", False) out = memory_compaction_sweep() assert out == {"skipped": True, "reason": "disabled"} def test_memory_compaction_sweep_schedules_recent_users( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setattr(settings, "memory_compaction_enabled", True) monkeypatch.setattr(settings, "memory_compaction_sweep_recent_hours", 24) scheduled: list[tuple[str, dict]] = [] async def fake_list(hours: int): assert hours == 24 return ["user-a", "user-b"] monkeypatch.setattr( "app.tasks.memory_compaction_tasks._list_users_with_recent_chunks_async", fake_list, ) monkeypatch.setattr( "app.tasks.memory_compaction_tasks.schedule_memory_compaction_run", lambda uid, ctx: scheduled.append((uid, dict(ctx))), ) out = memory_compaction_sweep() assert out["scheduled"] == 2 assert set(out["user_ids"]) == {"user-a", "user-b"} assert {u for u, _ in scheduled} == {"user-a", "user-b"} for _, ctx in scheduled: assert ctx.get("trigger_source") == "beat" assert ctx.get("sweep_hours") == 24