配置 SSOT(TOML + .env) 统一错误契约 Auth 与事务边界 Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client 可观测性(OpenTelemetry + LGTM)
474 lines
14 KiB
Python
474 lines
14 KiB
Python
"""Memory compaction 回归测试。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
from app.core import memory_compaction_schedule as schedule
|
|
from app.core.config import settings
|
|
from app.features.memory.compaction_service import (
|
|
canonical_score,
|
|
count_duplicate_layers,
|
|
embedding_layer_match,
|
|
metadata_layer_match,
|
|
run_memory_compaction,
|
|
text_layer_match,
|
|
)
|
|
from app.features.memory.constants import memory
|
|
from app.tasks.memory_compaction_tasks import (
|
|
memory_compaction_run,
|
|
memory_compaction_sweep,
|
|
)
|
|
|
|
|
|
class FakeRedis:
|
|
def __init__(self) -> None:
|
|
self._store: dict[str, str] = {}
|
|
|
|
def get(self, key: str) -> str | None:
|
|
return self._store.get(key)
|
|
|
|
def set(
|
|
self,
|
|
key: str,
|
|
value: str,
|
|
*,
|
|
nx: bool = False,
|
|
ex: int | None = None,
|
|
) -> bool:
|
|
if nx and key in self._store:
|
|
return False
|
|
self._store[key] = value
|
|
return True
|
|
|
|
def delete(self, key: str) -> int:
|
|
existed = key in self._store
|
|
self._store.pop(key, None)
|
|
return int(existed)
|
|
|
|
|
|
def test_embedding_layer_uses_cosine_distance() -> None:
|
|
assert embedding_layer_match(0.05, similarity_threshold=0.92) is True
|
|
assert embedding_layer_match(0.2, similarity_threshold=0.92) is False
|
|
|
|
|
|
def test_text_layer_jaccard() -> None:
|
|
a = "我在乡下度过童年 夏天很热"
|
|
b = "我在乡下度过童年 夏天很热"
|
|
assert text_layer_match(a, b, jaccard_min=0.55) is True
|
|
|
|
|
|
def test_metadata_layer_same_source() -> None:
|
|
from unittest.mock import MagicMock
|
|
|
|
c = MagicMock()
|
|
c.source_id = "s1"
|
|
c.event_year = None
|
|
assert (
|
|
metadata_layer_match(
|
|
c, {"source_id": "s1", "event_year": None}, event_year_window=1
|
|
)
|
|
is True
|
|
)
|
|
assert (
|
|
metadata_layer_match(
|
|
c, {"source_id": "s2", "event_year": None}, event_year_window=1
|
|
)
|
|
is False
|
|
)
|
|
|
|
|
|
def test_count_duplicate_layers_requires_min() -> None:
|
|
from unittest.mock import MagicMock
|
|
|
|
c = MagicMock()
|
|
c.source_id = "a"
|
|
c.event_year = 1990
|
|
c.content = "hello world test duplicate"
|
|
|
|
nb = {
|
|
"content": "hello world test duplicate",
|
|
"source_id": "b",
|
|
"event_year": 1991,
|
|
}
|
|
layers = count_duplicate_layers(
|
|
chunk=c,
|
|
neighbor=nb,
|
|
distance=0.02,
|
|
similarity_threshold=0.9,
|
|
jaccard_min=0.55,
|
|
event_year_window=1,
|
|
)
|
|
assert layers >= 2
|
|
|
|
|
|
def test_canonical_score_prefers_longer_and_draft() -> None:
|
|
s1 = canonical_score(content="short", metadata_json={}, source_type="transcript")
|
|
s2 = canonical_score(content="short", metadata_json={}, source_type="draft")
|
|
assert s2 > s1
|
|
|
|
|
|
def test_schedule_merges_subsequent_triggers(monkeypatch) -> None:
|
|
fake_redis = FakeRedis()
|
|
calls: list[tuple[str, dict, int]] = []
|
|
|
|
monkeypatch.setattr(memory, "compaction_enabled", True)
|
|
monkeypatch.setattr(memory, "compaction_debounce_seconds", 30)
|
|
monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis)
|
|
monkeypatch.setattr(schedule.time, "time", lambda: 100.0)
|
|
monkeypatch.setattr(
|
|
schedule,
|
|
"_enqueue_memory_compaction_task",
|
|
lambda user_id, context, *, countdown: calls.append(
|
|
(user_id, context or {}, countdown)
|
|
),
|
|
)
|
|
|
|
schedule.schedule_memory_compaction_run("u1", {"trigger_source": "memoir_segments"})
|
|
schedule.schedule_memory_compaction_run(
|
|
"u1", {"trigger_source": "chapter_recompose"}
|
|
)
|
|
|
|
assert len(calls) == 1
|
|
assert fake_redis.get(schedule.scheduler_key("u1")) == "1"
|
|
assert fake_redis.get(schedule.debounce_key("u1")) == "130.0"
|
|
|
|
|
|
def test_finalize_reschedules_when_deadline_extended(monkeypatch) -> None:
|
|
fake_redis = FakeRedis()
|
|
calls: list[tuple[str, dict, int]] = []
|
|
|
|
monkeypatch.setattr(memory, "compaction_debounce_seconds", 30)
|
|
monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis)
|
|
monkeypatch.setattr(schedule.time, "time", lambda: 140.0)
|
|
monkeypatch.setattr(
|
|
schedule,
|
|
"_enqueue_memory_compaction_task",
|
|
lambda user_id, context, *, countdown: calls.append(
|
|
(user_id, context or {}, countdown)
|
|
),
|
|
)
|
|
|
|
fake_redis.set(schedule.debounce_key("u1"), "175.0")
|
|
fake_redis.set(schedule.scheduler_key("u1"), "1")
|
|
|
|
schedule.finalize_memory_compaction_run(
|
|
"u1",
|
|
observed_deadline_ts=130.0,
|
|
context={"trigger_source": "memoir_segments"},
|
|
)
|
|
|
|
assert len(calls) == 1
|
|
assert calls[0][2] == 35
|
|
assert fake_redis.get(schedule.scheduler_key("u1")) == "1"
|
|
assert fake_redis.get(schedule.debounce_key("u1")) == "175.0"
|
|
|
|
|
|
def test_finalize_clears_stale_deadline_when_not_extended(monkeypatch) -> None:
|
|
fake_redis = FakeRedis()
|
|
|
|
monkeypatch.setattr(memory, "compaction_debounce_seconds", 30)
|
|
monkeypatch.setattr(schedule, "_get_redis", lambda: fake_redis)
|
|
|
|
fake_redis.set(schedule.debounce_key("u1"), "130.0")
|
|
fake_redis.set(schedule.scheduler_key("u1"), "1")
|
|
|
|
schedule.finalize_memory_compaction_run(
|
|
"u1",
|
|
observed_deadline_ts=130.0,
|
|
context={"trigger_source": "memoir_segments"},
|
|
)
|
|
|
|
assert fake_redis.get(schedule.scheduler_key("u1")) is None
|
|
assert fake_redis.get(schedule.debounce_key("u1")) is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_memory_compaction_stops_before_missing_embedding(monkeypatch) -> None:
|
|
chunk = SimpleNamespace(
|
|
id="chunk-1",
|
|
created_at=datetime(2024, 1, 1, tzinfo=timezone.utc),
|
|
)
|
|
row = SimpleNamespace(
|
|
id="chunk-1",
|
|
created_at=datetime(2024, 1, 1, tzinfo=timezone.utc),
|
|
is_excluded=False,
|
|
embedding=None,
|
|
)
|
|
|
|
async def fake_list(*args, **kwargs):
|
|
return [chunk]
|
|
|
|
async def fake_get(*args, **kwargs):
|
|
return row
|
|
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.list_incremental_chunks_for_compaction",
|
|
fake_list,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.get_memory_chunk_for_user",
|
|
fake_get,
|
|
)
|
|
|
|
out = await run_memory_compaction(
|
|
db=object(),
|
|
user_id="u1",
|
|
context={
|
|
"_cursor_pair_override": (
|
|
datetime(1970, 1, 1, tzinfo=timezone.utc),
|
|
"00000000-0000-0000-0000-000000000000",
|
|
)
|
|
},
|
|
)
|
|
|
|
assert out["skipped_reason"] == "awaiting_embeddings"
|
|
assert out["pending_chunk_id"] == "chunk-1"
|
|
assert out["new_cursor_id"] is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_curation_action_details_include_trigger_context(monkeypatch) -> None:
|
|
now = datetime(2024, 1, 1, tzinfo=timezone.utc)
|
|
chunk = SimpleNamespace(
|
|
id="chunk-1",
|
|
created_at=now,
|
|
content="hello world duplicate",
|
|
metadata_json={"a": 1},
|
|
source_id="s1",
|
|
event_year=1990,
|
|
embedding=[0.1, 0.2],
|
|
is_excluded=False,
|
|
)
|
|
loser = SimpleNamespace(id="chunk-2", is_excluded=False)
|
|
captured: list[dict] = []
|
|
|
|
async def fake_list(*args, **kwargs):
|
|
return [chunk]
|
|
|
|
async def fake_get_memory_chunk(_db, chunk_id: str, _user_id: str):
|
|
if chunk_id == "chunk-1":
|
|
return chunk
|
|
if chunk_id == "chunk-2":
|
|
return loser
|
|
return None
|
|
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.list_incremental_chunks_for_compaction",
|
|
fake_list,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.get_memory_chunk_for_user",
|
|
fake_get_memory_chunk,
|
|
)
|
|
|
|
async def fake_source_type(*args, **kwargs):
|
|
return "draft"
|
|
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service._source_type_for_chunk",
|
|
fake_source_type,
|
|
)
|
|
|
|
async def fake_search(*args, **kwargs):
|
|
return [
|
|
{
|
|
"id": "chunk-2",
|
|
"content": "hello world duplicate",
|
|
"source_id": "s1",
|
|
"event_year": 1990,
|
|
"metadata_json": {"b": 2},
|
|
"source_type": "transcript",
|
|
"created_at": now,
|
|
"distance": 0.01,
|
|
}
|
|
]
|
|
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.search_nearest_chunks_for_compaction",
|
|
fake_search,
|
|
)
|
|
|
|
async def fake_set(*args, **kwargs):
|
|
return True
|
|
|
|
async def fake_create(_db, **kwargs):
|
|
captured.append(kwargs["details"])
|
|
|
|
async def fake_stale(*_a, **_k):
|
|
return 0
|
|
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.set_chunk_excluded",
|
|
fake_set,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.create_curation_action",
|
|
fake_create,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.compaction_service.mark_facts_stale_for_excluded_chunk",
|
|
fake_stale,
|
|
)
|
|
|
|
out = await run_memory_compaction(
|
|
db=object(),
|
|
user_id="u1",
|
|
context={
|
|
"_cursor_pair_override": (
|
|
datetime(1970, 1, 1, tzinfo=timezone.utc),
|
|
"00000000-0000-0000-0000-000000000000",
|
|
),
|
|
"trigger_source": "memoir_segments",
|
|
"trigger_time": "2026-03-30T00:00:00+00:00",
|
|
"pipeline_run_id": "run-1",
|
|
"request_id": "req-1",
|
|
"story_dispatch_ids": ["story-1"],
|
|
"candidate_source_ids": ["s1"],
|
|
"chapters_to_enqueue": ["chapter-1"],
|
|
},
|
|
)
|
|
|
|
assert out["chunks_excluded"] == 1
|
|
assert captured[0]["trigger_time"] == "2026-03-30T00:00:00+00:00"
|
|
assert captured[0]["candidate_source_ids"] == ["s1"]
|
|
assert captured[0]["chapters_to_enqueue"] == ["chapter-1"]
|
|
|
|
|
|
def test_memory_compaction_run_releases_gate_and_retries_on_failure(
|
|
monkeypatch,
|
|
) -> None:
|
|
events: list[str] = []
|
|
|
|
class RetryTriggered(RuntimeError):
|
|
pass
|
|
|
|
async def fake_run_memory_compaction(user_id: str, context: dict | None):
|
|
events.append("run")
|
|
raise RuntimeError("compaction failed")
|
|
|
|
def fake_retry(*, exc):
|
|
events.append(f"retry:{type(exc).__name__}")
|
|
raise RetryTriggered("retried")
|
|
|
|
monkeypatch.setattr(memory, "compaction_enabled", True)
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks.read_debounce_deadline_ts",
|
|
lambda user_id: 100.0,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks.acquire_redis_lock",
|
|
lambda *args, **kwargs: SimpleNamespace(key="lock", token=b"t"),
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks.release_redis_lock",
|
|
lambda lock: events.append("release_lock"),
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks.release_scheduler_gate",
|
|
lambda user_id: events.append("release_gate"),
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks._run_memory_compaction_async",
|
|
fake_run_memory_compaction,
|
|
)
|
|
monkeypatch.setattr(
|
|
memory_compaction_run,
|
|
"retry",
|
|
fake_retry,
|
|
)
|
|
|
|
with pytest.raises(RetryTriggered):
|
|
memory_compaction_run.run("u1", {"trigger_source": "memoir_segments"})
|
|
|
|
assert "release_gate" in events
|
|
assert "retry:RuntimeError" in events
|
|
assert "release_lock" in events
|
|
assert events.index("release_gate") < events.index("retry:RuntimeError")
|
|
|
|
|
|
def test_memory_compaction_sweep_skipped_when_disabled(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
monkeypatch.setattr(memory, "compaction_enabled", False)
|
|
out = memory_compaction_sweep()
|
|
assert out == {"skipped": True, "reason": "disabled"}
|
|
|
|
|
|
def test_memory_compaction_sweep_schedules_recent_users(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
monkeypatch.setattr(memory, "compaction_enabled", True)
|
|
monkeypatch.setattr(memory, "compaction_sweep_recent_hours", 24)
|
|
scheduled: list[tuple[str, dict]] = []
|
|
|
|
async def fake_list(hours: int):
|
|
assert hours == 24
|
|
return ["user-a", "user-b"]
|
|
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks._list_users_with_recent_chunks_async",
|
|
fake_list,
|
|
)
|
|
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks.schedule_memory_compaction_run",
|
|
lambda uid, ctx: scheduled.append((uid, dict(ctx))),
|
|
)
|
|
|
|
out = memory_compaction_sweep()
|
|
assert out["scheduled"] == 2
|
|
assert out["failed"] == 0
|
|
assert out["hours"] == 24
|
|
assert {u for u, _ in scheduled} == {"user-a", "user-b"}
|
|
for _, ctx in scheduled:
|
|
assert ctx.get("trigger_source") == "beat"
|
|
assert ctx.get("sweep_hours") == 24
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_memory_compaction_async_wraps_transactional(monkeypatch) -> None:
|
|
commit_calls: list[str] = []
|
|
compact_calls: list[tuple[str, dict | None]] = []
|
|
|
|
class FakeSession:
|
|
async def commit(self) -> None:
|
|
commit_calls.append("commit")
|
|
|
|
async def rollback(self) -> None:
|
|
commit_calls.append("rollback")
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb) -> None:
|
|
return None
|
|
|
|
class FakeMemoryService:
|
|
def __init__(self, db) -> None:
|
|
self._db = db
|
|
|
|
async def compact_user(self, user_id: str, context: dict | None):
|
|
compact_calls.append((user_id, context))
|
|
return {"chunks_excluded": 1}
|
|
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks.AsyncSessionLocal",
|
|
lambda: FakeSession(),
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.tasks.memory_compaction_tasks.MemoryService",
|
|
FakeMemoryService,
|
|
)
|
|
|
|
from app.tasks.memory_compaction_tasks import _run_memory_compaction_async
|
|
|
|
out = await _run_memory_compaction_async("u1", {"trigger_source": "test"})
|
|
|
|
assert out == {"chunks_excluded": 1}
|
|
assert compact_calls == [("u1", {"trigger_source": "test"})]
|
|
assert commit_calls == ["commit"]
|