聊天和回忆录证据检索都走 pgvector,去掉 Postgres FTS/content_tsv,新迁移删掉 content_tsv 列(部署要先 alembic upgrade)。

Embedding 端口增加 is_available(),聊天和回忆录日志用统一方式表示向量是否真能调用。

记忆整理(compaction)支持 Beat 定期扫用户;

事实抽取提示与 subject 归一化,减少同一人多种称呼;
This commit is contained in:
Kevin
2026-04-03 11:43:16 +08:00
parent b853b986dd
commit 41518bda11
26 changed files with 543 additions and 222 deletions

View File

@@ -19,7 +19,10 @@ from app.features.memory.compaction_service import (
text_layer_match,
)
from app.features.memory.service import ingest_transcript_sync
from app.tasks.memory_compaction_tasks import memory_compaction_run
from app.tasks.memory_compaction_tasks import (
memory_compaction_run,
memory_compaction_sweep,
)
class FakeRedis:
@@ -197,12 +200,17 @@ def test_ingest_transcript_sync_populates_embeddings(monkeypatch) -> None:
self.commit_calls += 1
class FakeEmbeddingProvider:
def is_available(self) -> bool:
return True
async def embed_texts(self, texts: list[str]) -> list[list[float]]:
return [[float(i)] for i, _ in enumerate(texts, start=1)]
def embed_texts_sync(self, texts: list[str]) -> list[list[float]]:
return [[float(i)] for i, _ in enumerate(texts, start=1)]
fake_session = FakeSession()
embedded: list[tuple[str, list[float]]] = []
fts_updated: list[str] = []
monkeypatch.setattr(settings, "memory_enrichment_enabled", False)
monkeypatch.setattr(
@@ -221,10 +229,6 @@ def test_ingest_transcript_sync_populates_embeddings(monkeypatch) -> None:
"app.features.memory.repo.create_chunk_sync",
lambda *args, **kwargs: SimpleNamespace(id=f"chunk-{kwargs['chunk_index']}"),
)
monkeypatch.setattr(
"app.features.memory.repo.update_chunk_fts_sync",
lambda session, chunk_id: fts_updated.append(chunk_id),
)
monkeypatch.setattr(
"app.features.memory.repo.update_chunk_embedding_sync",
lambda session, chunk_id, embedding: embedded.append((chunk_id, embedding)),
@@ -239,7 +243,6 @@ def test_ingest_transcript_sync_populates_embeddings(monkeypatch) -> None:
assert source_id == "src-1"
assert [chunk_id for chunk_id, _ in embedded] == ["chunk-0", "chunk-1"]
assert fts_updated == ["chunk-0", "chunk-1"]
assert fake_session.commit_calls == 1
@@ -442,3 +445,53 @@ def test_memory_compaction_run_releases_gate_and_retries_on_failure(
assert "retry:RuntimeError" in events
assert "release_lock" in events
assert events.index("release_gate") < events.index("retry:RuntimeError")
def test_memory_compaction_sweep_skipped_when_disabled(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "memory_compaction_enabled", False)
out = memory_compaction_sweep()
assert out == {"skipped": True, "reason": "disabled"}
def test_memory_compaction_sweep_schedules_recent_users(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(settings, "memory_compaction_enabled", True)
monkeypatch.setattr(settings, "memory_compaction_sweep_recent_hours", 24)
scheduled: list[tuple[str, dict]] = []
class _DbCtx:
def __enter__(self):
return object()
def __exit__(self, *args):
return None
monkeypatch.setattr(
"app.tasks.memory_compaction_tasks.get_sync_db",
lambda: _DbCtx(),
)
def fake_list(session, *, hours):
assert hours == 24
return ["user-a", "user-b"]
monkeypatch.setattr(
"app.tasks.memory_compaction_tasks.list_users_with_recent_chunks_sync",
fake_list,
)
monkeypatch.setattr(
"app.tasks.memory_compaction_tasks.schedule_memory_compaction_run",
lambda uid, ctx: scheduled.append((uid, dict(ctx))),
)
out = memory_compaction_sweep()
assert out["scheduled"] == 2
assert set(out["user_ids"]) == {"user-a", "user-b"}
assert {u for u, _ in scheduled} == {"user-a", "user-b"}
for _, ctx in scheduled:
assert ctx.get("trigger_source") == "beat"
assert ctx.get("sweep_hours") == 24