聊天和回忆录证据检索都走 pgvector，去掉 Postgres FTS/content_tsv，新迁移删掉 content_tsv 列（部署要先 alembic upgrade）。

Embedding 端口增加 is_available()，聊天和回忆录日志用统一方式表示向量是否真能调用。记忆整理（compaction）支持 Beat 定期扫用户；事实抽取提示与 subject 归一化，减少同一人多种称呼；
2026-04-03 11:43:16 +08:00
parent b853b986dd
commit 41518bda11
26 changed files with 543 additions and 222 deletions
--- a/api/app/features/memory/service.py
+++ b/api/app/features/memory/service.py
@@ -1,16 +1,14 @@
 """
 MemoryService — conversation / memoir 的统一门面。

- ingest_transcript: transcript -> memory_sources, chunks, embedding, FTS
+- ingest_transcript: transcript -> memory_sources, chunks, embedding
 - ingest 后可选：LLM 富化（session/rolling 摘要、事实、时间线）
- retrieve: 委托 HybridRetriever 返回 evidence bundle（FTS + 可选向量 RRF）
+- retrieve: 委托 HybridRetriever 返回 evidence bundle（向量 chunks）

-Celery 侧使用 `ingest_transcript_sync` + `retrieve_evidence_sync`，与异步路径差异见
+Celery 侧使用 `ingest_transcript_sync` + `retrieve_evidence_sync`，与异步路径对齐见
 `api/docs/memory-retrieval.md`。
 """

-import asyncio
-
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.core.logging import get_logger
@@ -23,7 +21,6 @@ from app.features.memory.repo import (
    set_chunk_excluded,
    set_memory_fact_status,
    update_chunk_embedding,
-    update_chunk_fts,
 )
 from app.ports.embedding import EmbeddingProvider

@@ -45,7 +42,7 @@ class MemoryService:
    ) -> str:
        """
        Ingest conversation transcript into memory.
-        Creates MemorySource, chunks, populates embedding + FTS.
+        Creates MemorySource, chunks, populates embedding.
        Returns source_id.
        """
        if not transcript or not transcript.strip():
@@ -73,10 +70,6 @@ class MemoryService:

        await self._db.flush()

-        # FTS: populate content_tsv
-        for chunk_id, _ in chunk_records:
-            await update_chunk_fts(self._db, chunk_id)
-
        # Embedding: 若有 provider 则写入
        if self._embedding and chunk_records:
            texts = [c for _, c in chunk_records]
@@ -186,7 +179,7 @@ def ingest_transcript_sync(
 ) -> str:
    """
    Sync transcript ingest for Celery tasks.
-    Creates source + chunks + FTS, and best-effort populates embeddings.
+    Creates source + chunks, and best-effort populates embeddings.
    Returns source_id.
    """
    from app.core.dependencies import get_embedding_provider
@@ -195,7 +188,6 @@ def ingest_transcript_sync(
        create_chunk_sync,
        create_source_sync,
        update_chunk_embedding_sync,
-        update_chunk_fts_sync,
    )

    if not transcript or not transcript.strip():
@@ -222,13 +214,12 @@ def ingest_transcript_sync(
        )
        session.flush()
        chunk_records.append((chunk.id, content))
-        update_chunk_fts_sync(session, chunk.id)

    try:
        embedding_provider = get_embedding_provider()
        if chunk_records and embedding_provider is not None:
            texts = [content for _, content in chunk_records]
-            embeddings = asyncio.run(embedding_provider.embed_texts(texts))
+            embeddings = embedding_provider.embed_texts_sync(texts)
            for (chunk_id, _), emb in zip(chunk_records, embeddings):
                if emb:
                    update_chunk_embedding_sync(session, chunk_id, emb)