feat(api): 收敛对话与记忆流程边界,引入 LLM 网关与专用服务
- MemoryService 异步路径委托 MemoryIngestService / MemoryRetrievalService;富化派发经 MemoryEnrichmentScheduler - WebSocket pipeline 经 ChatTurnService 与显式 DTO 编排单轮对话;回忆录片段入队由 MemoirIngestScheduler 封装 - 新增 LlmGateway(LlmUseCase),各 agent、任务与适配器对齐 ports - 补充 memory 提示适配、runtime 类型、memory-retrieval 文档、ai-touchpoints 说明与扫描脚本及配套测试 Made-with: Cursor
This commit is contained in:
@@ -14,6 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.langchain_llm import ainvoke_json_object, invoke_json_object
|
||||
from app.core.llm_gateway import LlmGateway, LlmUseCase
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memory.enrichment_pipeline import (
|
||||
dedupe_key,
|
||||
@@ -45,9 +46,9 @@ def _lineage_snapshot_from_source(source: MemorySource | None) -> dict | None:
|
||||
|
||||
def _resolve_llm_sync() -> Any | None:
|
||||
try:
|
||||
from app.core.dependencies import get_llm_provider_fast
|
||||
|
||||
return get_llm_provider_fast().langchain_llm
|
||||
return LlmGateway().langchain_llm_for(
|
||||
LlmUseCase("memory.enrichment_sync", fast=True)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("memory enrichment 无法获取 LLM: {}", e)
|
||||
return None
|
||||
@@ -150,7 +151,8 @@ def enrich_memory_after_ingest_sync(
|
||||
chunk_ids = [c.id for c in chunks]
|
||||
chunk_texts = [c.content for c in chunks]
|
||||
numbered = "\n\n".join(
|
||||
f"[chunk_id={cid}]\n{txt}" for cid, txt in zip(chunk_ids, chunk_texts)
|
||||
f"[chunk_id={cid}]\n{txt}"
|
||||
for cid, txt in zip(chunk_ids, chunk_texts, strict=False)
|
||||
)
|
||||
narrator_label = (narrator_name or "").strip() or "叙述者"
|
||||
|
||||
@@ -224,7 +226,8 @@ async def enrich_memory_after_ingest_async(
|
||||
chunk_ids = [c.id for c in chunks]
|
||||
chunk_texts = [c.content for c in chunks]
|
||||
numbered = "\n\n".join(
|
||||
f"[chunk_id={cid}]\n{txt}" for cid, txt in zip(chunk_ids, chunk_texts)
|
||||
f"[chunk_id={cid}]\n{txt}"
|
||||
for cid, txt in zip(chunk_ids, chunk_texts, strict=False)
|
||||
)
|
||||
narrator_label = (narrator_name or "").strip() or "叙述者"
|
||||
|
||||
|
||||
50
api/app/features/memory/enrichment_scheduler.py
Normal file
50
api/app/features/memory/enrichment_scheduler.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""Memory enrichment scheduling boundary."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryEnrichmentRequest:
|
||||
user_id: str
|
||||
source_id: str
|
||||
memoir_correlation_id: str | None = None
|
||||
|
||||
|
||||
class MemoryEnrichmentScheduler:
|
||||
"""Adapter around the Celery enrichment task name and queue policy."""
|
||||
|
||||
def schedule(self, request: MemoryEnrichmentRequest) -> str | None:
|
||||
from app.tasks.memory_enrichment_tasks import schedule_memory_enrichment
|
||||
|
||||
return schedule_memory_enrichment(
|
||||
request.user_id,
|
||||
request.source_id,
|
||||
memoir_correlation_id=request.memoir_correlation_id,
|
||||
)
|
||||
|
||||
def schedule_many(
|
||||
self,
|
||||
user_id: str,
|
||||
source_ids: list[str],
|
||||
*,
|
||||
memoir_correlation_id: str | None = None,
|
||||
) -> list[str]:
|
||||
task_ids: list[str] = []
|
||||
for source_id in source_ids:
|
||||
if not source_id:
|
||||
continue
|
||||
task_id = self.schedule(
|
||||
MemoryEnrichmentRequest(
|
||||
user_id=user_id,
|
||||
source_id=source_id,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
)
|
||||
)
|
||||
if task_id:
|
||||
task_ids.append(task_id)
|
||||
return task_ids
|
||||
|
||||
|
||||
__all__ = ["MemoryEnrichmentRequest", "MemoryEnrichmentScheduler"]
|
||||
@@ -5,6 +5,7 @@ from __future__ import annotations
|
||||
from typing import Any
|
||||
|
||||
from app.core.langchain_llm import ainvoke_json_object, invoke_json_object
|
||||
from app.core.llm_gateway import LlmGateway, LlmUseCase
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memory.llm_schemas import (
|
||||
FactsExtractionPayload,
|
||||
@@ -101,10 +102,11 @@ async def extract_facts_from_transcript_async(
|
||||
async def extract_facts(chunk_text: str, *, user_id: str) -> list[dict]:
|
||||
"""兼容旧接口:单块文本(无 chunk id 时传空 source_chunk_id)。"""
|
||||
from app.core.db import AsyncSessionLocal
|
||||
from app.core.dependencies import get_llm_provider_fast
|
||||
from app.features.user.models import User
|
||||
|
||||
llm = get_llm_provider_fast().langchain_llm
|
||||
llm = LlmGateway().langchain_llm_for(
|
||||
LlmUseCase("memory.extract_facts.compat", fast=True)
|
||||
)
|
||||
narrator_name: str | None = None
|
||||
try:
|
||||
async with AsyncSessionLocal() as db:
|
||||
|
||||
110
api/app/features/memory/ingest_service.py
Normal file
110
api/app/features/memory/ingest_service.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Memory ingest service boundary."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.lineage_schemas import (
|
||||
primary_user_message_id_from_lineage,
|
||||
)
|
||||
from app.features.memory.chunker import chunk_transcript
|
||||
from app.features.memory.enrichment_scheduler import (
|
||||
MemoryEnrichmentRequest,
|
||||
MemoryEnrichmentScheduler,
|
||||
)
|
||||
from app.features.memory.repo import (
|
||||
create_chunk,
|
||||
create_source,
|
||||
update_chunk_embedding,
|
||||
)
|
||||
from app.ports.embedding import EmbeddingProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class MemoryIngestService:
|
||||
"""Creates memory sources/chunks and schedules post-commit enrichment."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
*,
|
||||
embedding_provider: EmbeddingProvider | None = None,
|
||||
enrichment_scheduler: MemoryEnrichmentScheduler | None = None,
|
||||
) -> None:
|
||||
self._db = db
|
||||
self._embedding = embedding_provider
|
||||
self._enrichment_scheduler = enrichment_scheduler or MemoryEnrichmentScheduler()
|
||||
|
||||
async def ingest_transcript(
|
||||
self,
|
||||
user_id: str,
|
||||
conversation_id: str,
|
||||
transcript: str,
|
||||
*,
|
||||
lineage_json: dict | None = None,
|
||||
) -> str:
|
||||
if not transcript or not transcript.strip():
|
||||
raise ValueError("transcript cannot be empty")
|
||||
|
||||
primary_mid = (
|
||||
primary_user_message_id_from_lineage(lineage_json) if lineage_json else None
|
||||
)
|
||||
source = await create_source(
|
||||
self._db,
|
||||
user_id=user_id,
|
||||
source_type="transcript",
|
||||
raw_text=transcript.strip(),
|
||||
conversation_id=conversation_id,
|
||||
lineage_json=lineage_json,
|
||||
primary_user_message_id=primary_mid,
|
||||
)
|
||||
|
||||
chunk_records: list[tuple[str, str]] = []
|
||||
for i, content in enumerate(chunk_transcript(transcript.strip())):
|
||||
chunk = await create_chunk(
|
||||
self._db,
|
||||
source_id=source.id,
|
||||
user_id=user_id,
|
||||
content=content,
|
||||
chunk_index=i,
|
||||
)
|
||||
chunk_records.append((chunk.id, content))
|
||||
|
||||
await self._db.flush()
|
||||
|
||||
vectors_written = 0
|
||||
if self._embedding and chunk_records:
|
||||
texts = [content for _, content in chunk_records]
|
||||
embeddings = await self._embedding.embed_texts(texts)
|
||||
for (chunk_id, _), emb in zip(
|
||||
chunk_records, embeddings, strict=False
|
||||
):
|
||||
if emb:
|
||||
vectors_written += 1
|
||||
await update_chunk_embedding(self._db, chunk_id, emb)
|
||||
|
||||
await self._db.commit()
|
||||
emb_ok = self._embedding.is_available() if self._embedding else False
|
||||
enrichment_task_id = self._enrichment_scheduler.schedule(
|
||||
MemoryEnrichmentRequest(user_id=user_id, source_id=source.id)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"event=memory_ingest_done user_id={} conversation_id={} source_id={} "
|
||||
"chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_task_id={}",
|
||||
user_id,
|
||||
conversation_id,
|
||||
source.id,
|
||||
len(chunk_records),
|
||||
vectors_written,
|
||||
emb_ok,
|
||||
settings.memory_enrichment_enabled,
|
||||
enrichment_task_id,
|
||||
)
|
||||
return source.id
|
||||
|
||||
|
||||
__all__ = ["MemoryIngestService"]
|
||||
26
api/app/features/memory/prompt_adapter.py
Normal file
26
api/app/features/memory/prompt_adapter.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Memory-to-prompt adapter boundary."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Mapping
|
||||
|
||||
from app.features.memory.chat_memory_injection import (
|
||||
InterviewMemorySlices,
|
||||
slice_interview_memory,
|
||||
)
|
||||
from app.features.memory.runtime_types import MemoryEvidenceBundle
|
||||
|
||||
|
||||
class MemoryPromptAdapter:
|
||||
"""Converts retrieved evidence into prompt-specific slices."""
|
||||
|
||||
def slice_for_interview(
|
||||
self,
|
||||
evidence: MemoryEvidenceBundle | Mapping[str, Any] | None,
|
||||
user_message: str,
|
||||
) -> InterviewMemorySlices:
|
||||
raw = evidence.raw if isinstance(evidence, MemoryEvidenceBundle) else evidence
|
||||
return slice_interview_memory(dict(raw or {}), user_message)
|
||||
|
||||
|
||||
__all__ = ["MemoryPromptAdapter"]
|
||||
55
api/app/features/memory/retrieval_service.py
Normal file
55
api/app/features/memory/retrieval_service.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Memory retrieval service boundary."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memory.retriever import HybridRetriever
|
||||
from app.features.memory.schemas import EvidenceBundle
|
||||
from app.ports.embedding import EmbeddingProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class MemoryRetrievalService:
|
||||
"""Retrieves typed evidence bundles for downstream consumers."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db: AsyncSession,
|
||||
*,
|
||||
embedding_provider: EmbeddingProvider | None = None,
|
||||
) -> None:
|
||||
self._db = db
|
||||
self._embedding = embedding_provider
|
||||
|
||||
async def retrieve(
|
||||
self,
|
||||
user_id: str,
|
||||
query: str,
|
||||
*,
|
||||
top_k: int = 10,
|
||||
) -> EvidenceBundle:
|
||||
retriever = HybridRetriever(self._db, embedding_provider=self._embedding)
|
||||
raw = await retriever.retrieve(user_id=user_id, query=query, top_k=top_k)
|
||||
bundle = EvidenceBundle.model_validate(raw)
|
||||
bd = bundle.model_dump()
|
||||
vec_ok = self._embedding.is_available() if self._embedding else False
|
||||
logger.info(
|
||||
"event=memory_retrieve_done user_id={} query_len={} top_k={} "
|
||||
"chunks={} facts={} summaries={} timeline={} stories={} vector_ok={}",
|
||||
user_id,
|
||||
len((query or "").strip()),
|
||||
top_k,
|
||||
len(bd.get("relevant_chunks") or []),
|
||||
len(bd.get("relevant_facts") or []),
|
||||
len(bd.get("relevant_summaries") or []),
|
||||
len(bd.get("timeline_hints") or []),
|
||||
len(bd.get("relevant_stories") or []),
|
||||
vec_ok,
|
||||
)
|
||||
return bundle
|
||||
|
||||
|
||||
__all__ = ["MemoryRetrievalService"]
|
||||
24
api/app/features/memory/runtime_types.py
Normal file
24
api/app/features/memory/runtime_types.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Runtime DTOs for memory consumers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Mapping
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryEvidenceBundle:
|
||||
"""Transport-neutral memory evidence payload used by chat and memoir adapters."""
|
||||
|
||||
raw: dict[str, Any]
|
||||
|
||||
@classmethod
|
||||
def from_mapping(cls, value: Mapping[str, Any] | None) -> "MemoryEvidenceBundle":
|
||||
return cls(raw=dict(value or {}))
|
||||
|
||||
@property
|
||||
def has_any(self) -> bool:
|
||||
return any(bool(self.raw.get(key)) for key in self.raw.keys())
|
||||
|
||||
|
||||
__all__ = ["MemoryEvidenceBundle"]
|
||||
@@ -15,18 +15,14 @@ from app.core.logging import get_logger
|
||||
from app.features.conversation.lineage_schemas import (
|
||||
primary_user_message_id_from_lineage,
|
||||
)
|
||||
from app.features.memory.chunker import chunk_transcript
|
||||
from app.features.memory.enrichment_scheduler import MemoryEnrichmentScheduler
|
||||
from app.features.memory.ingest_service import MemoryIngestService
|
||||
from app.features.memory.repo import (
|
||||
create_chunk,
|
||||
create_curation_action,
|
||||
create_source,
|
||||
set_chunk_excluded,
|
||||
set_memory_fact_status,
|
||||
update_chunk_embedding,
|
||||
)
|
||||
from app.features.conversation.lineage_schemas import (
|
||||
primary_user_message_id_from_lineage,
|
||||
)
|
||||
from app.features.memory.retrieval_service import MemoryRetrievalService
|
||||
from app.features.memory.schemas import EvidenceBundle
|
||||
from app.ports.embedding import EmbeddingProvider
|
||||
|
||||
@@ -56,101 +52,20 @@ class MemoryService:
|
||||
Creates MemorySource, chunks, populates embedding.
|
||||
Returns source_id.
|
||||
"""
|
||||
if not transcript or not transcript.strip():
|
||||
raise ValueError("transcript cannot be empty")
|
||||
|
||||
primary_mid = (
|
||||
primary_user_message_id_from_lineage(lineage_json) if lineage_json else None
|
||||
)
|
||||
source = await create_source(
|
||||
self._db,
|
||||
user_id=user_id,
|
||||
source_type="transcript",
|
||||
raw_text=transcript.strip(),
|
||||
conversation_id=conversation_id,
|
||||
lineage_json=lineage_json,
|
||||
primary_user_message_id=primary_mid,
|
||||
)
|
||||
|
||||
chunks_text = chunk_transcript(transcript.strip())
|
||||
chunk_records = []
|
||||
for i, content in enumerate(chunks_text):
|
||||
chunk = await create_chunk(
|
||||
self._db,
|
||||
source_id=source.id,
|
||||
user_id=user_id,
|
||||
content=content,
|
||||
chunk_index=i,
|
||||
)
|
||||
chunk_records.append((chunk.id, content))
|
||||
|
||||
await self._db.flush()
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
vectors_written = 0
|
||||
# Embedding: 若有 provider 则写入
|
||||
if self._embedding and chunk_records:
|
||||
texts = [c for _, c in chunk_records]
|
||||
embeddings = await self._embedding.embed_texts(texts)
|
||||
for (chunk_id, _), emb in zip(chunk_records, embeddings):
|
||||
if emb:
|
||||
vectors_written += 1
|
||||
await update_chunk_embedding(self._db, chunk_id, emb)
|
||||
|
||||
await self._db.commit()
|
||||
emb_ok = self._embedding.is_available() if self._embedding else False
|
||||
enrichment_task_id: str | None = None
|
||||
try:
|
||||
from app.tasks.memory_enrichment_tasks import schedule_memory_enrichment
|
||||
|
||||
enrichment_task_id = schedule_memory_enrichment(
|
||||
user_id, source.id, memoir_correlation_id=None
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"memory enrichment 派发跳过: {} exc_type={}", e, type(e).__name__
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"event=memory_ingest_done user_id={} conversation_id={} source_id={} "
|
||||
"chunks={} vectors_written={} embedding_available={} enrichment_enabled={} enrichment_task_id={}",
|
||||
service = MemoryIngestService(self._db, embedding_provider=self._embedding)
|
||||
return await service.ingest_transcript(
|
||||
user_id,
|
||||
conversation_id,
|
||||
source.id,
|
||||
len(chunk_records),
|
||||
vectors_written,
|
||||
emb_ok,
|
||||
settings.memory_enrichment_enabled,
|
||||
enrichment_task_id,
|
||||
transcript,
|
||||
lineage_json=lineage_json,
|
||||
)
|
||||
return source.id
|
||||
|
||||
async def retrieve(
|
||||
self, user_id: str, query: str, *, top_k: int = 10
|
||||
) -> EvidenceBundle:
|
||||
"""Retrieve relevant evidence. 委托 HybridRetriever。"""
|
||||
from app.features.memory.retriever import HybridRetriever
|
||||
|
||||
retriever = HybridRetriever(self._db, embedding_provider=self._embedding)
|
||||
raw = await retriever.retrieve(user_id=user_id, query=query, top_k=top_k)
|
||||
bundle = EvidenceBundle.model_validate(raw)
|
||||
bd = bundle.model_dump()
|
||||
vec_ok = self._embedding.is_available() if self._embedding else False
|
||||
logger.info(
|
||||
"event=memory_retrieve_done user_id={} query_len={} top_k={} "
|
||||
"chunks={} facts={} summaries={} timeline={} stories={} vector_ok={}",
|
||||
user_id,
|
||||
len((query or "").strip()),
|
||||
top_k,
|
||||
len(bd.get("relevant_chunks") or []),
|
||||
len(bd.get("relevant_facts") or []),
|
||||
len(bd.get("relevant_summaries") or []),
|
||||
len(bd.get("timeline_hints") or []),
|
||||
len(bd.get("relevant_stories") or []),
|
||||
vec_ok,
|
||||
)
|
||||
return bundle
|
||||
service = MemoryRetrievalService(self._db, embedding_provider=self._embedding)
|
||||
return await service.retrieve(user_id, query, top_k=top_k)
|
||||
|
||||
async def exclude_chunk(
|
||||
self, user_id: str, chunk_id: str, *, reason: str = ""
|
||||
@@ -292,7 +207,9 @@ def ingest_transcript_sync(
|
||||
if chunk_records and embedding_provider is not None:
|
||||
texts = [content for _, content in chunk_records]
|
||||
embeddings = embedding_provider.embed_texts_sync(texts)
|
||||
for (chunk_id, _), emb in zip(chunk_records, embeddings):
|
||||
for (chunk_id, _), emb in zip(
|
||||
chunk_records, embeddings, strict=False
|
||||
):
|
||||
if emb:
|
||||
vectors_written += 1
|
||||
update_chunk_embedding_sync(session, chunk_id, emb)
|
||||
@@ -405,7 +322,9 @@ def ingest_transcripts_batch_sync(
|
||||
if all_chunk_records and embedding_provider is not None:
|
||||
texts = [content for _, content in all_chunk_records]
|
||||
embeddings = embedding_provider.embed_texts_sync(texts)
|
||||
for (chunk_id, _), emb in zip(all_chunk_records, embeddings):
|
||||
for (chunk_id, _), emb in zip(
|
||||
all_chunk_records, embeddings, strict=False
|
||||
):
|
||||
if emb:
|
||||
vectors_written += 1
|
||||
update_chunk_embedding_sync(session, chunk_id, emb)
|
||||
@@ -438,10 +357,8 @@ def schedule_enrichment_for_sources(
|
||||
memoir_correlation_id: str | None = None,
|
||||
) -> None:
|
||||
"""After successful ingest commit, enqueue LLM enrichment for each source (memory_idle queue)."""
|
||||
from app.tasks.memory_enrichment_tasks import schedule_memory_enrichment
|
||||
|
||||
for sid in source_ids:
|
||||
if sid:
|
||||
schedule_memory_enrichment(
|
||||
user_id, sid, memoir_correlation_id=memoir_correlation_id
|
||||
)
|
||||
MemoryEnrichmentScheduler().schedule_many(
|
||||
user_id,
|
||||
source_ids,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
)
|
||||
|
||||
@@ -6,6 +6,7 @@ import json
|
||||
from typing import Any
|
||||
|
||||
from app.core.langchain_llm import ainvoke_json_object, invoke_json_object
|
||||
from app.core.llm_gateway import LlmGateway, LlmUseCase
|
||||
from app.core.logging import get_logger
|
||||
from app.features.memory.llm_schemas import (
|
||||
TimelineEventsPayload,
|
||||
@@ -70,7 +71,7 @@ async def build_timeline_events_from_facts_async(
|
||||
|
||||
async def build_timeline_events(facts: list[dict]) -> list[dict]:
|
||||
"""兼容旧接口。"""
|
||||
from app.core.dependencies import get_llm_provider_fast
|
||||
|
||||
llm = get_llm_provider_fast().langchain_llm
|
||||
llm = LlmGateway().langchain_llm_for(
|
||||
LlmUseCase("memory.timeline_events.compat", fast=True)
|
||||
)
|
||||
return await build_timeline_events_from_facts_async(llm, facts)
|
||||
|
||||
Reference in New Issue
Block a user