配置 SSOT(TOML + .env) 统一错误契约 Auth 与事务边界 Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client 可观测性(OpenTelemetry + LGTM)
158 lines
4.7 KiB
Python
158 lines
4.7 KiB
Python
"""MemoryIngestService 将 lineage_json 传入 create_source。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
from app.features.memory.ingest_service import MemoryIngestService
|
|
from app.features.memory.constants import memory
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ingest_batch_passes_segment_id(monkeypatch) -> None:
|
|
captured: dict = {}
|
|
|
|
class FakeSession:
|
|
async def commit(self) -> None:
|
|
pass
|
|
|
|
async def flush(self) -> None:
|
|
pass
|
|
|
|
async def fake_get(*_args, **_kwargs):
|
|
return None
|
|
|
|
async def fake_create_source(session, **kwargs):
|
|
captured.update(kwargs)
|
|
return SimpleNamespace(id="src-1")
|
|
|
|
async def fake_create_chunk(*_args, **_kwargs):
|
|
return SimpleNamespace(id="ch-0")
|
|
|
|
class FakeEmbeddingService:
|
|
def __init__(self, *_args, **_kwargs) -> None:
|
|
pass
|
|
|
|
async def embed_source(self, user_id: str, source_id: str) -> dict:
|
|
return {"status": "success", "vectors_written": 1}
|
|
|
|
monkeypatch.setattr(
|
|
"app.features.memory.ingest_service.get_transcript_source_by_segment_id",
|
|
fake_get,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.ingest_service.create_source",
|
|
fake_create_source,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.ingest_service.create_chunk",
|
|
fake_create_chunk,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.ingest_service.MemoryEmbeddingService",
|
|
FakeEmbeddingService,
|
|
)
|
|
monkeypatch.setattr("app.features.memory.constants.memory.enrichment_enabled", False)
|
|
|
|
lineage = {
|
|
"schema_version": 1,
|
|
"conversation_id": "c9",
|
|
"turns": [{"user_message_id": "um-1", "assistant_message_id": "as-1"}],
|
|
"primary_user_message_id": "um-1",
|
|
}
|
|
service = MemoryIngestService(
|
|
FakeSession(), # type: ignore[arg-type]
|
|
embedding_provider=None,
|
|
enrichment_scheduler=MagicMock(schedule_many=MagicMock(return_value=[])),
|
|
)
|
|
ids = await service.ingest_transcripts_batch(
|
|
"u1",
|
|
[("c9", "hello there", lineage, "seg-9")],
|
|
)
|
|
assert ids == ["src-1"]
|
|
assert captured.get("segment_id") == "seg-9"
|
|
assert captured.get("lineage_json") == lineage
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_ingest_passes_lineage(monkeypatch) -> None:
|
|
captured: dict = {}
|
|
|
|
class FakeSession:
|
|
commit_calls = 0
|
|
|
|
async def commit(self) -> None:
|
|
self.commit_calls += 1
|
|
|
|
async def flush(self) -> None:
|
|
pass
|
|
|
|
class FakeScheduler:
|
|
def schedule(self, request):
|
|
captured["scheduled"] = request
|
|
return "task-1"
|
|
|
|
class FakeEmbeddingScheduler:
|
|
def schedule(self, request):
|
|
captured["embedding_scheduled"] = request
|
|
return "embedding-task-1"
|
|
|
|
class FakeEmbeddingService:
|
|
def __init__(self, *_args, **_kwargs) -> None:
|
|
pass
|
|
|
|
async def embed_source(self, user_id: str, source_id: str) -> dict:
|
|
captured["embedded"] = (user_id, source_id)
|
|
return {"status": "success", "vectors_written": 1}
|
|
|
|
async def fake_create_source(session, **kwargs):
|
|
captured.update(kwargs)
|
|
return SimpleNamespace(id="src-1")
|
|
|
|
async def fake_create_chunk(*_args, **kwargs):
|
|
return SimpleNamespace(id=f"ch-{kwargs.get('chunk_index')}")
|
|
|
|
monkeypatch.setattr(
|
|
"app.features.memory.ingest_service.create_source",
|
|
fake_create_source,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.ingest_service.create_chunk",
|
|
fake_create_chunk,
|
|
)
|
|
monkeypatch.setattr(
|
|
"app.features.memory.ingest_service.MemoryEmbeddingService",
|
|
FakeEmbeddingService,
|
|
)
|
|
monkeypatch.setattr("app.features.memory.constants.memory.enrichment_enabled", False)
|
|
|
|
lineage = {
|
|
"schema_version": 1,
|
|
"conversation_id": "c9",
|
|
"turns": [
|
|
{"user_message_id": "um-1", "assistant_message_id": "as-1"},
|
|
],
|
|
"primary_user_message_id": "um-1",
|
|
}
|
|
|
|
fake_session = FakeSession()
|
|
service = MemoryIngestService(
|
|
fake_session, # type: ignore[arg-type]
|
|
embedding_provider=None,
|
|
embedding_scheduler=FakeEmbeddingScheduler(), # type: ignore[arg-type]
|
|
enrichment_scheduler=FakeScheduler(), # type: ignore[arg-type]
|
|
)
|
|
sid = await service.ingest_transcript(
|
|
"u1",
|
|
"c9",
|
|
"hello there",
|
|
lineage_json=lineage,
|
|
)
|
|
assert sid == "src-1"
|
|
assert captured.get("lineage_json") == lineage
|
|
assert captured.get("primary_user_message_id") == "um-1"
|
|
assert captured["scheduled"].source_id == "src-1"
|