refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)
配置 SSOT(TOML + .env) 统一错误契约 Auth 与事务边界 Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client 可观测性(OpenTelemetry + LGTM)
This commit is contained in:
@@ -5,6 +5,8 @@ from __future__ import annotations
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.db import transactional
|
||||
from app.core.errors import BadRequestError
|
||||
from app.core.logging import get_logger
|
||||
from app.features.conversation.lineage_schemas import (
|
||||
primary_user_message_id_from_lineage,
|
||||
@@ -22,8 +24,10 @@ from app.features.memory.enrichment_scheduler import (
|
||||
from app.features.memory.repo import (
|
||||
create_chunk,
|
||||
create_source,
|
||||
get_transcript_source_by_segment_id,
|
||||
)
|
||||
from app.ports.embedding import EmbeddingProvider
|
||||
from app.features.memory.constants import memory
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -53,34 +57,32 @@ class MemoryIngestService:
|
||||
lineage_json: dict | None = None,
|
||||
) -> str:
|
||||
if not transcript or not transcript.strip():
|
||||
raise ValueError("transcript cannot be empty")
|
||||
raise BadRequestError("transcript cannot be empty")
|
||||
|
||||
primary_mid = (
|
||||
primary_user_message_id_from_lineage(lineage_json) if lineage_json else None
|
||||
)
|
||||
source = await create_source(
|
||||
self._db,
|
||||
user_id=user_id,
|
||||
source_type="transcript",
|
||||
raw_text=transcript.strip(),
|
||||
conversation_id=conversation_id,
|
||||
lineage_json=lineage_json,
|
||||
primary_user_message_id=primary_mid,
|
||||
)
|
||||
|
||||
chunk_records: list[tuple[str, str]] = []
|
||||
for i, content in enumerate(chunk_transcript(transcript.strip())):
|
||||
chunk = await create_chunk(
|
||||
async with transactional(self._db):
|
||||
source = await create_source(
|
||||
self._db,
|
||||
source_id=source.id,
|
||||
user_id=user_id,
|
||||
content=content,
|
||||
chunk_index=i,
|
||||
source_type="transcript",
|
||||
raw_text=transcript.strip(),
|
||||
conversation_id=conversation_id,
|
||||
lineage_json=lineage_json,
|
||||
primary_user_message_id=primary_mid,
|
||||
)
|
||||
chunk_records.append((chunk.id, content))
|
||||
|
||||
await self._db.flush()
|
||||
await self._db.commit()
|
||||
chunk_records: list[tuple[str, str]] = []
|
||||
for i, content in enumerate(chunk_transcript(transcript.strip())):
|
||||
chunk = await create_chunk(
|
||||
self._db,
|
||||
source_id=source.id,
|
||||
user_id=user_id,
|
||||
content=content,
|
||||
chunk_index=i,
|
||||
)
|
||||
chunk_records.append((chunk.id, content))
|
||||
|
||||
embedding_result = await MemoryEmbeddingService(
|
||||
self._db,
|
||||
@@ -108,7 +110,7 @@ class MemoryIngestService:
|
||||
embedding_result.get("status"),
|
||||
emb_ok,
|
||||
embedding_task_id,
|
||||
settings.memory_enrichment_enabled,
|
||||
memory.enrichment_enabled,
|
||||
enrichment_task_id,
|
||||
)
|
||||
return source.id
|
||||
@@ -116,50 +118,63 @@ class MemoryIngestService:
|
||||
async def ingest_transcripts_batch(
|
||||
self,
|
||||
user_id: str,
|
||||
items: list[tuple[str, str, dict | None]],
|
||||
items: list[tuple[str, str, dict | None, str | None]],
|
||||
*,
|
||||
memoir_correlation_id: str | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Batch ingest transcript items through the async memory path.
|
||||
|
||||
items: (conversation_id, transcript, lineage_json). Empty transcripts are skipped.
|
||||
items: (conversation_id, transcript, lineage_json, segment_id).
|
||||
Empty transcripts are skipped. When segment_id is set and a transcript
|
||||
source already exists for the user, returns the existing source id.
|
||||
"""
|
||||
source_ids: list[str] = []
|
||||
chunk_records: list[tuple[str, str]] = []
|
||||
new_source_ids: list[str] = []
|
||||
|
||||
for conversation_id, transcript, lineage_json in items:
|
||||
text = (transcript or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
primary_mid = (
|
||||
primary_user_message_id_from_lineage(lineage_json)
|
||||
if lineage_json
|
||||
else None
|
||||
)
|
||||
source = await create_source(
|
||||
self._db,
|
||||
user_id=user_id,
|
||||
source_type="transcript",
|
||||
raw_text=text,
|
||||
conversation_id=conversation_id or None,
|
||||
lineage_json=lineage_json,
|
||||
primary_user_message_id=primary_mid,
|
||||
)
|
||||
source_ids.append(source.id)
|
||||
|
||||
for i, content in enumerate(chunk_transcript(text)):
|
||||
chunk = await create_chunk(
|
||||
self._db,
|
||||
source_id=source.id,
|
||||
user_id=user_id,
|
||||
content=content,
|
||||
chunk_index=i,
|
||||
async with transactional(self._db):
|
||||
for conversation_id, transcript, lineage_json, segment_id in items:
|
||||
text = (transcript or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
sid = (segment_id or "").strip() or None
|
||||
if sid:
|
||||
existing = await get_transcript_source_by_segment_id(
|
||||
self._db,
|
||||
user_id=user_id,
|
||||
segment_id=sid,
|
||||
)
|
||||
if existing is not None:
|
||||
source_ids.append(existing.id)
|
||||
continue
|
||||
primary_mid = (
|
||||
primary_user_message_id_from_lineage(lineage_json)
|
||||
if lineage_json
|
||||
else None
|
||||
)
|
||||
chunk_records.append((chunk.id, content))
|
||||
source = await create_source(
|
||||
self._db,
|
||||
user_id=user_id,
|
||||
source_type="transcript",
|
||||
raw_text=text,
|
||||
conversation_id=conversation_id or None,
|
||||
segment_id=sid,
|
||||
lineage_json=lineage_json,
|
||||
primary_user_message_id=primary_mid,
|
||||
)
|
||||
source_ids.append(source.id)
|
||||
new_source_ids.append(source.id)
|
||||
|
||||
await self._db.flush()
|
||||
await self._db.commit()
|
||||
for i, content in enumerate(chunk_transcript(text)):
|
||||
chunk = await create_chunk(
|
||||
self._db,
|
||||
source_id=source.id,
|
||||
user_id=user_id,
|
||||
content=content,
|
||||
chunk_index=i,
|
||||
)
|
||||
chunk_records.append((chunk.id, content))
|
||||
|
||||
vectors_written = 0
|
||||
embedding_retry_task_ids: list[str] = []
|
||||
@@ -168,7 +183,7 @@ class MemoryIngestService:
|
||||
self._db,
|
||||
embedding_provider=self._embedding,
|
||||
)
|
||||
for source_id in source_ids:
|
||||
for source_id in new_source_ids:
|
||||
result = await embedding_service.embed_source(user_id, source_id)
|
||||
vectors_written += int(result.get("vectors_written") or 0)
|
||||
status = str(result.get("status") or "unknown")
|
||||
@@ -185,7 +200,7 @@ class MemoryIngestService:
|
||||
emb_ok = self._embedding.is_available() if self._embedding else False
|
||||
task_ids = self._enrichment_scheduler.schedule_many(
|
||||
user_id,
|
||||
source_ids,
|
||||
new_source_ids,
|
||||
memoir_correlation_id=memoir_correlation_id,
|
||||
)
|
||||
|
||||
@@ -200,7 +215,7 @@ class MemoryIngestService:
|
||||
emb_ok,
|
||||
embedding_statuses,
|
||||
len(embedding_retry_task_ids),
|
||||
settings.memory_enrichment_enabled,
|
||||
memory.enrichment_enabled,
|
||||
len(task_ids),
|
||||
)
|
||||
return source_ids
|
||||
|
||||
Reference in New Issue
Block a user