refactor(api): TOML 配置 SSOT、统一错误契约、Auth/事务加固与可观测性 (#33)

配置 SSOT(TOML + .env)
统一错误契约
Auth 与事务边界
Redis / Celery 可靠性:业务 Redis(DB/0)与 Celery broker/backend(DB/1)显式拆分;连接池、sync client
可观测性(OpenTelemetry + LGTM)
This commit is contained in:
Sully
2026-05-22 13:44:50 +08:00
committed by GitHub
parent f09ae248f9
commit 53e0065e3e
298 changed files with 15247 additions and 4344 deletions

View File

@@ -10,8 +10,7 @@ from typing import Any
from celery import shared_task
from app.core.business_telemetry import business_span
from app.core.config import settings
from app.core.db import AsyncSessionLocal
from app.core.db import AsyncSessionLocal, transactional
from app.core.logging import get_logger
from app.core.memory_compaction_schedule import (
finalize_memory_compaction_run,
@@ -23,6 +22,7 @@ from app.core.memory_compaction_schedule import (
from app.core.redis_lock import acquire_redis_lock, release_redis_lock
from app.features.memory.repo import list_users_with_recent_chunks
from app.features.memory.service import MemoryService
from app.features.memory.constants import memory
logger = get_logger(__name__)
@@ -37,41 +37,53 @@ async def _run_memory_compaction_async(
context: dict[str, Any] | None,
) -> dict[str, Any]:
async with AsyncSessionLocal() as db:
service = MemoryService(db)
out = await service.compact_user(user_id, context)
await db.commit()
return out
async with transactional(db):
service = MemoryService(db)
return await service.compact_user(user_id, context)
@shared_task
def memory_compaction_sweep() -> dict[str, Any]:
@shared_task(bind=True, ignore_result=True)
def memory_compaction_sweep(self) -> dict[str, Any]:
"""Beat为近期有记忆写入的用户调度 compactiondebounce 仍由 schedule 合并)。"""
t0 = time.perf_counter()
if not settings.memory_compaction_enabled:
if not memory.compaction_enabled:
return {"skipped": True, "reason": "disabled"}
hours = int(settings.memory_compaction_sweep_recent_hours)
hours = int(memory.compaction_sweep_recent_hours)
with business_span("memory.compaction.sweep", hours=hours):
user_ids = asyncio.run(_list_users_with_recent_chunks_async(hours))
ctx_base: dict[str, Any] = {"trigger_source": "beat", "sweep_hours": hours}
scheduled = 0
failed = 0
for uid in user_ids:
schedule_memory_compaction_run(uid, dict(ctx_base))
try:
schedule_memory_compaction_run(uid, dict(ctx_base))
scheduled += 1
except Exception as exc:
failed += 1
logger.warning(
"event=memory_compaction_sweep_schedule_failed user_id={} exc={} "
"msg=单用户 compaction 调度失败,继续扫描",
uid,
exc,
)
ms = (time.perf_counter() - t0) * 1000
logger.info(
"event=memory_compaction_sweep_done hours={} scheduled_users={} duration_ms={:.1f} "
"msg=记忆压缩定时扫描已调度",
"event=memory_compaction_sweep_done hours={} scheduled_users={} failed_users={} "
"duration_ms={:.1f} msg=记忆压缩定时扫描已调度",
hours,
len(user_ids),
scheduled,
failed,
ms,
)
return {"scheduled": len(user_ids), "user_ids": user_ids}
return {"scheduled": scheduled, "failed": failed, "hours": hours}
@shared_task(bind=True, max_retries=12, default_retry_delay=20)
@shared_task(bind=True, max_retries=12, default_retry_delay=20, ignore_result=True)
def memory_compaction_run(
self, user_id: str, context: dict[str, Any] | None = None
) -> dict[str, Any]:
run_t0 = time.perf_counter()
if not settings.memory_compaction_enabled:
if not memory.compaction_enabled:
return {"skipped": True, "reason": "disabled"}
ctx = dict(context or {})
@@ -83,7 +95,7 @@ def memory_compaction_run(
lock = acquire_redis_lock(
f"lock:memory_compaction:{user_id}",
ttl_seconds=settings.memory_compaction_lock_ttl_seconds,
ttl_seconds=memory.compaction_lock_ttl_seconds,
)
if lock is None:
ms = (time.perf_counter() - run_t0) * 1000