Memory compaction(近重复 chunk 软排除) - 新增 compaction 调度:Redis debounce、scheduler gate、增量游标;任务结束时 finalize,避免 gate 长期占用并处理运行期新 trigger。 - Celery memory_compaction_run:debounce 未到点则 retry;用户级 Redis 锁;成功路径更新游标并 finalize;异常时释放 scheduler gate 并 self.retry,避免静默卡死调度与瞬时失败不重试。 - compaction_service:多层判定 + canonical 打分;无 embedding 时停止前移游标(awaiting_embeddings);curation details 补全 trigger 等上下文。 - ingest_transcript_sync:同步路径尽力写入 embedding,与异步 ingest 行为对齐,避免 compaction 永远扫不到无向量 chunk。 - repo:新增 update_chunk_embedding_sync。 测试 - 扩展 test_memory_compaction:调度合并、finalize、ingest embedding、无向量游标、异常路径 gate+retry 等回归用
80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
"""Celery:memory compaction(近重复 chunk 软排除)。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import time
|
||
from datetime import datetime
|
||
from typing import Any
|
||
|
||
from celery import shared_task
|
||
|
||
from app.core.config import settings
|
||
from app.core.db import get_sync_db
|
||
from app.core.logging import get_logger
|
||
from app.core.memory_compaction_schedule import (
|
||
finalize_memory_compaction_run,
|
||
read_debounce_deadline_ts,
|
||
release_scheduler_gate,
|
||
set_incremental_cursor_pair,
|
||
)
|
||
from app.core.redis_lock import acquire_redis_lock, release_redis_lock
|
||
from app.features.memory.compaction_service import run_memory_compaction_sync
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
@shared_task(bind=True, max_retries=12, default_retry_delay=20)
|
||
def memory_compaction_run(
|
||
self, user_id: str, context: dict[str, Any] | None = None
|
||
) -> dict[str, Any]:
|
||
if not settings.memory_compaction_enabled:
|
||
return {"skipped": True, "reason": "disabled"}
|
||
|
||
ctx = dict(context or {})
|
||
deadline = read_debounce_deadline_ts(user_id)
|
||
now = time.time()
|
||
if deadline is not None and now < deadline:
|
||
delay = max(1.0, deadline - now)
|
||
raise self.retry(countdown=int(delay))
|
||
|
||
lock = acquire_redis_lock(
|
||
f"lock:memory_compaction:{user_id}",
|
||
ttl_seconds=settings.memory_compaction_lock_ttl_seconds,
|
||
)
|
||
if lock is None:
|
||
logger.info(
|
||
"memory_compaction_skipped user_id={} skipped_reason=lock_not_acquired",
|
||
user_id,
|
||
)
|
||
out = {"skipped": True, "reason": "lock_not_acquired"}
|
||
finalize_memory_compaction_run(
|
||
user_id,
|
||
observed_deadline_ts=deadline,
|
||
context=ctx,
|
||
)
|
||
return out
|
||
|
||
try:
|
||
with get_sync_db() as session:
|
||
out = run_memory_compaction_sync(session, user_id, ctx)
|
||
session.commit()
|
||
|
||
if out.get("new_cursor_ts") and out.get("new_cursor_id") is not None:
|
||
set_incremental_cursor_pair(
|
||
user_id,
|
||
datetime.fromisoformat(out["new_cursor_ts"]),
|
||
str(out["new_cursor_id"]),
|
||
)
|
||
finalize_memory_compaction_run(
|
||
user_id,
|
||
observed_deadline_ts=deadline,
|
||
context=ctx,
|
||
)
|
||
return out
|
||
except Exception as exc:
|
||
logger.warning("memory_compaction_run failed user_id={} err={}", user_id, exc)
|
||
release_scheduler_gate(user_id)
|
||
raise self.retry(exc=exc)
|
||
finally:
|
||
release_redis_lock(lock)
|