Chat 访谈 - 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层 - 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式 - 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索 - 记忆证据注入:按用户话检索 memory evidence 并注入 prompt Memoir 回忆录 - 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入 - segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交 - fidelity_check / prompts / narrative_agent 微调 - Alembic 0005:清理跨章节 story 外键 Infra - Dockerfile 加入 ffmpeg - pyproject.toml 新增依赖并同步 uv.lock - .env.example / .env.production 补全新配置项 Tests - 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions - 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant Made-with: Cursor
160 lines
5.5 KiB
Python
160 lines
5.5 KiB
Python
"""回忆录后台任务聚合:debounce 后派发 process_memoir_segments(feature 层)。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import time
|
||
from dataclasses import dataclass, field
|
||
from typing import Dict, List
|
||
|
||
from app.core.config import settings
|
||
from app.core.logging import get_logger
|
||
from app.core.task_tracker import task_tracker
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
def _batch_ready_for_submit(
|
||
*,
|
||
min_chars: int,
|
||
max_wait_seconds: float,
|
||
total_text_chars: int,
|
||
elapsed_seconds: float,
|
||
) -> bool:
|
||
"""字数门闸开启时,静默结束后是否应提交(不含 min_chars==0 的早退,由调用方处理)。"""
|
||
if min_chars <= 0:
|
||
return True
|
||
if total_text_chars >= min_chars:
|
||
return True
|
||
if max_wait_seconds <= 0:
|
||
return True
|
||
return elapsed_seconds >= max_wait_seconds
|
||
|
||
|
||
def _next_retry_sleep_seconds(
|
||
debounce_seconds: float,
|
||
max_wait_seconds: float,
|
||
elapsed_seconds: float,
|
||
) -> float:
|
||
"""未达字数且未超时:下次再 sleep 的秒数。"""
|
||
return min(debounce_seconds, max(0.0, max_wait_seconds - elapsed_seconds))
|
||
|
||
|
||
@dataclass
|
||
class _MemoirBatchState:
|
||
segment_ids: list[str] = field(default_factory=list)
|
||
total_text_chars: int = 0
|
||
first_queued_monotonic: float | None = None
|
||
|
||
|
||
class BackgroundTaskRunner:
|
||
def __init__(self, debounce_seconds: int = 5) -> None:
|
||
self.debounce_seconds = debounce_seconds
|
||
self._batch: Dict[str, _MemoirBatchState] = {}
|
||
self._timers: Dict[str, asyncio.Task[None]] = {}
|
||
|
||
def _pop_batch(self, user_id: str) -> list[str]:
|
||
st = self._batch.pop(user_id, None)
|
||
if not st or not st.segment_ids:
|
||
return []
|
||
ids = st.segment_ids
|
||
return ids
|
||
|
||
async def _submit_task(self, user_id: str, segment_ids: List[str]) -> str | None:
|
||
try:
|
||
from app.tasks.memoir_tasks import process_memoir_segments
|
||
|
||
result = process_memoir_segments.delay(user_id, segment_ids)
|
||
task_id = result.id
|
||
await task_tracker.add_task(user_id, task_id, "memoir")
|
||
logger.info(
|
||
"已提交 Celery 任务: user_id={}, task_id={}, segments={}",
|
||
user_id,
|
||
task_id,
|
||
len(segment_ids),
|
||
)
|
||
return task_id
|
||
except Exception as e:
|
||
logger.error("提交 Celery 任务失败: {}", e)
|
||
return None
|
||
|
||
async def queue_message(
|
||
self, user_id: str, segment_id: str, *, text_char_count: int = 0
|
||
) -> None:
|
||
st = self._batch.setdefault(user_id, _MemoirBatchState())
|
||
if not st.segment_ids:
|
||
st.first_queued_monotonic = time.monotonic()
|
||
st.segment_ids.append(segment_id)
|
||
st.total_text_chars += max(0, text_char_count)
|
||
|
||
if user_id in self._timers:
|
||
self._timers[user_id].cancel()
|
||
|
||
async def delayed_submit() -> None:
|
||
try:
|
||
await asyncio.sleep(self.debounce_seconds)
|
||
while True:
|
||
if user_id not in self._batch:
|
||
return
|
||
batch = self._batch.get(user_id)
|
||
if not batch or not batch.segment_ids:
|
||
return
|
||
|
||
min_c = int(settings.memoir_segment_batch_min_chars)
|
||
max_w = float(settings.memoir_segment_batch_max_wait_seconds)
|
||
|
||
if min_c <= 0:
|
||
segment_ids = self._pop_batch(user_id)
|
||
if segment_ids:
|
||
await self._submit_task(user_id, segment_ids)
|
||
return
|
||
|
||
first = batch.first_queued_monotonic
|
||
if first is None:
|
||
segment_ids = self._pop_batch(user_id)
|
||
if segment_ids:
|
||
await self._submit_task(user_id, segment_ids)
|
||
return
|
||
|
||
now = time.monotonic()
|
||
elapsed = now - first
|
||
total = batch.total_text_chars
|
||
|
||
if _batch_ready_for_submit(
|
||
min_chars=min_c,
|
||
max_wait_seconds=max_w,
|
||
total_text_chars=total,
|
||
elapsed_seconds=elapsed,
|
||
):
|
||
segment_ids = self._pop_batch(user_id)
|
||
if segment_ids:
|
||
await self._submit_task(user_id, segment_ids)
|
||
return
|
||
|
||
sleep_more = _next_retry_sleep_seconds(
|
||
float(self.debounce_seconds),
|
||
max_w,
|
||
elapsed,
|
||
)
|
||
if sleep_more <= 0:
|
||
segment_ids = self._pop_batch(user_id)
|
||
if segment_ids:
|
||
await self._submit_task(user_id, segment_ids)
|
||
return
|
||
await asyncio.sleep(sleep_more)
|
||
except asyncio.CancelledError:
|
||
pass
|
||
except Exception as e:
|
||
logger.error("延迟提交任务失败: {}", e)
|
||
|
||
self._timers[user_id] = asyncio.create_task(delayed_submit())
|
||
|
||
async def flush_pending(self, user_id: str) -> str | None:
|
||
if user_id in self._timers:
|
||
self._timers[user_id].cancel()
|
||
del self._timers[user_id]
|
||
segment_ids = self._pop_batch(user_id)
|
||
if segment_ids:
|
||
return await self._submit_task(user_id, segment_ids)
|
||
return None
|