Files
life-echo/api/app/features/memoir/background_runner.py
Kevin 69a673e6c6 feat(api): 访谈人格/回复长度策略、口述归一、背景语气与输入净稿全链路
Chat 访谈
- 新增 persona 系统(default / warm_listener / curious_guide)与 background_voice 语气层
- 回复长度由 compute_reply_plan 统一决策(brief / standard / expanded),融合信息密度启发式
- 输入净稿(input_normalize):编排层可选 rules/llm 归一用户口语后再喂模型与记忆检索
- 记忆证据注入:按用户话检索 memory evidence 并注入 prompt

Memoir 回忆录
- 口述归一(oral_normalize):segment 原文保留,story 管线取派生净稿作叙事输入
- segment 入队批次门闸:累计字数 + 最长等待秒数,减少零碎提交
- fidelity_check / prompts / narrative_agent 微调
- Alembic 0005:清理跨章节 story 外键

Infra
- Dockerfile 加入 ffmpeg
- pyproject.toml 新增依赖并同步 uv.lock
- .env.example / .env.production 补全新配置项

Tests
- 新增 test_background_voice、test_chat_input_normalize、test_experience_regressions
- 扩展 test_interview_prompts、test_interview_reply_length、test_story_route_oral_invariant

Made-with: Cursor
2026-03-31 23:55:26 +08:00

160 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""回忆录后台任务聚合debounce 后派发 process_memoir_segmentsfeature 层)。"""
from __future__ import annotations
import asyncio
import time
from dataclasses import dataclass, field
from typing import Dict, List
from app.core.config import settings
from app.core.logging import get_logger
from app.core.task_tracker import task_tracker
logger = get_logger(__name__)
def _batch_ready_for_submit(
*,
min_chars: int,
max_wait_seconds: float,
total_text_chars: int,
elapsed_seconds: float,
) -> bool:
"""字数门闸开启时,静默结束后是否应提交(不含 min_chars==0 的早退,由调用方处理)。"""
if min_chars <= 0:
return True
if total_text_chars >= min_chars:
return True
if max_wait_seconds <= 0:
return True
return elapsed_seconds >= max_wait_seconds
def _next_retry_sleep_seconds(
debounce_seconds: float,
max_wait_seconds: float,
elapsed_seconds: float,
) -> float:
"""未达字数且未超时:下次再 sleep 的秒数。"""
return min(debounce_seconds, max(0.0, max_wait_seconds - elapsed_seconds))
@dataclass
class _MemoirBatchState:
segment_ids: list[str] = field(default_factory=list)
total_text_chars: int = 0
first_queued_monotonic: float | None = None
class BackgroundTaskRunner:
def __init__(self, debounce_seconds: int = 5) -> None:
self.debounce_seconds = debounce_seconds
self._batch: Dict[str, _MemoirBatchState] = {}
self._timers: Dict[str, asyncio.Task[None]] = {}
def _pop_batch(self, user_id: str) -> list[str]:
st = self._batch.pop(user_id, None)
if not st or not st.segment_ids:
return []
ids = st.segment_ids
return ids
async def _submit_task(self, user_id: str, segment_ids: List[str]) -> str | None:
try:
from app.tasks.memoir_tasks import process_memoir_segments
result = process_memoir_segments.delay(user_id, segment_ids)
task_id = result.id
await task_tracker.add_task(user_id, task_id, "memoir")
logger.info(
"已提交 Celery 任务: user_id={}, task_id={}, segments={}",
user_id,
task_id,
len(segment_ids),
)
return task_id
except Exception as e:
logger.error("提交 Celery 任务失败: {}", e)
return None
async def queue_message(
self, user_id: str, segment_id: str, *, text_char_count: int = 0
) -> None:
st = self._batch.setdefault(user_id, _MemoirBatchState())
if not st.segment_ids:
st.first_queued_monotonic = time.monotonic()
st.segment_ids.append(segment_id)
st.total_text_chars += max(0, text_char_count)
if user_id in self._timers:
self._timers[user_id].cancel()
async def delayed_submit() -> None:
try:
await asyncio.sleep(self.debounce_seconds)
while True:
if user_id not in self._batch:
return
batch = self._batch.get(user_id)
if not batch or not batch.segment_ids:
return
min_c = int(settings.memoir_segment_batch_min_chars)
max_w = float(settings.memoir_segment_batch_max_wait_seconds)
if min_c <= 0:
segment_ids = self._pop_batch(user_id)
if segment_ids:
await self._submit_task(user_id, segment_ids)
return
first = batch.first_queued_monotonic
if first is None:
segment_ids = self._pop_batch(user_id)
if segment_ids:
await self._submit_task(user_id, segment_ids)
return
now = time.monotonic()
elapsed = now - first
total = batch.total_text_chars
if _batch_ready_for_submit(
min_chars=min_c,
max_wait_seconds=max_w,
total_text_chars=total,
elapsed_seconds=elapsed,
):
segment_ids = self._pop_batch(user_id)
if segment_ids:
await self._submit_task(user_id, segment_ids)
return
sleep_more = _next_retry_sleep_seconds(
float(self.debounce_seconds),
max_w,
elapsed,
)
if sleep_more <= 0:
segment_ids = self._pop_batch(user_id)
if segment_ids:
await self._submit_task(user_id, segment_ids)
return
await asyncio.sleep(sleep_more)
except asyncio.CancelledError:
pass
except Exception as e:
logger.error("延迟提交任务失败: {}", e)
self._timers[user_id] = asyncio.create_task(delayed_submit())
async def flush_pending(self, user_id: str) -> str | None:
if user_id in self._timers:
self._timers[user_id].cancel()
del self._timers[user_id]
segment_ids = self._pop_batch(user_id)
if segment_ids:
return await self._submit_task(user_id, segment_ids)
return None