Files
life-echo/api/app/tasks/memoir_tasks.py
Kevin bb16d3a5c9 refactor(agents): 抽取阶段常量与对话上下文;快档 LLM;图片 prompt 可禁止回退
访谈与阶段
- 新增 app/agents/stage_constants.py:集中 CHAT_STAGES、章节分类/顺序、阶段到默认 memoir 类别等,与 MemoirState 默认槽位顺序对齐;减少散落在 prompts 内的重复常量。
- 新增 app/agents/chat/prompt_context.py:以 ChatPromptContext 汇总 guided 系统提示所需字段(阶段、槽位、轮次、人设、记忆证据、回复长度模式、背景声线、职业等),统一走 get_guided_conversation_prompt。
- 大幅收敛 app/agents/chat/prompts_conversation.py;调整 prompts.py、stage_prompts.py、stage_detection.py;同步 interview_agent、profile_agent、helpers 与 state_schema,使对话侧构造提示的方式一致、可测。

回忆录流水线
- memoir/prompts.py 删除已迁至 stage_constants / 独立模板的大段常量与图片占位相关逻辑;classification / extraction / fidelity / narrative agents 与 orchest(全量历史仍可用于计数,注入模型时按轮次与字符上限截断)、image_prompt_fallback_disabled。
- dependencies 增加 get_llm_provider_fast(LRU 缓存,可与默认共用密钥与 base_url)。

任务与编排
- memoir_tasks:prepare_batches 注入 llm_fast;开启独立快档模型时打结构化日志。
- chapter_cover_tasks、story_image_tasks:与图片 prompt / JSON 工具路径或策略变更对齐(import 与行为一致)。
- story_pipeline_sync 等小处同步。

其它核心
- langchain_llm、text_normalize 随上述调用链微调。

开发者体验
- .cursor/settings.json:启用 redis-development、postman 插件。

测试
- 新增 test_image_prompt_policy:覆盖「禁止回退」等图片 prompt 策略。
- 更新 test_interview_prompts、test_interview_reply_length、test_experience_regressions、test_json_and_memory_utils,匹配新常量位置、json_utils 与对话/长度行为。
2026-04-02 12:00:00 +08:00

605 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
回忆录处理 Celery 任务
"""
import json
import uuid
from datetime import datetime, timezone
from typing import Dict, List, Set
import redis
from celery import shared_task
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.agents.chat.background_voice import infer_background_voice
from app.agents.chat.prompts_profile import format_user_profile_context
from app.agents.memoir import MemoirOrchestrator
from app.agents.state_schema import MemoirStateSchema, SlotData, default_state
from app.core.chapter_pipeline_lock import (
acquire_chapter_pipeline_lock as _acquire_chapter_lock,
)
from app.core.chapter_pipeline_lock import (
release_chapter_pipeline_lock as _release_chapter_lock,
)
from app.core.config import settings
from app.core.db import get_sync_db
from app.core.dependencies import get_llm_provider, get_llm_provider_fast
from app.core.logging import get_logger
from app.features.conversation.models import Segment
from app.features.memoir.cover_eligibility import (
chapter_needs_cover_enqueue,
)
from app.features.memoir.memoir_images.parser import (
build_initial_image_assets,
)
from app.features.memoir.memoir_images.schema import (
IMAGE_STATUS_COMPLETED,
IMAGE_STATUS_FAILED,
IMAGE_STATUS_PENDING,
normalize_image_assets,
)
from app.features.memoir.memoir_images.serializers import (
image_dict_to_row_kwargs,
)
from app.features.memoir.memoir_images.settings import MemoirImageSettings
from app.features.memoir.models import (
Book,
MemoirImage,
MemoirState,
)
from app.features.memoir.story_pipeline_sync import (
run_story_pipeline_for_category_batch,
)
from app.features.user.models import User
logger = get_logger(__name__)
_REDIS_CLIENTS: dict[bool, redis.Redis] = {}
def _get_llm():
"""Celery 任务内获取 LangChain LLM通过 port"""
try:
return getattr(get_llm_provider(), "langchain_llm", None)
except Exception:
return None
def _get_llm_fast():
"""分类 / 抽取等快档位任务(与叙事、路由默认模型可分离)。"""
try:
return getattr(get_llm_provider_fast(), "langchain_llm", None)
except Exception:
return None
def _get_redis_client(*, decode_responses: bool = False) -> redis.Redis:
from app.core.config import settings
client = _REDIS_CLIENTS.get(decode_responses)
if client is None:
client = redis.from_url(
settings.redis_url,
decode_responses=decode_responses,
)
_REDIS_CLIENTS[decode_responses] = client
return client
def _chapter_lock_ttl() -> int:
return int(settings.chapter_pipeline_lock_ttl_seconds)
def _update_task_status_sync(
user_id: str, task_id: str, status: str, result: Dict = None
):
"""同步更新任务状态(在 Celery 任务中使用)"""
try:
r = _get_redis_client(decode_responses=True)
key = f"task:user:{user_id}:tasks"
# 获取现有任务信息
data = r.hget(key, task_id)
if data:
task_info = json.loads(data)
else:
task_info = {"task_id": task_id}
task_info["status"] = status
task_info["updated_at"] = datetime.now(timezone.utc).isoformat()
if result is not None:
task_info["result"] = result
r.hset(key, task_id, json.dumps(task_info))
r.expire(key, 3600) # 1小时过期
logger.debug("任务状态已更新: task_id={} status={}", task_id, status)
except Exception as e:
logger.error(f"更新任务状态失败: {e}")
def _merge_chapter_image_assets(
existing_images: list[dict] | None,
placeholders: list[dict],
provider: str,
style: str,
size: str,
now_iso: str,
) -> list[dict]:
normalized_existing_images = normalize_image_assets(existing_images)
existing_by_placeholder = {
item.get("placeholder"): dict(item)
for item in normalized_existing_images
if item.get("placeholder")
}
merged_assets: list[dict] = []
for item in placeholders:
existing = existing_by_placeholder.get(item["placeholder"])
if existing:
merged_item = dict(existing)
merged_item["index"] = item["index"]
merged_item["placeholder"] = item["placeholder"]
merged_item["description"] = item["description"]
merged_item["provider"] = merged_item.get("provider") or provider
merged_item["style"] = merged_item.get("style") or style
merged_item["size"] = merged_item.get("size") or size
merged_item["created_at"] = merged_item.get("created_at") or now_iso
merged_item["updated_at"] = merged_item.get("updated_at") or now_iso
if merged_item.get("status") == IMAGE_STATUS_COMPLETED and not (
merged_item.get("storage_key") or merged_item.get("url")
):
merged_item["status"] = IMAGE_STATUS_FAILED
merged_item["error"] = merged_item.get("error") or "missing image url"
else:
merged_item = build_initial_image_assets(
placeholders=[item],
provider=provider,
style=style,
size=size,
now_iso=now_iso,
)[0]
merged_assets.append(merged_item)
return merged_assets
def chapter_has_images_to_generate(images: list[dict] | None) -> bool:
return any(
item.get("status") in {IMAGE_STATUS_PENDING, IMAGE_STATUS_FAILED}
for item in normalize_image_assets(images)
)
def _memoir_image_from_asset(
chapter_id: str,
order_index: int,
image_asset: dict,
) -> MemoirImage:
"""从单条图片 dict 构建 MemoirImage 行(用于写入 memoir_images 表)。"""
kwargs = image_dict_to_row_kwargs(image_asset)
return MemoirImage(
id=str(uuid.uuid4()).replace("-", "")[:32],
chapter_id=chapter_id,
order_index=order_index,
**kwargs,
)
def _coerce_state(model: MemoirState) -> MemoirStateSchema:
"""将数据库模型转换为 Schema"""
return MemoirStateSchema.model_validate(
{
"stage_order": model.stage_order or default_state().stage_order,
"current_stage": model.current_stage,
"covered_stages": model.covered_stages or [],
"slots": model.slots
if isinstance(model.slots, dict)
else default_state().slots,
}
)
def _get_or_create_state_sync(user_id: str, db: Session) -> MemoirStateSchema:
"""同步获取或创建状态"""
stmt = select(MemoirState).where(MemoirState.user_id == user_id)
result = db.execute(stmt)
state = result.scalar_one_or_none()
if state:
return _coerce_state(state)
default = default_state()
state = MemoirState(
id=str(uuid.uuid4()),
user_id=user_id,
stage_order=default.stage_order,
current_stage=default.current_stage,
covered_stages=default.covered_stages,
slots={
k: {sk: sv.model_dump() for sk, sv in v.items()}
for k, v in default.slots.items()
},
)
db.add(state)
db.commit()
db.refresh(state)
return _coerce_state(state)
def _update_slot_sync(
user_id: str,
stage: str,
slot_name: str,
snippet: str,
segment_ids: List[str],
db: Session,
) -> MemoirStateSchema:
"""同步更新 slot"""
stmt = select(MemoirState).where(MemoirState.user_id == user_id)
result = db.execute(stmt)
state = result.scalar_one_or_none()
if not state:
_get_or_create_state_sync(user_id, db)
result = db.execute(stmt)
state = result.scalar_one()
slots: Dict[str, Dict] = state.slots or {}
stage_slots = slots.get(stage, {})
existing = stage_slots.get(slot_name, {})
merged_segment_ids = list({*(existing.get("segment_ids") or []), *segment_ids})
stage_slots[slot_name] = SlotData(
snippet=snippet, segment_ids=merged_segment_ids
).model_dump()
slots[stage] = stage_slots
state.slots = slots
state.current_stage = stage
db.commit()
db.refresh(state)
return _coerce_state(state)
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
def process_memoir_segments(self, user_id: str, segment_ids: List[str]):
"""
处理回忆录段落的 Celery 任务
Args:
user_id: 用户 ID
segment_ids: 段落 ID 列表
"""
task_id = self.request.id
logger.info(
f"开始处理回忆录段落: user_id={user_id}, task_id={task_id}, segments={len(segment_ids)}"
)
# 更新任务状态为 running
_update_task_status_sync(user_id, task_id, "running")
try:
with get_sync_db() as db:
# 获取段落
stmt = select(Segment).where(Segment.id.in_(segment_ids))
result = db.execute(stmt)
segments = result.scalars().all()
if not segments:
logger.warning(f"未找到段落: {segment_ids}")
return {"status": "no_segments"}
# Memory ingest 先于回忆录流水线 commit保证后续 retrieve_evidence_sync 可见本批 chunk
# (见 api/docs/memory-retrieval.md
conv_id = getattr(segments[0], "conversation_id", None) or ""
transcript = "\n\n".join(seg.user_input_text or "" for seg in segments)
if transcript.strip():
try:
from app.features.memory.service import ingest_transcript_sync
source_id = ingest_transcript_sync(db, user_id, conv_id, transcript)
logger.info(
"event=memory_transcript_ingested user_id={} task_id={} "
"source_id={} conversation_id={} transcript_chars={} "
"segment_count={}",
user_id,
task_id,
source_id,
conv_id,
len(transcript),
len(segments),
)
except Exception as e:
logger.warning(
"Memory ingest 跳过: {} exc_type={}",
e,
type(e).__name__,
)
llm = _get_llm()
llm_fast = _get_llm_fast()
if (settings.llm_fast_model or "").strip():
logger.info(
"event=llm_fast_tier_used pipeline=memoir_prepare_batches model={}",
settings.llm_fast_model,
)
image_settings = MemoirImageSettings.from_env()
user_obj = db.get(User, user_id)
user_profile = ""
user_birth_year = None
background_voice = "default"
user_occupation = ""
if user_obj:
user_birth_year = user_obj.birth_year
user_profile = format_user_profile_context(
birth_year=user_obj.birth_year,
birth_place=user_obj.birth_place,
grew_up_place=user_obj.grew_up_place,
occupation=user_obj.occupation,
)
background_voice = infer_background_voice(user_obj.occupation)
user_occupation = user_obj.occupation or ""
story_dispatch_ids: Set[str] = set()
memoir_orchestrator = MemoirOrchestrator()
prepared = memoir_orchestrator.prepare_batches(
segments=list(segments),
llm=llm,
llm_fast=llm_fast,
get_or_create_state=lambda: _get_or_create_state_sync(user_id, db),
update_slot=lambda stage, slot_name, snippet, seg_ids: (
_update_slot_sync(user_id, stage, slot_name, snippet, seg_ids, db)
),
)
chapters_to_enqueue: Set[str] = set()
affected_chapter_ids: Set[str] = set()
for (
chapter_category,
category_segments,
) in prepared.category_to_segments.items():
lock_handle = _acquire_chapter_lock(
user_id, chapter_category, ttl_seconds=_chapter_lock_ttl()
)
if lock_handle is None:
logger.warning(
"章节锁竞争: category={}, 延迟重试",
chapter_category,
)
raise self.retry(countdown=10)
try:
batch_ids = {str(s.id) for s in category_segments}
skip_ids = prepared.segment_skip_story_ids
in_skip = batch_ids & skip_ids
if in_skip:
logger.info(
"event=memoir_skip_story_signal chapter_category={} "
"segment_ids_in_skip_set={}",
chapter_category,
sorted(in_skip),
)
if batch_ids and batch_ids <= skip_ids:
logger.info(
"event=story_pipeline_skipped reason=no_substantive_after_none "
"chapter_category={} segment_ids={}",
chapter_category,
sorted(batch_ids),
)
continue
chapter, needs_cover, disp = run_story_pipeline_for_category_batch(
db,
user_id=user_id,
chapter_category=chapter_category,
category_segments=category_segments,
state=prepared.state,
user_profile=user_profile,
user_birth_year=user_birth_year,
llm=llm,
background_voice=background_voice,
occupation=user_occupation,
)
story_dispatch_ids |= disp
db.flush()
db.refresh(chapter)
affected_chapter_ids.add(chapter.id)
needs_cover_enqueue = (
image_settings.enabled and chapter_needs_cover_enqueue(chapter)
)
stmt_book = (
select(Book)
.where(Book.user_id == user_id)
.order_by(Book.updated_at.desc())
)
result_book = db.execute(stmt_book)
book = result_book.scalar_one_or_none()
if not book:
book = Book(
id=str(uuid.uuid4()),
user_id=user_id,
title="我的回忆录",
total_pages=0,
total_words=0,
cover_image_url=None,
)
db.add(book)
book.has_update = True
book.last_update_chapter_id = chapter.id
if chapter and needs_cover_enqueue:
chapters_to_enqueue.add(chapter.id)
finally:
_release_chapter_lock(lock_handle)
# 标记段落为已处理
for seg in segments:
seg.processed = True
db.commit()
from app.features.story.post_commit import enqueue_story_post_commit_effects
pc = enqueue_story_post_commit_effects(
user_id=user_id,
story_ids=set(story_dispatch_ids),
chapter_ids=affected_chapter_ids,
trigger_source="pipeline",
need_compaction=True,
compaction_extra={
"pipeline_run_id": str(task_id),
"story_dispatch_ids": sorted(story_dispatch_ids),
"chapters_to_enqueue": sorted(chapters_to_enqueue),
},
)
logger.info(
"event=story_post_commit user_id={} trigger=pipeline "
"enqueued_story_image_count={} enqueued_chapter_recompose_count={} "
"compaction_scheduled={} errors={}",
user_id,
pc.enqueued_story_image_count,
pc.enqueued_chapter_recompose_count,
pc.compaction_scheduled,
pc.errors,
)
from app.tasks.chapter_cover_enqueue import (
try_enqueue_generate_chapter_cover,
)
for chapter_id in sorted(chapters_to_enqueue):
if try_enqueue_generate_chapter_cover(chapter_id, source="pipeline"):
logger.info(f"派发章节封面任务: chapter={chapter_id}")
categories_processed = sorted(prepared.category_to_segments.keys())
logger.info(
"回忆录处理完成: user_id={} task_id={} segment_count={} "
"categories_processed={}",
user_id,
task_id,
len(segments),
categories_processed,
)
# 更新任务状态为成功
_update_task_status_sync(
user_id,
task_id,
"success",
{
"processed": len(segments),
"categories_processed": categories_processed,
},
)
return {
"status": "success",
"processed": len(segments),
"categories_processed": categories_processed,
}
except Exception as e:
logger.error(f"回忆录处理失败: {e}")
# 更新任务状态为失败
_update_task_status_sync(user_id, task_id, "failure", {"error": str(e)})
# 重试
raise self.retry(exc=e) from e
@shared_task(bind=True, max_retries=3, default_retry_delay=30)
def generate_chapter_content(self, user_id: str, stage: str, new_content: str):
"""
单独生成章节内容的任务(用于实时更新)
Args:
user_id: 用户 ID
stage: 阶段
new_content: 新内容
"""
logger.info(f"生成章节内容: user_id={user_id}, stage={stage}")
try:
with get_sync_db() as db:
llm = _get_llm()
user_obj = db.get(User, user_id)
user_profile = ""
user_birth_year = None
background_voice = "default"
user_occupation = ""
if user_obj:
user_birth_year = user_obj.birth_year
user_profile = format_user_profile_context(
birth_year=user_obj.birth_year,
birth_place=user_obj.birth_place,
grew_up_place=user_obj.grew_up_place,
occupation=user_obj.occupation,
)
background_voice = infer_background_voice(user_obj.occupation)
user_occupation = user_obj.occupation or ""
class _Seg:
def __init__(self, text: str):
self.id = str(uuid.uuid4())
self.user_input_text = text
state = _get_or_create_state_sync(user_id, db)
chapter, _, dispatch_ids = run_story_pipeline_for_category_batch(
db,
user_id=user_id,
chapter_category=stage,
category_segments=[_Seg(new_content)],
state=state,
user_profile=user_profile,
user_birth_year=user_birth_year,
llm=llm,
background_voice=background_voice,
occupation=user_occupation,
)
db.commit()
db.refresh(chapter)
from app.features.story.post_commit import enqueue_story_post_commit_effects
ch_ids: set[str] = set()
if chapter is not None:
ch_ids.add(str(chapter.id))
pc = enqueue_story_post_commit_effects(
user_id=user_id,
story_ids=set(dispatch_ids),
chapter_ids=ch_ids,
trigger_source="pipeline",
need_compaction=False,
)
logger.info(
"event=story_post_commit user_id={} trigger=pipeline_generate_chapter "
"enqueued_story_image_count={} enqueued_chapter_recompose_count={} "
"compaction_scheduled={} errors={}",
user_id,
pc.enqueued_story_image_count,
pc.enqueued_chapter_recompose_count,
pc.compaction_scheduled,
pc.errors,
)
image_settings = MemoirImageSettings.from_env()
if (
image_settings.enabled
and chapter
and chapter_needs_cover_enqueue(chapter)
):
from app.tasks.chapter_cover_enqueue import (
try_enqueue_generate_chapter_cover,
)
try_enqueue_generate_chapter_cover(chapter.id, source="pipeline")
return {"status": "success"}
except Exception as e:
logger.error(f"章节生成失败: {e}")
raise self.retry(exc=e) from e