Files
life-echo/api/app/features/memoir/repo.py
Kevin aac484463d feat(api): 拆分章节物化与 Story 后处理,并加固 Redis 锁与腾讯 ASR
回忆录 Story 流水线(同步)
- 同步路径仅写入 Story 与章节关联,改为 mark_chapter_dirty_sync,不再内联 compose
- 物化由 Celery recompose_chapter 异步完成;compose 不变量与异常时保留 dirty 的语义在 repo 中补充说明
- Evidence:大批次时降低 top_k;路由候选 story 携带 char_count/version_count;append 超长/版本过多时强制新开 story
- 叙事 prompt:relevant_chunks 去重,减少重复证据噪声
- 叙事回退与忠实度 gate:返回 fallback 类型并记录结构化日志(含耗时、JSON 有效性等)

Post-commit 与任务编排
- 新增 post_commit.enqueue_story_post_commit_effects:统一派发 generate_story_image(Redis 去重)、延迟 recompose_chapter、可选 memory compaction
- memoir_tasks / story_service / story_image_tasks 改为调用 post-commit 入口;主图回填后按关联章节重算并调度物化与 compacs(锁委托、Redis 单例、ASR to_thread)
- 更新 test_narrative_pipeline 以适配 _apply_narrative_fallbacks 返回值
2026-03-30 11:53:04 +08:00

345 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Memoir repository — Book, Chapter, MemoirState data access."""
import uuid
from sqlalchemy import delete, func, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session, joinedload
from app.core.db import utc_now
from app.features.asset.models import Asset
from app.features.memoir.asset_resolver import collect_asset_ids_for_chapter
from app.features.memoir.chapter_markdown_compose import (
materialize_chapter_markdown_from_loaded_chapter,
)
from app.features.memoir.models import (
Book,
Chapter,
ChapterCoverIntent,
ChapterStoryLink,
ChapterVersion,
MemoirState,
)
from app.features.memoir.reading_segment_materialize import (
build_reading_segments_snapshot,
)
from app.features.story.models import Story
from app.features.story.time_hints import life_sort_key_parts
async def get_current_book(user_id: str, db: AsyncSession) -> Book | None:
stmt = (
select(Book)
.where(Book.user_id == user_id)
.order_by(Book.updated_at.desc())
.limit(1)
)
result = await db.execute(stmt)
return result.scalar_one_or_none()
async def get_chapters_for_memoir_list(
user_id: str,
db: AsyncSession,
*,
active_only: bool = True,
is_new_only: bool | None = None,
) -> list[Chapter]:
"""列表/详情stories-first预加载 story_links 与 images。"""
stmt = (
select(Chapter)
.where(Chapter.user_id == user_id)
.options(
joinedload(Chapter.images),
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
.order_by(Chapter.order_index)
)
if active_only:
stmt = stmt.where(Chapter.is_active == True) # noqa: E712
if is_new_only is True:
stmt = stmt.where(Chapter.is_new == True) # noqa: E712
result = await db.execute(stmt)
return list(result.unique().scalars().all())
async def get_chapter_by_id(chapter_id: str, db: AsyncSession) -> Chapter | None:
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.images),
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
)
result = await db.execute(stmt)
return result.unique().scalars().one_or_none()
async def get_memoir_state(user_id: str, db: AsyncSession) -> MemoirState | None:
stmt = select(MemoirState).where(MemoirState.user_id == user_id)
result = await db.execute(stmt)
return result.scalar_one_or_none()
async def get_chapter_ids_linked_to_story(db: AsyncSession, story_id: str) -> list[str]:
stmt = select(ChapterStoryLink.chapter_id).where(
ChapterStoryLink.story_id == story_id
)
result = await db.execute(stmt)
return list(dict.fromkeys(result.scalars().all()))
async def mark_chapters_dirty_for_story(db: AsyncSession, story_id: str) -> None:
ids = await get_chapter_ids_linked_to_story(db, story_id)
if not ids:
return
await db.execute(
update(Chapter).where(Chapter.id.in_(ids)).values(markdown_compose_dirty=True)
)
def mark_chapters_dirty_for_story_sync(session: Session, story_id: str) -> None:
stmt = select(ChapterStoryLink.chapter_id).where(
ChapterStoryLink.story_id == story_id
)
ids = list(dict.fromkeys(session.scalars(stmt).all()))
if not ids:
return
session.execute(
update(Chapter).where(Chapter.id.in_(ids)).values(markdown_compose_dirty=True)
)
def get_chapter_ids_linked_to_story_sync(session: Session, story_id: str) -> list[str]:
stmt = select(ChapterStoryLink.chapter_id).where(
ChapterStoryLink.story_id == story_id
)
return list(dict.fromkeys(session.scalars(stmt).all()))
def mark_chapter_dirty_sync(session: Session, chapter_id: str) -> None:
"""幂等:将章节标为需物化 markdown多次调用安全"""
ch = session.get(Chapter, chapter_id)
if ch:
ch.markdown_compose_dirty = True
session.flush()
async def get_chapter_with_story_links_for_compose(
chapter_id: str, db: AsyncSession
) -> Chapter | None:
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
)
result = await db.execute(stmt)
return result.unique().scalar_one_or_none()
async def append_chapter_compose_version_async(
db: AsyncSession,
chapter: Chapter,
markdown: str,
) -> None:
count_stmt = select(func.count(ChapterVersion.id)).where(
ChapterVersion.chapter_id == chapter.id
)
version_no = (await db.execute(count_stmt)).scalar() or 0
version_no += 1
vid = str(uuid.uuid4())
version = ChapterVersion(
id=vid,
chapter_id=chapter.id,
version_no=version_no,
markdown_snapshot=markdown,
actor_type="system",
source_type="compose_from_stories",
)
db.add(version)
await db.flush()
chapter.canonical_markdown = markdown
chapter.current_version_id = vid
chapter.markdown_compose_dirty = False
chapter.markdown_composed_at = utc_now()
chapter.reading_segments_json = build_reading_segments_snapshot(chapter)
def append_chapter_compose_version_sync(
session: Session,
chapter: Chapter,
markdown: str,
) -> None:
count_stmt = select(func.count(ChapterVersion.id)).where(
ChapterVersion.chapter_id == chapter.id
)
version_no = (session.execute(count_stmt).scalar() or 0) + 1
vid = str(uuid.uuid4())
version = ChapterVersion(
id=vid,
chapter_id=chapter.id,
version_no=version_no,
markdown_snapshot=markdown,
actor_type="system",
source_type="compose_from_stories",
)
session.add(version)
session.flush()
chapter.canonical_markdown = markdown
chapter.current_version_id = vid
chapter.markdown_compose_dirty = False
chapter.markdown_composed_at = utc_now()
chapter.reading_segments_json = build_reading_segments_snapshot(chapter)
def reorder_chapter_story_links_by_life_order_sync(
session: Session, chapter_id: str
) -> None:
"""
按人生发生顺序time_start / 标题年 / 创建时间)重排 chapter_story_links.order_index。
无 story 或单条时 noop。
"""
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.story_links).joinedload(ChapterStoryLink.story),
)
)
chapter = session.execute(stmt).unique().scalar_one_or_none()
if not chapter:
return
links = list(chapter.story_links or [])
if len(links) < 2:
return
def _key(link: ChapterStoryLink) -> tuple[int, int, str]:
st = link.story
if st is None:
return (9999, 0, link.story_id or "")
return life_sort_key_parts(
time_start=getattr(st, "time_start", None),
title=getattr(st, "title", None),
created_at=getattr(st, "created_at", None),
story_id=st.id,
)
ordered = sorted(links, key=_key)
for i, link in enumerate(ordered):
if link.order_index != i:
link.order_index = i
session.flush()
def compose_chapter_from_story_links_sync(session: Session, chapter_id: str) -> bool:
"""
按 story_links 重组 canonical_markdown 并写入版本链。
若无 story_links 则清除 dirty 并返回 False。
不变量:成功物化或空链接分支均会将 markdown_compose_dirty=False异常退出保留 dirty。
"""
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
)
chapter = session.execute(stmt).unique().scalar_one_or_none()
if not chapter:
return False
links = list(chapter.story_links or [])
if not links:
chapter.markdown_compose_dirty = False
chapter.reading_segments_json = []
session.flush()
return False
md = materialize_chapter_markdown_from_loaded_chapter(chapter)
append_chapter_compose_version_sync(session, chapter, md)
return True
async def replace_chapter_story_links_async(
db: AsyncSession,
*,
chapter_id: str,
user_id: str,
story_ids: list[str],
) -> None:
chapter = await db.get(Chapter, chapter_id)
if not chapter or chapter.user_id != user_id:
raise ValueError("Chapter not found or access denied")
if len(story_ids) != len(set(story_ids)):
raise ValueError("Duplicate story_id in story_ids")
if story_ids:
stmt = select(Story.id).where(
Story.id.in_(story_ids),
Story.user_id == user_id,
)
result = await db.execute(stmt)
found = set(result.scalars().all())
missing = set(story_ids) - found
if missing:
raise ValueError(f"Stories not found or not owned: {sorted(missing)}")
await db.execute(
delete(ChapterStoryLink).where(ChapterStoryLink.chapter_id == chapter_id)
)
await db.flush()
for i, sid in enumerate(story_ids):
db.add(
ChapterStoryLink(
id=str(uuid.uuid4()),
chapter_id=chapter_id,
story_id=sid,
order_index=i,
)
)
await db.flush()
async def collect_cos_storage_keys_for_chapter(
db: AsyncSession, chapter: Chapter
) -> list[str]:
"""
章节内插图 MemoirImage、正文 asset:// 引用的 Asset、封面 cover_asset、封面意图绑定的 Asset 的 storage_key。
用于软删除章节后回收 COS 空间。
"""
keys: set[str] = set()
for img in getattr(chapter, "images", None) or []:
sk = getattr(img, "storage_key", None)
if sk:
keys.add(sk)
asset_ids = set(collect_asset_ids_for_chapter(chapter))
intent_rows = await db.execute(
select(ChapterCoverIntent.asset_id).where(
ChapterCoverIntent.chapter_id == chapter.id,
ChapterCoverIntent.asset_id.isnot(None),
)
)
for aid in intent_rows.scalars().all():
if aid:
asset_ids.add(str(aid))
if asset_ids:
row_keys = await db.execute(
select(Asset.storage_key).where(
Asset.id.in_(asset_ids),
Asset.storage_key.isnot(None),
)
)
keys.update(k for k in row_keys.scalars().all() if k)
return sorted(keys)