Files
life-echo/api/app/features/memoir/repo.py
Kevin 309a051038 feat: 回忆录证据血缘与内部评测可追溯,顺带对齐本地评测台与 CI
数据库与模型:新增多版迁移(章节证据快照、对话血缘、记忆事实/时间线 lineage 等),把「成稿 ↔ 对话/记忆」的溯源信息落到表结构里。
业务链路:会话与 WS、回忆录/故事流水线、记忆写入与 enrichment 等跟着接上线索与快照;新增章节证据快照与评测侧 EvalTraceService 等模块,方便组评审用的证据包。
内部评测:自动化 run 与手工 memoir 评审共用可追溯证据;rubric/ judge 相关脚本与文档有配套调整。
app-eval-web:Memoir/实验详情里能展开看证据摘要与 evidence_trace(含对话轮次 id);Vite 代理与 development.sh 注入的 API 端口与当前默认内部评测端口一致,避免改端口后页面连错服务。
工程杂项:GitHub Actions / 仓库说明有更新;各适配器与支付/配额/plan 等多处为小改动或跟随主改动的收尾;新增/扩充了?
2026-04-08 15:37:09 +08:00

366 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Memoir repository — Book, Chapter, MemoirState data access."""
import uuid
from sqlalchemy import delete, func, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session, joinedload
from app.core.db import utc_now
from app.core.logging import get_logger
from app.features.asset.models import Asset
from app.features.memoir.asset_resolver import collect_asset_ids_for_chapter
from app.features.memoir.chapter_evidence_snapshot import (
refresh_chapter_evidence_snapshot_with_retry_sync,
)
from app.features.memoir.chapter_markdown_compose import (
materialize_chapter_markdown_from_loaded_chapter,
)
from app.features.memoir.models import (
Book,
Chapter,
ChapterCoverIntent,
ChapterStoryLink,
ChapterVersion,
MemoirState,
)
from app.features.memoir.reading_segment_materialize import (
build_reading_segments_snapshot,
)
from app.features.story.models import Story
from app.features.story.time_hints import life_sort_key_parts
logger = get_logger(__name__)
async def get_current_book(user_id: str, db: AsyncSession) -> Book | None:
stmt = (
select(Book)
.where(Book.user_id == user_id)
.order_by(Book.updated_at.desc())
.limit(1)
)
result = await db.execute(stmt)
return result.scalar_one_or_none()
async def get_chapters_for_memoir_list(
user_id: str,
db: AsyncSession,
*,
active_only: bool = True,
is_new_only: bool | None = None,
) -> list[Chapter]:
"""列表/详情stories-first预加载 story_links 与 images。"""
stmt = (
select(Chapter)
.where(Chapter.user_id == user_id)
.options(
joinedload(Chapter.images),
joinedload(Chapter.current_evidence_snapshot),
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
.order_by(Chapter.order_index)
)
if active_only:
stmt = stmt.where(Chapter.is_active == True) # noqa: E712
if is_new_only is True:
stmt = stmt.where(Chapter.is_new == True) # noqa: E712
result = await db.execute(stmt)
return list(result.unique().scalars().all())
async def get_chapter_by_id(chapter_id: str, db: AsyncSession) -> Chapter | None:
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.images),
joinedload(Chapter.current_evidence_snapshot),
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
)
result = await db.execute(stmt)
return result.unique().scalars().one_or_none()
async def get_memoir_state(user_id: str, db: AsyncSession) -> MemoirState | None:
stmt = select(MemoirState).where(MemoirState.user_id == user_id)
result = await db.execute(stmt)
return result.scalar_one_or_none()
async def get_chapter_ids_linked_to_story(db: AsyncSession, story_id: str) -> list[str]:
stmt = select(ChapterStoryLink.chapter_id).where(
ChapterStoryLink.story_id == story_id
)
result = await db.execute(stmt)
return list(dict.fromkeys(result.scalars().all()))
async def mark_chapters_dirty_for_story(db: AsyncSession, story_id: str) -> None:
ids = await get_chapter_ids_linked_to_story(db, story_id)
if not ids:
return
await db.execute(
update(Chapter).where(Chapter.id.in_(ids)).values(markdown_compose_dirty=True)
)
def mark_chapters_dirty_for_story_sync(session: Session, story_id: str) -> None:
stmt = select(ChapterStoryLink.chapter_id).where(
ChapterStoryLink.story_id == story_id
)
ids = list(dict.fromkeys(session.scalars(stmt).all()))
if not ids:
return
session.execute(
update(Chapter).where(Chapter.id.in_(ids)).values(markdown_compose_dirty=True)
)
def get_chapter_ids_linked_to_story_sync(session: Session, story_id: str) -> list[str]:
stmt = select(ChapterStoryLink.chapter_id).where(
ChapterStoryLink.story_id == story_id
)
return list(dict.fromkeys(session.scalars(stmt).all()))
def mark_chapter_dirty_sync(session: Session, chapter_id: str) -> None:
"""幂等:将章节标为需物化 markdown多次调用安全"""
ch = session.get(Chapter, chapter_id)
if ch:
ch.markdown_compose_dirty = True
session.flush()
async def get_chapter_with_story_links_for_compose(
chapter_id: str, db: AsyncSession
) -> Chapter | None:
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
)
result = await db.execute(stmt)
return result.unique().scalar_one_or_none()
async def append_chapter_compose_version_async(
db: AsyncSession,
chapter: Chapter,
markdown: str,
) -> None:
count_stmt = select(func.count(ChapterVersion.id)).where(
ChapterVersion.chapter_id == chapter.id
)
version_no = (await db.execute(count_stmt)).scalar() or 0
version_no += 1
vid = str(uuid.uuid4())
version = ChapterVersion(
id=vid,
chapter_id=chapter.id,
version_no=version_no,
markdown_snapshot=markdown,
actor_type="system",
source_type="compose_from_stories",
)
db.add(version)
await db.flush()
chapter.canonical_markdown = markdown
chapter.current_version_id = vid
chapter.markdown_compose_dirty = False
chapter.markdown_composed_at = utc_now()
chapter.reading_segments_json = build_reading_segments_snapshot(chapter)
def _snap(sess: Session) -> None:
refresh_chapter_evidence_snapshot_with_retry_sync(sess, str(chapter.id))
try:
await db.run_sync(_snap)
except Exception as e:
logger.warning(
"evidence_snapshot_refresh_failed async compose path chapter_id={}: {}",
chapter.id,
e,
)
def append_chapter_compose_version_sync(
session: Session,
chapter: Chapter,
markdown: str,
) -> None:
count_stmt = select(func.count(ChapterVersion.id)).where(
ChapterVersion.chapter_id == chapter.id
)
version_no = (session.execute(count_stmt).scalar() or 0) + 1
vid = str(uuid.uuid4())
version = ChapterVersion(
id=vid,
chapter_id=chapter.id,
version_no=version_no,
markdown_snapshot=markdown,
actor_type="system",
source_type="compose_from_stories",
)
session.add(version)
session.flush()
chapter.canonical_markdown = markdown
chapter.current_version_id = vid
chapter.markdown_compose_dirty = False
chapter.markdown_composed_at = utc_now()
chapter.reading_segments_json = build_reading_segments_snapshot(chapter)
def reorder_chapter_story_links_by_life_order_sync(
session: Session, chapter_id: str
) -> None:
"""
按人生发生顺序time_start / 标题年 / 创建时间)重排 chapter_story_links.order_index。
无 story 或单条时 noop。
"""
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.story_links).joinedload(ChapterStoryLink.story),
)
)
chapter = session.execute(stmt).unique().scalar_one_or_none()
if not chapter:
return
links = list(chapter.story_links or [])
if len(links) < 2:
return
def _key(link: ChapterStoryLink) -> tuple[int, int, str]:
st = link.story
if st is None:
return (9999, 0, link.story_id or "")
return life_sort_key_parts(
time_start=getattr(st, "time_start", None),
title=getattr(st, "title", None),
created_at=getattr(st, "created_at", None),
story_id=st.id,
)
ordered = sorted(links, key=_key)
for i, link in enumerate(ordered):
if link.order_index != i:
link.order_index = i
session.flush()
def compose_chapter_from_story_links_sync(session: Session, chapter_id: str) -> bool:
"""
按 story_links 重组 canonical_markdown 并写入版本链。
若无 story_links 则清除 dirty 并返回 False。
不变量:成功物化或空链接分支均会将 markdown_compose_dirty=False异常退出保留 dirty。
"""
stmt = (
select(Chapter)
.where(Chapter.id == chapter_id)
.options(
joinedload(Chapter.story_links)
.joinedload(ChapterStoryLink.story)
.joinedload(Story.image_intents),
)
)
chapter = session.execute(stmt).unique().scalar_one_or_none()
if not chapter:
return False
links = list(chapter.story_links or [])
if not links:
chapter.markdown_compose_dirty = False
chapter.reading_segments_json = []
session.flush()
return False
md = materialize_chapter_markdown_from_loaded_chapter(chapter)
append_chapter_compose_version_sync(session, chapter, md)
refresh_chapter_evidence_snapshot_with_retry_sync(session, str(chapter.id))
return True
async def replace_chapter_story_links_async(
db: AsyncSession,
*,
chapter_id: str,
user_id: str,
story_ids: list[str],
) -> None:
chapter = await db.get(Chapter, chapter_id)
if not chapter or chapter.user_id != user_id:
raise ValueError("Chapter not found or access denied")
if len(story_ids) != len(set(story_ids)):
raise ValueError("Duplicate story_id in story_ids")
if story_ids:
stmt = select(Story.id).where(
Story.id.in_(story_ids),
Story.user_id == user_id,
)
result = await db.execute(stmt)
found = set(result.scalars().all())
missing = set(story_ids) - found
if missing:
raise ValueError(f"Stories not found or not owned: {sorted(missing)}")
await db.execute(
delete(ChapterStoryLink).where(ChapterStoryLink.chapter_id == chapter_id)
)
await db.flush()
for i, sid in enumerate(story_ids):
db.add(
ChapterStoryLink(
id=str(uuid.uuid4()),
chapter_id=chapter_id,
story_id=sid,
order_index=i,
)
)
await db.flush()
async def collect_cos_storage_keys_for_chapter(
db: AsyncSession, chapter: Chapter
) -> list[str]:
"""
章节内插图 MemoirImage、正文 asset:// 引用的 Asset、封面 cover_asset、封面意图绑定的 Asset 的 storage_key。
用于软删除章节后回收 COS 空间。
"""
keys: set[str] = set()
for img in getattr(chapter, "images", None) or []:
sk = getattr(img, "storage_key", None)
if sk:
keys.add(sk)
asset_ids = set(collect_asset_ids_for_chapter(chapter))
intent_rows = await db.execute(
select(ChapterCoverIntent.asset_id).where(
ChapterCoverIntent.chapter_id == chapter.id,
ChapterCoverIntent.asset_id.isnot(None),
)
)
for aid in intent_rows.scalars().all():
if aid:
asset_ids.add(str(aid))
if asset_ids:
row_keys = await db.execute(
select(Asset.storage_key).where(
Asset.id.in_(asset_ids),
Asset.storage_key.isnot(None),
)
)
keys.update(k for k in row_keys.scalars().all() if k)
return sorted(keys)