Files
life-echo/api/app/features/memoir/reading_segment_materialize.py
Kevin 1374f6e8f5 feat(memoir+conversation): 章节/故事最小可读字数;会话 hasUserMessage 与 UI 优化
- 后端:300 字门槛统一物化、hydrate、列表/PDF/详情;过短章节对读者隐藏
- 对话:首包前打字动画、大字模式排版、朗读/TTS 交互与布局稳定
- 首页:复用无用户消息会话;空列表「继续对话」与文案 i18n
- 章节阅读:标题进正文、封面与去重标题;阅读 Markdown 字号上调
2026-03-26 16:28:33 +08:00

209 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""章节阅读片段物化与「可读字数」门槛(单一事实源)。
**字数阈值** ``MIN_STORY_CHARS_IN_CHAPTER``(当前 300对 Markdown 去图片/链接噪声后
用 ``story_plain_text_char_count`` 估算字符数,用于:
- **单篇故事**:是否写入 ``reading_segments_json``、是否参与 ``chapter_markdown_compose`` 拼接;
- **章节**``chapter_meets_minimum_display`` / ``chapter_body_meets_minimum_for_display`` 是否对
用户展示(列表/详情/PDF 见 ``MemoirService``)。
**物化**``build_reading_segments_snapshot`` 与 canonical 同路径写入 ``reading_segments_json``(无签名 URL
**API**``hydrate_reading_segments_from_snapshot`` 解析快照(含签名 URL旧快照亦按当前阈值过滤。
``resolve_reading_segments_for_chapter_detail`` 仅读已物化快照。
其它引用:``repo.append_chapter_compose``、``helpers.chapter_to_dict``(经 ``resolve_reading_segments…``)、
``chapter_markdown_compose``(故事拼接)。
"""
from __future__ import annotations
import re
from typing import Any
from app.features.memoir.asset_resolver import (
collect_asset_ids_from_markdown,
resolve_asset_refs_in_markdown,
strip_asset_image_refs_from_markdown,
strip_image_placeholders,
)
from app.features.memoir.markdown_sanitize import sanitize_story_for_chapter_compose
from app.features.memoir.models import Chapter
# 故事收录章节、章节对读者展示:共用最小可读字数(与 story_plain_text_char_count 一致)
MIN_STORY_CHARS_IN_CHAPTER = 300
_WS_COLLAPSE = re.compile(r"\s+")
def story_plain_text_char_count(markdown: str) -> int:
"""估算 Markdown 正文可读字符数(中英按字计),用于故事/章节字数门槛。"""
if not markdown or not str(markdown).strip():
return 0
t = strip_image_placeholders(markdown)
t = strip_asset_image_refs_from_markdown(t)
t = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", t)
t = re.sub(r"!\[([^\]]*)\]\([^)]+\)", "", t)
t = re.sub(r"`+([^`]+)`+", r"\1", t)
t = re.sub(r"^#{1,6}\s+", "", t, flags=re.MULTILINE)
# 剩余强调符等不计入「字数」
t = re.sub(r"[*_#`]", "", t)
t = _WS_COLLAPSE.sub("", t)
return len(t)
def story_meets_minimum_chapter_length(markdown: str) -> bool:
"""单篇故事正文是否达到收录章节的阈值物化快照、hydrate 过滤)。"""
return story_plain_text_char_count(markdown) >= MIN_STORY_CHARS_IN_CHAPTER
def chapter_body_meets_minimum_for_display(canonical_markdown: str) -> bool:
"""章节 canonical 是否达到对读者展示的最小可读字数(与单篇故事阈值一致)。"""
return (
story_plain_text_char_count(canonical_markdown or "")
>= MIN_STORY_CHARS_IN_CHAPTER
)
def chapter_meets_minimum_display(ch: Any) -> bool:
"""基于章节当前 canonical_markdown物化后判断是否可对读者展示。"""
md = getattr(ch, "canonical_markdown", None) or ""
return chapter_body_meets_minimum_for_display(str(md))
def _primary_story_intent_asset_id(story: Any) -> str | None:
for it in getattr(story, "image_intents", None) or []:
if getattr(it, "intent_role", None) == "primary":
aid = getattr(it, "asset_id", None)
return str(aid) if aid else None
return None
def _cover_intent_snapshot_from_story(story: Any) -> dict | None:
"""primary intent 元数据(无 url供 JSON 持久化。"""
intents = getattr(story, "image_intents", None) or []
primary = None
for it in intents:
if getattr(it, "intent_role", None) == "primary":
primary = it
break
if not primary:
return None
aid = getattr(primary, "asset_id", None)
if not aid:
return None
status = getattr(primary, "status", None) or "pending"
return {
"asset_id": str(aid),
"status": status,
"description": getattr(primary, "caption", None) or "故事配图",
"prompt": getattr(primary, "prompt_brief", None),
"style": getattr(primary, "style_profile", None),
"error": getattr(primary, "error", None),
"created_at": primary.created_at.isoformat() if primary.created_at else None,
"updated_at": primary.updated_at.isoformat() if primary.updated_at else None,
}
def _cover_dict_from_snapshot_row(
snap: dict[str, Any], asset_url_map: dict[str, str]
) -> dict:
aid = snap.get("asset_id")
url = asset_url_map.get(str(aid)) if aid else None
return {
"placeholder": "",
"description": snap.get("description") or "故事配图",
"index": 0,
"status": snap.get("status") or "pending",
"prompt": snap.get("prompt"),
"url": url,
"storage_key": None,
"provider": None,
"style": snap.get("style"),
"size": None,
"error": snap.get("error"),
"retryable": None,
"created_at": snap.get("created_at"),
"updated_at": snap.get("updated_at"),
}
def build_reading_segments_snapshot(ch: Chapter) -> list[dict[str, Any]]:
"""
物化阅读片段快照body 保留 asset://cover 仅存 intent 元数据(正文已含同 asset 则省略)。
与 append_chapter_compose_version 同路径写入。
"""
links = sorted(
list(getattr(ch, "story_links", None) or []),
key=lambda x: getattr(x, "order_index", 0),
)
out: list[dict[str, Any]] = []
for link in links:
st = getattr(link, "story", None)
if st is None:
continue
title = (getattr(st, "title", None) or "").strip()
raw = (getattr(st, "canonical_markdown", None) or "").strip()
body = sanitize_story_for_chapter_compose(raw, title)
if not body:
continue
if not story_meets_minimum_chapter_length(body):
continue
primary_aid = _primary_story_intent_asset_id(st)
inline_ids = set(collect_asset_ids_from_markdown(body))
cover: dict | None = None
if primary_aid and primary_aid not in inline_ids:
cover = _cover_intent_snapshot_from_story(st)
out.append(
{
"story_id": st.id,
"body_markdown": body,
"cover_asset": cover,
}
)
return out
def hydrate_reading_segments_from_snapshot(
ch: Chapter,
asset_url_map: dict[str, str] | None = None,
) -> list[dict[str, Any]]:
"""将持久化快照解析为 API 形态(签名 URL"""
from app.features.memoir import helpers as h
asset_url_map = asset_url_map or {}
resolve = lambda aid: asset_url_map.get(aid) # noqa: E731
rows = getattr(ch, "reading_segments_json", None) or []
out: list[dict[str, Any]] = []
for row in rows:
raw_body = row.get("body_markdown") or ""
# 与物化时一致;旧库快照亦按当前阈值过滤
if not story_meets_minimum_chapter_length(raw_body):
continue
body = resolve_asset_refs_in_markdown(raw_body, resolve)
ci = row.get("cover_asset")
if ci:
img_raw = _cover_dict_from_snapshot_row(ci, asset_url_map)
img_norm = h.first_normalized_image_for_api(img_raw)
else:
img_norm = None
out.append(
{
"story_id": row["story_id"],
"body_markdown": body,
"cover_asset": img_norm,
}
)
return out
def resolve_reading_segments_for_chapter_detail(
ch: Chapter,
asset_url_map: dict[str, str] | None = None,
) -> list[dict[str, Any]]:
"""章节详情:仅读取已物化快照。"""
asset_url_map = asset_url_map or {}
if getattr(ch, "reading_segments_json", None) is None:
return []
return hydrate_reading_segments_from_snapshot(ch, asset_url_map=asset_url_map)