- 后端:300 字门槛统一物化、hydrate、列表/PDF/详情;过短章节对读者隐藏 - 对话:首包前打字动画、大字模式排版、朗读/TTS 交互与布局稳定 - 首页:复用无用户消息会话;空列表「继续对话」与文案 i18n - 章节阅读:标题进正文、封面与去重标题;阅读 Markdown 字号上调
209 lines
7.8 KiB
Python
209 lines
7.8 KiB
Python
"""章节阅读片段物化与「可读字数」门槛(单一事实源)。
|
||
|
||
**字数阈值** ``MIN_STORY_CHARS_IN_CHAPTER``(当前 300):对 Markdown 去图片/链接噪声后
|
||
用 ``story_plain_text_char_count`` 估算字符数,用于:
|
||
|
||
- **单篇故事**:是否写入 ``reading_segments_json``、是否参与 ``chapter_markdown_compose`` 拼接;
|
||
- **章节**:``chapter_meets_minimum_display`` / ``chapter_body_meets_minimum_for_display`` 是否对
|
||
用户展示(列表/详情/PDF 见 ``MemoirService``)。
|
||
|
||
**物化**:``build_reading_segments_snapshot`` 与 canonical 同路径写入 ``reading_segments_json``(无签名 URL)。
|
||
|
||
**API**:``hydrate_reading_segments_from_snapshot`` 解析快照(含签名 URL);旧快照亦按当前阈值过滤。
|
||
``resolve_reading_segments_for_chapter_detail`` 仅读已物化快照。
|
||
|
||
其它引用:``repo.append_chapter_compose``、``helpers.chapter_to_dict``(经 ``resolve_reading_segments…``)、
|
||
``chapter_markdown_compose``(故事拼接)。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import Any
|
||
|
||
from app.features.memoir.asset_resolver import (
|
||
collect_asset_ids_from_markdown,
|
||
resolve_asset_refs_in_markdown,
|
||
strip_asset_image_refs_from_markdown,
|
||
strip_image_placeholders,
|
||
)
|
||
from app.features.memoir.markdown_sanitize import sanitize_story_for_chapter_compose
|
||
from app.features.memoir.models import Chapter
|
||
|
||
# 故事收录章节、章节对读者展示:共用最小可读字数(与 story_plain_text_char_count 一致)
|
||
MIN_STORY_CHARS_IN_CHAPTER = 300
|
||
|
||
_WS_COLLAPSE = re.compile(r"\s+")
|
||
|
||
|
||
def story_plain_text_char_count(markdown: str) -> int:
|
||
"""估算 Markdown 正文可读字符数(中英按字计),用于故事/章节字数门槛。"""
|
||
if not markdown or not str(markdown).strip():
|
||
return 0
|
||
t = strip_image_placeholders(markdown)
|
||
t = strip_asset_image_refs_from_markdown(t)
|
||
t = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", t)
|
||
t = re.sub(r"!\[([^\]]*)\]\([^)]+\)", "", t)
|
||
t = re.sub(r"`+([^`]+)`+", r"\1", t)
|
||
t = re.sub(r"^#{1,6}\s+", "", t, flags=re.MULTILINE)
|
||
# 剩余强调符等不计入「字数」
|
||
t = re.sub(r"[*_#`]", "", t)
|
||
t = _WS_COLLAPSE.sub("", t)
|
||
return len(t)
|
||
|
||
|
||
def story_meets_minimum_chapter_length(markdown: str) -> bool:
|
||
"""单篇故事正文是否达到收录章节的阈值(物化快照、hydrate 过滤)。"""
|
||
return story_plain_text_char_count(markdown) >= MIN_STORY_CHARS_IN_CHAPTER
|
||
|
||
|
||
def chapter_body_meets_minimum_for_display(canonical_markdown: str) -> bool:
|
||
"""章节 canonical 是否达到对读者展示的最小可读字数(与单篇故事阈值一致)。"""
|
||
return (
|
||
story_plain_text_char_count(canonical_markdown or "")
|
||
>= MIN_STORY_CHARS_IN_CHAPTER
|
||
)
|
||
|
||
|
||
def chapter_meets_minimum_display(ch: Any) -> bool:
|
||
"""基于章节当前 canonical_markdown(物化后)判断是否可对读者展示。"""
|
||
md = getattr(ch, "canonical_markdown", None) or ""
|
||
return chapter_body_meets_minimum_for_display(str(md))
|
||
|
||
|
||
def _primary_story_intent_asset_id(story: Any) -> str | None:
|
||
for it in getattr(story, "image_intents", None) or []:
|
||
if getattr(it, "intent_role", None) == "primary":
|
||
aid = getattr(it, "asset_id", None)
|
||
return str(aid) if aid else None
|
||
return None
|
||
|
||
|
||
def _cover_intent_snapshot_from_story(story: Any) -> dict | None:
|
||
"""primary intent 元数据(无 url),供 JSON 持久化。"""
|
||
intents = getattr(story, "image_intents", None) or []
|
||
primary = None
|
||
for it in intents:
|
||
if getattr(it, "intent_role", None) == "primary":
|
||
primary = it
|
||
break
|
||
if not primary:
|
||
return None
|
||
aid = getattr(primary, "asset_id", None)
|
||
if not aid:
|
||
return None
|
||
status = getattr(primary, "status", None) or "pending"
|
||
return {
|
||
"asset_id": str(aid),
|
||
"status": status,
|
||
"description": getattr(primary, "caption", None) or "故事配图",
|
||
"prompt": getattr(primary, "prompt_brief", None),
|
||
"style": getattr(primary, "style_profile", None),
|
||
"error": getattr(primary, "error", None),
|
||
"created_at": primary.created_at.isoformat() if primary.created_at else None,
|
||
"updated_at": primary.updated_at.isoformat() if primary.updated_at else None,
|
||
}
|
||
|
||
|
||
def _cover_dict_from_snapshot_row(
|
||
snap: dict[str, Any], asset_url_map: dict[str, str]
|
||
) -> dict:
|
||
aid = snap.get("asset_id")
|
||
url = asset_url_map.get(str(aid)) if aid else None
|
||
return {
|
||
"placeholder": "",
|
||
"description": snap.get("description") or "故事配图",
|
||
"index": 0,
|
||
"status": snap.get("status") or "pending",
|
||
"prompt": snap.get("prompt"),
|
||
"url": url,
|
||
"storage_key": None,
|
||
"provider": None,
|
||
"style": snap.get("style"),
|
||
"size": None,
|
||
"error": snap.get("error"),
|
||
"retryable": None,
|
||
"created_at": snap.get("created_at"),
|
||
"updated_at": snap.get("updated_at"),
|
||
}
|
||
|
||
|
||
def build_reading_segments_snapshot(ch: Chapter) -> list[dict[str, Any]]:
|
||
"""
|
||
物化阅读片段快照:body 保留 asset://;cover 仅存 intent 元数据(正文已含同 asset 则省略)。
|
||
与 append_chapter_compose_version 同路径写入。
|
||
"""
|
||
links = sorted(
|
||
list(getattr(ch, "story_links", None) or []),
|
||
key=lambda x: getattr(x, "order_index", 0),
|
||
)
|
||
out: list[dict[str, Any]] = []
|
||
for link in links:
|
||
st = getattr(link, "story", None)
|
||
if st is None:
|
||
continue
|
||
title = (getattr(st, "title", None) or "").strip()
|
||
raw = (getattr(st, "canonical_markdown", None) or "").strip()
|
||
body = sanitize_story_for_chapter_compose(raw, title)
|
||
if not body:
|
||
continue
|
||
if not story_meets_minimum_chapter_length(body):
|
||
continue
|
||
primary_aid = _primary_story_intent_asset_id(st)
|
||
inline_ids = set(collect_asset_ids_from_markdown(body))
|
||
cover: dict | None = None
|
||
if primary_aid and primary_aid not in inline_ids:
|
||
cover = _cover_intent_snapshot_from_story(st)
|
||
out.append(
|
||
{
|
||
"story_id": st.id,
|
||
"body_markdown": body,
|
||
"cover_asset": cover,
|
||
}
|
||
)
|
||
return out
|
||
|
||
|
||
def hydrate_reading_segments_from_snapshot(
|
||
ch: Chapter,
|
||
asset_url_map: dict[str, str] | None = None,
|
||
) -> list[dict[str, Any]]:
|
||
"""将持久化快照解析为 API 形态(签名 URL)。"""
|
||
from app.features.memoir import helpers as h
|
||
|
||
asset_url_map = asset_url_map or {}
|
||
resolve = lambda aid: asset_url_map.get(aid) # noqa: E731
|
||
rows = getattr(ch, "reading_segments_json", None) or []
|
||
out: list[dict[str, Any]] = []
|
||
for row in rows:
|
||
raw_body = row.get("body_markdown") or ""
|
||
# 与物化时一致;旧库快照亦按当前阈值过滤
|
||
if not story_meets_minimum_chapter_length(raw_body):
|
||
continue
|
||
body = resolve_asset_refs_in_markdown(raw_body, resolve)
|
||
ci = row.get("cover_asset")
|
||
if ci:
|
||
img_raw = _cover_dict_from_snapshot_row(ci, asset_url_map)
|
||
img_norm = h.first_normalized_image_for_api(img_raw)
|
||
else:
|
||
img_norm = None
|
||
out.append(
|
||
{
|
||
"story_id": row["story_id"],
|
||
"body_markdown": body,
|
||
"cover_asset": img_norm,
|
||
}
|
||
)
|
||
return out
|
||
|
||
|
||
def resolve_reading_segments_for_chapter_detail(
|
||
ch: Chapter,
|
||
asset_url_map: dict[str, str] | None = None,
|
||
) -> list[dict[str, Any]]:
|
||
"""章节详情:仅读取已物化快照。"""
|
||
asset_url_map = asset_url_map or {}
|
||
if getattr(ch, "reading_segments_json", None) is None:
|
||
return []
|
||
return hydrate_reading_segments_from_snapshot(ch, asset_url_map=asset_url_map)
|