Files
life-echo/api/app/features/story/image_intent_extractor.py
Kevin 8af37e5e8e 修复:CI 部署环境与 ref 错配、迁移碎片化、图片意图 source_span、章节物化脏版式、会话历史与本地语音不一致
新增:TTS 上传 COS 与分片、章节 reading_segments 物化与快照、markdown 清洗、会话消息 repository、语音 store 重构与相关测试
2026-03-20 16:43:02 +08:00

105 lines
3.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
StoryImageIntentExtractor — 从 story markdown 提取唯一主图意图。
每个 story 必须且仅有一张主插图。提取策略:
1. 最具画面感的场景段落
2. 具有人物 + 动作 + 场景 + 时代细节的段落
3. 故事转折点或记忆锚点段落
4. 若 story 过于抽象,则退化为 story title/stage/time_refs/place_refs/people_refs/summary
"""
from __future__ import annotations
import re
from dataclasses import dataclass
@dataclass
class StoryImageIntentResult:
"""提取出的主图意图。"""
caption: str
prompt_brief: str
style_profile: str | None
# 画面感相关词汇(用于简单启发式评分)
_SCENE_WORDS = frozenset(
"坐 站 走 跑 看 望 笑 哭 说 听 拿 放 穿 戴 吃 喝 院子 路 巷 房 屋 树 花 山 水 河 桥 街 镇 村 城 夏天 冬天 春天 秋天 早晨 傍晚 夜晚 童年 少年 青年 中年 老人 奶奶 爷爷 父亲 母亲 孩子 朋友 老师 同学".split()
)
def _score_paragraph(text: str) -> float:
"""对段落做简单画面感评分。"""
if not text or len(text.strip()) < 20:
return 0.0
t = text.strip()
score = min(len(t) / 100.0, 2.0) # 长度 0~2 分
overlap = sum(1 for w in _SCENE_WORDS if w in t)
score += min(overlap * 0.3, 2.0) # 场景词 0~2 分
return score
def extract_primary_image_intent(
markdown: str,
*,
title: str = "",
stage: str | None = None,
summary: str | None = None,
people_refs: list[str] | None = None,
place_refs: list[str] | None = None,
time_start: str | None = None,
time_end: str | None = None,
style_profile: str | None = None,
) -> StoryImageIntentResult:
"""
从 story markdown 提取唯一主图意图。
优先从正文中选取最具画面感的段落;若正文过短或过于抽象,则使用 fallback。
"""
paragraphs: list[str] = []
if markdown and markdown.strip():
for p in re.split(r"\n\n+", markdown.strip()):
t = p.strip()
if t:
paragraphs.append(t)
best_caption = ""
best_prompt_brief = ""
best_score = 0.0
for text in paragraphs:
score = _score_paragraph(text)
if score > best_score:
best_score = score
best_caption = (text[:80] + "") if len(text) > 80 else text
best_prompt_brief = text[:500].strip()
if best_score >= 0.5:
return StoryImageIntentResult(
caption=best_caption,
prompt_brief=best_prompt_brief,
style_profile=style_profile,
)
# Fallback: story title, stage, time, place, people, summary
fallback_parts = []
if title:
fallback_parts.append(title)
if stage:
fallback_parts.append(stage)
if time_start or time_end:
fallback_parts.append(f"{time_start or ''}-{time_end or ''}".strip("-"))
if place_refs:
fallback_parts.extend(place_refs[:3])
if people_refs:
fallback_parts.extend(people_refs[:3])
if summary:
fallback_parts.append(summary[:200])
fallback_text = "".join(p for p in fallback_parts if p) or "人生故事"
return StoryImageIntentResult(
caption=fallback_text[:80],
prompt_brief=fallback_text,
style_profile=style_profile,
)