api/app/features/story/image_intent_extractor.py

"""
StoryImageIntentExtractor — 从 story markdown 提取唯一主图意图。

每个 story 必须且仅有一张主插图。提取策略：
1. 最具画面感的场景段落
2. 具有人物 + 动作 + 场景 + 时代细节的段落
3. 故事转折点或记忆锚点段落
4. 若 story 过于抽象，则退化为 story title/stage/time_refs/place_refs/people_refs/summary
"""

from __future__ import annotations

import re
from dataclasses import dataclass


@dataclass
class StoryImageIntentResult:
    """提取出的主图意图。"""

    caption: str
    prompt_brief: str
    style_profile: str | None


# 画面感相关词汇（用于简单启发式评分）
_SCENE_WORDS = frozenset(
    "坐 站 走 跑 看 望 笑 哭 说 听 拿 放 穿 戴 吃 喝 院子 路 巷 房 屋 树 花 山 水 河 桥 街 镇 村 城 夏天 冬天 春天 秋天 早晨 傍晚 夜晚 童年 少年 青年 中年 老人 奶奶 爷爷 父亲 母亲 孩子 朋友 老师 同学".split()
)


def _score_paragraph(text: str) -> float:
    """对段落做简单画面感评分。"""
    if not text or len(text.strip()) < 20:
        return 0.0
    t = text.strip()
    score = min(len(t) / 100.0, 2.0)  # 长度 0~2 分
    overlap = sum(1 for w in _SCENE_WORDS if w in t)
    score += min(overlap * 0.3, 2.0)  # 场景词 0~2 分
    return score


def extract_primary_image_intent(
    markdown: str,
    *,
    title: str = "",
    stage: str | None = None,
    summary: str | None = None,
    people_refs: list[str] | None = None,
    place_refs: list[str] | None = None,
    time_start: str | None = None,
    time_end: str | None = None,
    style_profile: str | None = None,
) -> StoryImageIntentResult:
    """
    从 story markdown 提取唯一主图意图。

    优先从正文中选取最具画面感的段落；若正文过短或过于抽象，则使用 fallback。
    """
    paragraphs: list[str] = []
    if markdown and markdown.strip():
        for p in re.split(r"\n\n+", markdown.strip()):
            t = p.strip()
            if t:
                paragraphs.append(t)

    best_caption = ""
    best_prompt_brief = ""
    best_score = 0.0

    for text in paragraphs:
        score = _score_paragraph(text)
        if score > best_score:
            best_score = score
            best_caption = (text[:80] + "…") if len(text) > 80 else text
            best_prompt_brief = text[:500].strip()

    if best_score >= 0.5:
        return StoryImageIntentResult(
            caption=best_caption,
            prompt_brief=best_prompt_brief,
            style_profile=style_profile,
        )

    # Fallback: story title, stage, time, place, people, summary
    fallback_parts = []
    if title:
        fallback_parts.append(title)
    if stage:
        fallback_parts.append(stage)
    if time_start or time_end:
        fallback_parts.append(f"{time_start or ''}-{time_end or ''}".strip("-"))
    if place_refs:
        fallback_parts.extend(place_refs[:3])
    if people_refs:
        fallback_parts.extend(people_refs[:3])
    if summary:
        fallback_parts.append(summary[:200])
    fallback_text = "，".join(p for p in fallback_parts if p) or "人生故事"
    return StoryImageIntentResult(
        caption=fallback_text[:80],
        prompt_brief=fallback_text,
        style_profile=style_profile,
    )