"""章节物化前对 story 正文的受限清洗：禁止表格、可选剥离与标题元数据重复的首行 heading。"""

from __future__ import annotations

import re


def _is_table_row(line: str) -> bool:
    s = line.strip()
    if not s.startswith("|"):
        return False
    return s.count("|") >= 2


def strip_markdown_tables(text: str) -> str:
    """移除 GFM 管道表格块（连续以 | 开头的行）。"""
    if not text or not str(text).strip():
        return ""
    lines = str(text).splitlines()
    out: list[str] = []
    i = 0
    while i < len(lines):
        if _is_table_row(lines[i]):
            while i < len(lines) and _is_table_row(lines[i]):
                i += 1
            continue
        out.append(lines[i])
        i += 1
    return "\n".join(out).strip()


_HEADING_LINE_RE = re.compile(r"^#{1,6}\s+(.+?)\s*$")


def _normalize_title_key(s: str) -> str:
    return "".join((s or "").split()).casefold()


def strip_leading_heading_if_matches_title(body: str, story_title: str) -> str:
    """若首行为 markdown 标题且与 story 标题（规范化后）一致，则移除该行。"""
    if not body or not str(body).strip():
        return body or ""
    st_key = _normalize_title_key(story_title or "")
    if not st_key:
        return body
    lines = str(body).splitlines()
    if not lines:
        return body
    m = _HEADING_LINE_RE.match(lines[0].strip())
    if not m:
        return body
    heading_key = _normalize_title_key(m.group(1))
    if heading_key != st_key:
        return body
    rest = "\n".join(lines[1:])
    return rest.lstrip("\n")


def sanitize_story_for_chapter_compose(body: str, story_title: str) -> str:
    """物化章节前：去表格、去与元数据重复的首行标题。"""
    t = strip_markdown_tables(body or "")
    t = strip_leading_heading_if_matches_title(t, story_title)
    return (t or "").strip()