FishServer/fish_api/app/services/sonar_video.py

"""声呐视频：后台处理 ``BIOMASS_SONAR_VIDEO_DIR`` 中的**当前 mtime 最新**视频文件。

支持 MP4、MKV、MOV。MP4/MOV 在录制中缺少 ``moov`` atom，须等录完才能处理；
MKV 的元数据写在文件头，录制中即可读取，无需等待。

切片策略由 ``Settings.biomass_sonar_slice_order`` 决定：

* **sequential**（默认）：从 t=0 起按 ``BIOMASS_SONAR_VIDEO_SLICE_SEC`` 顺序切**完整**
  块（ffprobe 可得 duration 且剩余不足一块则不发布）；增长中的文件随 duration 增大继续切。
* **tail**：当原文件 **(path, size)** 变化时，用 ``-sseof`` 取**最后 N 秒**再处理。

之后对切片做光流 overlay（可选）→ H.264 转码并发布。``GET .../sonar/video/`` 返回
最近一次成功发布的 URL。
"""

from __future__ import annotations

import asyncio
import datetime
import math
import subprocess
import time
from pathlib import Path
from typing import Optional, Tuple

from loguru import logger

from app.compat import to_thread
from app.logging_config import new_run_id, stage
from app.services.action_watch import iter_mp4
from app.services.measure import _ffprobe_video_codec_name, transcode_src_to_h264_dst
from app.services.sonar_optical_flow import run_sonar_optical_flow_overlay
from app.services.video_slice import _get_ffmpeg_path, _get_ffprobe_path
from app.settings import Settings

DEFAULT_CLIENT_ID = "default"

# --- published state (read by GET endpoint) ---
_published_url: str = ""
_published_lock = asyncio.Lock()

# 避免对同一「未就绪」文件每个 poll 都打 INFO
_last_sonar_skip_logged_path: Optional[str] = None


def _public_media_url(settings: Settings, basename: str) -> str:
    base = settings.public_base_url.rstrip("/")
    return f"{base}/media/{basename}"


def _safe_sonar_media_basename(raw: str) -> str:
    n = (raw or "").strip()
    if not n:
        return "biomass_sonar.mp4"
    return Path(n).name or "biomass_sonar.mp4"


# Formats that store metadata at the start → readable while recording
_STREAMING_SUFFIXES = frozenset({".mkv", ".ts", ".webm"})


# ---------------------------------------------------------------------------
# Probe: is the video file ready to process?
# ---------------------------------------------------------------------------

def _is_ready_to_process(path: Path) -> bool:
    """MKV/TS/WebM are always readable (metadata at start); MP4/MOV need moov at end."""
    if path.suffix.lower() in _STREAMING_SUFFIXES:
        return path.is_file() and path.stat().st_size > 0
    return _probe_moov_readable(path)


def _probe_moov_readable(path: Path) -> bool:
    """Quick check via ffprobe (fallback cv2): does the MP4/MOV have a moov atom?"""
    log = logger.bind(pipeline="sonar_watch", source=path.name)
    ffprobe = _get_ffprobe_path()
    try:
        r = subprocess.run(
            [
                ffprobe, "-v", "error", "-show_entries", "format=duration",
                "-of", "default=noprint_wrappers=1:nokey=1", str(path),
            ],
            capture_output=True,
            text=True,
            timeout=5,
        )
        if r.returncode == 0 and r.stdout.strip():
            return True
        log.debug(
            "[声呐监控] ffprobe 无有效 duration（可能缺 moov）| ffprobe={} | {}",
            ffprobe,
            path.name,
        )
        return False
    except FileNotFoundError:
        log.warning(
            "[声呐监控] 未找到 ffprobe（与 FFMPEG_PATH 同目录或 PATH）| 配置路径={}",
            ffprobe,
        )
    except Exception as e:
        log.debug(
            "[声呐监控] ffprobe 失败（{}），改用 cv2 探测 | ffprobe={}",
            e,
            ffprobe,
        )

    try:
        import cv2
        cap = cv2.VideoCapture(str(path))
        ok = cap.isOpened() and cap.get(cv2.CAP_PROP_FRAME_COUNT) > 0
        cap.release()
        if not ok:
            log.debug("[声呐监控] cv2：无法读取 {}", path.name)
        return ok
    except Exception:
        return False


def _probe_media_duration_sec(path: Path) -> Optional[float]:
    """ffprobe ``format=duration``；无法解析（N/A、moov 未就绪等）时返回 None。"""
    ffprobe = _get_ffprobe_path()
    try:
        r = subprocess.run(
            [
                ffprobe,
                "-v",
                "error",
                "-show_entries",
                "format=duration",
                "-of",
                "default=noprint_wrappers=1:nokey=1",
                str(path),
            ],
            capture_output=True,
            text=True,
            timeout=30,
        )
        if r.returncode != 0:
            return None
        raw = (r.stdout or "").strip()
        if not raw or raw.lower() in ("n/a", "nan"):
            return None
        d = float(raw)
        if not math.isfinite(d) or d <= 0:
            return None
        return d
    except (ValueError, subprocess.TimeoutExpired, OSError):
        return None


def _extract_range_slice(
    src: Path,
    slice_out: Path,
    start_sec: float,
    duration_sec: float,
) -> bool:
    """Extract ``[start_sec, start_sec + duration_sec)`` with stream copy (``-ss`` before ``-i``)."""
    slice_out.parent.mkdir(parents=True, exist_ok=True)
    slice_out.unlink(missing_ok=True)
    if not src.is_file() or src.stat().st_size <= 0:
        return False
    start = float(start_sec)
    dur = float(duration_sec)
    if not math.isfinite(start) or start < 0 or not math.isfinite(dur) or dur <= 0:
        return False

    ffmpeg = _get_ffmpeg_path()
    log = logger.bind(pipeline="sonar_watch", source=src.name)
    log.info(
        "[声呐监控] 顺序切片提取 | ffmpeg={} | {:.3f}s–{:.3f}s（{:.0f}s）| {} -> {}",
        ffmpeg,
        start,
        start + dur,
        dur,
        src.name,
        slice_out.name,
    )
    cmd = [
        ffmpeg,
        "-y",
        "-hide_banner",
        "-loglevel",
        "error",
        "-ss",
        str(start),
        "-t",
        str(dur),
        "-i",
        str(src),
        "-c",
        "copy",
        "-avoid_negative_ts",
        "make_zero",
        str(slice_out),
    ]
    r: subprocess.CompletedProcess[str]
    try:
        r = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
        if r.returncode == 0 and slice_out.is_file() and slice_out.stat().st_size > 0:
            log.info(
                "[声呐监控] 顺序切片成功 | {} -> {}（{} 字节）",
                src.name,
                slice_out.name,
                slice_out.stat().st_size,
            )
            return True
    except Exception as e:
        log.debug("[声呐监控] 顺序切片异常：{} | {}", src.name, e)
        slice_out.unlink(missing_ok=True)
        return False

    tail = (r.stderr or "")[-500:]
    log.warning(
        "[声呐监控] 顺序切片失败 | {} | stderr尾={}",
        src.name,
        tail,
    )
    slice_out.unlink(missing_ok=True)
    return False


def _extract_tail_slice(src: Path, slice_out: Path, duration_sec: float) -> bool:
    """Extract last ``duration_sec`` seconds with ``ffmpeg -sseof`` + stream copy.

    For growing MKV files ``ffprobe`` often returns ``N/A`` for duration, so we
    always attempt ``-sseof`` first (ffmpeg clamps to file start when the file is
    shorter than the requested window).  Only when ``-sseof`` fails do we fall
    back to a plain ``-c copy`` of the entire file.
    """
    slice_out.parent.mkdir(parents=True, exist_ok=True)
    slice_out.unlink(missing_ok=True)
    if not src.is_file() or src.stat().st_size <= 0:
        return False

    ffmpeg = _get_ffmpeg_path()
    sec = float(duration_sec)
    log = logger.bind(pipeline="sonar_watch", source=src.name)
    log.info(
        "[声呐监控] 尾段提取开始 | ffmpeg={} | 最后 {:.0f}s | {} -> {}",
        ffmpeg,
        sec,
        src.name,
        slice_out.name,
    )

    # --- primary: -sseof (works on growing MKV even when duration is unknown) ---
    sseof_cmd = [
        ffmpeg, "-y", "-hide_banner", "-loglevel", "error",
        "-sseof", f"-{sec}",
        "-i", str(src),
        "-t", str(sec),
        "-c", "copy",
        "-avoid_negative_ts", "make_zero",
        str(slice_out),
    ]
    try:
        r = subprocess.run(sseof_cmd, capture_output=True, text=True, timeout=600)
        if r.returncode == 0 and slice_out.is_file() and slice_out.stat().st_size > 0:
            log.info(
                "[声呐监控] 尾段提取成功（-sseof {:.0f}s copy）| ffmpeg={} | {} -> {}（{} 字节）",
                sec,
                ffmpeg,
                src.name,
                slice_out.name,
                slice_out.stat().st_size,
            )
            return True
    except Exception as e:
        log.debug("[声呐监控] -sseof 提取异常：{}（{}）", src.name, e)

    # --- fallback: copy entire file (source shorter than window or -sseof unsupported) ---
    slice_out.unlink(missing_ok=True)
    copy_cmd = [
        ffmpeg, "-y", "-hide_banner", "-loglevel", "error",
        "-i", str(src),
        "-c", "copy",
        str(slice_out),
    ]
    try:
        r = subprocess.run(copy_cmd, capture_output=True, text=True, timeout=600)
        if r.returncode == 0 and slice_out.is_file() and slice_out.stat().st_size > 0:
            log.info(
                "[声呐监控] 尾段提取成功（整段 -c copy 回退）| ffmpeg={} | {} -> {}（{} 字节）",
                ffmpeg,
                src.name,
                slice_out.name,
                slice_out.stat().st_size,
            )
            return True
    except Exception as e:
        log.warning("[声呐监控] 尾段提取回退异常：{}（{}）", src.name, e)

    log.warning(
        "[声呐监控] 尾段提取失败 | {} | stderr={}",
        src.name, (r.stderr or "")[:500],
    )
    slice_out.unlink(missing_ok=True)
    return False


# ---------------------------------------------------------------------------
# Publish pipeline: slice (tail or sequential) → optical flow (optional) → H.264
# ---------------------------------------------------------------------------

async def _publish_video(
    src: Path,
    media_root: Path,
    dst_stem: str,
    settings: Settings,
    *,
    range_start_sec: Optional[float] = None,
) -> Optional[Path]:
    """Extract a slice → optional optical-flow → H.264 transcode → verify → save.

    * ``tail`` order: ignores ``range_start_sec``; uses ``-sseof`` last N seconds.
    * ``sequential`` order: requires ``range_start_sec``; uses ``-ss`` / ``-t`` copy.

    Returns the path of the published file, or ``None`` on failure.
    """
    order = settings.biomass_sonar_slice_order
    src_size_mb = src.stat().st_size / (1024 * 1024) if src.is_file() else 0
    slice_sec = float(settings.biomass_sonar_video_slice_sec)
    rid = new_run_id("sonar_watch")
    log = logger.bind(pipeline="sonar_watch", run_id=rid, source=src.name)

    if order == "sequential":
        if range_start_sec is None:
            log.error("[声呐监控] sequential 模式缺少 range_start_sec：{}", src.name)
            return None
        log.info(
            "[声呐监控] 开始处理：{} ({:.1f} MB) | 顺序切片 t={:.3f}s | 块长={:.0f}s | 光流={}",
            src.name,
            src_size_mb,
            range_start_sec,
            slice_sec,
            settings.biomass_sonar_optical_flow,
        )
    else:
        log.info(
            "[声呐监控] 开始处理：{} ({:.1f} MB) | 尾段={:.0f}s | 光流={}",
            src.name,
            src_size_mb,
            slice_sec,
            settings.biomass_sonar_optical_flow,
        )

    t0 = time.monotonic()
    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    slice_tmp = media_root / f"{dst_stem}_tail_slice.mkv"
    slice_tmp.unlink(missing_ok=True)
    tmp = media_root / f"{dst_stem}_tmp.mp4"
    tmp.unlink(missing_ok=True)

    extract_metrics = {
        "slice_sec": slice_sec,
        "src_mb": round(src_size_mb, 2),
        "slice_order": order,
    }
    if order == "sequential" and range_start_sec is not None:
        extract_metrics["range_start_sec"] = round(range_start_sec, 4)

    try:
        if order == "tail":
            stage_title = f"声呐尾段提取（{src.name}）"
            extract_step = "extract_tail"
            with stage(
                stage_title,
                pipeline="sonar_watch",
                step=extract_step,
                run_id=rid,
                source=src.name,
                metrics=extract_metrics,
                raise_on_error=False,
            ):
                slice_ok = await to_thread(
                    _extract_tail_slice, src, slice_tmp, slice_sec,
                )
        else:
            assert range_start_sec is not None
            stage_title = (
                f"声呐顺序切片 {range_start_sec:.1f}s–{range_start_sec + slice_sec:.1f}s"
                f"（{src.name}）"
            )
            with stage(
                stage_title,
                pipeline="sonar_watch",
                step="extract_sequential",
                run_id=rid,
                source=src.name,
                metrics=extract_metrics,
                raise_on_error=False,
            ):
                slice_ok = await to_thread(
                    _extract_range_slice,
                    src,
                    slice_tmp,
                    range_start_sec,
                    slice_sec,
                )

        if not slice_ok:
            log.warning("[声呐监控] 切片提取失败，跳过发布：{}", src.name)
            return None

        if slice_tmp.is_file():
            log.info(
                "[声呐监控] 切片已就绪 | {}（{:.2f} MB）→ 后续光流/H.264",
                slice_tmp.name,
                slice_tmp.stat().st_size / (1024 * 1024),
            )

        transcode_src = slice_tmp
        if settings.biomass_sonar_optical_flow:
            flow_dst = media_root / f"{dst_stem}_optical_flow_{ts}.mp4"
            flow_tmp = media_root / f"{dst_stem}_flow_tmp.mp4"
            flow_tmp.unlink(missing_ok=True)
            with stage(
                f"声呐光流叠加（{src.name}）",
                pipeline="sonar_watch",
                step="optical_flow",
                run_id=rid,
                source=src.name,
                metrics={"resize": settings.biomass_sonar_optical_flow_resize},
                raise_on_error=False,
            ):
                flow_ok = await to_thread(
                    run_sonar_optical_flow_overlay, slice_tmp, flow_tmp, settings,
                )
            if flow_ok and flow_tmp.is_file() and flow_tmp.stat().st_size > 0:
                flow_tmp.replace(flow_dst)
                transcode_src = flow_dst
                log.info(
                    "[声呐监控] 光流文件已保存：{} ({:.1f} MB)",
                    flow_dst.name,
                    flow_dst.stat().st_size / (1024 * 1024),
                )
            else:
                flow_tmp.unlink(missing_ok=True)
                log.warning("[声呐监控] 光流叠加失败，对原切片直接转码：{}", src.name)

        in_codec = _ffprobe_video_codec_name(transcode_src) or "未知"
        log.info(
            "[声呐监控] H.264 发布转码（FishMeasure 同款管线）| 输入编码={} | {} -> {}",
            in_codec,
            transcode_src.name,
            tmp.name,
        )

        with stage(
            f"声呐 H.264 转码（{src.name}）",
            pipeline="sonar_watch",
            step="transcode_h264",
            run_id=rid,
            source=src.name,
            raise_on_error=False,
        ):
            ok = await to_thread(transcode_src_to_h264_dst, transcode_src, tmp)
        if not (ok and tmp.is_file() and tmp.stat().st_size > 0):
            tmp.unlink(missing_ok=True)
            log.warning("[声呐监控] 转码失败：{}", src.name)
            return None

        if not _probe_moov_readable(tmp):
            log.warning("[声呐监控] 转码产物不可播放，已丢弃：{}", src.name)
            tmp.unlink(missing_ok=True)
            return None

        out_codec = _ffprobe_video_codec_name(tmp) or "未知"
        log.info(
            "[声呐监控] H.264 转码产物已验证 | codec_name={} | {}（{:.2f} MB）",
            out_codec,
            tmp.name,
            tmp.stat().st_size / (1024 * 1024),
        )

        dst = media_root / f"{dst_stem}_{ts}.mp4"
        tmp.replace(dst)
        elapsed = time.monotonic() - t0
        dst_mb = dst.stat().st_size / (1024 * 1024)

        pub_metrics = {
            "src_mb": round(src_size_mb, 2),
            "dst_mb": round(dst_mb, 2),
            "dst_name": dst.name,
            "slice_order": order,
        }
        if order == "sequential" and range_start_sec is not None:
            pub_metrics["range_start_sec"] = round(range_start_sec, 4)

        logger.bind(
            event=True,
            pipeline="sonar_watch",
            step="publish_total",
            run_id=rid,
            source=src.name,
            status="success",
            duration_ms=round(elapsed * 1000.0, 3),
            metrics=pub_metrics,
        ).info(
            "[声呐监控] 发布完成 | 源 {} ({:.1f} MB) → {} ({:.1f} MB) | 耗时={:.1f}s",
            src.name,
            src_size_mb,
            dst.name,
            dst_mb,
            elapsed,
        )
        return dst
    except Exception:
        log.exception("[声呐监控] 发布异常 | {}", src.name)
        tmp.unlink(missing_ok=True)
        return None
    finally:
        slice_tmp.unlink(missing_ok=True)


# ---------------------------------------------------------------------------
# Background watcher loop  (started from lifespan, like action_watch)
# ---------------------------------------------------------------------------

async def run_sonar_video_watch_loop(settings: Settings) -> None:
    """Poll ``BIOMASS_SONAR_VIDEO_DIR``, follow the **latest** file by mtime.

    * ``tail``: when ``(resolved_path, size)`` changes and the file is ready, run one
      tail-``N``-second publish.
    * ``sequential``: maintain a cursor from t=0; each poll publishes up to
      ``max_chunks_per_poll`` full ``N``-second blocks while ``duration`` allows.
    """
    global _published_url, _last_sonar_skip_logged_path

    d = settings.biomass_sonar_video_dir
    if d is None:
        logger.bind(pipeline="sonar_watch").info(
            "[声呐监控] BIOMASS_SONAR_VIDEO_DIR 未设置，声呐监控未启用"
        )
        return

    poll = max(1.0, settings.biomass_sonar_video_poll_interval)
    slice_order = settings.biomass_sonar_slice_order
    max_chunks = settings.biomass_sonar_max_chunks_per_poll

    basename = _safe_sonar_media_basename(settings.biomass_sonar_video_media_name)
    dst_stem = Path(basename).stem
    media_root = settings.media_root
    media_root.mkdir(parents=True, exist_ok=True)

    existing = sorted(
        media_root.glob(f"{dst_stem}_*.mp4"),
        key=lambda p: p.stat().st_mtime,
    )
    existing = [
        p for p in existing
        if not any(
            tag in p.stem
            for tag in ("_tmp", "_flow_tmp", "_optical_flow_", "_tail_slice")
        )
    ]
    if existing:
        seed = existing[-1]
        async with _published_lock:
            _published_url = _public_media_url(settings, seed.name)
        logger.bind(pipeline="sonar_watch").info(
            "[声呐监控] 已用历史发布文件回种：{}", _published_url
        )

    last_published_key: Optional[Tuple[str, int]] = None
    seq_cursor_rp: Optional[str] = None
    seq_next_start: float = 0.0

    logger.bind(pipeline="sonar_watch").info(
        "[声呐监控] 监控目录已启动 | dir={} | poll={:.0f}s | recursive={} | "
        "slice_order={} | max_chunks_per_poll={}",
        d,
        poll,
        settings.biomass_sonar_video_recursive,
        slice_order,
        max_chunks,
    )

    while True:
        try:
            if d.is_dir():
                all_videos = iter_mp4(d, settings.biomass_sonar_video_recursive)
                if all_videos:
                    latest = max(all_videos, key=lambda p: p.stat().st_mtime)
                    try:
                        rp = str(latest.resolve())
                        sz = latest.stat().st_size
                    except OSError:
                        continue

                    if sz <= 0:
                        continue

                    cycle = logger.bind(pipeline="sonar_watch", source=latest.name)

                    if slice_order == "tail":
                        if last_published_key == (rp, sz):
                            continue
                        ready = await to_thread(_is_ready_to_process, latest)
                        if ready:
                            cycle.info(
                                "[声呐监控] 源文件已就绪，进入发布管线（尾段）| {:.2f} MB | {}",
                                sz / (1024 * 1024),
                                rp,
                            )
                            published = await _publish_video(
                                latest, media_root, dst_stem, settings,
                            )
                            if published is not None:
                                async with _published_lock:
                                    _published_url = _public_media_url(
                                        settings, published.name,
                                    )
                                last_published_key = (rp, sz)
                                _last_sonar_skip_logged_path = None
                            else:
                                cycle.warning(
                                    "[声呐监控] 发布失败（切片/光流/转码见上方）| {}",
                                    latest.name,
                                )
                        else:
                            if _last_sonar_skip_logged_path != rp:
                                _last_sonar_skip_logged_path = rp
                                cycle.info(
                                    "[声呐监控] 最新文件尚未可处理（录制中或 MP4/MOV 缺 moov），"
                                    "等待：{}",
                                    latest.name,
                                )
                            else:
                                cycle.debug(
                                    "[声呐监控] 仍在等待就绪：{}",
                                    latest.name,
                                )
                    else:
                        if rp != seq_cursor_rp:
                            seq_cursor_rp = rp
                            seq_next_start = 0.0
                            cycle.info(
                                "[声呐监控] 顺序模式：跟踪新文件，游标归零 | {}",
                                latest.name,
                            )

                        ready = await to_thread(_is_ready_to_process, latest)
                        if not ready:
                            if _last_sonar_skip_logged_path != rp:
                                _last_sonar_skip_logged_path = rp
                                cycle.info(
                                    "[声呐监控] 最新文件尚未可处理（录制中或 MP4/MOV 缺 moov），"
                                    "等待：{}",
                                    latest.name,
                                )
                            else:
                                cycle.debug(
                                    "[声呐监控] 仍在等待就绪：{}",
                                    latest.name,
                                )
                        else:
                            duration = await to_thread(
                                _probe_media_duration_sec, latest,
                            )
                            slice_sec = float(settings.biomass_sonar_video_slice_sec)
                            if duration is None:
                                cycle.warning(
                                    "[声呐监控] 顺序模式：无法读取 duration（ffprobe），"
                                    "跳过本周期 | {}",
                                    latest.name,
                                )
                            elif seq_next_start + slice_sec > duration + 1e-3:
                                cycle.debug(
                                    "[声呐监控] 顺序模式：无完整 {:.0f}s 块可切 "
                                    "（cursor={:.3f}s duration={:.3f}s）| {}",
                                    slice_sec,
                                    seq_next_start,
                                    duration,
                                    latest.name,
                                )
                            else:
                                _last_sonar_skip_logged_path = None
                                n_done = 0
                                while (
                                    seq_next_start + slice_sec <= duration + 1e-3
                                ):
                                    if max_chunks > 0 and n_done >= max_chunks:
                                        break
                                    cycle.info(
                                        "[声呐监控] 顺序发布 | t={:.3f}s | "
                                        "duration={:.3f}s | {}",
                                        seq_next_start,
                                        duration,
                                        latest.name,
                                    )
                                    published = await _publish_video(
                                        latest,
                                        media_root,
                                        dst_stem,
                                        settings,
                                        range_start_sec=seq_next_start,
                                    )
                                    if published is None:
                                        cycle.warning(
                                            "[声呐监控] 顺序发布失败 @ t={:.3f}s | {}",
                                            seq_next_start,
                                            latest.name,
                                        )
                                        break
                                    seq_next_start += slice_sec
                                    n_done += 1
                                    async with _published_lock:
                                        _published_url = _public_media_url(
                                            settings, published.name,
                                        )
                                    await asyncio.sleep(0)

                else:
                    logger.bind(pipeline="sonar_watch").debug(
                        "[声呐监控] 目录中暂无 .mp4/.mkv/.mov：{}",
                        d,
                    )

        except asyncio.CancelledError:
            raise
        except Exception:
            logger.bind(pipeline="sonar_watch").exception(
                "[声呐监控] 监控循环异常"
            )

        await asyncio.sleep(poll)


# ---------------------------------------------------------------------------
# GET endpoint helper  (called from biomass router, no change to API contract)
# ---------------------------------------------------------------------------

async def get_sonar_video_public_url(
    settings: Settings,
    _client_id: str = DEFAULT_CLIENT_ID,
) -> str:
    """Return the URL of the latest successfully published sonar video, or ``""``."""
    async with _published_lock:
        return _published_url