whole process

2026-04-14 22:05:52 +08:00
parent af67f61b63
commit 940d426a37
7 changed files with 161 additions and 123 deletions
--- a/FishMeasure/generate_video_with_labels.py
+++ b/FishMeasure/generate_video_with_labels.py
@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 """Generate labeled preview video from SVO + weight prediction JSON.

-Reads the SVO with YOLO tracking, overlays DGCNN weight/length on each
-detection box, and writes ``<svo_name>_preview.mp4`` into ``--save-output``.
+Per-frame labeling: each detection box shows the weight/length predicted
+for that specific frame's PLY (from ``per_cloud`` / ``per_file`` in the
+DGCNN JSON).  Frames without a corresponding PLY carry forward the last
+known value.  The final aggregated result is shown at top-right.

 Called by ``predict_weigth_from_svo2.py`` after DGCNN completes.
 Replaces any existing preview video so the final published file has labels.
@@ -13,9 +15,10 @@ from __future__ import annotations
 import argparse
 import json
 import math
+import re
 import sys
 from pathlib import Path
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 import cv2
 import numpy as np
@@ -27,45 +30,74 @@ except ImportError:
    ZED_AVAILABLE = False


-def _extract_weight_length(weight_json: Path) -> Tuple[Optional[float], Optional[float]]:
-    """Return (weight_g, length_mm) from a weight prediction JSON."""
+def _parse_weight_json(weight_json: Path) -> Tuple[
+    Dict[int, Tuple[float, float]],
+    Optional[float],
+    Optional[float],
+    bool,
+]:
+    """Parse weight JSON → per-frame map + summary + confidence.
+
+    Returns:
+        per_frame: {frame_number: (weight_g, length_mm)} from per_cloud/per_file
+        summary_weight_g: final aggregated weight
+        summary_length_mm: final aggregated length
+        is_confident: True when ``*`` should be shown (avg > 440g OR length band fraction >= 25%)
+    """
    data = json.loads(weight_json.read_text(encoding="utf-8"))
    summary = data.get("dgcnn_summary") or data.get("weight_summary") or data.get("summary") or {}

-    w_candidates = [
+    def _first_finite(*candidates):
+        for c in candidates:
+            if c is not None:
+                try:
+                    v = float(c)
+                    if math.isfinite(v):
+                        return v
+                except (TypeError, ValueError):
+                    pass
+        return None
+
+    summary_wg = _first_finite(
        summary.get("pred_weight_g"),
        summary.get("avg_predicted_weight_g"),
        data.get("pred_weight_g"),
        data.get("avg_predicted_weight_g"),
-    ]
-    weight_g = None
-    for c in w_candidates:
-        if c is not None:
-            try:
-                v = float(c)
-                if math.isfinite(v):
-                    weight_g = v
-                    break
-            except (TypeError, ValueError):
-                continue
-
-    l_candidates = [
+    )
+    summary_lmm = _first_finite(
        summary.get("avg_length_input_topk"),
        summary.get("avg_length_input"),
        data.get("avg_length_input"),
-    ]
-    length_mm = None
-    for c in l_candidates:
-        if c is not None:
-            try:
-                v = float(c)
-                if math.isfinite(v):
-                    length_mm = v
-                    break
-            except (TypeError, ValueError):
-                continue
+    )

-    return weight_g, length_mm
+    CONFIDENT_AVG_G = 440.0
+    MIN_FRAC_LARGEST_LENGTH_GROUP = 0.25
+
+    mean_g = _first_finite(
+        summary.get("mean_all_pred_g_after_filters"),
+        summary.get("avg_predicted_weight_g"),
+    )
+    frac = _first_finite(summary.get("fraction_in_near_max_length_band"))
+
+    is_confident = False
+    if mean_g is not None and mean_g > CONFIDENT_AVG_G:
+        is_confident = True
+    elif frac is not None and frac >= MIN_FRAC_LARGEST_LENGTH_GROUP:
+        is_confident = True
+
+    per_frame: Dict[int, Tuple[float, float]] = {}
+    for item in data.get("per_cloud") or data.get("per_file") or []:
+        ply = item.get("ply", "")
+        m = re.search(r"frame_(\d+)", Path(str(ply)).stem)
+        if not m:
+            continue
+        fnum = int(m.group(1))
+        wg = _first_finite(item.get("predicted_weight_g"))
+        lmm = _first_finite(item.get("length_input"))
+        if wg is not None:
+            per_frame[fnum] = (wg, lmm if lmm is not None else float("nan"))
+
+    return per_frame, summary_wg, summary_lmm, is_confident


 def _draw_label_on_box(
@@ -79,8 +111,8 @@ def _draw_label_on_box(
    x1, y1, x2, y2 = map(int, box)
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

-    w_str = f"{weight_g:.0f}g" if weight_g is not None else "--g"
-    l_str = f"{length_mm:.0f}mm" if length_mm is not None else "--mm"
+    w_str = f"{weight_g:.0f}g" if weight_g is not None and math.isfinite(weight_g) else "--g"
+    l_str = f"{length_mm:.0f}mm" if length_mm is not None and math.isfinite(length_mm) else "--mm"
    label = f"ID:{tid} {class_name} weight: {w_str} len: {l_str}"

    (tw, th), bl = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1)
@@ -93,13 +125,15 @@ def _draw_large_summary(
    image: np.ndarray,
    weight_g: Optional[float],
    length_mm: Optional[float],
+    is_confident: bool = False,
 ) -> None:
+    star = " *" if is_confident else ""
    lines = []
-    lines.append(f"Weight: {weight_g:.0f}g" if weight_g is not None else "Weight: --g")
+    lines.append(f"Final: {weight_g:.0f}g{star}" if weight_g is not None else f"Final: --g")
    lines.append(f"Length: {length_mm:.0f}mm" if length_mm is not None else "Length: --mm")

    font = cv2.FONT_HERSHEY_SIMPLEX
-    scale = 2.75
+    scale = 2.0
    thickness = 2
    pad = 10
    h, w = image.shape[:2]
@@ -138,11 +172,13 @@ def generate_video(
        print("ERROR: pyzed not available, cannot generate labeled video")
        return None

-    weight_g, length_mm = _extract_weight_length(weight_json)
-    print(f"  Labeling with weight={weight_g}g, length={length_mm}mm from {weight_json.name}")
+    per_frame, summary_wg, summary_lmm, is_confident = _parse_weight_json(weight_json)
+    star_s = " *" if is_confident else ""
+    print(f"  Per-frame predictions: {len(per_frame)} PLYs mapped")
+    print(f"  Summary: weight={summary_wg}g, length={summary_lmm}mm{star_s}")

-    if weight_g is None and length_mm is None:
-        print("  WARNING: No valid weight/length in JSON, video will show '--'")
+    if not per_frame and summary_wg is None:
+        print("  WARNING: No weight data in JSON, video will show '--'")

    from ultralytics import YOLO
    yolo = YOLO(yolo_model_path)
@@ -162,8 +198,10 @@ def generate_video(
    images_dir = output_dir / "images"
    images_dir.mkdir(parents=True, exist_ok=True)

-    frames = []
+    frames: List[np.ndarray] = []
    idx = 0
+    last_wg: Optional[float] = None
+    last_lmm: Optional[float] = None

    try:
        while True:
@@ -179,6 +217,15 @@ def generate_video(
                idx += 1
                continue

+            frame_number = idx + 1
+            if frame_number in per_frame:
+                cur_wg, cur_lmm = per_frame[frame_number]
+                last_wg = cur_wg
+                last_lmm = cur_lmm if math.isfinite(cur_lmm) else last_lmm
+            else:
+                cur_wg = last_wg
+                cur_lmm = last_lmm
+
            results = yolo.track(img, conf=conf, imgsz=imgsz, verbose=False, persist=True)[0]
            num_dets = len(results.boxes) if results.boxes is not None else 0

@@ -196,13 +243,13 @@ def generate_video(
                    tid = int(tids[i]) if i < len(tids) else 0
                    cid = int(cls_ids[i]) if i < len(cls_ids) else 0
                    cname = class_names.get(cid, "fish")
-                    _draw_label_on_box(left_disp, box, tid, cname, weight_g, length_mm)
+                    _draw_label_on_box(left_disp, box, tid, cname, cur_wg, cur_lmm)

-                if show_large:
-                    _draw_large_summary(left_disp, weight_g, length_mm)
+            if show_large or summary_wg is not None:
+                _draw_large_summary(left_disp, summary_wg, summary_lmm, is_confident)

-            frame_name = f"frame_{idx + 1:06d}"
-            info = f"[{idx + 1}] {frame_name} | Detections: {num_dets}"
+            frame_name = f"frame_{frame_number:06d}"
+            info = f"[{frame_number}] {frame_name} | Detections: {num_dets}"
            cv2.putText(left_disp, info, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, cv2.LINE_AA)
            cv2.putText(left_disp, "Detection", (10, left_disp.shape[0] - 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
@@ -216,7 +263,9 @@ def generate_video(
                frames.append(combined)

            if idx % 30 == 0:
-                print(f"  [{idx + 1}] {frame_name} dets={num_dets} frames_collected={len(frames)}")
+                w_s = f"{cur_wg:.0f}g" if cur_wg is not None else "--"
+                l_s = f"{cur_lmm:.0f}mm" if cur_lmm is not None else "--"
+                print(f"  [{frame_number}] {frame_name} dets={num_dets} w={w_s} l={l_s} collected={len(frames)}")

            idx += 1
    finally:
@@ -232,7 +281,7 @@ def generate_video(
    for f in frames:
        writer.write(f)
    writer.release()
-    print(f"  ✓ Labeled video: {video_path.name} ({len(frames)} frames, weight={weight_g}g len={length_mm}mm)")
+    print(f"  ✓ Labeled video: {video_path.name} ({len(frames)} frames, {len(per_frame)} PLY labels)")
    return video_path


--- a/FishMeasure/run_predict_from_svo2_fish9.sh
+++ b/FishMeasure/run_predict_from_svo2_fish9.sh
@@ -10,7 +10,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR"

 SESSION_ROOT="/home/ubuntu/data/fish/2016-1-22-last"
-FISH_NAME="fish9"
+FISH_NAME="fish1"
 fish_dir="${SESSION_ROOT}/${FISH_NAME}/"
 OUT_PARENT="output_weight_estimator"
 save_out="${OUT_PARENT}/${FISH_NAME}"
--- a/README.md
+++ b/README.md
@@ -1,68 +1,7 @@
-# FishServer 核心仓库（已瘦身）
+# How To Run

-本目录面向 **FishMeasure（SVO2 称重/点云）**、**FishAction（MP4 行为 X3D）** 与 **fish_api（FastAPI 网关）** 的部署与运行，已去掉训练数据、历史推理产物、旧版 Django 前端和 SlowFast 训练栈等大体积非运行时内容。
+In repo root, use ./scripts/start_fresh.sh  to run the server

-## 目录结构
+# About Configs

-| 路径 | 说明 |
-|------|------|
-| `fish_api/` | FastAPI：`uv sync` 后 `uv run uvicorn app.main:app`，见其中 `README.md` |
-| `FishMeasure/` | 双目链路：`predict_weigth_from_svo2.py`、`fish_video_weight_evaluation.py`、`weight_estimator/`、`pointcloud_classifier/` 等 |
-| `FishAction/` | 行为推断：`predict_video_x3d_3class.py`、`train_pytorchvideo_x3d.py`、`checkpoints/`（X3D） |
-| `packaging/` | **单一 Conda 环境**：网关 + 两条算法依赖定义，见 [`packaging/README.md`](packaging/README.md) |
-| `scripts/start_fresh.sh` | 在已激活的 `fishserver` 环境中清空缓存后启动 uvicorn |
-| `scripts/start_no_fresh.sh` | 保留 SQLite 与推理缓存启动 uvicorn |
-
-### 一键打包成「单环境」运行（推荐服务器）
-
-```bash
-bash packaging/bootstrap_fishserver.sh
-conda activate fishserver
-bash packaging/patch_cuda_torch.sh    # Linux + NVIDIA 时建议
-# 再按 packaging/README.md 安装 ZED SDK 与 pyzed
-PORT=8001 bash scripts/start_fresh.sh
-```
-
-多 Conda 环境、分别设置 `PYTHON_FISH_MEASURE` / `PYTHON_FISH_ACTION` 的方式仍支持，见 `fish_api/README.md`。
-
-## 已删除内容（需训练/旧功能时可从备份找回）
-
- `FishMeasure/output_weight_estimator/`、`output-yolo-sam/`：推理输出  
- `FishMeasure/datasets/`：训练集  
- `FishMeasure/project_jiuzhou01/`：九州 Django + 前端工程  
- `FishMeasure/measure/`、`detect_refbox/dataset`、`detect_refbox/runs`：独立实验数据与跑次  
- `FishMeasure/runs/predict`、`runs/segment`：旧预测/分割输出  
- `FishMeasure/utils/data/`：工具附带大数据  
- `FishMeasure/weight_estimator/runs/` 中除 `dgcnn_20260312_171043/` 外的历史训练目录  
- `FishAction/slowfast/`：SlowFast 训练代码（当前网关仅走 PyTorchVideo X3D）  
- 根目录重复 `yolo*.pt`、`fish_video_weight_evaluation__v1.py` 等  
-
-## 仍占空间的大文件（运行时一般需要）
-
- **`FishMeasure/sam_vit_h_4b8939.pth`**（约 2.4GB）：SAM `vit_h`。若未放置，可从 [Segment Anything 官方权重](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth) 下载到 `FishMeasure/` 下同名文件。  
- **`FishMeasure/runs/train/fish_detection_20251127_104658/weights/best.pt`**：YOLO 检测，与 `fish_api` 默认环境变量一致。  
- **`FishMeasure/weight_estimator/runs/dgcnn_20260312_171043/best.pt`**：DGCNN 体重估计。  
- **`FishAction/checkpoints/ptv_x3d_m/checkpoint_best.pt`**：行为分类（已删除其它实验版 checkpoint 目录以省空间；若需恢复请从备份拿回）。  
-
-## Git 与体积
-
-`FishMeasure/`、`FishAction/` 内嵌各自 `.git`，其中 FishMeasure 历史对象可能很大（含 LFS）。**仅部署运行时**可用 rsync 排除版本库：
-
-```bash
-rsync -avz --exclude '.git' --exclude 'fish_api/.venv' ...
-```
-
-## 接口说明
-
- 业务 API 契约可参考仓库根目录 `接口文档.docx`（若仍保留）。  
- HTTP 网关路径以 `fish_api/app/routers/` 为准。
-
-## 同步到服务器示例
-
-```bash
-rsync -avz --delete \
-  --exclude '.venv' --exclude 'fish_api/.venv' --exclude '__pycache__' \
-  ./ ubuntu@192.168.10.93:/home/ubuntu/projects/FishServer/
-```
-
-（按实际 IP、用户与路径修改；`--delete` 慎用，会删远端多余文件。）
+You can set input file location in .env and other settings.
--- a/fish_api/README.md
+++ b/fish_api/README.md
@@ -25,7 +25,7 @@ FastAPI 网关：分块接收 **SVO2**（FishMeasure）与 **MP4**（FishAction
 | `MEDIA_ROOT` | 对外托管每次测量生成的 `*_left.mp4` / `*_right.mp4` | `<repo>/fish_api/.data/media` |
 | `FISH_MEASURE_ROOT` | `FishMeasure` 根目录 | 自动相对仓库 |
 | `FISH_ACTION_ROOT` | `FishAction` 根目录 | 自动相对仓库 |
-| `MEASURE_OUTPUT_ROOT` | 传给 `--save-output` 的目录 | `FishMeasure/output_weight_estimator` |
+| `MEASURE_OUTPUT_ROOT` | 传给 `--save-output` 的目录 | `<repo>/fish_api/.data/measure_output` |
 | `YOLO_MODEL` / `WEIGHT_CHECKPOINT` / `ACTION_CHECKPOINT` | 模型路径 | 与仓库内脚本默认一致 |
 | `SAM_DEVICE` | `cuda` 或 `cpu` | `cuda` |
 可在 `fish_api/.env` 中填写上述变量（`pydantic-settings` 会读取）。
@@ -37,7 +37,8 @@ cd fish_api
 uv sync
 # 可选：包含 httpx，便于本地用 FastAPI TestClient 做冒烟测试
 # uv sync --group dev
-bash start_fresh.sh    # 清空 SQLite / 缓存后启动；保留缓存用 start_no_fresh.sh
+bash start_fresh.sh    # 默认仅重置 client_id 投递进度，保留 SQLite 历史与快照
+# CLEAR_SQLITE_DATABASE=1 bash start_fresh.sh  # 需要时才彻底清 SQLite
 # 或：uv run uvicorn app.main:app --host 0.0.0.0 --port 8000（需自行 prestart）
 ```

@@ -85,6 +86,17 @@ MP4 将 `svo` 换成 `mp4`，本地文件换成 `clip.mp4`，轮询 `GET /api/v1

 FishMeasure 跑完后在输出目录查找 `*preview*.mp4`，复制到 `MEDIA_ROOT/`，文件名为 `{UTC时间戳}_{svo_stem}_left.mp4` / `_right.mp4`（每次测量不覆盖；仅一个预览文件时可能左右 URL 指向同一逻辑源经 SBS 拆分）。确保 `PUBLIC_BASE_URL` 与前端/文档中的域名端口一致。

+## Weight Rule (Current)
+
+最终体重 `pred_weight_g` 由以下规则链决定（按优先级从高到低）：
+
+1. **440g 全池均值保护**（规则 B）：若 `avg_g_filtered`（所有 candidates 均值）> `--mean-pool-fallback-max-if-over-g`（默认 440g），则 `pred_weight_g = max_predicted_weight_g_after_filter`，`pred_weight_rule = "max_after_filter_high_mean_pool_over_g"`。
+2. **400g mean-all fallback**（规则 A，仅 `--average-all-after-filter` 开启时）：若全池 mean > `--average-all-fallback-max-if-mean-over-g`（默认 400g），则 `pred_weight_g = max_predicted_weight_g_after_filter`，`pred_weight_rule = "max_after_filter_high_mean_all"`。
+3. **`--average-all-after-filter`**（默认关）：全部 candidates 均值作为最终值，`pred_weight_rule = "mean_all_filtered"`。
+4. **Top-K 聚合**（默认路径）：按 `--top-by-length`（默认开）选 top-K 帧，candidates < 5 用 max 否则用 mean，`pred_weight_rule = "top_k_aggregate"`。
+
+DGCNN 明细中同时输出 `mean_all_pred_g_after_filters`、`avg_topk_mean_pred_g` 等供对比参考。
+
 ## 演进建议

 - RTSP：用 `ffmpeg` 切段写入 MP4 后调用现有 `finalize` 逻辑
--- a/fish_api/app/db.py
+++ b/fish_api/app/db.py
@@ -750,6 +750,28 @@ def remove_sqlite_database_files(settings: Settings) -> None:
            pass


+def reset_delivery_client_progress(settings: Settings) -> None:
+    """仅重置客户端投递游标（保留历史快照与 watch 缓存）。"""
+    init_db(settings)
+    conn = _connect(settings.sqlite_path)
+    try:
+        # 清空所有客户端游标，避免沿用旧 client_id 的消费进度。
+        conn.execute("UPDATE delivery_client_cursor SET last_delivered_id = 0")
+        # 确保默认客户端行存在（历史库升级场景）。
+        for kind in ("measure", "health"):
+            conn.execute(
+                """
+                INSERT INTO delivery_client_cursor (client_id, kind, last_delivered_id)
+                VALUES (?, ?, 0)
+                ON CONFLICT(client_id, kind) DO NOTHING
+                """,
+                (DEFAULT_CLIENT_ID, kind),
+            )
+        conn.commit()
+    finally:
+        conn.close()
+
+
 def clear_watch_cache_and_snapshots(settings: Settings) -> None:
    """清空 watch 已处理路径与对应快照，便于重新跑推理（与 measure/action_watch 的 use_state_file 开关一致）。"""
    init_db(settings)
--- a/fish_api/app/prestart_fresh.py
+++ b/fish_api/app/prestart_fresh.py
@@ -1,6 +1,8 @@
-"""启动前清空状态：SQLite（客户端数据）、watch 旧 JSON。
+"""启动前清空状态：默认仅重置客户端游标，保留 SQLite 历史快照。

 由 start_fresh.sh 在 uvicorn 之前调用。
+- 默认保留 SQLite 历史数据，仅重置 client_id 投递游标（fresh 语义）
+- 设置 CLEAR_SQLITE_DATABASE=1 可强制清空 SQLite（主库 + wal/shm）
 - 默认保留 measure_output 以复用中间步骤（点云等）
 - 设置 CLEAR_MEASURE_OUTPUT=1 清空测量输出目录
 - 设置 CLEAR_ACTION_OUTPUT=1 清空行为输出目录
@@ -11,7 +13,7 @@ from __future__ import annotations
 import os
 from pathlib import Path

-from app.db import _safe_rm_tree, remove_sqlite_database_files
+from app.db import _safe_rm_tree, remove_sqlite_database_files, reset_delivery_client_progress
 from app.settings import get_settings


@@ -29,12 +31,23 @@ def _rm_legacy_json(path: Path | None) -> None:
 def run_prestart_fresh() -> None:
    s = get_settings()

-    # 始终清空 SQLite（客户端数据）
-    remove_sqlite_database_files(s)
-    print(
-        f"[prestart-fresh] removed SQLite at {s.sqlite_path} (and -wal/-shm if present).",
-        flush=True,
+    clear_sqlite_database = os.environ.get("CLEAR_SQLITE_DATABASE", "").strip() in (
+        "1",
+        "true",
+        "yes",
    )
+    if clear_sqlite_database:
+        remove_sqlite_database_files(s)
+        print(
+            f"[prestart-fresh] removed SQLite at {s.sqlite_path} (and -wal/-shm if present).",
+            flush=True,
+        )
+    else:
+        reset_delivery_client_progress(s)
+        print(
+            f"[prestart-fresh] kept SQLite history, reset delivery client progress in {s.sqlite_path}.",
+            flush=True,
+        )

    # 检查是否清空中间输出目录（默认保留以复用点云等中间步骤）
    clear_measure_output = os.environ.get("CLEAR_MEASURE_OUTPUT", "").strip() in ("1", "true", "yes")
--- a/fish_api/start_fresh.sh
+++ b/fish_api/start_fresh.sh
@@ -1,10 +1,13 @@
 #!/usr/bin/env bash
-# 清空 SQLite（客户端数据）后启动 Fish API（uvicorn）。
+# 默认重置 client_id 投递游标后启动 Fish API（uvicorn），保留 SQLite 历史快照。
 # 默认保留 measure_output 中间步骤（点云等）以加速重新处理。
 #
 #   bash fish_api/start_fresh.sh
 #   PORT=8001 HOST=0.0.0.0 bash fish_api/start_fresh.sh
 #
+# 强制清空 SQLite（谨慎）：
+#   CLEAR_SQLITE_DATABASE=1 bash fish_api/start_fresh.sh
+#
 # 强制清空中间输出目录（重新生成点云等）：
 #   CLEAR_MEASURE_OUTPUT=1 bash fish_api/start_fresh.sh
 #   CLEAR_MEASURE_OUTPUT=1 CLEAR_ACTION_OUTPUT=1 bash fish_api/start_fresh.sh