cli to control zed camera start and stop. 2. measure now use every svo2 file for 1 fish, give intermideate result and final result with confidecne level(*).

2026-04-16 11:38:30 +08:00
parent 9dce487c79
commit cc6cef0f73
57 changed files with 1877 additions and 386 deletions
--- a/FishMeasure/generate_video_with_labels.py
+++ b/FishMeasure/generate_video_with_labels.py
@@ -167,22 +167,41 @@ def generate_video(
    imgsz: int = 640,
    frame_stride: int = 1,
    show_large: bool = False,
+    summary_weight_g: Optional[float] = None,
+    summary_length_mm: Optional[float] = None,
+    summary_star: bool = False,
+    output_video_name: Optional[str] = None,
+    sam_device: str = "cuda",
 ) -> Optional[Path]:
    if not ZED_AVAILABLE:
        print("ERROR: pyzed not available, cannot generate labeled video")
        return None

-    per_frame, summary_wg, summary_lmm, is_confident = _parse_weight_json(weight_json)
-    star_s = " *" if is_confident else ""
+    per_frame, parsed_summary_wg, parsed_summary_lmm, raw_confident = _parse_weight_json(weight_json)
+    if summary_weight_g is None:
+        summary_weight_g = parsed_summary_wg
+    if summary_length_mm is None:
+        summary_length_mm = parsed_summary_lmm
+    star_s = " *" if summary_star else ""
    print(f"  Per-frame predictions: {len(per_frame)} PLYs mapped")
-    print(f"  Summary: weight={summary_wg}g, length={summary_lmm}mm{star_s}")
+    print(
+        f"  Summary: weight={summary_weight_g}g, length={summary_length_mm}mm{star_s} "
+        f"(raw_confident={raw_confident})"
+    )

-    if not per_frame and summary_wg is None:
+    if not per_frame and summary_weight_g is None:
        print("  WARNING: No weight data in JSON, video will show '--'")

    from ultralytics import YOLO
    yolo = YOLO(yolo_model_path)
    class_names = yolo.names if hasattr(yolo, "names") else {}
+    from fish_video_weight_evaluation import (
+        create_segmentation_overlay,
+        load_sam_predictor_with_fallback,
+        segment_with_sam,
+    )
+    sam_predictor, eff_sam_device = load_sam_predictor_with_fallback(sam_device)
+    sam_torch_device = eff_sam_device

    from dataset.zed_reader import ZEDReader
    zed_reader = ZEDReader(svo_path=str(svo_path), camera_mode=False, use_yolo_detector=False)
@@ -218,6 +237,7 @@ def generate_video(
                continue

            frame_number = idx + 1
+            frame_name = f"frame_{frame_number:06d}"
            if frame_number in per_frame:
                cur_wg, cur_lmm = per_frame[frame_number]
                last_wg = cur_wg
@@ -230,6 +250,7 @@ def generate_video(
            num_dets = len(results.boxes) if results.boxes is not None else 0

            left_disp = img.copy()
+            right_disp = img.copy()
            if num_dets > 0:
                boxes = results.boxes.xyxy.cpu().numpy()
                tids = (results.boxes.id.cpu().numpy().astype(int)
@@ -245,19 +266,31 @@ def generate_video(
                    cname = class_names.get(cid, "fish")
                    _draw_label_on_box(left_disp, box, tid, cname, cur_wg, cur_lmm)

-            if show_large or summary_wg is not None:
-                _draw_large_summary(left_disp, summary_wg, summary_lmm, is_confident)
+                try:
+                    masks = segment_with_sam(sam_predictor, img, boxes, sam_torch_device)
+                except Exception as e:
+                    print(f"  WARNING: SAM segmentation failed on {frame_name}: {e}")
+                    masks = []
+
+                if masks:
+                    right_disp = create_segmentation_overlay(img.copy(), masks)
+                    cv2.putText(right_disp, "Segmentation", (10, right_disp.shape[0] - 20),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
+                else:
+                    cv2.putText(right_disp, "Segmentation (failed)", (10, right_disp.shape[0] - 20),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, cv2.LINE_AA)
+            else:
+                cv2.putText(right_disp, "No detections", (10, right_disp.shape[0] - 20),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 128), 2, cv2.LINE_AA)
+
+            if show_large or summary_weight_g is not None:
+                _draw_large_summary(left_disp, summary_weight_g, summary_length_mm, summary_star)

-            frame_name = f"frame_{frame_number:06d}"
            info = f"[{frame_number}] {frame_name} | Detections: {num_dets}"
            cv2.putText(left_disp, info, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2, cv2.LINE_AA)
            cv2.putText(left_disp, "Detection", (10, left_disp.shape[0] - 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)

-            right_disp = img.copy()
-            cv2.putText(right_disp, "Original", (10, right_disp.shape[0] - 20),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
-
            combined = np.hstack([left_disp, right_disp])
            if num_dets > 0:
                frames.append(combined)
@@ -275,7 +308,7 @@ def generate_video(
        print(f"  WARNING: No detection frames collected from {svo_name}")
        return None

-    video_path = images_dir / f"{svo_name}_preview.mp4"
+    video_path = images_dir / (output_video_name or f"{svo_name}_preview.mp4")
    h, w = frames[0].shape[:2]
    writer = cv2.VideoWriter(str(video_path), cv2.VideoWriter_fourcc(*"mp4v"), 10.0, (w, h))
    for f in frames:
@@ -295,7 +328,17 @@ def main():
    parser.add_argument("--conf", type=float, default=0.25)
    parser.add_argument("--imgsz", type=int, default=640)
    parser.add_argument("--frame-stride", type=int, default=1)
+    parser.add_argument("--sam-device", type=str, default="cuda")
    parser.add_argument("--show-large-labels-at-top-right", action="store_true")
+    parser.add_argument(
+        "--summary-star",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Whether to draw * on the Final summary line; caller/DB is the source of truth.",
+    )
+    parser.add_argument("--summary-weight-g", type=float, default=None)
+    parser.add_argument("--summary-length-mm", type=float, default=None)
+    parser.add_argument("--output-video-name", type=str, default=None)
    args = parser.parse_args()

    svo = Path(args.svo).expanduser().resolve()
@@ -315,7 +358,12 @@ def main():
        conf=args.conf,
        imgsz=args.imgsz,
        frame_stride=args.frame_stride,
+        sam_device=args.sam_device,
        show_large=args.show_large_labels_at_top_right,
+        summary_weight_g=args.summary_weight_g,
+        summary_length_mm=args.summary_length_mm,
+        summary_star=bool(args.summary_star),
+        output_video_name=args.output_video_name,
    )