fix video label

This commit is contained in:
zaiun xu
2026-04-10 10:30:01 +08:00
parent e1b514836e
commit 09736f9e15
20 changed files with 518 additions and 268 deletions

View File

@@ -8,6 +8,7 @@ weight_prediction.json、临时 pred.json 等),由 fish_api 在子进程结
from __future__ import annotations
import json
import math
import shutil
import sqlite3
from datetime import datetime, timezone
@@ -222,6 +223,86 @@ def get_latest_health(settings: Settings) -> HealthSnapshot:
conn.close()
def _coerce_finite_number(v: Any) -> Optional[float]:
if v is None:
return None
if isinstance(v, bool):
return None
if isinstance(v, (int, float)):
x = float(v)
return x if math.isfinite(x) else None
if isinstance(v, str):
s = v.strip()
if not s:
return None
try:
x = float(s)
return x if math.isfinite(x) else None
except ValueError:
return None
return None
def _coerce_track_id(v: Any) -> Optional[int]:
# bool is a subclass of int in Python
if isinstance(v, bool):
return None
if isinstance(v, int):
return v if v >= 0 else None
if isinstance(v, str):
try:
i = int(v.strip(), 10)
return i if i >= 0 else None
except ValueError:
return None
return None
def measure_result_deliverable(result: Any, error: Optional[str]) -> bool:
"""至少一条记录含有效 track id 与有限数值的 weight(g)、length(mm)。"""
if error:
return False
if not isinstance(result, list) or not result:
return False
for it in result:
if not isinstance(it, dict):
continue
tid = _coerce_track_id(it.get("id"))
w = _coerce_finite_number(it.get("weight"))
ln = _coerce_finite_number(it.get("length"))
if tid is not None and w is not None and ln is not None:
return True
return False
def measure_snapshot_deliverable(snap: MeasureSnapshot) -> bool:
return measure_result_deliverable(snap.result, snap.error)
def health_snapshot_deliverable(snap: HealthSnapshot) -> bool:
if snap.error:
return False
b = (snap.behavior_result or "").strip()
h = (snap.health_result or "").strip()
r = (snap.raw_class_en or "").strip()
return bool(b or h or r)
def _health_row_deliverable(
behavior_result: str,
health_result: str,
raw_class_en: str,
error: Optional[str],
) -> bool:
snap = HealthSnapshot(
behavior_result=behavior_result or "",
health_result=health_result or "",
raw_class_en=raw_class_en or "",
error=error,
)
return health_snapshot_deliverable(snap)
def _last_delivered_id(
conn: sqlite3.Connection, kind: str, snapshots_table: str
) -> int:
@@ -243,48 +324,55 @@ def _last_delivered_id(
def pop_next_measure(
settings: Settings,
) -> Tuple[MeasureSnapshot, bool, Optional[int]]:
"""取队首未投递的 measure 快照并推进游标;无未投递时 has_new=False"""
"""取队首未投递且可交付的 measure 快照并推进游标;跳过不可交付行仅推进游标"""
init_db(settings)
conn = _connect(settings.sqlite_path)
try:
conn.execute("BEGIN IMMEDIATE")
last_id = _last_delivered_id(conn, "measure", "measure_snapshots")
next_row = conn.execute(
"""
SELECT id, created_at, result_json, video_left, video_right,
error, raw_prediction_path
FROM measure_snapshots
WHERE id > ?
ORDER BY id ASC
LIMIT 1
""",
(last_id,),
).fetchone()
while True:
next_row = conn.execute(
"""
SELECT id, created_at, result_json, video_left, video_right,
error, raw_prediction_path
FROM measure_snapshots
WHERE id > ?
ORDER BY id ASC
LIMIT 1
""",
(last_id,),
).fetchone()
if next_row is None:
conn.commit()
return MeasureSnapshot(result=[], video_left="", video_right=""), False, None
nid = int(next_row["id"])
err: Optional[str] = next_row["error"]
data: Any = json.loads(next_row["result_json"])
if not isinstance(data, list):
data = []
conn.execute(
"UPDATE delivery_cursor SET last_delivered_id = ? WHERE kind = ?",
(nid, "measure"),
)
if not measure_result_deliverable(data, err):
last_id = nid
continue
if next_row is None:
conn.commit()
return MeasureSnapshot(result=[], video_left="", video_right=""), False, None
nid = int(next_row["id"])
conn.execute(
"UPDATE delivery_cursor SET last_delivered_id = ? WHERE kind = ?",
(nid, "measure"),
)
conn.commit()
data: Any = json.loads(next_row["result_json"])
if not isinstance(data, list):
data = []
snap = MeasureSnapshot(
result=data,
video_left=next_row["video_left"] or "",
video_right=next_row["video_right"] or "",
updated_at=_parse_dt(next_row["created_at"]),
error=next_row["error"],
raw_prediction_path=next_row["raw_prediction_path"],
)
return snap, True, nid
snap = MeasureSnapshot(
result=data,
video_left=next_row["video_left"] or "",
video_right=next_row["video_right"] or "",
updated_at=_parse_dt(next_row["created_at"]),
error=err,
raw_prediction_path=next_row["raw_prediction_path"],
)
return snap, True, nid
except Exception:
conn.rollback()
raise
@@ -293,44 +381,54 @@ def pop_next_measure(
def pop_next_health(settings: Settings) -> Tuple[HealthSnapshot, bool, Optional[int]]:
"""取队首未投递的 health 快照并推进游标;无未投递时 has_new=False"""
"""取队首未投递且可交付的 health 快照并推进游标;跳过不可交付行仅推进游标"""
init_db(settings)
conn = _connect(settings.sqlite_path)
try:
conn.execute("BEGIN IMMEDIATE")
last_id = _last_delivered_id(conn, "health", "health_snapshots")
next_row = conn.execute(
"""
SELECT id, created_at, behavior_result, health_result,
raw_class_en, error
FROM health_snapshots
WHERE id > ?
ORDER BY id ASC
LIMIT 1
""",
(last_id,),
).fetchone()
while True:
next_row = conn.execute(
"""
SELECT id, created_at, behavior_result, health_result,
raw_class_en, error
FROM health_snapshots
WHERE id > ?
ORDER BY id ASC
LIMIT 1
""",
(last_id,),
).fetchone()
if next_row is None:
conn.commit()
return HealthSnapshot(behavior_result="", health_result=""), False, None
nid = int(next_row["id"])
beh = next_row["behavior_result"] or ""
hlth = next_row["health_result"] or ""
raw_en = next_row["raw_class_en"] or ""
err: Optional[str] = next_row["error"]
conn.execute(
"UPDATE delivery_cursor SET last_delivered_id = ? WHERE kind = ?",
(nid, "health"),
)
if not _health_row_deliverable(beh, hlth, raw_en, err):
last_id = nid
continue
if next_row is None:
conn.commit()
return HealthSnapshot(behavior_result="", health_result=""), False, None
nid = int(next_row["id"])
conn.execute(
"UPDATE delivery_cursor SET last_delivered_id = ? WHERE kind = ?",
(nid, "health"),
)
conn.commit()
snap = HealthSnapshot(
behavior_result=next_row["behavior_result"] or "",
health_result=next_row["health_result"] or "",
updated_at=_parse_dt(next_row["created_at"]),
error=next_row["error"],
raw_class_en=next_row["raw_class_en"] or "",
)
return snap, True, nid
snap = HealthSnapshot(
behavior_result=beh,
health_result=hlth,
updated_at=_parse_dt(next_row["created_at"]),
error=err,
raw_class_en=raw_en,
)
return snap, True, nid
except Exception:
conn.rollback()
raise

View File

@@ -1,6 +1,6 @@
"""启动前清空状态SQLite、watch 旧 JSON、测量/行为运行时目录。
start.sh / start_fresh.sh 在 uvicorn 之前调用,使 FishMeasure 与 FishAction 均在无缓存下重新推理。
由 start_fresh.sh 在 uvicorn 之前调用,使 FishMeasure 与 FishAction 均在无缓存下重新推理。
"""
from __future__ import annotations

View File

@@ -5,7 +5,12 @@ from pathlib import Path
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, Response
from app.db import save_health_snapshot, save_measure_snapshot
from app.db import (
health_snapshot_deliverable,
measure_snapshot_deliverable,
save_health_snapshot,
save_measure_snapshot,
)
from app.deps import require_ingest_auth
from app.services import action as action_svc
from app.services import measure as measure_svc
@@ -16,7 +21,7 @@ from app.services.sessions import (
write_chunk,
)
from app.settings import Settings, get_settings
from app.state import HealthSnapshot, MeasureSnapshot, app_state
from app.state import app_state
router = APIRouter(prefix="/api/v1/ingest", tags=["ingest"])
@@ -28,19 +33,12 @@ async def _measure_job_serial(svo_path: Path, settings: Settings) -> None:
snap = await asyncio.to_thread(
measure_svc.run_full_measure, svo_path, settings
)
save_measure_snapshot(settings, snap, source_path=str(svo_path.resolve()))
if measure_snapshot_deliverable(snap):
save_measure_snapshot(
settings, snap, source_path=str(svo_path.resolve())
)
app_state.measure_status = "idle"
except Exception as e:
save_measure_snapshot(
settings,
MeasureSnapshot(
result=[],
video_left="",
video_right="",
error=str(e),
),
source_path=str(svo_path.resolve()),
)
except Exception:
app_state.measure_status = "error"
@@ -51,18 +49,12 @@ async def _action_job_serial(mp4_path: Path, settings: Settings) -> None:
snap = await asyncio.to_thread(
action_svc.run_full_action, mp4_path, settings
)
save_health_snapshot(settings, snap, source_path=str(mp4_path.resolve()))
if health_snapshot_deliverable(snap):
save_health_snapshot(
settings, snap, source_path=str(mp4_path.resolve())
)
app_state.action_status = "idle"
except Exception as e:
save_health_snapshot(
settings,
HealthSnapshot(
behavior_result="",
health_result="",
error=str(e),
),
source_path=str(mp4_path.resolve()),
)
except Exception:
app_state.action_status = "error"

View File

@@ -6,10 +6,15 @@ from typing import Dict, Set
from loguru import logger
from app.db import add_watch_processed, load_watch_processed, save_health_snapshot
from app.db import (
add_watch_processed,
health_snapshot_deliverable,
load_watch_processed,
save_health_snapshot,
)
from app.services import action as action_svc
from app.settings import Settings
from app.state import HealthSnapshot, app_state
from app.state import app_state
from app.watch_idle import IdleWatchWarnState, idle_warn_interval_sec, maybe_warn_idle_watch
_ACTION_IDLE_WARN_INTERVAL_SEC = idle_warn_interval_sec(
@@ -52,7 +57,13 @@ async def _run_inference_and_state(
app_state.action_status = "running"
try:
snap = await asyncio.to_thread(action_svc.run_full_action, mp4, settings)
save_health_snapshot(settings, snap, source_path=key)
if health_snapshot_deliverable(snap):
save_health_snapshot(settings, snap, source_path=key)
else:
logger.warning(
"[action-watch] no deliverable health snapshot for {}, skip SQLite",
mp4.name,
)
app_state.action_status = "idle"
processed.add(key)
if settings.action_watch_use_state_file:
@@ -60,18 +71,11 @@ async def _run_inference_and_state(
pred = (snap.raw_class_en or "").strip()
logger.info("[action-watch] done: {} -> {}", mp4.name, pred)
except Exception as e:
save_health_snapshot(
settings,
HealthSnapshot(
behavior_result="",
health_result="",
error=str(e),
),
source_path=key,
)
app_state.action_status = "error"
logger.exception("[action-watch] error on {}: {}", mp4, e)
raise
app_state.action_status = "idle"
processed.add(key)
if settings.action_watch_use_state_file:
add_watch_processed(settings, key, "action")
async def watch_tick(
@@ -106,12 +110,9 @@ async def watch_tick(
stability[key] = (size, cnt + 1)
_, cnt = stability[key]
if cnt >= settings.action_watch_stable_polls:
try:
await _run_inference_and_state(mp4, settings, processed, state_file)
stability.pop(key, None)
did = True
except Exception:
stability[key] = (size, 1)
await _run_inference_and_state(mp4, settings, processed, state_file)
stability.pop(key, None)
did = True
for k in list(stability.keys()):
if k not in seen_keys:
del stability[k]

View File

@@ -1,11 +1,13 @@
from __future__ import annotations
import json
import math
import os
import re
import shutil
import subprocess
import sys
from datetime import date, datetime, timezone
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@@ -59,6 +61,8 @@ def _predict_weigth_from_svo2_extra_args(settings: Settings) -> List[str]:
str(settings.predict_minute_interval_sec),
]
)
if not settings.measure_reuse_existing_clouds:
out.append("--no-reuse-existing-clouds")
return out
@@ -106,13 +110,116 @@ def run_measure_subprocess(svo_path: Path, settings: Settings) -> None:
)
def _load_weight_json(svo_path: Path, settings: Settings) -> Dict[str, Any]:
def _summary_entry_matches_svo(item: Dict[str, Any], svo_path: Path) -> bool:
stem = svo_path.stem
candidate = settings.measure_output_root / stem / "weight_prediction.json"
if not candidate.is_file():
raise FileNotFoundError(f"Expected output missing: {candidate}")
with open(candidate, encoding="utf-8") as f:
return json.load(f)
resolved = str(svo_path.resolve())
svo_key = item.get("svo")
if svo_key:
try:
if Path(str(svo_key)).resolve() == svo_path.resolve():
return True
except OSError:
pass
if str(svo_key) == resolved:
return True
if item.get("svo_name") == stem:
return True
return False
def _load_weight_json(svo_path: Path, settings: Settings) -> Dict[str, Any]:
"""读取 FishMeasure 合并结果。优先 per-SVO 的 weight_prediction.json否则从 weight_predictions_summary.json 取匹配项predict 脚本在权重步失败时仍 exit 0 只写 summary"""
stem = svo_path.stem
root = settings.measure_output_root
candidate = root / stem / "weight_prediction.json"
if candidate.is_file():
with open(candidate, encoding="utf-8") as f:
return json.load(f)
summary_path = root / "weight_predictions_summary.json"
if summary_path.is_file():
with open(summary_path, encoding="utf-8") as f:
summary_list: Any = json.load(f)
if isinstance(summary_list, list):
for item in reversed(summary_list):
if not isinstance(item, dict):
continue
if not _summary_entry_matches_svo(item, svo_path):
continue
err = item.get("error")
if err:
raise RuntimeError(
f"FishMeasure 权重步骤失败({svo_path.name}: {err}"
)
if item.get("per_cloud") or item.get("per_file") or item.get(
"dgcnn_summary"
):
return item
break
combined_path = root / "weight_prediction.json"
if combined_path.is_file():
with open(combined_path, encoding="utf-8") as f:
combined: Any = json.load(f)
if isinstance(combined, dict) and combined.get("combined"):
names = combined.get("svo_names") or []
if stem in names:
return combined
raise FileNotFoundError(
f"未找到测量结果 JSON{candidate}(且 summary 中无本条 SVO 的成功记录)"
)
_TID_RE = re.compile(r"_tid(\d+)")
def _parse_tid_from_ply_name(name: str) -> Optional[int]:
"""与 FishMeasure/fish_video_weight_evaluation._parse_tid_from_ply_name 一致。"""
m = _TID_RE.search(name)
return int(m.group(1)) if m else None
def _safe_media_prefix(stem: str) -> str:
s = re.sub(r"[^\w.\-]+", "_", stem, flags=re.UNICODE).strip("._") or "svo"
return s[:120]
def _result_from_weight_prediction(data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""按 track_id 聚合:体重取 max(predicted_weight_g),体长取达到 max 的那条 PLY 的 length_input (mm)。"""
items = data.get("per_cloud") or data.get("per_file") or []
if not isinstance(items, list):
return []
# tid -> (max_weight_g, length_mm at max weight)
best: Dict[int, Tuple[float, float]] = {}
for it in items:
if not isinstance(it, dict):
continue
ply = it.get("ply")
if not ply:
continue
tid = _parse_tid_from_ply_name(Path(str(ply)).name)
if tid is None:
continue
try:
wg = float(it.get("predicted_weight_g", float("nan")))
except (TypeError, ValueError):
continue
if not math.isfinite(wg):
continue
try:
ln = float(it.get("length_input", float("nan")))
except (TypeError, ValueError):
ln = float("nan")
if tid not in best or wg > best[tid][0]:
best[tid] = (wg, ln)
out: List[Dict[str, Any]] = []
for tid in sorted(best.keys()):
wg, ln = best[tid]
if not math.isfinite(ln):
continue
out.append({"id": tid, "weight": wg, "length": ln})
return out
def _find_preview_videos(output_dir: Path) -> Tuple[Optional[Path], Optional[Path]]:
@@ -173,10 +280,12 @@ def _publish_media(
left: Optional[Path],
right: Optional[Path],
settings: Settings,
file_prefix: str,
) -> Tuple[str, str]:
settings.media_root.mkdir(parents=True, exist_ok=True)
left_dst = settings.media_root / "latest_left.mp4"
right_dst = settings.media_root / "latest_right.mp4"
safe_p = _safe_media_prefix(file_prefix)
left_dst = settings.media_root / f"{safe_p}_left.mp4"
right_dst = settings.media_root / f"{safe_p}_right.mp4"
base = settings.public_base_url.rstrip("/")
if left is not None and left == right and left.is_file():
@@ -200,37 +309,39 @@ def _publish_media(
def build_measure_snapshot(svo_path: Path, settings: Settings) -> MeasureSnapshot:
data = _load_weight_json(svo_path, settings)
summary = data.get("dgcnn_summary") or data.get("weight_summary") or {}
length_mm = summary.get("avg_length_input")
weight_g = summary.get("avg_predicted_weight_g")
if length_mm is None:
length_mm = data.get("avg_length_input")
length_mm = summary.get("avg_length_input_topk")
if weight_g is None:
weight_g = data.get("avg_predicted_weight_g")
if length_mm is None:
length_mm = summary.get("avg_length_input") or data.get("avg_length_input")
today = date.today().isoformat()
result_item = {
"id": 1,
"type": settings.default_fish_species,
"length": "" if length_mm is None else str(int(round(float(length_mm)))),
"weight": "" if weight_g is None else str(int(round(float(weight_g)))),
"date": today,
}
result: List[Dict[str, Any]] = []
if weight_g is not None and length_mm is not None:
try:
w = float(weight_g)
l = float(length_mm)
if math.isfinite(w) and math.isfinite(l):
result = [{"id": 1, "weight": w, "length": l}]
except (TypeError, ValueError):
pass
logger.info(
"[FishMeasure] parsed {}\navg_length_mm={} avg_weight_g={}\nweight_summary:\n{}",
"[FishMeasure] parsed {}\navg_weight_g(top5)={} avg_length_mm(top5)={}\nresult:\n{}\ndgcnn_summary:\n{}",
svo_path.name,
length_mm,
weight_g,
length_mm,
format_json_pretty(result),
format_json_pretty(summary if summary else {}),
)
logger.info(
"[FishMeasure] API result_item:\n{}",
format_json_pretty(result_item),
)
out_dir = Path(data.get("output_dir", settings.measure_output_root / svo_path.stem))
lv, rv = _find_preview_videos(out_dir)
v_left, v_right = _publish_media(lv, rv, settings)
prefix = (
f"{datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%S')}_{svo_path.stem}"
)
v_left, v_right = _publish_media(lv, rv, settings, prefix)
logger.info(
"[FishMeasure] media preview_paths={} {} | published_left={} published_right={}",
lv,
@@ -240,7 +351,7 @@ def build_measure_snapshot(svo_path: Path, settings: Settings) -> MeasureSnapsho
)
return MeasureSnapshot(
result=[result_item],
result=result,
video_left=v_left,
video_right=v_right,
updated_at=datetime.now(timezone.utc),

View File

@@ -8,10 +8,15 @@ from typing import Dict, Set
from loguru import logger
from app.db import add_watch_processed, load_watch_processed, save_measure_snapshot
from app.db import (
add_watch_processed,
load_watch_processed,
measure_snapshot_deliverable,
save_measure_snapshot,
)
from app.services import measure as measure_svc
from app.settings import Settings
from app.state import MeasureSnapshot, app_state
from app.state import app_state
from app.watch_idle import IdleWatchWarnState, idle_warn_interval_sec, maybe_warn_idle_watch
_MEASURE_IDLE_WARN_INTERVAL_SEC = idle_warn_interval_sec(
@@ -54,7 +59,13 @@ async def _run_measure_and_state(
app_state.measure_status = "running"
try:
snap = await asyncio.to_thread(measure_svc.run_full_measure, svo, settings)
save_measure_snapshot(settings, snap, source_path=key)
if measure_snapshot_deliverable(snap):
save_measure_snapshot(settings, snap, source_path=key)
else:
logger.warning(
"[measure-watch] no deliverable measure rows for {}, skip SQLite",
svo.name,
)
app_state.measure_status = "idle"
processed.add(key)
if settings.measure_watch_use_state_file:
@@ -62,20 +73,19 @@ async def _run_measure_and_state(
r0 = snap.result[0] if snap.result else {}
w = r0.get("weight", "")
logger.info("[measure-watch] done: {} weight={!r}", svo.name, w)
except (RuntimeError, FileNotFoundError) as e:
# FishMeasure 常见失败:无点云、缺 JSON 等,避免整段 traceback 刷屏
logger.warning("[measure-watch] measure failed for {}: {}", svo.name, e)
app_state.measure_status = "idle"
processed.add(key)
if settings.measure_watch_use_state_file:
add_watch_processed(settings, key, "measure")
except Exception as e:
save_measure_snapshot(
settings,
MeasureSnapshot(
result=[],
video_left="",
video_right="",
error=str(e),
),
source_path=key,
)
app_state.measure_status = "error"
logger.exception("[measure-watch] error on {}: {}", svo, e)
raise
app_state.measure_status = "idle"
processed.add(key)
if settings.measure_watch_use_state_file:
add_watch_processed(settings, key, "measure")
async def watch_tick(
@@ -109,12 +119,9 @@ async def watch_tick(
stability[key] = (size, cnt + 1)
_, cnt = stability[key]
if cnt >= settings.measure_watch_stable_polls:
try:
await _run_measure_and_state(svo, settings, processed, state_file)
stability.pop(key, None)
did = True
except Exception:
stability[key] = (size, 1)
await _run_measure_and_state(svo, settings, processed, state_file)
stability.pop(key, None)
did = True
for k in list(stability.keys()):
if k not in seen_keys:
del stability[k]

View File

@@ -81,6 +81,8 @@ class Settings(BaseSettings):
predict_flatness_threshold: float = 55.0
measure_weight_top_k: int = 5
measure_weight_top_by_length: bool = True
#: 为 False 时向 predict 传 --no-reuse-existing-clouds每次强制跑 fish_video避免误用空/陈旧 cloud可设 True 加速重复跑同一 SVO
measure_reuse_existing_clouds: bool = False
#: 为 True 时 fish_video 内联 DGCNN + 预览叠加(更重;需 fish_video 已支持)
predict_fish_video_weight_overlay: bool = False
predict_minute_interval_sec: float = 60.0