121 lines
3.7 KiB
Python
121 lines
3.7 KiB
Python
|
|
"""Prepare input artifacts expected by algorithm_subprocesses/5.15 main.py."""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import copy
|
||
|
|
import json
|
||
|
|
from dataclasses import dataclass
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
import yaml
|
||
|
|
|
||
|
|
from app.algo_host.bundle import load_reference_default_config, resolve_reference_bundle_dir
|
||
|
|
from app.algo_host.transcode import ensure_batch_pipeline_input_video
|
||
|
|
from app.consumable_catalog import build_name_mapping
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass(frozen=True)
|
||
|
|
class BatchJobFiles:
|
||
|
|
config_path: Path
|
||
|
|
excel_path: Path
|
||
|
|
whitelist_path: Path
|
||
|
|
output_path: Path
|
||
|
|
work_dir: Path
|
||
|
|
input_video_path: Path
|
||
|
|
|
||
|
|
|
||
|
|
def write_reference_catalog_excel(
|
||
|
|
path: Path,
|
||
|
|
*,
|
||
|
|
candidate_consumables: list[str],
|
||
|
|
) -> None:
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
name_to_code = build_name_mapping(candidate_consumables)
|
||
|
|
rows = [
|
||
|
|
{
|
||
|
|
"序号": idx,
|
||
|
|
"产品编码": name_to_code.get(name, name),
|
||
|
|
"商品名称": name,
|
||
|
|
}
|
||
|
|
for idx, name in enumerate(candidate_consumables, start=1)
|
||
|
|
]
|
||
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
pd.DataFrame(rows, columns=["序号", "产品编码", "商品名称"]).to_excel(path, index=False)
|
||
|
|
|
||
|
|
|
||
|
|
def write_reference_whitelist_json(path: Path, *, candidate_consumables: list[str]) -> None:
|
||
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
path.write_text(
|
||
|
|
json.dumps({"allowed_names": candidate_consumables}, ensure_ascii=False, indent=2),
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def build_job_config(
|
||
|
|
*,
|
||
|
|
bundle_dir: Path,
|
||
|
|
video_path: Path,
|
||
|
|
output_path: Path,
|
||
|
|
work_dir: Path,
|
||
|
|
excel_path: Path,
|
||
|
|
whitelist_path: Path,
|
||
|
|
) -> dict:
|
||
|
|
cfg = copy.deepcopy(load_reference_default_config(bundle_dir))
|
||
|
|
cfg["io"]["video"] = str(video_path.resolve())
|
||
|
|
cfg["io"]["excel"] = str(excel_path.resolve())
|
||
|
|
cfg["io"]["out"] = str(output_path.resolve())
|
||
|
|
cfg["io"]["whitelist_json"] = str(whitelist_path.resolve())
|
||
|
|
cfg["runtime"]["work_dir"] = str(work_dir.resolve())
|
||
|
|
cfg["runtime"]["keep_work_dir"] = False
|
||
|
|
return cfg
|
||
|
|
|
||
|
|
|
||
|
|
def prepare_batch_job(
|
||
|
|
*,
|
||
|
|
bundle_dir: Path | None,
|
||
|
|
cache_dir: Path,
|
||
|
|
uploaded_video_path: Path,
|
||
|
|
candidate_consumables: list[str],
|
||
|
|
) -> BatchJobFiles:
|
||
|
|
root = resolve_reference_bundle_dir(bundle_dir)
|
||
|
|
cache_input_dir = cache_dir / "input"
|
||
|
|
cache_output_dir = cache_dir / "output"
|
||
|
|
cache_work_dir = cache_dir / "work"
|
||
|
|
cache_config_dir = cache_dir / "config"
|
||
|
|
for d in (cache_input_dir, cache_output_dir, cache_work_dir, cache_config_dir):
|
||
|
|
d.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
cache_input = cache_input_dir / "input.mp4"
|
||
|
|
ensure_batch_pipeline_input_video(
|
||
|
|
source_path=uploaded_video_path,
|
||
|
|
dest_path=cache_input,
|
||
|
|
)
|
||
|
|
output_path = cache_output_dir / "result.tsv"
|
||
|
|
excel_path = cache_config_dir / "商品信息表.xlsx"
|
||
|
|
whitelist_path = cache_config_dir / "whitelist.json"
|
||
|
|
config_path = cache_config_dir / "config.yaml"
|
||
|
|
|
||
|
|
write_reference_catalog_excel(excel_path, candidate_consumables=candidate_consumables)
|
||
|
|
write_reference_whitelist_json(whitelist_path, candidate_consumables=candidate_consumables)
|
||
|
|
config = build_job_config(
|
||
|
|
bundle_dir=root,
|
||
|
|
video_path=cache_input.resolve(),
|
||
|
|
output_path=output_path.resolve(),
|
||
|
|
work_dir=cache_work_dir.resolve(),
|
||
|
|
excel_path=excel_path.resolve(),
|
||
|
|
whitelist_path=whitelist_path.resolve(),
|
||
|
|
)
|
||
|
|
config_path.write_text(
|
||
|
|
yaml.safe_dump(config, allow_unicode=True, sort_keys=False),
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
return BatchJobFiles(
|
||
|
|
config_path=config_path,
|
||
|
|
excel_path=excel_path,
|
||
|
|
whitelist_path=whitelist_path,
|
||
|
|
output_path=output_path,
|
||
|
|
work_dir=cache_work_dir,
|
||
|
|
input_video_path=cache_input,
|
||
|
|
)
|