"""Prepare input artifacts expected by algorithm_subprocesses/5.15 main.py.""" from __future__ import annotations import copy import json from dataclasses import dataclass from pathlib import Path import yaml from app.algo_host.bundle import load_reference_default_config, resolve_reference_bundle_dir from app.consumable_catalog import build_name_mapping @dataclass(frozen=True) class BatchJobFiles: config_path: Path excel_path: Path whitelist_path: Path output_path: Path work_dir: Path input_video_path: Path def write_reference_catalog_excel( path: Path, *, candidate_consumables: list[str], ) -> None: import pandas as pd name_to_code = build_name_mapping(candidate_consumables) rows = [ { "序号": idx, "产品编码": name_to_code.get(name, name), "商品名称": name, } for idx, name in enumerate(candidate_consumables, start=1) ] path.parent.mkdir(parents=True, exist_ok=True) pd.DataFrame(rows, columns=["序号", "产品编码", "商品名称"]).to_excel(path, index=False) def write_reference_whitelist_json(path: Path, *, candidate_consumables: list[str]) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text( json.dumps({"allowed_names": candidate_consumables}, ensure_ascii=False, indent=2), encoding="utf-8", ) def build_job_config( *, bundle_dir: Path, video_path: Path, output_path: Path, work_dir: Path, excel_path: Path, whitelist_path: Path, ) -> dict: cfg = copy.deepcopy(load_reference_default_config(bundle_dir)) cfg["io"]["video"] = str(video_path.resolve()) cfg["io"]["excel"] = str(excel_path.resolve()) cfg["io"]["out"] = str(output_path.resolve()) cfg["io"]["whitelist_json"] = str(whitelist_path.resolve()) cfg["runtime"]["work_dir"] = str(work_dir.resolve()) cfg["runtime"]["keep_work_dir"] = False return cfg def prepare_batch_job( *, bundle_dir: Path | None, cache_dir: Path, pipeline_video_path: Path, candidate_consumables: list[str], ) -> BatchJobFiles: root = resolve_reference_bundle_dir(bundle_dir) cache_output_dir = cache_dir / "output" cache_work_dir = cache_dir / "work" cache_config_dir = cache_dir / "config" for d in (cache_output_dir, cache_work_dir, cache_config_dir): d.mkdir(parents=True, exist_ok=True) pipeline_video = pipeline_video_path.resolve() output_path = cache_output_dir / "result.tsv" excel_path = cache_config_dir / "商品信息表.xlsx" whitelist_path = cache_config_dir / "whitelist.json" config_path = cache_config_dir / "config.yaml" write_reference_catalog_excel(excel_path, candidate_consumables=candidate_consumables) write_reference_whitelist_json(whitelist_path, candidate_consumables=candidate_consumables) config = build_job_config( bundle_dir=root, video_path=pipeline_video, output_path=output_path.resolve(), work_dir=cache_work_dir.resolve(), excel_path=excel_path.resolve(), whitelist_path=whitelist_path.resolve(), ) config_path.write_text( yaml.safe_dump(config, allow_unicode=True, sort_keys=False), encoding="utf-8", ) return BatchJobFiles( config_path=config_path, excel_path=excel_path, whitelist_path=whitelist_path, output_path=output_path, work_dir=cache_work_dir, input_video_path=pipeline_video, )