def run_remote_worker(worker_id, worker_path, id, num_workers): """Run worker on remote node.""" from vega.common.utils import init_log init_log(level="info", log_file=".temp_{}.log".format(worker_id), log_path=worker_path) for index in range(num_workers): config = _load_config(worker_id, worker_path, id, index) if "LD_LIBRARY_PATH" in config["env"] and config["env"][ "LD_LIBRARY_PATH"] is not None: os.environ["LD_LIBRARY_PATH"] = config["env"]["LD_LIBRARY_PATH"] os.environ["PWD"] = config["env"]["PWD"] os.chdir(os.environ["PWD"]) vega.set_backend(os.environ['BACKEND_TYPE'].lower(), os.environ["DEVICE_CATEGORY"]) if vega.is_gpu_device(): sub_pid_list = call_in_gpu(config, id, worker_id, worker_path, index) elif vega.is_npu_device(): os.environ["PYTHONPATH"] = config["env"]["PYTHONPATH"] os.environ["PATH"] = config["env"]["PATH"] os.environ["ASCEND_OPP_PATH"] = config["env"]["ASCEND_OPP_PATH"] sub_pid_list = call_in_npu(config, id, worker_id, worker_path, index) logging.info("DistributedWorker finished!") for sub_pid in sub_pid_list: kill_proc_tree(pid=sub_pid) logging.info("DistributedWorker subprocess cleaned!") return 0
def load_config(config_file): """Load config from file.""" import os import pickle import vega with open(config_file, 'rb') as f: config = pickle.load(f) for (key, value) in config["env"].items(): if value is not None: os.environ[key] = value vega.set_backend(os.environ['BACKEND_TYPE'].lower(), os.environ["DEVICE_CATEGORY"]) from vega.common.class_factory import ClassFactory from vega.common.general import General from vega.datasets.conf.dataset import DatasetConfig from vega.networks.model_config import ModelConfig from vega.trainer.conf import TrainerConfig from vega.evaluator.conf import EvaluatorConfig from vega.core.pipeline.conf import PipeStepConfig ClassFactory.__registry__ = config["class_factory"] General.from_dict(config["general"]) DatasetConfig.from_dict(config["dataset"]) ModelConfig.from_dict(config["model"]) TrainerConfig.from_dict(config["trainer"]) EvaluatorConfig.from_dict(config["evaluator"]) PipeStepConfig.from_dict(config["pipe_step"])
def __init__(self): self._load_config() vega.set_backend(General.backend, General.device_category) init_log(level=General.logger.level, log_file=f"{General.step_name}_worker_{self.worker_id}.log", log_path=TaskOps().local_log_path) self.report_client = ReportClient()
def _init_env(cfg_path): """Init config and evn parameters. :param cfg_path: config file path """ logging.getLogger().setLevel(logging.DEBUG) UserConfig().load(cfg_path) # load general General.from_dict(UserConfig().data.get("general"), skip_check=False) init_log(level=General.logger.level, log_file="pipeline.log", log_path=TaskOps().local_log_path) General.env = env_args() if not General.env: General.env = init_cluster_args() setattr(PipelineConfig, "steps", UserConfig().data.pipeline) set_backend(General.backend, General.device_category)
def _set_backend(args): if args.backend in ["pytorch", "p"]: vega.set_backend("pytorch", args.device) elif args.backend in ["tensorflow", "t"]: os.environ['TF_CPP_MIN_LOG_LEVEL'] = "1" vega.set_backend("tensorflow", args.device) elif args.backend in ["mindspore", "m"]: vega.set_backend("mindspore", args.device)
def _set_backend(args): backend = args.backend device = args.device if backend: if args.backend in ["pytorch", "p"]: backend = "pytorch" elif args.backend in ["tensorflow", "t"]: backend = "tensorflow" elif args.backend in ["mindspore", "m"]: backend = "mindspore" else: config = Config(args.config_file) if "general" in config and "backend" in config["general"]: backend = config["general"]["backend"] if not device: config = Config(args.config_file) if "general" in config and "device_category" in config["general"]: device = config["general"]["device_category"] if backend: General.backend = backend if device: General.device_category = device vega.set_backend(General.backend, General.device_category)
# This program is free software; you can redistribute it and/or modify # it under the terms of the MIT License. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MIT License for more details. """The example of training model.""" import logging import vega logging.info("load trial") trial = vega.TrialAgent() logging.info("create model") vega.set_backend("pytorch", "GPU") model = vega.network("ResNet", depth=18).cuda() logging.info("load dataset") train_loader = vega.dataset("Cifar10", data_path="/cache/datasets/cifar10", mode="train", batch_size=256).loader test_loader = vega.dataset("Cifar10", data_path="/cache/datasets/cifar10", mode="test", batch_size=256).loader logging.info("create trainer") trainer = vega.trainer(model=model, id=trial.worker_id, hps=trial.hps) trainer.config.mixup = True
if __name__ == '__main__': if len(sys.argv) not in [2, 3] and not sys.argv[1].endswith(".yml"): print("Usage:") print( " python3 ./run_benchmark.py <algorithm's config file> [pytorch(default)]|[p]|[tensorflow]|[t]" ) print("for example:") print(" python3 ./run_benchmark.py ./nas/cars/cars.yml") print(" python3 ./run_benchmark.py ./nas/cars/cars.yml pytorch") print(" python3 ./run_benchmark.py ./nas/cars/cars.yml tensorflow") # set backend if len(sys.argv) == 3 and sys.argv[2] in [ "pytorch", "p", "tensorflow", "t" ]: if sys.argv[2] in ["pytorch", "p"]: vega.set_backend("pytorch") else: vega.set_backend("tensorflow") # import class lazily cfg_file = sys.argv[1] if cfg_file.endswith("fmd.yml"): _append_env() from algs.fully_train.fmd import FmdNetwork elif cfg_file.endswith("spnas.yml"): _append_env() import vega.algorithms.nas.sp_nas # run vega vega.run(sys.argv[1])
"--output_image_file", required=True, type=str, help="Output image file.") parser.add_argument("-d", "--model_desc_file", required=True, type=str, help="Model description file.") parser.add_argument("-w", "--model_weights_file", required=True, type=str, help="Model weights file(.pth).") args = parser.parse_args() return args if __name__ == "__main__": vega.set_backend("pytorch") args = _parse_args() print("model description file: {}".format(args.model_desc_file)) print("model weights file: {}".format(args.model_weights_file)) print("input image: {}".format(args.input_image_file)) print("output image: {}".format(args.output_image_file)) try: _cam(args) print("OK.") except Exception as e: raise e
import pickle import vega def parse_args_parser(): """Parse parameters.""" parser = argparse.ArgumentParser(description='Vega Inference.') parser.add_argument("--model_desc", default=None, type=str) parser.add_argument("--model", default=None, type=str) parser.add_argument("--data_type", default=None, type=str) parser.add_argument("--data_path", default=None, type=str) parser.add_argument("--backend", default='pytorch', type=str) parser.add_argument("--device_category", default='GPU', type=str) parser.add_argument("--result_path", default='./result.pkl', type=str) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args_parser() vega.set_backend(args.backend, args.device_category) from vega.model_zoo import ModelZoo from vega.datasets.pytorch.common.dataset import Dataset dataset = Dataset(type=args.data_type, mode='test', data_path=args.data_path) valid_dataloader = dataset.dataloader model = ModelZoo.get_model(args.model_desc, args.model) result = ModelZoo.infer(model, valid_dataloader) output = open(args.result_path, 'wb') pickle.dump(result, output) output.close()