Exemplo n.º 1
0
def run_remote_worker(worker_id, worker_path, id, num_workers):
    """Run worker on remote node."""
    from vega.common.utils import init_log
    init_log(level="info",
             log_file=".temp_{}.log".format(worker_id),
             log_path=worker_path)
    for index in range(num_workers):
        config = _load_config(worker_id, worker_path, id, index)
        if "LD_LIBRARY_PATH" in config["env"] and config["env"][
                "LD_LIBRARY_PATH"] is not None:
            os.environ["LD_LIBRARY_PATH"] = config["env"]["LD_LIBRARY_PATH"]
        os.environ["PWD"] = config["env"]["PWD"]
        os.chdir(os.environ["PWD"])
        vega.set_backend(os.environ['BACKEND_TYPE'].lower(),
                         os.environ["DEVICE_CATEGORY"])

        if vega.is_gpu_device():
            sub_pid_list = call_in_gpu(config, id, worker_id, worker_path,
                                       index)
        elif vega.is_npu_device():
            os.environ["PYTHONPATH"] = config["env"]["PYTHONPATH"]
            os.environ["PATH"] = config["env"]["PATH"]
            os.environ["ASCEND_OPP_PATH"] = config["env"]["ASCEND_OPP_PATH"]
            sub_pid_list = call_in_npu(config, id, worker_id, worker_path,
                                       index)
        logging.info("DistributedWorker finished!")
        for sub_pid in sub_pid_list:
            kill_proc_tree(pid=sub_pid)
        logging.info("DistributedWorker subprocess cleaned!")
    return 0
Exemplo n.º 2
0
def load_config(config_file):
    """Load config from file."""
    import os
    import pickle
    import vega

    with open(config_file, 'rb') as f:
        config = pickle.load(f)
    for (key, value) in config["env"].items():
        if value is not None:
            os.environ[key] = value

    vega.set_backend(os.environ['BACKEND_TYPE'].lower(), os.environ["DEVICE_CATEGORY"])

    from vega.common.class_factory import ClassFactory
    from vega.common.general import General
    from vega.datasets.conf.dataset import DatasetConfig
    from vega.networks.model_config import ModelConfig
    from vega.trainer.conf import TrainerConfig
    from vega.evaluator.conf import EvaluatorConfig
    from vega.core.pipeline.conf import PipeStepConfig

    ClassFactory.__registry__ = config["class_factory"]
    General.from_dict(config["general"])
    DatasetConfig.from_dict(config["dataset"])
    ModelConfig.from_dict(config["model"])
    TrainerConfig.from_dict(config["trainer"])
    EvaluatorConfig.from_dict(config["evaluator"])
    PipeStepConfig.from_dict(config["pipe_step"])
Exemplo n.º 3
0
 def __init__(self):
     self._load_config()
     vega.set_backend(General.backend, General.device_category)
     init_log(level=General.logger.level,
              log_file=f"{General.step_name}_worker_{self.worker_id}.log",
              log_path=TaskOps().local_log_path)
     self.report_client = ReportClient()
Exemplo n.º 4
0
def _init_env(cfg_path):
    """Init config and evn parameters.

    :param cfg_path: config file path
    """
    logging.getLogger().setLevel(logging.DEBUG)
    UserConfig().load(cfg_path)
    # load general
    General.from_dict(UserConfig().data.get("general"), skip_check=False)
    init_log(level=General.logger.level,
             log_file="pipeline.log",
             log_path=TaskOps().local_log_path)
    General.env = env_args()
    if not General.env:
        General.env = init_cluster_args()
    setattr(PipelineConfig, "steps", UserConfig().data.pipeline)
    set_backend(General.backend, General.device_category)
Exemplo n.º 5
0
def _set_backend(args):
    if args.backend in ["pytorch", "p"]:
        vega.set_backend("pytorch", args.device)
    elif args.backend in ["tensorflow", "t"]:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = "1"
        vega.set_backend("tensorflow", args.device)
    elif args.backend in ["mindspore", "m"]:
        vega.set_backend("mindspore", args.device)
Exemplo n.º 6
0
def _set_backend(args):
    backend = args.backend
    device = args.device
    if backend:
        if args.backend in ["pytorch", "p"]:
            backend = "pytorch"
        elif args.backend in ["tensorflow", "t"]:
            backend = "tensorflow"
        elif args.backend in ["mindspore", "m"]:
            backend = "mindspore"
    else:
        config = Config(args.config_file)
        if "general" in config and "backend" in config["general"]:
            backend = config["general"]["backend"]
    if not device:
        config = Config(args.config_file)
        if "general" in config and "device_category" in config["general"]:
            device = config["general"]["device_category"]
    if backend:
        General.backend = backend
    if device:
        General.device_category = device
    vega.set_backend(General.backend, General.device_category)
Exemplo n.º 7
0
# This program is free software; you can redistribute it and/or modify
# it under the terms of the MIT License.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# MIT License for more details.
"""The example of training model."""

import logging
import vega

logging.info("load trial")
trial = vega.TrialAgent()

logging.info("create model")
vega.set_backend("pytorch", "GPU")
model = vega.network("ResNet", depth=18).cuda()

logging.info("load dataset")
train_loader = vega.dataset("Cifar10",
                            data_path="/cache/datasets/cifar10",
                            mode="train",
                            batch_size=256).loader
test_loader = vega.dataset("Cifar10",
                           data_path="/cache/datasets/cifar10",
                           mode="test",
                           batch_size=256).loader

logging.info("create trainer")
trainer = vega.trainer(model=model, id=trial.worker_id, hps=trial.hps)
trainer.config.mixup = True
Exemplo n.º 8
0

if __name__ == '__main__':
    if len(sys.argv) not in [2, 3] and not sys.argv[1].endswith(".yml"):
        print("Usage:")
        print(
            "    python3 ./run_benchmark.py <algorithm's config file> [pytorch(default)]|[p]|[tensorflow]|[t]"
        )
        print("for example:")
        print("    python3 ./run_benchmark.py ./nas/cars/cars.yml")
        print("    python3 ./run_benchmark.py ./nas/cars/cars.yml pytorch")
        print("    python3 ./run_benchmark.py ./nas/cars/cars.yml tensorflow")
    # set backend
    if len(sys.argv) == 3 and sys.argv[2] in [
            "pytorch", "p", "tensorflow", "t"
    ]:
        if sys.argv[2] in ["pytorch", "p"]:
            vega.set_backend("pytorch")
        else:
            vega.set_backend("tensorflow")
    # import class lazily
    cfg_file = sys.argv[1]
    if cfg_file.endswith("fmd.yml"):
        _append_env()
        from algs.fully_train.fmd import FmdNetwork
    elif cfg_file.endswith("spnas.yml"):
        _append_env()
        import vega.algorithms.nas.sp_nas
    # run vega
    vega.run(sys.argv[1])
Exemplo n.º 9
0
                        "--output_image_file",
                        required=True,
                        type=str,
                        help="Output image file.")
    parser.add_argument("-d",
                        "--model_desc_file",
                        required=True,
                        type=str,
                        help="Model description file.")
    parser.add_argument("-w",
                        "--model_weights_file",
                        required=True,
                        type=str,
                        help="Model weights file(.pth).")
    args = parser.parse_args()
    return args


if __name__ == "__main__":
    vega.set_backend("pytorch")
    args = _parse_args()
    print("model description file: {}".format(args.model_desc_file))
    print("model weights file: {}".format(args.model_weights_file))
    print("input image: {}".format(args.input_image_file))
    print("output image: {}".format(args.output_image_file))
    try:
        _cam(args)
        print("OK.")
    except Exception as e:
        raise e
Exemplo n.º 10
0
import pickle
import vega


def parse_args_parser():
    """Parse parameters."""
    parser = argparse.ArgumentParser(description='Vega Inference.')
    parser.add_argument("--model_desc", default=None, type=str)
    parser.add_argument("--model", default=None, type=str)
    parser.add_argument("--data_type", default=None, type=str)
    parser.add_argument("--data_path", default=None, type=str)
    parser.add_argument("--backend", default='pytorch', type=str)
    parser.add_argument("--device_category", default='GPU', type=str)
    parser.add_argument("--result_path", default='./result.pkl', type=str)
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args_parser()
    vega.set_backend(args.backend, args.device_category)
    from vega.model_zoo import ModelZoo
    from vega.datasets.pytorch.common.dataset import Dataset
    dataset = Dataset(type=args.data_type, mode='test', data_path=args.data_path)
    valid_dataloader = dataset.dataloader
    model = ModelZoo.get_model(args.model_desc, args.model)
    result = ModelZoo.infer(model, valid_dataloader)
    output = open(args.result_path, 'wb')
    pickle.dump(result, output)
    output.close()