Exemplo n.º 1
0
def _init_device_info():
    """
    INTERNAL USE ONLY!
    As rank_id need to pass into deep layer for numa and device_queue.
    One process work with only one rank_id, In standalone scenario,
    rank_id may come from env 'CUDA_VISIBLE_DEVICES', For distribute
    scenario, rank_id come from _get_global_rank()
    """
    from mindspore import context
    from mindspore.parallel._auto_parallel_context import auto_parallel_context
    from mindspore.parallel._utils import _get_global_rank, _get_device_num
    if context.get_context("device_target") == "GPU":
        rank_id = _get_global_rank()
        parallel_mode = auto_parallel_context().get_parallel_mode()
        if parallel_mode == "stand_alone":
            cuda_device_info = os.getenv("CUDA_VISIBLE_DEVICES")
            if cuda_device_info:
                cuda_id = int(cuda_device_info.split(",")[0].strip())
                if cuda_id != rank_id:
                    rank_id = cuda_id
        _config.set_rank_id(rank_id)
    elif context.get_context("device_target") == "Ascend":
        rank_id = _get_global_rank()
        device_num = _get_device_num()
        # Ascend only support multi-process scenario
        if device_num > 1:
            _config.set_rank_id(rank_id)
Exemplo n.º 2
0
def _init_device_info():
    """
    INTERNAL USE ONLY!
    As rank_id need to pass into deep layer for numa and device_queue.
    One process work with only one rank_id, In standalone scenario,
    rank_id may come from env 'CUDA_VISIBLE_DEVICES', For distribute
    scenario, rank_id come from _get_global_rank()
    """
    from mindspore import context
    from mindspore.parallel._auto_parallel_context import auto_parallel_context
    from mindspore.parallel._utils import _get_global_rank
    if context.get_context("device_target") == "GPU":
        rank_id = _get_global_rank()
        parallel_mode = auto_parallel_context().get_parallel_mode()
        if parallel_mode == "stand_alone":
            cuda_device_info = os.getenv("CUDA_VISIBLE_DEVICES")
            if cuda_device_info:
                cuda_id = int(cuda_device_info.split(",")[0].strip())
                if cuda_id != rank_id:
                    rank_id = cuda_id
        _config.set_rank_id(rank_id)
    elif context.get_context("device_target") == "Ascend":
        # Ascend is a special scenario, we'd better get rank info from env
        env_rank_size = os.getenv("RANK_SIZE", None)
        env_rank_id = os.getenv("RANK_ID", None)
        if env_rank_size and env_rank_id:
            # Ascend only support multi-process scenario
            rank_size = int(env_rank_size.strip())
            rank_id = int(env_rank_id.strip())
            if rank_size > 1:
                _config.set_rank_id(rank_id)
Exemplo n.º 3
0
    def __init__(self,
                 network,
                 loss_fn=None,
                 optimizer=None,
                 metrics=None,
                 eval_network=None,
                 eval_indexes=None,
                 amp_level="O0",
                 frequency=278,
                 stop_epoch=100,
                 **kwargs):
        self._network = network
        self._loss_fn = loss_fn
        self._optimizer = optimizer
        self._loss_scale_manager = None
        self._loss_scale_manager_set = False
        self._keep_bn_fp32 = True
        self._check_kwargs(kwargs)
        self._amp_level = amp_level
        self._process_amp_args(kwargs)
        self._parallel_mode = _get_parallel_mode()
        self._device_number = _get_device_num()
        self._global_rank = _get_global_rank()
        self._parameter_broadcast = _get_parameter_broadcast()
        self._frequency = frequency
        self._stop_epoch = stop_epoch

        self._train_network = self._build_train_network()
        self._build_eval_network(metrics, eval_network, eval_indexes)
        self._build_predict_network()
Exemplo n.º 4
0
def _init_device_info():
    """
    INTERNAL USE ONLY!
    As rank_id need to pass into deep layer for numa and device_queue.
    One process work with only one rank_id, In standalone scenario,
    rank_id may come from env 'CUDA_VISIBLE_DEVICES', For distribute
    scenario, rank_id come from _get_global_rank()
    """
    from mindspore import context
    from mindspore.parallel._auto_parallel_context import auto_parallel_context
    from mindspore.parallel._utils import _get_global_rank
    if context.get_context("device_target") == "GPU":
        rank_id = _get_global_rank()
        parallel_mode = auto_parallel_context().get_parallel_mode()
        if parallel_mode == "stand_alone":
            rank_id = context.get_context("device_id")
        _config.set_rank_id(rank_id)
    elif context.get_context("device_target") == "Ascend":
        # Ascend is a special scenario, we'd better get rank info from env
        env_rank_size = os.getenv("RANK_SIZE", None)
        env_rank_id = os.getenv("RANK_ID", None)
        if env_rank_size and env_rank_id:
            # Ascend only support multi-process scenario
            rank_size = int(env_rank_size.strip())
            rank_id = int(env_rank_id.strip())
            if rank_size > 1:
                _config.set_rank_id(rank_id)
            # Now single process under ascend mode doesn't support numa bind for performance consideration.
            if _config.get_numa_enable() is True and rank_size == 1:
                raise ValueError(
                    "single process under Ascend mode doesn't support numa bind for "
                    "performance consideration.")
Exemplo n.º 5
0
def _init_device_info():
    """
    INTERNAL USE ONLY!
    As rank_id need to pass into deep layer for numa and device_queue.
    One process work with only one rank_id, In standalone scenario,
    rank_id may come from env 'CUDA_VISIBLE_DEVICES', For distribute
    scenario, rank_id come from _get_global_rank()
    """
    from mindspore import context
    from mindspore.parallel._auto_parallel_context import auto_parallel_context
    from mindspore.parallel._utils import _get_global_rank
    numa_enable = False
    numa_enable_env = os.getenv("DATASET_ENABLE_NUMA", None)
    if numa_enable_env and numa_enable_env.strip() == 'True':
        numa_enable = True
    if context.get_context("device_target") == "GPU":
        rank_id = _get_global_rank()
        parallel_mode = auto_parallel_context().get_parallel_mode()
        if parallel_mode == "stand_alone":
            rank_id = context.get_context("device_id")
        if numa_enable:
            _config.set_numa_enable(True)
        _config.set_rank_id(rank_id)
    elif context.get_context("device_target") == "Ascend":
        # Ascend is a special scenario, we'd better get rank info from env
        env_rank_size = os.getenv("RANK_SIZE", None)
        env_rank_id = os.getenv("RANK_ID", None)
        if env_rank_size and env_rank_id:
            # Ascend only support multi-process scenario
            rank_size = int(env_rank_size.strip())
            rank_id = int(env_rank_id.strip())
            if rank_size > 1:
                if numa_enable:
                    _config.set_numa_enable(True)
                _config.set_rank_id(rank_id)
Exemplo n.º 6
0
 def _use_parallel_optimizer(self):
     """Indicates whether to use automatic parallelism."""
     if context.get_auto_parallel_context("enable_parallel_optimizer"):
         if _get_parallel_mode() == ParallelMode.DATA_PARALLEL and context.get_context("device_target") == "Ascend":
             self.use_parallel = True
         elif _get_parallel_mode() == ParallelMode.DATA_PARALLEL \
                 and context.get_context("device_target") != "Ascend":
             raise RuntimeError("Parallel optimizer only supports Ascend in data parallel mode.")
         elif _get_parallel_mode() in (ParallelMode.STAND_ALONE, ParallelMode.HYBRID_PARALLEL):
             raise RuntimeError("Parallel optimizer is not supported in {}.".format(_get_parallel_mode()))
         else:
             self.use_parallel = False
     else:
         self.use_parallel = False
     if self.use_parallel:
         if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
             raise RuntimeError("Parallel optimizer does not support optimizer {}".format(self.cls_name))
         self.dev_num = _get_device_num()
         if self.dev_num > self.param_length:
             raise RuntimeError("Parallel optimizer can not be applied when the number of parameters {} is"
                                " less than the number of devices {}".format(self.param_length, self.dev_num))
         self.param_rank = self._get_parameter_group_id()
         self.optim_filter = tuple(map(lambda x: x == _get_global_rank(), self.param_rank))
         self.param_names = []
         for param in self.parameters:
             self.param_names.append(param.name)
     else:
         self.optim_filter = (True,) * self.param_length
Exemplo n.º 7
0
    def __init__(self, dataset, first_order_order):
        self.dataset = dataset
        self.device_num = _get_device_num()
        self.global_rank = _get_global_rank()
        self.repeat_count = dataset.get_repeat_count()
        self.repeat_ind = 0
        self.loop_count = dataset.get_dataset_size()
        self.ind = 0

        parallel_mode = context.get_auto_parallel_context("parallel_mode")
        self.need_to_full = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL,
                                              ParallelMode.AUTO_PARALLEL)
Exemplo n.º 8
0
    def __init__(self,
                 learning_rate,
                 parameters,
                 weight_decay=0.0,
                 loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError(
                "Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float],
                                   self.cls_name)
        validator.check_positive_float(loss_scale, "loss_scale", self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)

        self._unique = True
        self._target = context.get_context("device_target")
        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32),
                                         name='global_step')

        if self.is_group_lr:
            if self.dynamic_lr:
                self.learning_rate = CellList(self.group_lr)
            else:
                self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate,
                                                       'learning_rate')
        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            self.weight_decay_tensor_tuple = tuple(
                Tensor(x, mstype.float32) for x in self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(
                decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            self.weight_decay_tensor = Tensor(self.weight_decay,
                                              mstype.float32)
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        # when a parameter has been unique, there is no need do another unique in optimizer.
        for param in self.parameters:
            if param.unique:
                self._unique = False
                break
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        ps_cache_filter = lambda x: x.cache_enable
        self.cache_enable = tuple(ps_cache_filter(x) for x in self.parameters)
        self.reciprocal_scale = Tensor(1.0 / loss_scale, mstype.float32)
        self.need_scale = loss_scale != 1.0
        self.global_step_increase_tensor = Tensor(1, mstype.int32)
        self.param_length = len(self.parameters)
        self.map_ = C.Map()
        if context.get_auto_parallel_context("enable_parallel_optimizer"):
            if _get_parallel_mode(
            ) == ParallelMode.DATA_PARALLEL and context.get_context(
                    "device_target") == "Ascend":
                self.use_parallel = True
            elif _get_parallel_mode() == ParallelMode.DATA_PARALLEL \
                    and context.get_context("device_target") != "Ascend":
                raise RuntimeError(
                    "Parallel optimizer only supports Ascend in data parallel mode."
                )
            elif _get_parallel_mode() in (ParallelMode.STAND_ALONE,
                                          ParallelMode.HYBRID_PARALLEL):
                raise RuntimeError(
                    "Parallel optimizer is not supported in {}.".format(
                        _get_parallel_mode()))
            else:
                self.use_parallel = False
        else:
            self.use_parallel = False
        if self.use_parallel:
            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                raise RuntimeError(
                    "Parallel optimizer does not support optimizer {}".format(
                        self.cls_name))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError(
                    "Parallel optimizer can not be applied when the number of parameters {} is"
                    " less than the number of devices {}".format(
                        self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(
                map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)

        else:
            self.optim_filter = (True, ) * self.param_length
Exemplo n.º 9
0
    def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0):
        super(Optimizer, self).__init__(auto_prefix=False)
        if parameters is not None and not isinstance(parameters, list):
            parameters = list(parameters)

        if not parameters:
            raise ValueError("Optimizer got an empty parameter list.")

        if not isinstance(parameters[0], (dict, Parameter)):
            raise TypeError("Only a list of Parameter or dict can be supported.")

        if isinstance(loss_scale, int):
            loss_scale = float(loss_scale)
        validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name)
        validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name)
        self.loss_scale = loss_scale

        weight_decay = self._preprocess_weight_decay(weight_decay)

        self.dynamic_lr = False
        self.assignadd = None
        self.global_step = None
        self.is_group = False
        self.is_group_lr = False
        self.is_group_params_ordered = False
        learning_rate = self._preprocess_single_lr(learning_rate)
        if isinstance(parameters[0], dict):
            self.is_group = True
            self.group_params = []
            self.group_lr = []
            self.group_weight_decay = []
            self._init_group_params(parameters, learning_rate, weight_decay)

        # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params
        if self.dynamic_lr:
            self.assignadd = P.AssignAdd()
            self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step')

        if self.is_group_lr:
            if self.dynamic_lr:
                self.learning_rate = CellList(self.group_lr)
            else:
                self.learning_rate = ParameterTuple(self.group_lr)
        else:
            self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate')
        if self.is_group:
            self.parameters = ParameterTuple(self.group_params)
            self.weight_decay = tuple(self.group_weight_decay)
            decay_filter = lambda x: x > 0
            self.decay_flags = tuple(decay_filter(x) for x in self.weight_decay)
            self.exec_weight_decay = any(self.decay_flags)
        else:
            self.parameters = ParameterTuple(parameters)
            self.weight_decay = weight_decay * loss_scale
            decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name
            self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
            self.exec_weight_decay = self.weight_decay > 0
        ps_filter = lambda x: x.is_param_ps
        self.ps_parameters = tuple(ps_filter(x) for x in self.parameters)
        self.reciprocal_scale = 1.0 / loss_scale
        self.param_length = len(self.parameters)
        self.map_ = C.Map()

        use_parallel = context.get_auto_parallel_context("enable_parallel_optimizer")
        self.use_parallel = use_parallel
        if use_parallel:
            if self.cls_name not in ["Lamb", "AdamWeightDecay"]:
                raise RuntimeError("Optimizer segmentation does not support optimizer {}".format(self.cls_name))
            if _get_parallel_mode() != ParallelMode.DATA_PARALLEL:
                raise RuntimeError("Optimizer segmentation does not support parallel mode {}".format
                                   (_get_parallel_mode()))
            self.dev_num = _get_device_num()
            if self.dev_num > self.param_length:
                raise RuntimeError("Optimizer segmentation can not be applied when the number of parameters {} is"
                                   " less than the number of devices {}".format(self.param_length, self.dev_num))
            self.param_rank = self._get_parameter_group_id()
            self.optim_filter = tuple(map(lambda x: x == _get_global_rank(), self.param_rank))
            self.param_names = []
            for param in self.parameters:
                self.param_names.append(param.name)

        else:
            self.optim_filter = (True,) * self.param_length