def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): kernel_size = twice(kernel_size) stride = twice(stride) dilation = twice(dilation) Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) if isinstance(padding, tuple): Validator.check_equal_int(len(padding), 4, 'padding size', self.cls_name) # out_channels and in_channels swap. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. super(Conv2dTranspose, self).__init__(in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init, transposed=True) self.in_channels = in_channels self.out_channels = out_channels self.shape = P.Shape() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError( 'Attr \'pad_mode\' of \'Conv2dTranspose\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.is_valid = self.pad_mode == 'valid' self.is_same = self.pad_mode == 'same' self.is_pad = self.pad_mode == 'pad' if Validator.check_bool(has_bias): self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels, kernel_size=kernel_size, mode=1, pad_mode=pad_mode, pad=padding, stride=stride, dilation=dilation, group=group) self.bias_add = P.BiasAdd() if isinstance(self.padding, int): self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = ( self.padding, ) * 4 else: self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) Validator.check_value_type("stride", stride, [int], self.cls_name) Validator.check_value_type("padding", padding, [int], self.cls_name) Validator.check_value_type("dilation", dilation, [int], self.cls_name) Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name) Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name) Validator.check_non_negative_int(padding, 'padding', self.cls_name) Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name) kernel_size = (1, kernel_size) stride = (1, stride) dilation = (1, dilation) get_shape = P.Shape() get_dtype = P.DType() if isinstance(weight_init, Tensor): weight_init_shape = get_shape(weight_init) Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) weight_init_dtype = get_dtype(weight_init) weight_init_value = weight_init.asnumpy() weight_init_value = np.expand_dims(weight_init_value, 2) weight_init = Tensor(weight_init_value, weight_init_dtype) # out_channels and in_channels swap. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. super(Conv1dTranspose, self).__init__(in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init, transposed=True) self.padding = (0, 0, padding, padding) self.in_channels = in_channels self.out_channels = out_channels self.shape = P.Shape() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError( 'Attr \'pad_mode\' of \'Conv1dTranspose\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.is_valid = self.pad_mode == 'valid' self.is_same = self.pad_mode == 'same' self.is_pad = self.pad_mode == 'pad' if Validator.check_bool(has_bias): self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels, kernel_size=kernel_size, mode=1, pad_mode=pad_mode, pad=self.padding, stride=stride, dilation=dilation, group=group) self.bias_add = P.BiasAdd() self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(2)
def conv2d(x, weight, bias=None, stride=1, pad=0, dilation=1, groups=1, padding_mode='zeros'): """Convolution 2D.""" # pylint: disable=unused-argument validator.check_value_type('stride', stride, (int, tuple)) if isinstance(stride, int): stride = (stride, stride) elif len(stride) == 4: stride = (stride[2], stride[3]) if len(stride) != 2 or (not isinstance(stride[0], int)) or \ (not isinstance(stride[1], int)) or \ stride[0] < 1 or stride[1] < 1: raise ValueError( f"The \'stride\' of \'conv2d\' should be an positive int number or " f"a tuple of two positive int numbers, but got {stride}") stride_h = stride[0] stride_w = stride[1] validator.check_value_type('dilation', dilation, (int, tuple)) if isinstance(dilation, int): dilation = (dilation, dilation) elif len(dilation) == 4: dilation = (dilation[2], dilation[3]) if len(dilation) != 2 or (not isinstance(dilation[0], int)) or \ (not isinstance(dilation[1], int)) or \ dilation[0] < 1 or dilation[1] < 1: raise ValueError( f"The \'dilation\' of \'conv2d\' should be an positive int number or " f"a tuple of two positive int numbers, but got {dilation}") dilation_h = dilation[0] dilation_w = dilation[1] if isinstance(pad, int): pad_top = pad pad_bottom = pad pad_left = pad pad_right = pad elif isinstance(pad, tuple) and len(pad) == 4: pad_top, pad_bottom, pad_left, pad_right = pad else: raise ValueError(f"The \'pad\' should be an int number or " f"a tuple of two or four int numbers, but got {pad}") batch_num, _, x_h, x_w = x.shape filter_num, _, filter_h, filter_w = weight.shape out_h = 1 + int((x_h + pad_top + pad_bottom - filter_h - (filter_h - 1) * (dilation_h - 1)) / stride_h) out_w = 1 + int((x_w + pad_left + pad_right - filter_w - (filter_w - 1) * (dilation_w - 1)) / stride_w) col = im2col(x, filter_h, filter_w, stride, pad, dilation) col_w = np.reshape(weight, (filter_num, -1)).T out = np.dot(col, col_w) out = out.reshape(batch_num, out_h, out_w, -1).transpose(0, 3, 1, 2) if bias is not None: out += bias return out
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) Validator.check_value_type("stride", stride, [int], self.cls_name) Validator.check_value_type("padding", padding, [int], self.cls_name) Validator.check_value_type("dilation", dilation, [int], self.cls_name) Validator.check_int(kernel_size, 1, Rel.GE, 'kernel_size', self.cls_name) Validator.check_int(stride, 1, Rel.GE, 'stride', self.cls_name) Validator.check_non_negative_int(padding, 'padding', self.cls_name) Validator.check_int(dilation, 1, Rel.GE, 'dilation', self.cls_name) kernel_size = (1, kernel_size) stride = (1, stride) dilation = (1, dilation) get_shape = P.Shape() get_dtype = P.DType() if isinstance(weight_init, Tensor): weight_init_shape = get_shape(weight_init) Validator.check_equal_int(len(weight_init_shape), 3, 'weight_init_shape', self.cls_name) weight_init_dtype = get_dtype(weight_init) weight_init_value = weight_init.asnumpy() weight_init_value = np.expand_dims(weight_init_value, 2) weight_init = Tensor(weight_init_value, weight_init_dtype) super(Conv1d, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init) self.padding = (0, 0, padding, padding) self.conv2d = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, mode=1, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation, group=self.group) self.bias_add = P.BiasAdd() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError('Attr \'pad_mode\' of \'Conv1d\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(2) self.shape = P.Shape()
def __init__(self, alpha=0.2): super(LeakyReLU, self).__init__() validator.check_value_type('alpha', alpha, [float, int], self.cls_name) self.greater_equal = P.GreaterEqual() self.mul = P.Mul() self.alpha = alpha
def __init__(self, max_val=1.0): super(PSNR, self).__init__() validator.check_value_type('max_val', max_val, [int, float], self.cls_name) validator.check_number('max_val', max_val, 0.0, Rel.GT, self.cls_name) self.max_val = max_val
def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) if parameters is not None and not isinstance(parameters, list): parameters = list(parameters) if not parameters: raise ValueError("Optimizer got an empty parameter list.") if not isinstance(parameters[0], (dict, Parameter)): raise TypeError("Only a list of Parameter or dict can be supported.") if isinstance(loss_scale, int): loss_scale = float(loss_scale) validator.check_value_type("loss_scale", loss_scale, [float], self.cls_name) validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name) self.loss_scale = loss_scale weight_decay = self._preprocess_weight_decay(weight_decay) self.dynamic_lr = False self.assignadd = None self.global_step = None self.is_group = False self.is_group_lr = False self.is_group_params_ordered = False learning_rate = self._preprocess_single_lr(learning_rate) if isinstance(parameters[0], dict): self.is_group = True self.group_params = [] self.group_lr = [] self.group_weight_decay = [] self._init_group_params(parameters, learning_rate, weight_decay) # The final value of dynamic_lr can be determined after the process of parse_single_lr and init_group_params if self.dynamic_lr: self.assignadd = P.AssignAdd() self.global_step = Parameter(initializer(0, [1], mindspore.int32), name='global_step') if self.is_group_lr: if self.dynamic_lr: self.learning_rate = CellList(self.group_lr) else: self.learning_rate = ParameterTuple(self.group_lr) else: self.learning_rate = self._build_single_lr(learning_rate, 'learning_rate') if self.is_group: self.parameters = ParameterTuple(self.group_params) self.weight_decay = tuple(self.group_weight_decay) decay_filter = lambda x: x > 0 self.decay_flags = tuple(decay_filter(x) for x in self.weight_decay) self.exec_weight_decay = any(self.decay_flags) else: self.parameters = ParameterTuple(parameters) self.weight_decay = weight_decay * loss_scale decay_filter = lambda x: 'beta' not in x.name and 'gamma' not in x.name self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.exec_weight_decay = self.weight_decay > 0 ps_filter = lambda x: x.is_param_ps self.ps_parameters = tuple(ps_filter(x) for x in self.parameters) self.reciprocal_scale = 1.0 / loss_scale self.param_length = len(self.parameters) self.map_ = C.Map() use_parallel = context.get_auto_parallel_context("enable_parallel_optimizer") self.use_parallel = use_parallel if use_parallel: if self.cls_name not in ["Lamb", "AdamWeightDecay"]: raise RuntimeError("Optimizer segmentation does not support optimizer {}".format(self.cls_name)) if _get_parallel_mode() != ParallelMode.DATA_PARALLEL: raise RuntimeError("Optimizer segmentation does not support parallel mode {}".format (_get_parallel_mode())) self.dev_num = _get_device_num() if self.dev_num > self.param_length: raise RuntimeError("Optimizer segmentation can not be applied when the number of parameters {} is" " less than the number of devices {}".format(self.param_length, self.dev_num)) self.param_rank = self._get_parameter_group_id() self.optim_filter = tuple(map(lambda x: x == _get_global_rank(), self.param_rank)) self.param_names = [] for param in self.parameters: self.param_names.append(param.name) else: self.optim_filter = (True,) * self.param_length
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, loss_scale=1.0, use_nesterov=False, decay_filter=lambda x: x.name not in []): super(THOR_GPU, self).__init__(learning_rate, params, weight_decay, loss_scale) Validator.check_value_type("momentum", momentum, [float], self.cls_name) if isinstance(momentum, float) and momentum < 0.0: raise ValueError( "momentum should be at least 0.0, but got momentum {}".format( momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.params = self.parameters self.use_nesterov = Validator.check_bool(use_nesterov) self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum(use_nesterov=self.use_nesterov) self.feature_map = [ 1.0 / 12544, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 ] self.feature_map_new = [x**0.5 for x in self.feature_map] self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.matmul = P.MatMul() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.A_inv_max = ParameterTuple(A_inv_max) self.G_inv_max = ParameterTuple(G_inv_max) self.assign = P.Assign() self.mul = P.Mul() mean = _get_gradients_mean() degree = _get_device_num() parameter_length = len(self.feature_map) self.grad_reducer_thorA = DistributedGradReducerThor( parameter_length, ((parameter_length, ), 0), mean, degree) self.grad_reducer_thorG = DistributedGradReducerThor( parameter_length, ((parameter_length, ), 0), mean, degree) self.weight_decay = weight_decay self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.update_gradient = P.UpdateThorGradient(split_dim=128)
def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, loss_scale=1.0, weight_decay=0.0, prim_name=None): """Check param.""" validator.check_value_type("initial_accum", initial_accum, [float], prim_name) validator.check_number("initial_accum", initial_accum, 0.0, Rel.GE, prim_name) validator.check_value_type("learning_rate", learning_rate, [float], prim_name) validator.check_number("learning_rate", learning_rate, 0.0, Rel.GT, prim_name) validator.check_value_type("lr_power", lr_power, [float], prim_name) validator.check_number("lr_power", lr_power, 0.0, Rel.LE, prim_name) validator.check_value_type("l1", l1, [float], prim_name) validator.check_number("l1", l1, 0.0, Rel.GE, prim_name) validator.check_value_type("l2", l2, [float], prim_name) validator.check_number("l2", l2, 0.0, Rel.GE, prim_name) validator.check_value_type("use_locking", use_locking, [bool], prim_name) validator.check_value_type("loss_scale", loss_scale, [float], prim_name) validator.check_number("loss_scale", loss_scale, 1.0, Rel.GE, prim_name) validator.check_value_type("weight_decay", weight_decay, [float], prim_name) validator.check_number("weight_decay", weight_decay, 0.0, Rel.GE, prim_name)
def __init__(self, smooth=1e-5, threshold=0.5): super(DiceLoss, self).__init__() self.smooth = validator.check_positive_float(smooth, "smooth") self.threshold = validator.check_value_type("threshold", threshold, [float]) self.reshape = P.Reshape()
def _check_validate_keepdims(keep_dims, name): keep_dims = validator.check_value_type('keep_dims', keep_dims, [bool], name) return keep_dims
def _check_validate_axis(axis, name): if isinstance(axis, (tuple, list)): for idx, item in enumerate(axis): validator.check_value_type("axis[%d]" % idx, item, [int], name) axis = validator.check_value_type('axis', axis, [int, tuple, list], name) return axis
def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power, update_decay_epoch=False): r""" Calculate learning rate base on polynomial decay function. For the i-th step, the formula of computing decayed_learning_rate[i] is: .. math:: decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) * (1 - tmp\_epoch / tmp\_decay\_epoch)^{power} + end\_learning\_rate Where: .. math:: tmp\_epoch = min(current\_epoch, decay\_epoch) .. math:: current\_epoch=floor(\frac{i}{step\_per\_epoch}) .. math:: tmp\_decay\_epoch = decay\_epoch If `update_decay_epoch` is true, update the value of `tmp_decay_epoch` every epoch. The formula is: .. math:: tmp\_decay\_epoch = decay\_epoch * ceil(current\_epoch / decay\_epoch) Args: learning_rate (float): The initial value of learning rate. end_learning_rate (float): The end value of learning rate. total_step (int): The total number of steps. step_per_epoch (int): The number of steps in per epoch. decay_epoch (int): A value used to calculate decayed learning rate. power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0. update_decay_epoch (bool): If true, update `decay_epoch`. Default: False. Returns: list[float]. The size of list is `total_step`. Examples: >>> learning_rate = 0.1 >>> end_learning_rate = 0.01 >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 2 >>> power = 0.5 >>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01] """ validator.check_float_positive('learning_rate', learning_rate, None) validator.check_float_legal_value('learning_rate', learning_rate, None) if not isinstance(end_learning_rate, float): raise TypeError("end_learning_rate must be float.") validator.check_number_range("end_learning_rate", end_learning_rate, 0.0, float("inf"), Rel.INC_LEFT, None) validator.check_float_positive('power', power, None) validator.check_float_legal_value('power', power, None) validator.check_positive_int(total_step, 'total_step') validator.check_positive_int(step_per_epoch, 'step_per_epoch') validator.check_positive_int(decay_epoch, 'decay_epoch') validator.check_value_type('update_decay_epoch', update_decay_epoch, [bool], None) origin_decay_epoch = decay_epoch function = lambda x, y: (x, min(x, y)) if update_decay_epoch: function = lambda x, y: (origin_decay_epoch * max( math.ceil(y / origin_decay_epoch), 1), y) lr = [] delta = learning_rate - end_learning_rate for i in range(total_step): current_epoch = math.floor(i / step_per_epoch) decay_epoch, tmp_epoch = function(decay_epoch, current_epoch) lr.append(delta * (1 - tmp_epoch / decay_epoch)**power + end_learning_rate) return lr