def __init__(self, seed, dtype, name, param): """ Constructor of distribution class. """ super(Distribution, self).__init__() validator.check_value_type('name', name, [str], 'distribution_name') validator.check_integer('seed', seed, 0, Rel.GE, name) self._name = name self._seed = seed self._dtype = dtype self._parameters = {} # parsing parameters for k in param.keys(): if not (k == 'self' or k.startswith('_')): self._parameters[k] = param[k] # some attributes self._broadcast_shape = calc_broadcast_shape_from_param( self.parameters) self._is_scalar_batch = check_scalar_from_param(self.parameters) # set the function to call according to the derived class's attributes self._set_prob() self._set_log_prob() self._set_sd() self._set_var() self._set_cdf() self._set_survival() self._set_log_cdf() self._set_log_survival() self._set_cross_entropy()
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) validator.check_integer("hidden_size", hidden_size, 0, Rel.GT, self.cls_name) validator.check_integer("num_layers", num_layers, 0, Rel.GT, self.cls_name) self.batch_first = batch_first self.transpose = P.Transpose() self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, bidirectional=bidirectional, dropout=float(dropout)) weight_size = 0 gate_size = 4 * hidden_size num_directions = 2 if bidirectional else 1 for layer in range(num_layers): input_layer_size = input_size if layer == 0 else hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * hidden_size if has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions stdv = 1 / math.sqrt(hidden_size) w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight')
def __init__(self, is_grad=True, sparse=False, reduction=None, smooth_factor=0, num_classes=2): super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction) self.is_grad = is_grad self.sparse = sparse validator.check_integer("num_classes", num_classes, 1, Rel.GT, self.cls_name) validator.check_number_range("smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name) self.smooth_factor = smooth_factor self.num_classes = num_classes self.softmax_cross_entropy = P.SoftmaxCrossEntropyWithLogits() self.one_hot = P.OneHot() self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32) self.off_value = Tensor( 1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32) self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"] if self.is_cpugpu: self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits( is_grad=self.is_grad)
def avg_pooling(x, pool_h, pool_w, stride): """ Applies average pooling over an input array. Args: x (numpy.ndarray): The input array to be average pooled. pool_h (int): Height of the pooling window. pool_w (int): Width of the pooling window. stride (int): The stride of the sliding window. Returns: numpy.ndarray, an output array after applying average pooling on input array. """ validator.check_integer("stride", stride, 0, Rel.GT, None) num, channel, height, width = x.shape out_h = (height - pool_h) // stride + 1 out_w = (width - pool_w) // stride + 1 col = im2col(x, pool_h, pool_w, stride) col = col.reshape(-1, pool_h * pool_w) out = np.mean(col, axis=1) out = out.reshape((num, out_h, out_w, channel)).transpose(0, 3, 1, 2) return out
def _check_param_value(decay_steps, warmup_steps, start_learning_rate, end_learning_rate, power, beta1, beta2, eps, weight_decay, prim_name): """Check the type of inputs.""" _ = warmup_steps validator.check_float_positive('start_learning_rate', start_learning_rate, prim_name) validator.check_float_legal_value('start_learning_rate', start_learning_rate, prim_name) validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name) validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name) validator.check_float_positive('power', power, prim_name) validator.check_float_legal_value('power', power, prim_name) validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name) validator.check_integer('warmup_steps', decay_steps, 0, Rel.GT, prim_name) validator.check_value_type("beta1", beta1, [float], prim_name) validator.check_value_type("beta2", beta2, [float], prim_name) validator.check_value_type("eps", eps, [float], prim_name) validator.check_value_type("weight_dacay", weight_decay, [float], prim_name) validator.check_number_range("beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name) validator.check_number_range("beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name) validator.check_number_range("eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name) validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) Validator.check_value_type("stride", stride, [int], self.cls_name) Validator.check_value_type("padding", padding, [int], self.cls_name) Validator.check_value_type("dilation", dilation, [int], self.cls_name) Validator.check_integer('kernel_size', kernel_size, 1, Rel.GE, self.cls_name) Validator.check_integer('stride', stride, 1, Rel.GE, self.cls_name) Validator.check_integer('padding', padding, 0, Rel.GE, self.cls_name) Validator.check_integer('dilation', dilation, 1, Rel.GE, self.cls_name) kernel_size = (1, kernel_size) stride = (1, stride) dilation = (1, dilation) get_shape = P.Shape() get_dtype = P.DType() if isinstance(weight_init, Tensor): weight_init_shape = get_shape(weight_init) Validator.check_integer('weight_init_shape', len(weight_init_shape), 3, Rel.EQ, self.cls_name) weight_init_dtype = get_dtype(weight_init) weight_init_value = weight_init.asnumpy() weight_init_value = np.expand_dims(weight_init_value, 2) weight_init = Tensor(weight_init_value, weight_init_dtype) super(Conv1d, self).__init__(in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init) self.padding = (0, 0, padding, padding) self.conv2d = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, mode=1, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation, group=self.group) self.bias_add = P.BiasAdd() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError( 'Attr \'pad_mode\' of \'Conv1d\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(2) self.shape = P.Shape()
def __init__(self, seed, dtype, name, param): """ Constructor of distribution class. """ super(Distribution, self).__init__() if seed is None: seed = get_seed() if seed is None: seed = 0 validator.check_value_type('name', name, [str], type(self).__name__) validator.check_integer('seed', seed, 0, Rel.GE, name) self._name = name self._seed = seed self._dtype = cast_type_for_device(dtype) self._parameters = {} # parsing parameters for k in param.keys(): if not (k == 'self' or k.startswith('_')): self._parameters[k] = param[k] # some attributes if 'distribution' in self.parameters.keys(): self.parameter_type = self.parameters[ 'distribution'].parameter_type else: self.parameter_type = set_param_type(self.parameters['param_dict'], dtype) self._broadcast_shape = self._calc_broadcast_shape() self._is_scalar_batch = self._check_is_scalar_batch() # set the function to call according to the derived class's attributes self._set_prob() self._set_log_prob() self._set_sd() self._set_var() self._set_cdf() self._set_survival() self._set_log_cdf() self._set_log_survival() self._set_cross_entropy() self.context_mode = context.get_context('mode') self.checktuple = CheckTuple() self.checktensor = CheckTensor() self.broadcast = broadcast_to # ops needed for the base class self.cast_base = P.Cast() self.dtype_base = P.DType() self.exp_base = exp_generic self.fill_base = P.Fill() self.log_base = log_generic self.sametypeshape_base = P.SameTypeShape() self.sq_base = P.Square() self.sqrt_base = P.Sqrt() self.shape_base = P.Shape()
def _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, prim_name): """Check the type of inputs.""" validator.check_value_type("learning_rate", learning_rate, [float], prim_name) validator.check_number_range("learning_rate", learning_rate, 0.0, float("inf"), Rel.INC_LEFT, prim_name) validator.check_value_type("end_learning_rate", end_learning_rate, [float], prim_name) validator.check_number_range("end_learning_rate", end_learning_rate, 0.0, float("inf"), Rel.INC_LEFT, prim_name) validator.check_float_positive('power', power, prim_name) validator.check_float_legal_value('power', power, prim_name) validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name)
def _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, prim_name): """Check the type of inputs.""" validator.check_float_positive('learning_rate', learning_rate, prim_name) validator.check_float_legal_value('learning_rate', learning_rate, prim_name) validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name) validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name) validator.check_float_positive('power', power, prim_name) validator.check_float_legal_value('power', power, prim_name) validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name)
def __init__(self, vocab_size, embedding_size, param_init='normal', target='CPU', slice_mode='batch_slice', manual_shapes=None): super(EmbeddingLookup, self).__init__() self.target = target if target not in ('CPU', 'DEVICE'): raise ValueError( 'Attr \'target\' of \'EmbeddingLookup\' Op passed ' + str(target) + ', should be one of values in \'CPU\', \'DEVICE\'.') self.gatherv2 = P.GatherV2() self.embeddinglookup = P.EmbeddingLookup().add_prim_attr( 'primitive_target', 'CPU') self.embedding_table = Parameter(initializer( param_init, [vocab_size, embedding_size]), name='embedding_table') parallel_mode = _get_parallel_mode() is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) if slice_mode == EmbeddingLookUpSplitMode.FIELD_SLICE and is_auto_parallel: if not manual_shapes: raise ValueError( "in slice field mode, the manual_shapes should not be none" ) if not isinstance(manual_shapes, tuple): raise TypeError( "manual_shapes type must be tuple(int) cannot be {}!". format(type(manual_shapes))) for dim in manual_shapes: Validator.check_integer('manul shape dim', dim, 0, Rel.GT, self.cls_name) self.gatherv2.add_prim_attr("manual_split", manual_shapes) self.embeddinglookup.add_prim_attr("manual_split", manual_shapes) self.gatherv2.set_strategy( ((get_group_size(), 1), (1, get_group_size()))) self.embeddinglookup.set_strategy( ((get_group_size(), 1), (1, get_group_size()))) elif slice_mode == EmbeddingLookUpSplitMode.TABLE_ROW_SLICE and is_auto_parallel: self.gatherv2.set_strategy(((get_group_size(), 1), (1, 1))) self.embeddinglookup.set_strategy(((get_group_size(), 1), (1, 1))) elif slice_mode == EmbeddingLookUpSplitMode.TABLE_COLUMN_SLICE and is_auto_parallel: self.gatherv2.set_strategy(((1, get_group_size()), (1, 1))) self.embeddinglookup.set_strategy(((1, get_group_size()), (1, 1))) elif slice_mode == EmbeddingLookUpSplitMode.BATCH_SLICE and is_auto_parallel: self.gatherv2.set_strategy(((1, 1), (get_group_size(), 1))) self.embeddinglookup.set_strategy(((1, 1), (get_group_size(), 1))) else: if is_auto_parallel: raise ValueError( "slice_mode should support mode in nn.EmbeddingLookUpSplitMode, but get " + str(slice_mode))
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): super(DepthwiseConv2d, self).__init__() self.kernel_size = twice(kernel_size) self.stride = twice(stride) self.dilation = twice(dilation) self.in_channels = check_int_positive(in_channels) self.out_channels = check_int_positive(out_channels) validator.check_integer('group', group, in_channels, Rel.EQ) validator.check_integer('group', group, out_channels, Rel.EQ) validator.check_integer('group', group, 1, Rel.GE) self.pad_mode = pad_mode self.dilation = dilation self.group = group self.has_bias = has_bias self.weight_init = weight_init self.bias_init = bias_init Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) if isinstance(padding, tuple): Validator.check_integer('padding size', len(padding), 4, Rel.EQ, self.cls_name) self.padding = padding self.conv = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=self.kernel_size, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation) self.bias_add = P.BiasAdd() weight_shape = [1, in_channels, *self.kernel_size] self.weight = Parameter(initializer(weight_init, weight_shape), name='weight') if check_bool(has_bias): self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') else: if bias_init != 'zeros': logger.warning( "value of `has_bias` is False, value of `bias_init` will be ignore." ) self.bias = None
def __init__(self, params, decay_steps, warmup_steps=0, learning_rate=0.001, end_learning_rate=0.0001, power=10.0, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0, decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): super(AdamWeightDecayDynamicLR, self).__init__(0.0, params) if self.is_group: raise RuntimeError( f"The {self.cls_name} optimizer cannot support group setting.") _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name) _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, self.cls_name) validator.check_integer('warmup_steps', warmup_steps, 0, Rel.GE, self.cls_name) # turn them to scalar when me support scalar/tensor mix operations self.global_step = Parameter(initializer(0, [1]), name="global_step") self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32)) self.warmup_flag = False if warmup_steps > 0: self.warmup_flag = True self.decay_steps = Tensor(np.array([decay_steps]).astype(np.float32)) self.end_learning_rate = Tensor( np.array([end_learning_rate]).astype(np.float32)) self.diff_learning_rate = Tensor( np.array([learning_rate - end_learning_rate]).astype(np.float32)) self.power = power self.beta1 = Tensor(np.array([beta1]).astype(np.float32)) self.beta2 = Tensor(np.array([beta2]).astype(np.float32)) self.eps = Tensor(np.array([eps]).astype(np.float32)) self.weight_decay_tensor = Tensor( np.array([weight_decay]).astype(np.float32)) self.params = self.parameters self.moments1 = self.params.clone(prefix="adam_m", init='zeros') self.moments2 = self.params.clone(prefix="adam_v", init='zeros') self.decay_flag = tuple(decay_filter(x) for x in self.params) self.hyper_map = C.HyperMap() self.min = P.Minimum() self.pow = P.Pow() self.greater = P.Greater() self.one = Tensor(np.array([1.0]).astype(np.float32)) self.cast = P.Cast() self.start_learning_rate = Tensor( np.array([learning_rate]).astype(np.float32))
def max_pool_with_argmax(x, pool_h, pool_w, stride): """Max pooling with argmax.""" validator.check_integer("stride", stride, 0, Rel.GT, None) num, channel, height, width = x.shape out_h = (height - pool_h) // stride + 1 out_w = (width - pool_w) // stride + 1 col = im2col(x, pool_h, pool_w, stride) col = col.reshape(-1, pool_h * pool_w) out = np.max(col, axis=1) out_argmax = np.argmax(col, axis=1) out = out.reshape((num, out_h, out_w, channel)).transpose(0, 3, 1, 2) out_argmax = out_argmax.reshape( (num, out_h, out_w, channel)).transpose(0, 3, 1, 2) return out, out_argmax
def __init__(self, max_val=1.0, power_factors=(0.0448, 0.2856, 0.3001, 0.2363, 0.1333), filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): super(MSSSIM, self).__init__() validator.check_value_type('max_val', max_val, [int, float], self.cls_name) validator.check_number('max_val', max_val, 0.0, Rel.GT, self.cls_name) self.max_val = max_val validator.check_value_type('power_factors', power_factors, [tuple, list], self.cls_name) self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name) self.filter_sigma = validator.check_float_positive( 'filter_sigma', filter_sigma, self.cls_name) self.k1 = validator.check_value_type('k1', k1, [float], self.cls_name) self.k2 = validator.check_value_type('k2', k2, [float], self.cls_name) window = _create_window(filter_size, filter_sigma) self.level = len(power_factors) self.conv = [] for i in range(self.level): self.conv.append(_conv2d(1, 1, filter_size, Tensor(window))) self.conv[i].weight.requires_grad = False self.multi_convs_list = CellList(self.conv) self.weight_tensor = Tensor(power_factors, mstype.float32) self.avg_pool = AvgPool2d(kernel_size=2, stride=2, pad_mode='valid') self.relu = ReLU() self.reduce_mean = P.ReduceMean() self.prod = P.ReduceProd() self.pow = P.Pow() self.pack = P.Pack(axis=-1) self.concat = P.Concat(axis=1)
def piecewise_constant_lr(milestone, learning_rates): r""" Get piecewise constant learning rate. Calculate learning rate by given `milestone` and `learning_rates`. Let the value of `milestone` be :math:`(M_1, M_2, ..., M_N)` and the value of `learning_rates` be :math:`(x_1, x_2, ..., x_N)`. N is the length of `milestone`. Let the output learning rate be `y`. .. math:: y[i] = x_t,\ for\ i \in [M_{t-1}, M_t) Args: milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list. Every element is a milestone step, and must be greater than 0. learning_rates (Union[list[float], tuple[float]]): A list of learning rates. Returns: list[float]. The size of list is :math:`M_N`. Examples: >>> milestone = [2, 5, 10] >>> learning_rates = [0.1, 0.05, 0.01] >>> piecewise_constant_lr(milestone, learning_rates) [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01] """ validator.check_value_type('milestone', milestone, (tuple, list), None) validator.check_value_type('learning_rates', learning_rates, (tuple, list), None) if len(milestone) != len(learning_rates): raise ValueError( 'The size of `milestone` must be same with the size of `learning_rates`.' ) lr = [] last_item = 0 for i, item in enumerate(milestone): validator.check_integer(f'milestone[{i}]', item, 0, Rel.GT, None) validator.check_float_legal_value(f'learning_rates[{i}]', learning_rates[i], None) if item < last_item: raise ValueError( f'The value of milestone[{i}] must be greater than milestone[{i - 1}]' ) lr += [learning_rates[i]] * (item - last_item) last_item = item return lr
def __init__(self, kernel_size=1, stride=1, pad_mode="valid"): super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode) validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name) validator.check_value_type('stride', stride, [int], self.cls_name) self.pad_mode = validator.check_string('pad_mode', pad_mode.upper(), ['VALID', 'SAME'], self.cls_name) validator.check_integer("kernel_size", kernel_size, 1, Rel.GE, self.cls_name) validator.check_integer("stride", stride, 1, Rel.GE, self.cls_name) self.kernel_size = (1, kernel_size) self.stride = (1, stride) self.avg_pool = P.AvgPool(ksize=self.kernel_size, strides=self.stride, padding=self.pad_mode) self.shape = F.shape self.reduce_mean = P.ReduceMean(keep_dims=True) self.slice = P.Slice() self.expand = P.ExpandDims()
def __init__(self, max_val=1.0, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): super(SSIM, self).__init__() validator.check_value_type('max_val', max_val, [int, float], self.cls_name) validator.check_number('max_val', max_val, 0.0, Rel.GT, self.cls_name) self.max_val = max_val self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name) self.filter_sigma = validator.check_float_positive('filter_sigma', filter_sigma, self.cls_name) validator.check_value_type('k1', k1, [float], self.cls_name) self.k1 = validator.check_number_range('k1', k1, 0.0, 1.0, Rel.INC_NEITHER, self.cls_name) validator.check_value_type('k2', k2, [float], self.cls_name) self.k2 = validator.check_number_range('k2', k2, 0.0, 1.0, Rel.INC_NEITHER, self.cls_name) self.mean = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=filter_size)
def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power, update_decay_epoch=False): r""" Calculate learning rate base on polynomial decay function. For the i-th step, the formula of computing decayed_learning_rate[i] is: .. math:: decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) * (1 - tmp\_epoch / tmp\_decay\_epoch)^{power} + end\_learning\_rate Where :math:`tmp\_epoch=min(current\_epoch, decay\_epoch),\ current\_epoch=floor(\frac{i}{step\_per\_epoch})`, and :math:`tmp\_decay\_epoch = decay\_epoch`. If `update_decay_epoch` is true, update the value of `tmp_decay_epoch` every epoch. The formula is :math:`tmp\_decay\_epoch = decay\_epoch * ceil(current\_epoch / decay\_epoch)` Args: learning_rate (float): The initial value of learning rate. end_learning_rate (float): The end value of learning rate. total_step (int): The total number of steps. step_per_epoch (int): The number of steps in per epoch. decay_epoch (int): A value used to calculate decayed learning rate. power (float): A value used to calculate decayed learning rate. This parameter should be greater than 0. update_decay_epoch (bool): If true, update `decay_epoch`. Default: False. Returns: list[float]. The size of list is `total_step`. Examples: >>> learning_rate = 0.1 >>> end_learning_rate = 0.01 >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 2 >>> power = 0.5 >>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01] """ validator.check_float_positive('learning_rate', learning_rate, None) validator.check_float_legal_value('learning_rate', learning_rate, None) validator.check_float_positive('end_learning_rate', end_learning_rate, None) validator.check_float_legal_value('end_learning_rate', end_learning_rate, None) validator.check_float_positive('power', power, None) validator.check_float_legal_value('power', power, None) validator.check_integer('total_step', total_step, 0, Rel.GT, None) validator.check_integer('step_per_epoch', step_per_epoch, 0, Rel.GT, None) validator.check_integer('decay_epoch', decay_epoch, 0, Rel.GT, None) validator.check_value_type('update_decay_epoch', update_decay_epoch, [bool], None) origin_decay_epoch = decay_epoch function = lambda x, y: (x, min(x, y)) if update_decay_epoch: function = lambda x, y: (origin_decay_epoch * max(math.ceil(y / origin_decay_epoch), 1), y) lr = [] delta = learning_rate - end_learning_rate for i in range(total_step): current_epoch = math.floor(i / step_per_epoch) decay_epoch, tmp_epoch = function(decay_epoch, current_epoch) lr.append(delta * (1 - tmp_epoch / decay_epoch) ** power + end_learning_rate) return lr
def _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair): validator.check_integer('total_step', total_step, 0, Rel.GT, None) validator.check_integer('step_per_epoch', step_per_epoch, 0, Rel.GT, None) validator.check_integer('decay_epoch', decay_epoch, 0, Rel.GT, None) validator.check_float_positive('learning_rate', learning_rate, None) validator.check_float_positive('decay_rate', decay_rate, None) validator.check_value_type('is_stair', is_stair, [bool], None)
def __init__(self, max_val=1.0, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): super(SSIM, self).__init__() validator.check_value_type('max_val', max_val, [int, float], self.cls_name) validator.check_number('max_val', max_val, 0.0, Rel.GT, self.cls_name) self.max_val = max_val self.filter_size = validator.check_integer('filter_size', filter_size, 1, Rel.GE, self.cls_name) self.filter_sigma = validator.check_float_positive('filter_sigma', filter_sigma, self.cls_name) self.k1 = validator.check_value_type('k1', k1, [float], self.cls_name) self.k2 = validator.check_value_type('k2', k2, [float], self.cls_name) window = _create_window(filter_size, filter_sigma) self.conv = _conv2d(1, 1, filter_size, Tensor(window)) self.conv.weight.requires_grad = False self.reduce_mean = P.ReduceMean() self.concat = P.Concat(axis=1)
def _init_depthwise_conv2d(self, weight_init): """Initialize depthwise conv2d op""" if context.get_context("device_target") == "Ascend" and self.group > 1: self.dilation = self._dilation Validator.check_integer('group', self.group, self.in_channels, Rel.EQ) Validator.check_integer('group', self.group, self.out_channels, Rel.EQ) self.conv2d = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=self.kernel_size, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation) weight_shape = [1, self.in_channels, *self.kernel_size] self.weight_init = weight_init if isinstance(weight_init, Tensor): self.weight_init = Tensor(weight_init.asnumpy().swapaxes(0, 1), weight_init.dtype) if isinstance(weight_init, Initializer): self.weight_init.shape = weight_shape self.weight = Parameter(initializer(self.weight_init, weight_shape), name='weight')
def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch): r""" Calculate learning rate base on cosine decay function. For the i-th step, the formula of computing decayed_learning_rate[i] is: .. math:: decayed\_learning\_rate[i] = min\_learning\_rate + 0.5 * (max\_learning\_rate - min\_learning\_rate) * (1 + cos(\frac{current\_epoch}{decay\_epoch}\pi)) Where :math:`current\_epoch=floor(\frac{i}{step\_per\_epoch})`. Args: min_lr (float): The minimum value of learning rate. max_lr (float): The maximum value of learning rate. total_step (int): The total number of steps. step_per_epoch (int): The number of steps in per epoch. decay_epoch (int): A value used to calculate decayed learning rate. Returns: list[float]. The size of list is `total_step`. Examples: >>> min_lr = 0.01 >>> max_lr = 0.1 >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 2 >>> cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01] """ if not isinstance(min_lr, float): raise TypeError("min_lr must be float.") validator.check_number_range("min_lr", min_lr, 0.0, float("inf"), Rel.INC_LEFT, None) validator.check_float_positive('max_lr', max_lr, None) validator.check_float_legal_value('max_lr', max_lr, None) validator.check_integer('total_step', total_step, 0, Rel.GT, None) validator.check_integer('step_per_epoch', step_per_epoch, 0, Rel.GT, None) validator.check_integer('decay_epoch', decay_epoch, 0, Rel.GT, None) if min_lr >= max_lr: raise ValueError('`max_lr` should be greater than `min_lr`.') delta = 0.5 * (max_lr - min_lr) lr = [] for i in range(total_step): tmp_epoch = min(math.floor(i / step_per_epoch), decay_epoch) lr.append(min_lr + delta * (1 + math.cos(math.pi * tmp_epoch / decay_epoch))) return lr
def warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch): r""" Get learning rate warming up. For the i-th step, the formula of computing warmup_learning_rate[i] is: .. math:: warmup\_learning\_rate[i] = learning\_rate * tmp\_epoch / tmp\_warmup\_epoch Where :math:`tmp\_epoch=min(current\_epoch, warmup\_epoch),\ current\_epoch=floor(\frac{i}{step\_per\_epoch})` Args: learning_rate (float): The initial value of learning rate. warmup_steps (int): The warm up steps of learning rate. Inputs: Tensor. The current step number. Returns: Tensor. The learning rate value for the current step. Examples: >>> learning_rate = 0.1 >>> total_step = 6 >>> step_per_epoch = 2 >>> warmup_epoch = 2 >>> warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch) [0.0, 0.0, 0.05, 0.05, 0.1, 0.1] """ if not isinstance(learning_rate, float): raise TypeError("learning_rate must be float.") validator.check_number_range("learning_rate", learning_rate, 0.0, float("inf"), Rel.INC_LEFT, None) validator.check_integer('warmup_epoch', warmup_epoch, 0, Rel.GT, None) validator.check_integer('total_step', total_step, 0, Rel.GT, None) validator.check_integer('step_per_epoch', step_per_epoch, 0, Rel.GT, None) function = lambda x, y: (x, min(x, y)) lr = [] for i in range(total_step): current_epoch = math.floor(i / step_per_epoch) warmup_epoch, tmp_epoch = function(warmup_epoch, current_epoch) lr.append(learning_rate * tmp_epoch / warmup_epoch) return lr
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): Validator.check_value_type("kernel_size", kernel_size, [int], self.cls_name) Validator.check_value_type("stride", stride, [int], self.cls_name) Validator.check_value_type("padding", padding, [int], self.cls_name) Validator.check_value_type("dilation", dilation, [int], self.cls_name) Validator.check_integer('kernel_size', kernel_size, 1, Rel.GE, self.cls_name) Validator.check_integer('stride', stride, 1, Rel.GE, self.cls_name) Validator.check_integer('padding', padding, 0, Rel.GE, self.cls_name) Validator.check_integer('dilation', dilation, 1, Rel.GE, self.cls_name) kernel_size = (1, kernel_size) stride = (1, stride) dilation = (1, dilation) get_shape = P.Shape() get_dtype = P.DType() if isinstance(weight_init, Tensor): weight_init_shape = get_shape(weight_init) Validator.check_integer('weight_init_shape', len(weight_init_shape), 3, Rel.EQ, self.cls_name) weight_init_dtype = get_dtype(weight_init) weight_init_value = weight_init.asnumpy() weight_init_value = np.expand_dims(weight_init_value, 2) weight_init = Tensor(weight_init_value, weight_init_dtype) # out_channels and in_channels swap. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, # then Conv1dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. super(Conv1dTranspose, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init, transposed=True) self.padding = (0, 0, padding, padding) self.in_channels = in_channels self.out_channels = out_channels self.shape = P.Shape() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError('Attr \'pad_mode\' of \'Conv1dTranspose\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.is_valid = self.pad_mode == 'valid' self.is_same = self.pad_mode == 'same' self.is_pad = self.pad_mode == 'pad' if check_bool(has_bias): self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels, kernel_size=kernel_size, mode=1, pad_mode=pad_mode, pad=self.padding, stride=stride, dilation=dilation, group=group) self.bias_add = P.BiasAdd() self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(2)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_init='normal', bias_init='zeros'): kernel_size = twice(kernel_size) stride = twice(stride) dilation = twice(dilation) Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) if isinstance(padding, tuple): Validator.check_integer('padding size', len(padding), 4, Rel.EQ, self.cls_name) # out_channels and in_channels swap. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. super(Conv2dTranspose, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init, transposed=True) self.in_channels = in_channels self.out_channels = out_channels self.shape = P.Shape() if pad_mode not in ('valid', 'same', 'pad'): raise ValueError('Attr \'pad_mode\' of \'Conv2dTranspose\' Op passed ' + str(pad_mode) + ', should be one of values in \'valid\', \'same\', \'pad\'.') self.is_valid = self.pad_mode == 'valid' self.is_same = self.pad_mode == 'same' self.is_pad = self.pad_mode == 'pad' if check_bool(has_bias): self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel. self.conv2d_transpose = P.Conv2DBackpropInput(out_channel=in_channels, kernel_size=kernel_size, mode=1, pad_mode=pad_mode, pad=padding, stride=stride, dilation=dilation, group=group) self.bias_add = P.BiasAdd() if isinstance(self.padding, int): self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = (self.padding,) * 4 else: self.padding_top, self.padding_bottom, self.padding_left, self.padding_right = self.padding
def __init__(self, in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, has_bias, weight_init, bias_init, transposed=False): super(_Conv, self).__init__() self.in_channels = check_int_positive(in_channels) self.out_channels = check_int_positive(out_channels) self.kernel_size = kernel_size self.stride = stride self.pad_mode = pad_mode self.weight_init = weight_init self.bias_init = bias_init if isinstance(padding, int): Validator.check_integer('padding', padding, 0, Rel.GE, self.cls_name) self.padding = padding elif isinstance(padding, tuple): for pad in padding: Validator.check_integer('padding item', pad, 0, Rel.GE, self.cls_name) self.padding = padding else: raise TypeError("padding type must be int/tuple(int) cannot be {}!".format(type(padding))) self.dilation = dilation self.group = check_int_positive(group) self.has_bias = has_bias if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ kernel_size[0] < 1 or kernel_size[1] < 1: raise ValueError("Attr 'kernel_size' of 'Conv2D' Op passed " + str(self.kernel_size) + ", should be a int or tuple and equal to or greater than 1.") if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or stride[0] < 1 or stride[1] < 1: raise ValueError("Attr 'stride' of 'Conv2D' Op passed " + str(self.stride) + ", should be a int or tuple and equal to or greater than 1.") if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \ dilation[0] < 1 or dilation[1] < 1: raise ValueError("Attr 'dilation' of 'Conv2D' Op passed " + str(self.dilation) + ", should equal to or greater than 1.") if in_channels % group != 0: raise ValueError("Attr 'in_channels' of 'Conv2D' Op must be divisible by " "attr 'group' of 'Conv2D' Op.") if out_channels % group != 0: raise ValueError("Attr 'out_channels' of 'Conv2D' Op must be divisible by " "attr 'group' of 'Conv2D' Op.") if transposed: shape = [in_channels, out_channels // group, *kernel_size] else: shape = [out_channels, in_channels // group, *kernel_size] self.weight = Parameter(initializer(self.weight_init, shape), name='weight') if check_bool(has_bias): self.bias = Parameter(initializer(self.bias_init, [out_channels]), name='bias') else: if self.bias_init != 'zeros': logger.warning("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") self.bias = None
def _get_matrix_diag_part_assist(x_shape, x_dtype): Validator.check_integer("x rank", len(x_shape), 2, Rel.GE, "_get_matrix_diag_part_assist") base_eye = np.eye(x_shape[-2], x_shape[-1]).reshape(-1) assist = np.tile(base_eye, x_shape[:-2]).reshape(x_shape) return Tensor(assist, x_dtype)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, eps=1e-5, momentum=0.997, weight_init=None, beta_init=None, gamma_init=None, mean_init=None, var_init=None, quant_delay=0, freeze_bn=100000, fake=True, num_bits=8, per_channel=False, symmetric=False, narrow_range=False): """init Conv2dBatchNormQuant layer""" super(Conv2dBatchNormQuant, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = twice(kernel_size) self.stride = twice(stride) self.pad_mode = pad_mode self.padding = padding self.dilation = twice(dilation) self.group = group self.eps = eps self.momentum = momentum self.quant_delay = quant_delay self.freeze_bn = freeze_bn self.fake = fake self.num_bits = num_bits self.per_channel = per_channel self.symmetric = symmetric self.narrow_range = narrow_range self.is_gpu = context.get_context('device_target') == "GPU" # initialize convolution op and Parameter if context.get_context('device_target') == "Ascend" and group > 1: validator.check_integer('group', group, in_channels, Rel.EQ, 'Conv2dBatchNormQuant') validator.check_integer('group', group, out_channels, Rel.EQ, 'Conv2dBatchNormQuant') self.conv = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=self.kernel_size, pad_mode=pad_mode, pad=padding, stride=self.stride, dilation=self.dilation) if weight_init is None: weight_init = initializer('normal', [1, in_channels, *self.kernel_size]) channel_axis = 1 else: self.conv = P.Conv2D(out_channel=out_channels, kernel_size=self.kernel_size, pad_mode=pad_mode, pad=padding, stride=self.stride, dilation=self.dilation, group=group) if weight_init is None: weight_init = initializer( 'normal', [out_channels, in_channels // group, *self.kernel_size]) channel_axis = 0 self.weight = Parameter(weight_init, name='weight') # initialize batchnorm Parameter if gamma_init is None: gamma_init = initializer('ones', [out_channels]) self.gamma = Parameter(gamma_init, name='gamma') if beta_init is None: beta_init = initializer('zeros', [out_channels]) self.beta = Parameter(beta_init, name='beta') if mean_init is None: mean_init = initializer('zeros', [out_channels]) self.moving_mean = Parameter(mean_init, name='moving_mean', requires_grad=False) if var_init is None: var_init = initializer('ones', [out_channels]) self.moving_variance = Parameter(var_init, name='moving_variance', requires_grad=False) # initialize fake ops self.fake_quant_weight = FakeQuantWithMinMax(min_init=-6, max_init=6, ema=False, num_bits=num_bits, quant_delay=quant_delay, per_channel=per_channel, out_channels=out_channels, symmetric=symmetric, narrow_range=narrow_range) self.batchnorm_fold = BatchNormFoldCell(epsilon=eps, momentum=momentum, freeze_bn=freeze_bn) self.correct_mul = P.CorrectionMul(channel_axis) if context.get_context('device_target') == "Ascend": self.batchnorm_fold2_train = P.BatchNormFold2_D( freeze_bn=freeze_bn) self.batchnorm_fold2_infer = P.BatchNormFold2_D(freeze_bn=0) elif context.get_context('device_target') == "GPU": self.batchnorm_fold2_train = P.BatchNormFold2(freeze_bn=freeze_bn) self.batchnorm_fold2_infer = P.BatchNormFold2(freeze_bn=0) else: raise ValueError("Unsupported platform: {}".format( context.get_context('device_target'))) self.step = Parameter(initializer('normal', [1], dtype=mstype.int32), name='step', requires_grad=False) self.one = Tensor(1, mstype.int32) self.assignadd = P.AssignAdd()
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.has_bias = has_bias self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) self.hidden_size = validator.check_integer("hidden_size", hidden_size, 0, Rel.GT, self.cls_name) self.num_layers = validator.check_integer("num_layers", num_layers, 0, Rel.GT, self.cls_name) self.dropout = float(dropout) self.bidirectional = bidirectional if self.batch_first: self.transpose1 = P.Transpose() self.transpose2 = P.Transpose() num_directions = 2 if self.bidirectional else 1 self.cpu_target = False enable_debug = context.get_context("enable_debug_runtime") if context.get_context("device_target") == "CPU" and not enable_debug: self.cpu_target = True if not self.cpu_target: self.lstm = P.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, has_bias=self.has_bias, bidirectional=self.bidirectional, dropout=self.dropout) weight_size = 0 gate_size = 4 * self.hidden_size for layer in range(self.num_layers): input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * self.hidden_size if self.has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions stdv = 1 / math.sqrt(hidden_size) w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') else: input_size_list = [] input_size_list.append(self.input_size) for i in range(self.num_layers - 1): input_size_list.append(self.hidden_size * num_directions) weights = [] layers = [] bias_size = 0 if not self.has_bias else num_directions * self.hidden_size * 4 stdv = 1 / math.sqrt(hidden_size) for i in range(num_layers): weight_size = (input_size_list[i] + self.hidden_size ) * num_directions * self.hidden_size * 4 if has_bias: weight_size = weight_size + bias_size w_np = np.random.uniform( -stdv, stdv, (weight_size, 1, 1)).astype(np.float32) weights.append( Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i))) layers.append( nn.LSTMCell(input_size=input_size_list[i], hidden_size=self.hidden_size, has_bias=self.has_bias, bidirectional=self.bidirectional, dropout=self.dropout)) self.lstms = layers self.weight = ParameterTuple(tuple(weights)) self.fill = P.Fill() self.shape = P.Shape()