def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', has_bias=True, activation=None): super(Dense, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.reshape = P.Reshape() self.shape_op = P.Shape() if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("Weight init shape error.") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") self.bias = None if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("Bias init shape error.") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.bias_add = P.BiasAdd() self.matmul = P.MatMul(transpose_b=True) self.activation = get_activation(activation) if isinstance(activation, str) else activation if activation is not None and not isinstance(self.activation, (Cell, Primitive)): raise TypeError("The activation must be str or Cell or Primitive,"" but got {}.".format(activation)) self.activation_flag = self.activation is not None
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) validator.check_positive_int(hidden_size, "hidden_size", self.cls_name) validator.check_positive_int(num_layers, "num_layers", self.cls_name) self.batch_first = batch_first self.transpose = P.Transpose() self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, bidirectional=bidirectional, dropout=float(dropout)) weight_size = 0 gate_size = 4 * hidden_size num_directions = 2 if bidirectional else 1 for layer in range(num_layers): input_layer_size = input_size if layer == 0 else hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * hidden_size if has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions stdv = 1 / math.sqrt(hidden_size) w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight')
def __init__(self, model, train_dataset, task_type, num_classes=None, epochs=1, epi_uncer_model_path=None, ale_uncer_model_path=None, save_model=False): self.epi_model = model self.ale_model = deepcopy(model) self.epi_train_dataset = train_dataset self.ale_train_dataset = train_dataset self.task_type = task_type self.epochs = Validator.check_positive_int(epochs) self.epi_uncer_model_path = epi_uncer_model_path self.ale_uncer_model_path = ale_uncer_model_path self.save_model = Validator.check_bool(save_model) self.epi_uncer_model = None self.ale_uncer_model = None self.concat = P.Concat(axis=0) self.sum = P.ReduceSum() self.pow = P.Pow() if not isinstance(model, Cell): raise TypeError('The model should be Cell type.') if task_type not in ('regression', 'classification'): raise ValueError('The task should be regression or classification.') if task_type == 'classification': self.num_classes = Validator.check_positive_int(num_classes) else: self.num_classes = num_classes if save_model: if epi_uncer_model_path is None or ale_uncer_model_path is None: raise ValueError("If save_model is True, the epi_uncer_model_path and " "ale_uncer_model_path should not be None.")
def avg_pooling(x, pool_h, pool_w, stride): """ Applies average pooling over an input array. Args: x (numpy.ndarray): The input array to be average pooled. pool_h (int): Height of the pooling window. pool_w (int): Width of the pooling window. stride (int): The stride of the sliding window. Returns: numpy.ndarray, an output array after applying average pooling on input array. """ validator.check_positive_int(stride, "stride") num, channel, height, width = x.shape out_h = (height - pool_h) // stride + 1 out_w = (width - pool_w) // stride + 1 col = im2col(x, pool_h, pool_w, stride) col = col.reshape(-1, pool_h * pool_w) out = np.mean(col, axis=1) out = out.reshape((num, out_h, out_w, channel)).transpose(0, 3, 1, 2) return out
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=278, batch_size=32, has_bias=True, activation=None): super(Dense_Thor_GPU, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels])) if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels])) self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None split_dim = 128 matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim) self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False) self.broadcast_to = P.BroadcastTo(matrix_A_shape) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.mul = P.Mul() self.cube_matmul = P.MatMul(transpose_a=True) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.batch_size = Tensor(batch_size, mstype.float16) self.getG = P.InsertGradientOf(self.save_gradient) self.damping = Parameter(Tensor(damping), requires_grad=False) self.dampingA = Tensor(np.identity(in_channels), mstype.float32) self.dampingG = Tensor(np.identity(out_channels), mstype.float32) self.cast = P.Cast() self.gather = P.Gather() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.add = P.Add() self.sqrt = P.Sqrt() self.cholesky = P.CholeskyTrsm(split_dim=split_dim) self.vector_matmul = P.BatchMatMul(transpose_a=True)
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', has_bias=True, activation=None): super(Dense, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) if isinstance(weight_init, Tensor): if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("Weight init shape error.") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") self.bias = None if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: raise ValueError("Bias init shape error.") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.bias_add = P.BiasAdd() self.matmul = P.MatMul(transpose_b=True) self.activation = get_activation(activation) self.activation_flag = self.activation is not None
def __init__(self, num_groups, num_channels, eps=1e-05, affine=True, gamma_init='ones', beta_init='zeros'): super(GroupNorm, self).__init__() self.num_groups = validator.check_positive_int(num_groups) self.num_channels = validator.check_positive_int(num_channels) if num_channels % num_groups != 0: raise ValueError("num_channels should be divided by num_groups") self.eps = validator.check_value_type('eps', eps, (float, ), type(self).__name__) self.affine = validator.check_bool(affine) gamma = initializer(gamma_init, num_channels) beta = initializer(beta_init, num_channels) if self.affine: self.gamma = Parameter(gamma, name='gamma') self.beta = Parameter(beta, name='beta') else: self.gamma = gamma self.beta = beta self.shape = F.shape self.reshape = F.reshape self.reduce_mean = P.ReduceMean(keep_dims=True) self.square = F.square self.reduce_sum = P.ReduceSum(keep_dims=True) self.sqrt = P.Sqrt()
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', has_bias=True): super(GNNFeatureTransform, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd()
def __init__(self, features, biases, ftr_dims, num_class, num_nodes, hidden_units, num_heads, attn_drop=0.0, ftr_drop=0.0, activation=nn.ELU(), residual=False): super(GAT, self).__init__() self.features = Tensor(features) self.biases = Tensor(biases) self.ftr_dims = Validator.check_positive_int(ftr_dims) self.num_class = Validator.check_positive_int(num_class) self.num_nodes = Validator.check_positive_int(num_nodes) self.hidden_units = hidden_units self.num_heads = num_heads self.attn_drop = attn_drop self.ftr_drop = ftr_drop self.activation = activation self.residual = Validator.check_bool(residual) self.layers = [] # first layer self.layers.append( AttentionAggregator(self.ftr_dims, self.hidden_units[0], self.num_heads[0], self.ftr_drop, self.attn_drop, self.activation, residual=False)) # intermediate layer for i in range(1, len(self.hidden_units)): self.layers.append( AttentionAggregator(self.hidden_units[i - 1] * self.num_heads[i - 1], self.hidden_units[i], self.num_heads[i], self.ftr_drop, self.attn_drop, self.activation, residual=self.residual)) # output layer self.layers.append( AttentionAggregator(self.hidden_units[-1] * self.num_heads[-2], self.num_class, self.num_heads[-1], self.ftr_drop, self.attn_drop, activation=None, residual=False, output_transform='sum')) self.layers = nn.layer.CellList(self.layers)
def set_grad_accumulation_step(self, grad_accumulation_step): """ Set grad accumulation step. Args: grad_accumulation_step (int): The grad accumulation step. """ self.check_context_handle() Validator.check_positive_int(grad_accumulation_step) self._context_handle.set_grad_accumulation_step(grad_accumulation_step)
def __init__(self, vocab_size, embedding_size, param_init='normal', target='CPU', slice_mode='batch_slice', manual_shapes=None): super(EmbeddingLookup, self).__init__() self.target = target if target not in ('CPU', 'DEVICE'): raise ValueError( 'Attr \'target\' of \'EmbeddingLookup\' Op passed ' + str(target) + ', should be one of values in \'CPU\', \'DEVICE\'.') self.gatherv2 = P.GatherV2() self.embeddinglookup = P.EmbeddingLookup().add_prim_attr( 'primitive_target', 'CPU') self.embedding_table = Parameter(initializer( param_init, [vocab_size, embedding_size]), name='embedding_table') parallel_mode = _get_parallel_mode() is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) if slice_mode == "field_slice" and is_auto_parallel: if not manual_shapes: raise ValueError( "in slice field mode, the manual_shapes should not be none" ) if not isinstance(manual_shapes, tuple): raise TypeError( "manual_shapes type must be tuple(int) cannot be {}!". format(type(manual_shapes))) for dim in manual_shapes: validator.check_positive_int(dim, 'manual shape dim', self.cls_name) self.gatherv2.add_prim_attr("manual_split", manual_shapes) self.embeddinglookup.add_prim_attr("manual_split", manual_shapes) self.gatherv2.shard(((get_group_size(), 1), (1, get_group_size()))) self.embeddinglookup.shard( ((get_group_size(), 1), (1, get_group_size()))) elif slice_mode == "table_row_slice" and is_auto_parallel: self.gatherv2.shard(((get_group_size(), 1), (1, 1))) self.embeddinglookup.shard(((get_group_size(), 1), (1, 1))) elif slice_mode == "table_column_slice" and is_auto_parallel: self.gatherv2.shard(((1, get_group_size()), (1, 1))) self.embeddinglookup.shard(((1, get_group_size()), (1, 1))) elif slice_mode == "batch_slice" and is_auto_parallel: self.gatherv2.shard(((1, 1), (get_group_size(), 1))) self.embeddinglookup.shard(((1, 1), (get_group_size(), 1))) else: if is_auto_parallel: raise ValueError( "slice_mode should support mode in nn.EmbeddingLookup, but get " + str(slice_mode))
def max_pool_with_argmax(x, pool_h, pool_w, stride): """Max pooling with argmax.""" validator.check_positive_int(stride, "stride") num, channel, height, width = x.shape out_h = (height - pool_h) // stride + 1 out_w = (width - pool_w) // stride + 1 col = im2col(x, pool_h, pool_w, stride) col = col.reshape(-1, pool_h * pool_w) out = np.max(col, axis=1) out_argmax = np.argmax(col, axis=1) out = out.reshape((num, out_h, out_w, channel)).transpose(0, 3, 1, 2) out_argmax = out_argmax.reshape( (num, out_h, out_w, channel)).transpose(0, 3, 1, 2) return out, out_argmax
def __init__(self, num_features, eps=1e-5, momentum=0.9, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', moving_var_init='ones', use_batch_statistics=None, device_num_each_group=2): super(GlobalBatchNorm, self).__init__(num_features, eps, momentum, affine, gamma_init, beta_init, moving_mean_init, moving_var_init, use_batch_statistics, device_num_each_group, input_dims='both') self.group = validator.check_positive_int(device_num_each_group) if self.group <= 1: raise ValueError("the number of group must be greater than 1.")
def run(self, train_dataset, epochs=10): """ Optimize the parameters by training the probability network, and return the trained network. Args: epochs (int): Total number of iterations on the data. Default: 10. train_dataset (Dataset): A training dataset iterator. Outputs: Cell, the trained probability network. """ epochs = Validator.check_positive_int(epochs) train_net = TrainOneStepCell(self.net_with_loss, self.optimizer) train_net.set_train() for _ in range(1, epochs + 1): train_loss = 0 dataset_size = 0 for data in train_dataset.create_dict_iterator(num_epochs=1): x = Tensor(data['image'], dtype=mstype.float32) y = Tensor(data['label'], dtype=mstype.int32) dataset_size += len(x) loss = train_net(x, y).asnumpy() train_loss += loss self._loss = train_loss / dataset_size model = self.net_with_loss.backbone_network return model
def piecewise_constant_lr(milestone, learning_rates): r""" Get piecewise constant learning rate. Calculate learning rate by given `milestone` and `learning_rates`. Let the value of `milestone` be :math:`(M_1, M_2, ..., M_N)` and the value of `learning_rates` be :math:`(x_1, x_2, ..., x_N)`. N is the length of `milestone`. Let the output learning rate be `y`. .. math:: y[i] = x_t,\ for\ i \in [M_{t-1}, M_t) Args: milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list. Every element is a milestone step, and must be greater than 0. learning_rates (Union[list[float], tuple[float]]): A list of learning rates. Returns: list[float]. The size of list is :math:`M_N`. Examples: >>> milestone = [2, 5, 10] >>> learning_rates = [0.1, 0.05, 0.01] >>> piecewise_constant_lr(milestone, learning_rates) [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01] """ validator.check_value_type('milestone', milestone, (tuple, list), None) validator.check_value_type('learning_rates', learning_rates, (tuple, list), None) if len(milestone) != len(learning_rates): raise ValueError( 'The size of `milestone` must be same with the size of `learning_rates`.' ) lr = [] last_item = 0 for i, item in enumerate(milestone): validator.check_positive_int(item, f'milestone[{i}]') validator.check_float_legal_value(f'learning_rates[{i}]', learning_rates[i], None) if item < last_item: raise ValueError( f'The value of milestone[{i}] must be greater than milestone[{i - 1}]' ) lr += [learning_rates[i]] * (item - last_item) last_item = item return lr
def __init__(self, channel=1, w=0.25): super(PReLU, self).__init__() validator.check_positive_int(channel, 'channel', self.cls_name) if isinstance(w, (np.float32, float)): tmp = np.empty((channel,), dtype=np.float32) tmp.fill(w) w = Tensor(tmp) elif isinstance(w, list): w = Tensor(w) if not isinstance(w, Tensor): raise TypeError("w only support np.float32, float, list or Tensor type.") self.w = Parameter(initializer(w, [channel]), name='a') self.prelu = P.PReLU() self.relu = P.ReLU() self.assign = P.Assign()
def __init__(self, encoder, decoder, hidden_size, latent_size): super(VAE, self).__init__() self.encoder = encoder self.decoder = decoder if (not isinstance(encoder, Cell)) or (not isinstance(decoder, Cell)): raise TypeError('The encoder and decoder should be Cell type.') self.hidden_size = Validator.check_positive_int(hidden_size) self.latent_size = Validator.check_positive_int(latent_size) if hidden_size < latent_size: raise ValueError('The latent_size should be less than or equal to the hidden_size.') self.normal = C.normal self.exp = P.Exp() self.reshape = P.Reshape() self.shape = P.Shape() self.to_tensor = P.ScalarToArray() self.dense1 = Dense(self.hidden_size, self.latent_size) self.dense2 = Dense(self.hidden_size, self.latent_size) self.dense3 = Dense(self.latent_size, self.hidden_size)
def __init__(self, in_channel, out_channel, in_drop_ratio=0.0, coef_drop_ratio=0.0, residual=False, coef_activation=nn.LeakyReLU(), activation=nn.ELU()): super(AttentionHead, self).__init__() self.in_channel = Validator.check_positive_int(in_channel) self.out_channel = Validator.check_positive_int(out_channel) self.in_drop_ratio = in_drop_ratio self.in_drop = nn.Dropout(keep_prob=1 - in_drop_ratio) self.in_drop_2 = nn.Dropout(keep_prob=1 - in_drop_ratio) self.feature_transform = GNNFeatureTransform( in_channels=self.in_channel, out_channels=self.out_channel, has_bias=False, weight_init='XavierUniform') self.f_1_transform = GNNFeatureTransform(in_channels=self.out_channel, out_channels=1, weight_init='XavierUniform') self.f_2_transform = GNNFeatureTransform(in_channels=self.out_channel, out_channels=1, weight_init='XavierUniform') self.softmax = nn.Softmax() self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio) self.matmul = P.MatMul() self.bias_add = P.BiasAdd() self.bias = Parameter(initializer('zeros', self.out_channel), name='bias') self.residual = Validator.check_bool(residual) if self.residual: if in_channel != out_channel: self.residual_transform_flag = True self.residual_transform = GNNFeatureTransform( in_channels=self.in_channel, out_channels=self.out_channel) else: self.residual_transform = None self.coef_activation = coef_activation self.activation = activation
def _train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True): """ Training. Args: epoch (int): Total number of iterations on the data. train_dataset (Dataset): A training dataset iterator. If there is no loss_fn, a tuple with multiply data (data1, data2, data3, ...) will be returned and passed to the network. Otherwise, a tuple (data, label) will be returned, and the data and label are passed to the network and loss function respectively. callbacks (list): List of callback object. Callbacks which should be executed while training. Default: None. dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. Configure pynative mode, the training process will be performed with dataset not sink. """ epoch = Validator.check_positive_int(epoch) self._train_network.set_train() if self._parameter_broadcast: self._train_network.set_broadcast_flag() # build callback list cb_params = _InternalCallbackParam() cb_params.train_network = self._train_network cb_params.epoch_num = epoch cb_params.batch_num = train_dataset.get_dataset_size() cb_params.mode = "train" cb_params.loss_fn = self._loss_fn cb_params.optimizer = self._optimizer cb_params.parallel_mode = self._parallel_mode cb_params.device_number = self._device_number cb_params.train_dataset = train_dataset cb_params.list_callback = callbacks with _CallbackManager(callbacks) as list_callback: if not dataset_sink_mode: self._train_process(epoch, train_dataset, list_callback, cb_params) elif context.get_context("mode") == context.PYNATIVE_MODE: logger.warning( "The pynative mode cannot support dataset sink mode currently." "So the training process will be performed with dataset not sink." ) self._train_process(epoch, train_dataset, list_callback, cb_params) else: self._train_dataset_sink_process(epoch, train_dataset, list_callback, cb_params)
def __init__(self, encoder, decoder, hidden_size, latent_size, num_classes): super(ConditionalVAE, self).__init__() self.encoder = encoder self.decoder = decoder if (not isinstance(encoder, Cell)) or (not isinstance(decoder, Cell)): raise TypeError('The encoder and decoder should be Cell type.') self.hidden_size = Validator.check_positive_int(hidden_size) self.latent_size = Validator.check_positive_int(latent_size) if hidden_size < latent_size: raise ValueError('The latent_size should be less than or equal to the hidden_size.') self.num_classes = Validator.check_positive_int(num_classes) self.normal = C.normal self.exp = P.Exp() self.reshape = P.Reshape() self.shape = P.Shape() self.concat = P.Concat(axis=1) self.to_tensor = P.ScalarToArray() self.one_hot = OneHot(depth=num_classes) self.dense1 = Dense(self.hidden_size, self.latent_size) self.dense2 = Dense(self.hidden_size, self.latent_size) self.dense3 = Dense(self.latent_size + self.num_classes, self.hidden_size)
def _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair): validator.check_positive_int(total_step, 'total_step') validator.check_positive_int(step_per_epoch, 'step_per_epoch') validator.check_positive_int(decay_epoch, 'decay_epoch') validator.check_positive_float(learning_rate, 'learning_rate') validator.check_is_float(learning_rate, 'learning_rate') validator.check_positive_float(decay_rate, 'decay_rate') validator.check_is_float(decay_rate, 'decay_rate') validator.check_value_type('is_stair', is_stair, [bool])
def generate_sample(self, generate_nums, shape): """ Randomly sample from latent space to generate samples. Args: generate_nums (int): The number of samples to generate. shape(tuple): The shape of sample, it must be (generate_nums, C, H, W) or (-1, C, H, W). Returns: Tensor, the generated samples. """ generate_nums = Validator.check_positive_int(generate_nums) if not isinstance(shape, tuple) or len(shape) != 4 or (shape[0] != -1 and shape[0] != generate_nums): raise ValueError('The shape should be (generate_nums, C, H, W) or (-1, C, H, W).') sample_z = self.normal((generate_nums, self.latent_size), self.to_tensor(0.0), self.to_tensor(1.0), seed=0) sample = self._decode(sample_z) sample = self.reshape(sample, shape) return sample
def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch): r""" Calculate learning rate base on cosine decay function. For the i-th step, the formula of computing decayed_learning_rate[i] is: .. math:: decayed\_learning\_rate[i] = min\_learning\_rate + 0.5 * (max\_learning\_rate - min\_learning\_rate) * (1 + cos(\frac{current\_epoch}{decay\_epoch}\pi)) Where :math:`current\_epoch=floor(\frac{i}{step\_per\_epoch})`. Args: min_lr (float): The minimum value of learning rate. max_lr (float): The maximum value of learning rate. total_step (int): The total number of steps. step_per_epoch (int): The number of steps in per epoch. decay_epoch (int): A value used to calculate decayed learning rate. Returns: list[float]. The size of list is `total_step`. Examples: >>> min_lr = 0.01 >>> max_lr = 0.1 >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 2 >>> output = cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch) >>> print(output) [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01] """ if not isinstance(min_lr, float): raise TypeError("min_lr must be float.") validator.check_non_negative_float(min_lr, "min_lr", None) validator.check_positive_float(max_lr, 'max_lr') validator.check_is_float(max_lr, 'max_lr') validator.check_positive_int(total_step, 'total_step') validator.check_positive_int(step_per_epoch, 'step_per_epoch') validator.check_positive_int(decay_epoch, 'decay_epoch') if min_lr >= max_lr: raise ValueError('`max_lr` should be greater than `min_lr`.') delta = 0.5 * (max_lr - min_lr) lr = [] for i in range(total_step): tmp_epoch = min(math.floor(i / step_per_epoch), decay_epoch) lr.append(min_lr + delta * (1 + math.cos(math.pi * tmp_epoch / decay_epoch))) return lr
def generate_sample(self, sample_y, generate_nums, shape): """ Randomly sample from the latent space to generate samples. Args: sample_y (Tensor): Define the label of samples. Tensor of shape (generate_nums, ) and type mindspore.int32. generate_nums (int): The number of samples to generate. shape(tuple): The shape of sample, which must be the format of (generate_nums, C, H, W) or (-1, C, H, W). Returns: Tensor, the generated samples. """ generate_nums = Validator.check_positive_int(generate_nums) if not isinstance(shape, tuple) or len(shape) != 4 or (shape[0] != -1 and shape[0] != generate_nums): raise ValueError('The shape should be (generate_nums, C, H, W) or (-1, C, H, W).') sample_z = self.normal((generate_nums, self.latent_size), self.to_tensor(0.0), self.to_tensor(1.0), seed=0) sample_y = self.one_hot(sample_y) sample_c = self.concat((sample_z, sample_y)) sample = self._decode(sample_c) sample = self.reshape(sample, shape) return sample
def warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch): r""" Get learning rate warming up. For the i-th step, the formula of computing warmup_learning_rate[i] is: .. math:: warmup\_learning\_rate[i] = learning\_rate * tmp\_epoch / tmp\_warmup\_epoch Where :math:`tmp\_epoch=min(current\_epoch, warmup\_epoch),\ current\_epoch=floor(\frac{i}{step\_per\_epoch})` Args: learning_rate (float): The initial value of learning rate. warmup_steps (int): The warm up steps of learning rate. Inputs: Tensor. The current step number. Returns: Tensor. The learning rate value for the current step. Examples: >>> learning_rate = 0.1 >>> total_step = 6 >>> step_per_epoch = 2 >>> warmup_epoch = 2 >>> output = warmup_lr(learning_rate, total_step, step_per_epoch, warmup_epoch) >>> print(output) [0.0, 0.0, 0.05, 0.05, 0.1, 0.1] """ if not isinstance(learning_rate, float): raise TypeError("learning_rate must be float.") validator.check_non_negative_float(learning_rate, "learning_rate", None) validator.check_positive_int(warmup_epoch, 'warmup_epoch') validator.check_positive_int(total_step, 'total_step') validator.check_positive_int(step_per_epoch, 'step_per_epoch') function = lambda x, y: (x, min(x, y)) lr = [] for i in range(total_step): current_epoch = math.floor(i / step_per_epoch) warmup_epoch, tmp_epoch = function(warmup_epoch, current_epoch) lr.append(learning_rate * tmp_epoch / warmup_epoch) return lr
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) validator.check_positive_int(hidden_size, "hidden_size", self.cls_name) validator.check_positive_int(num_layers, "num_layers", self.cls_name) self.is_ascend = context.get_context("device_target") == "Ascend" self.batch_first = batch_first self.transpose = P.Transpose() self.num_layers = num_layers self.bidirectional = bidirectional self.dropout = dropout self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, bidirectional=bidirectional, dropout=float(dropout)) weight_size = 0 gate_size = 4 * hidden_size stdv = 1 / math.sqrt(hidden_size) num_directions = 2 if bidirectional else 1 if self.is_ascend: self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0) self.concat = P.Concat(axis=0) self.concat_2dim = P.Concat(axis=2) self.cast = P.Cast() self.shape = P.Shape() if dropout != 0: self.dropout_op = nn.Dropout(float(dropout)) b0 = np.zeros(gate_size, dtype=np.float16) self.w_list = [] self.b_list = [] self.rnns_fw = P.DynamicRNN(forget_bias=0.0) self.rnns_bw = P.DynamicRNN(forget_bias=0.0) for layer in range(num_layers): w_shape = input_size if layer == 0 else (num_directions * hidden_size) w_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter(initializer(Tensor(w_np), [w_shape + hidden_size, gate_size]), name='weight_fw' + str(layer))) if has_bias: b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) self.b_list.append( Parameter(initializer(Tensor(b_np), [gate_size]), name='bias_fw' + str(layer))) else: self.b_list.append( Parameter(initializer(Tensor(b0), [gate_size]), name='bias_fw' + str(layer))) if bidirectional: w_bw_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter( initializer(Tensor(w_bw_np), [w_shape + hidden_size, gate_size]), name='weight_bw' + str(layer))) b_bw_np = np.random.uniform( -stdv, stdv, (4 * hidden_size)).astype(np.float16) if has_bias else b0 self.b_list.append( Parameter(initializer(Tensor(b_bw_np), [gate_size]), name='bias_bw' + str(layer))) self.w_list = ParameterTuple(self.w_list) self.b_list = ParameterTuple(self.b_list) else: for layer in range(num_layers): input_layer_size = input_size if layer == 0 else hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * hidden_size if has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight')
def __init__(self, vocab_size, embedding_size, field_size, param_init='normal', target='CPU', slice_mode='batch_slice', feature_num_list=None, max_norm=None, sparse=True, operator='SUM'): super(MultiFieldEmbeddingLookup, self).__init__(vocab_size, embedding_size, param_init, target, slice_mode, feature_num_list, max_norm, sparse) self.field_size = validator.check_positive_int(field_size, 'field_size') self.operator = operator self.mul = P.Mul() self.inf_mask_mul = P.Mul() self.bias_add = P.Add() self.inf_add = P.Add() self.merge_op = None self.count_op = P.UnsortedSegmentSum() self.abs = P.Abs() self.equal = P.Equal() self.add = P.Add() self.cast = P.Cast() self.div_no_nan = P.DivNoNan() self.expand = P.ExpandDims() self.max_mask_mul = P.Mul() self.max_no_equal = P.NotEqual() if operator == MultiFieldEmbeddingLookup.OPERATOR_SUM: self.merge_op = P.UnsortedSegmentSum() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.merge_op = P.UnsortedSegmentMax() elif operator == MultiFieldEmbeddingLookup.OPERATOR_MEAN: self.merge_op = P.UnsortedSegmentSum() else: raise ValueError( "The operator supports ['SUM', 'MAX', 'MEAN'], but found: " + str(operator)) parallel_mode = _get_parallel_mode() is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) if slice_mode in ["table_row_slice", "batch_slice" ] and is_auto_parallel: self.merge_op.shard( ((get_group_size(), 1, 1), (get_group_size(), 1))) self.expand.shard(((get_group_size(), ), )) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) self.count_op.shard(((get_group_size(), 1), (get_group_size(), 1))) self.add.shard(((get_group_size(), ), (get_group_size(), ))) self.div_no_nan.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_mask_mul.shard( ((get_group_size(), 1), (get_group_size(), 1))) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((get_group_size(), 1, 1), ())) self.inf_mask_mul.shard(((get_group_size(), 1, 1), ())) self.merge_op.shard( ((get_group_size(), 1), (get_group_size(), ))) self.count_op.shard( ((get_group_size(), ), (get_group_size(), ))) self.inf_add.shard( ((get_group_size(), 1, 1), (get_group_size(), 1, 1))) elif slice_mode == "table_column_slice" and is_auto_parallel: self.merge_op.shard(((1, 1, get_group_size()), (1, 1))) self.div_no_nan.shard(((1, get_group_size()), (1, 1))) self.bias_add.shard(((1, 1), (1, 1))) self.mul.shard(((1, 1, 1), (1, 1, get_group_size()))) self.count_op.shard(((1, 1), (1, 1))) self.add.shard(((1, ), (1, ))) self.max_mask_mul.shard(((1, get_group_size()), (1, 1))) self.expand.shard(((1, ), )) self.max_no_equal.shard(((1, ), ())) if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX: self.equal.shard(((1, 1, 1), ())) self.inf_mask_mul.shard(((1, 1, 1), ())) self.merge_op.shard(((1, get_group_size()), (1, ))) self.count_op.shard(((1, ), (1, ))) self.inf_add.shard(((1, 1, get_group_size()), (1, 1, 1))) else: if is_auto_parallel: raise ValueError( "slice_mode should be ['table_row_slice', 'batch_slice' and \ 'table_column_slice'], but get " + str(slice_mode)) # Min value for fp32 self.negative_inf_value = -3.402823466E+38
def __init__(self, vocab_size, embedding_size, param_init='normal', target='CPU', slice_mode='batch_slice', manual_shapes=None, max_norm=None, sparse=True, vocab_cache_size=0): super(EmbeddingLookup, self).__init__() validator.check_value_type('sparse', sparse, [bool], self.cls_name) self.vocab_size = validator.check_positive_int(vocab_size, 'vocab_size') self.vocab_cache_size = validator.check_non_negative_int( vocab_cache_size, 'vocab_cache_size') self.target = target self.sparse = sparse self.cache_enable = self.vocab_cache_size > 0 self.forward_unique = False if target not in ('CPU', 'DEVICE'): raise ValueError( 'Attr \'target\' of \'EmbeddingLookup\' Op passed ' + str(target) + ', should be one of values in \'CPU\', \'DEVICE\'.') if not sparse and target == 'CPU': raise ValueError( 'When target is CPU, embedding_lookup must be sparse.') if sparse: self.gatherv2 = P.SparseGatherV2() else: self.gatherv2 = P.Gather() self.embeddinglookup = P.EmbeddingLookup().add_prim_attr( 'primitive_target', 'CPU') enable_ps = _get_ps_context("enable_ps") if enable_ps: self._process_vocab_cache(slice_mode) self.embedding_size = validator.check_positive_int( embedding_size, 'embedding_size') self.embedding_table = Parameter(initializer( param_init, [self.vocab_size, self.embedding_size]), name='embedding_table') parallel_mode = _get_parallel_mode() is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL) self.gather_revert = P.Gather() self.reshape_first = P.Reshape() self.reshape = P.Reshape() self.unique = P.Unique() self.shape = P.Shape() if is_auto_parallel: self.unique = P.Unique().shard(((1, ), )) if self.cache_enable and enable_ps: self._set_voacb_cache_enable_for_ps(vocab_cache_size, embedding_size, vocab_size) if is_auto_parallel: self.unique.add_prim_attr('cache_enable', True) indices_shape_size = 2 if slice_mode == "field_slice" and is_auto_parallel: if not manual_shapes: raise ValueError( "in slice field mode, the manual_shapes should not be none" ) if not isinstance(manual_shapes, tuple): raise TypeError( "manual_shapes type must be tuple(int) cannot be {}!". format(type(manual_shapes))) for dim in manual_shapes: validator.check_positive_int(dim, 'manual shape dim', self.cls_name) self.gatherv2.add_prim_attr("manual_split", manual_shapes) self.embeddinglookup.add_prim_attr("manual_split", manual_shapes) self.gatherv2.shard(((get_group_size(), 1), (1, get_group_size()))) self.embeddinglookup.shard( ((get_group_size(), 1), (1, get_group_size()))) elif slice_mode == "table_row_slice" and is_auto_parallel: full_batch = _get_full_batch() if (target == 'DEVICE' and not full_batch) or (self.cache_enable and enable_ps and sparse): indices_shape_size = 1 self.gather_revert.shard(((1, 1), (get_group_size(), ))) self.forward_unique = True indices_strategy = (1, ) * indices_shape_size self.gatherv2.shard(((get_group_size(), 1), indices_strategy)) self.embeddinglookup.shard( ((get_group_size(), 1), indices_strategy)) elif slice_mode == "table_column_slice" and is_auto_parallel: if target == 'DEVICE': indices_shape_size = 1 self.gather_revert.shard(((1, get_group_size()), (1, ))) self.forward_unique = True indices_strategy = (1, ) * indices_shape_size self.gatherv2.shard(((1, get_group_size()), indices_strategy)) self.embeddinglookup.shard( ((1, get_group_size()), indices_strategy)) elif slice_mode == "batch_slice" and is_auto_parallel: indices_strategy = [get_group_size()] indices_strategy.extend([1] * (indices_shape_size - 1)) indices_strategy = tuple(indices_strategy) self.gatherv2.shard(((1, 1), indices_strategy)) self.embeddinglookup.shard(((1, 1), indices_strategy)) else: if is_auto_parallel: raise ValueError( "slice_mode should support mode in nn.EmbeddingLookup, but get " + str(slice_mode)) if self.cache_enable and not enable_ps: if parallel_mode != ParallelMode.STAND_ALONE: raise ValueError( "parallel mode haven't supported cache enable yet.") self._set_cache_enable() self.embedding_table.unique = self.forward_unique self.max_norm = max_norm if self.max_norm is not None: self.max_norm = validator.check_positive_float( self.max_norm, 'max_norm', self.cls_name) self.max_norm = Tensor(self.max_norm, dtype=mstype.float32)
def __init__(self, num_features, eps=1e-5, momentum=0.9, affine=True, gamma_init='ones', beta_init='zeros', moving_mean_init='zeros', moving_var_init='ones', use_batch_statistics=None, device_num_each_group=1, input_dims='2d', data_format='NCHW'): super(_BatchNorm, self).__init__() if num_features < 1: raise ValueError("num_features must be at least 1") if momentum < 0 or momentum > 1: raise ValueError("momentum should be a number in range [0, 1], but got {}".format(momentum)) self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) if context.get_context("device_target") != "GPU" and self.format == "NHWC": raise ValueError("NHWC format only support in GPU target.") self.use_batch_statistics = use_batch_statistics self.num_features = num_features self.eps = eps self.input_dims = input_dims self.moving_mean = Parameter(initializer( moving_mean_init, num_features), name="mean", requires_grad=False) self.moving_variance = Parameter(initializer( moving_var_init, num_features), name="variance", requires_grad=False) self.gamma = Parameter(initializer( gamma_init, num_features), name="gamma", requires_grad=affine) self.beta = Parameter(initializer( beta_init, num_features), name="beta", requires_grad=affine) self.group = validator.check_positive_int(device_num_each_group) self.is_global = False if self.group != 1: self.rank_id = get_rank() self.rank_size = get_group_size() self.device_list = [i for i in range(0, self.rank_size)] self.rank_list = self.list_group(self.device_list, self.group) self.rank_list_idx = len(self.rank_list) for i in range(self.rank_list_idx): if self.rank_id in self.rank_list[i] and self.group != 1: self.is_global = True management.create_group('group' + str(i), self.rank_list[i]) self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1) self.shape = P.Shape() self.reduce_mean = P.ReduceMean(keep_dims=True) self.square = P.Square() self.sqrt = P.Sqrt() self.cast = P.Cast() self.dtype = P.DType() self.reshape = P.Reshape() self.is_ascend = context.get_context("device_target") == "Ascend" self.is_gpu = context.get_context("device_target") == "GPU" self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE self.momentum = 1.0 - momentum if context.get_context("enable_ge"): self.is_ge_backend = True else: self.is_ge_backend = False if self.is_graph_mode and (self.is_ge_backend or self.is_ascend): self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) elif self.is_gpu: self.bn_train = P.FusedBatchNormEx(mode=1, epsilon=self.eps, momentum=self.momentum, data_format=self.format) else: self.bn_train = P.FusedBatchNorm(mode=1, epsilon=self.eps, momentum=self.momentum) self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format) self.enable_global_sync = self.is_global and (self.is_ge_backend or (self.is_graph_mode and self.is_ascend)) self.enable_default_train = self.is_graph_mode and not self.is_global and \ (self.is_ge_backend or self.is_ascend) data_parallel_strategy = ((1,), (1,)) data_parallel_strategy_one = ((1,), ()) self.sub_mean = P.Sub().shard(data_parallel_strategy) self.sub_var = P.Sub().shard(data_parallel_strategy) self.mul_mean = P.Mul().shard(data_parallel_strategy_one) self.mul_var = P.Mul().shard(data_parallel_strategy_one) self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy) self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)
def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power, update_decay_epoch=False): r""" Calculate learning rate base on polynomial decay function. For the i-th step, the formula of computing decayed_learning_rate[i] is: .. math:: decayed\_learning\_rate[i] = (learning\_rate - end\_learning\_rate) * (1 - tmp\_epoch / tmp\_decay\_epoch)^{power} + end\_learning\_rate Where: .. math:: tmp\_epoch = min(current\_epoch, decay\_epoch) .. math:: current\_epoch=floor(\frac{i}{step\_per\_epoch}) .. math:: tmp\_decay\_epoch = decay\_epoch If `update_decay_epoch` is true, update the value of `tmp_decay_epoch` every epoch. The formula is: .. math:: tmp\_decay\_epoch = decay\_epoch * ceil(current\_epoch / decay\_epoch) Args: learning_rate (float): The initial value of learning rate. end_learning_rate (float): The end value of learning rate. total_step (int): The total number of steps. step_per_epoch (int): The number of steps in per epoch. decay_epoch (int): A value used to calculate decayed learning rate. power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0. update_decay_epoch (bool): If true, update `decay_epoch`. Default: False. Returns: list[float]. The size of list is `total_step`. Examples: >>> learning_rate = 0.1 >>> end_learning_rate = 0.01 >>> total_step = 6 >>> step_per_epoch = 2 >>> decay_epoch = 2 >>> power = 0.5 >>> r = polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power) >>> print(r) [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01] """ validator.check_positive_float(learning_rate, 'learning_rate') validator.check_is_float(learning_rate, 'learning_rate') if not isinstance(end_learning_rate, float): raise TypeError("end_learning_rate must be float.") validator.check_non_negative_float(end_learning_rate, "end_learning_rate", None) validator.check_positive_float(power, 'power') validator.check_is_float(power, 'power') validator.check_positive_int(total_step, 'total_step') validator.check_positive_int(step_per_epoch, 'step_per_epoch') validator.check_positive_int(decay_epoch, 'decay_epoch') validator.check_value_type('update_decay_epoch', update_decay_epoch, [bool]) origin_decay_epoch = decay_epoch function = lambda x, y: (x, min(x, y)) if update_decay_epoch: function = lambda x, y: (origin_decay_epoch * max( math.ceil(y / origin_decay_epoch), 1), y) lr = [] delta = learning_rate - end_learning_rate for i in range(total_step): current_epoch = math.floor(i / step_per_epoch) decay_epoch, tmp_epoch = function(decay_epoch, current_epoch) lr.append(delta * (1 - tmp_epoch / decay_epoch)**power + end_learning_rate) return lr