def __init__(self, batch_size): """init function""" super(Reader_Albert, self).__init__() self.expanddims_0 = P.ExpandDims() self.expanddims_0_axis = 1 self.expanddims_3 = P.ExpandDims() self.expanddims_3_axis = 2 self.cast_5 = P.Cast() self.cast_5_to = mstype.float32 self.sub_7 = P.Sub() self.sub_7_bias = 1.0 self.mul_9 = P.Mul() self.mul_9_w = -10000.0 self.gather_1_input_weight = Parameter(Tensor(np.random.uniform(0, 1, (30005, 128)).astype(np.float32)), name=None) self.gather_1_axis = 0 self.gather_1 = P.Gather() self.gather_2_input_weight = Parameter(Tensor(np.random.uniform(0, 1, (2, 128)).astype(np.float32)), name=None) self.gather_2_axis = 0 self.gather_2 = P.Gather() self.add_4 = P.Add() self.add_6 = P.Add() self.add_6_bias = Parameter(Tensor(np.random.uniform(0, 1, (1, 512, 128)).astype(np.float32)), name=None) self.layernorm1_0 = LayerNorm(mul_7_w_shape=(128,), add_8_bias_shape=(128,)) self.linear3_0 = Linear(matmul_0_weight_shape=(128, 4096), add_1_bias_shape=(4096,)) self.module34_0 = TransformerLayer(batch_size, layernorm1_0_mul_7_w_shape=(4096,), layernorm1_0_add_8_bias_shape=(4096,), linear3_0_matmul_0_weight_shape=(4096, 16384), linear3_0_add_1_bias_shape=(16384,), linear3_1_matmul_0_weight_shape=(16384, 4096), linear3_1_add_1_bias_shape=(4096,)) self.layernorm1_1 = LayerNorm(mul_7_w_shape=(4096,), add_8_bias_shape=(4096,))
def __init__( self, vocab_size, embedding_size, embedding_shape, use_one_hot_embeddings=False, initializer_range=0.02, batch_size=12, damping=0.03, loss_scale=1, frequency=100, ): super(Embedding_Thor, self).__init__() self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.embedding_table = Parameter( initializer(TruncatedNormal(initializer_range), [vocab_size, embedding_size])) self.thor = True self.expand = P.ExpandDims() self.shape_flat = (-1, ) self.gather = P.Gather() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.em_shape = tuple(embedding_shape) self.shape = P.Shape() self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.matrix_A_inv = Parameter(Tensor( np.zeros([vocab_size]).astype(np.float16)), requires_grad=False) self.matrix_G_inv = Parameter(Tensor( np.zeros([embedding_size, embedding_size]).astype(np.float16)), requires_grad=False) self.fake_G = Tensor( np.zeros([embedding_size, embedding_size]).astype(np.float16)) self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32)) self.dampingG = Tensor(np.identity(embedding_size), mstype.float32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.damping = damping self.gather = P.Gather() self.sqrt = P.Sqrt() self.mul = P.Mul() self.cast = P.Cast() self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.vector_matmul = P.CusBatchMatMul() self.cholesky = P.CusCholeskyTrsm() self.matrix_combine = P.CusMatrixCombine() self.reduce_sum = P.ReduceSum(keep_dims=False) self.inv = P.Inv() self.getG = P.InsertGradientOf(self.save_gradient) self.batch_size = batch_size
def __init__(self): super().__init__() self.unique = P.Unique().shard(((1, ), )) self.relu = P.ReLU() self.mul = P.Mul() self.embedding_lookp = P.Gather().shard(((8, 1), (1, ))) self.embedding_table = Parameter(initializer( 'normal', [2000, 128]), name='embedding_table') self.gatherv2 = P.Gather().shard(((1, 1), (1, ))) self.reshape = P.Reshape() self.matmul = P.MatMul() self.mul_weight = Parameter(Tensor( np.full([32, 64, 1], 0.5, dtype=np.float32)), name="mul_weight")
def __init__(self, strategy1=None, strategy2=None, strategy3=None, axis=0, init_flag=True, split_tuple=(4, 4), split_string="manual_split", param_shape=(8, 8)): super().__init__() self.gatherv2 = P.Gather().shard(strategy1) self.gatherv2.add_prim_attr(split_string, split_tuple) self.mul = P.Mul().shard(strategy2) self.reshape = P.Reshape() self.matmul = P.MatMul().shard(strategy3) self.matmul.add_prim_attr("forward_reduce_scatter", True) if init_flag: self.param = Parameter(initializer("ones", param_shape, ms.float32), name="gatherv2_param") else: self.param = Parameter(Tensor(np.ones(param_shape), dtype=ms.float32), name="gatherv2_param") self.mul_weight = Parameter(initializer("ones", (8, 8, 8), ms.float32), name="mul_weight") self.matmul_weight = Parameter(initializer("ones", (64, 16), ms.float32), name="matmul_weight") self.axis = axis
def __init__(self, axis=0, dyn_a=True, dyn_b=True): super(GatherNetDynamic, self).__init__() self.gather = P.Gather() self.gpu_convert_to_dynamic_shape = inner.GpuConvertToDynamicShape() self.to_dyn_1 = dyn_a self.to_dyn_2 = dyn_b self.axis = axis
def __init__(self, use_relative_positions, embedding_size, embedding_shape, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): super(EmbeddingPostprocessor, self).__init__() self.use_token_type = use_token_type self.token_type_vocab_size = token_type_vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.max_position_embeddings = max_position_embeddings self.embedding_table = Parameter(initializer (TruncatedNormal(initializer_range), [token_type_vocab_size, embedding_size])) self.shape_flat = (-1,) self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.1, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape) self.layernorm = nn.LayerNorm((embedding_size,)) self.dropout = nn.Dropout(1 - dropout_prob) self.gather = P.Gather() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() self.full_position_embeddings = Parameter(initializer (TruncatedNormal(initializer_range), [max_position_embeddings, embedding_size]))
def __init__(self, vocab_size, embedding_size, use_one_hot=False, embedding_table='normal', dtype=mstype.float32, padding_idx=None): super(Embedding, self).__init__() self.vocab_size = validator.check_value_type('vocab_size', vocab_size, [int], self.cls_name) self.embedding_size = validator.check_value_type('embedding_size', embedding_size, [int], self.cls_name) validator.check_value_type('use_one_hot', use_one_hot, [bool], self.cls_name) validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name) self.use_one_hot = use_one_hot self.dtype = dtype self.init_tensor = initializer(embedding_table, [vocab_size, embedding_size]) self.padding_idx = padding_idx if padding_idx is not None: self.padding_idx = validator.check_int_range(padding_idx, 0, vocab_size, Rel.INC_BOTH, "padding_idx", self.cls_name) if isinstance(self.init_tensor, Tensor) and self.init_tensor.init is not None: self.init_tensor = self.init_tensor.init_data() self.init_tensor = self.init_tensor.asnumpy() self.init_tensor[self.padding_idx] = 0 self.init_tensor = Tensor(self.init_tensor) self.embedding_table = Parameter(self.init_tensor, name='embedding_table') self.expand = P.ExpandDims() self.reshape_flat = P.Reshape() self.shp_flat = (-1,) self.gather = P.Gather() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, self.dtype) self.off_value = Tensor(0.0, self.dtype) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shp = P.Shape()
def __init__(self, batch_size=4): super(DiceLoss, self).__init__() self.threshold0 = Tensor(0.5, mstype.float32) self.zero_float32 = Tensor(0.0, mstype.float32) self.k = int(640 * 640) self.negative_one_int32 = Tensor(-1, mstype.int32) self.batch_size = batch_size self.concat = P.Concat() self.less_equal = P.LessEqual() self.greater = P.Greater() self.reduce_sum = P.ReduceSum() self.reduce_sum_keep_dims = P.ReduceSum(keep_dims=True) self.reduce_mean = P.ReduceMean() self.reduce_min = P.ReduceMin() self.cast = P.Cast() self.minimum = P.Minimum() self.expand_dims = P.ExpandDims() self.select = P.Select() self.fill = P.Fill() self.topk = P.TopK(sorted=True) self.shape = P.Shape() self.sigmoid = P.Sigmoid() self.reshape = P.Reshape() self.slice = P.Slice() self.logical_and = P.LogicalAnd() self.logical_or = P.LogicalOr() self.equal = P.Equal() self.zeros_like = P.ZerosLike() self.add = P.TensorAdd() self.gather = P.Gather()
def __init__(self): super(AssignWhenInsertGrad, self).__init__() self.gather = P.Gather() self.damping = Tensor(np.array([0.03, 0.03]).astype(np.float32)) self.cov_step = ms.Parameter(0, name="cov_step", requires_grad=False) self.freq = Tensor(278, ms.int32) self.getG = P.InsertGradientOf(self.save_gradient)
def __init__(self, config): super(GetMaskedLMOutput, self).__init__() self.width = config.hidden_size self.reshape = P.Reshape() self.gather = P.Gather() weight_init = TruncatedNormal(config.initializer_range) self.dense = nn.Dense(self.width, config.hidden_size, weight_init=weight_init, activation=config.hidden_act).to_float(config.compute_type) self.layernorm = nn.LayerNorm((config.hidden_size,)).to_float(config.compute_type) self.output_bias = Parameter( initializer( 'zero', config.vocab_size)) self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_offsets = (-1, 1) self.last_idx = (-1,) self.shape_flat_sequence_tensor = (-1, self.width) self.seq_length_tensor = Tensor(np.array((config.seq_length,)).astype(np.int32)) self.cast = P.Cast() self.compute_type = config.compute_type self.dtype = config.dtype
def __init__(self, is_training, vocab_size, embed_dim, initializer_range=0.1, use_one_hot_embeddings=False): super(EmbeddingLookup, self).__init__() self.is_training = is_training self.embedding_dim = embed_dim self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings init_weight = np.random.normal(-initializer_range, initializer_range, size=[vocab_size, embed_dim]) self.embedding_table = Parameter(Tensor(init_weight, mstype.float32)) self.expand = P.ExpandDims() self.gather = P.Gather() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shape = P.Shape() self.cast = P.Cast()
def __init__(self, seq_len): super(ModelOneHop, self).__init__() self.expanddims = P.ExpandDims() self.expanddims_axis_0 = 1 self.expanddims_axis_1 = 2 self.cast = P.Cast() self.cast_to = mstype.float32 self.sub = P.Sub() self.sub_bias = 1.0 self.mul = P.Mul() self.mul_w = -10000.0 self.input_weight_0 = Parameter(Tensor( np.random.uniform(0, 1, (30522, 768)).astype(np.float32)), name=None) self.gather_axis_0 = 0 self.gather = P.Gather() self.input_weight_1 = Parameter(Tensor( np.random.uniform(0, 1, (2, 768)).astype(np.float32)), name=None) self.add = P.Add() self.add_bias = Parameter(Tensor( np.random.uniform(0, 1, (1, seq_len, 768)).astype(np.float32)), name=None) self.layernorm = LayerNorm() self.encoder_layer_1_4 = BertEncoder(seq_len) self.encoder_layer_5_8 = BertEncoder(seq_len) self.encoder_layer_9_12 = BertEncoder(seq_len) self.cls_ids = Tensor(np.array(0)) self.gather_axis_1 = 1 self.dense = nn.Dense(in_channels=768, out_channels=768, has_bias=True) self.tanh = nn.Tanh()
def __init__(self, vocab_size, embed_dim, use_one_hot_embeddings=False): """ Embeddings lookup table with a fixed dictionary and size. Args: vocab_size (int): Size of the dictionary of embeddings. embed_dim (int): The size of word embedding. use_one_hot_embeddings (bool): Whether use one-hot embedding. Default: False. """ super(EmbeddingLookup, self).__init__() self.embedding_dim = embed_dim self.vocab_size = vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings init_weight = np.random.normal(0, embed_dim**-0.5, size=[vocab_size, embed_dim]).astype(np.float32) # 0 is Padding index, thus init it as 0. init_weight[0, :] = 0 self.embedding_table = Parameter(Tensor(init_weight)) self.expand = P.ExpandDims() self.gather = P.Gather() self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.get_shape = P.Shape()
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=278, batch_size=32, has_bias=True, activation=None): super(Dense_Thor_GPU, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels])) if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels])) self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None split_dim = 128 matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim) self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False) self.broadcast_to = P.BroadcastTo(matrix_A_shape) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.mul = P.Mul() self.cube_matmul = P.MatMul(transpose_a=True) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.batch_size = Tensor(batch_size, mstype.float16) self.getG = P.InsertGradientOf(self.save_gradient) self.damping = Parameter(Tensor(damping), requires_grad=False) self.dampingA = Tensor(np.identity(in_channels), mstype.float32) self.dampingG = Tensor(np.identity(out_channels), mstype.float32) self.cast = P.Cast() self.gather = P.Gather() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.add = P.Add() self.sqrt = P.Sqrt() self.cholesky = P.CholeskyTrsm(split_dim=split_dim) self.vector_matmul = P.BatchMatMul(transpose_a=True)
def __init__(self, config): super(GetMaskedLMOutput, self).__init__() self.width = config.hidden_size self.reshape = P.Reshape() self.gather = P.Gather() weight_init = TruncatedNormal(config.initializer_range) self.dense = Dense_Thor(in_channels=self.width, out_channels=config.hidden_size, weight_init=weight_init, has_bias=True, bias_init='zeros', damping=damping, loss_scale=loss_scale, frequency=frequency, activation=config.hidden_act, batch_size=batch_size).to_float( config.compute_type) self.layernorm = nn.LayerNorm( (config.hidden_size, )).to_float(config.compute_type) self.output_bias = Parameter(initializer('zero', config.vocab_size)) self.matmul = P.MatMul(transpose_b=True) self.log_softmax = nn.LogSoftmax(axis=-1) self.shape_flat_offsets = (-1, 1) self.rng = Tensor( np.array(range(0, config.batch_size)).astype(np.int32)) self.last_idx = (-1, ) self.shape_flat_sequence_tensor = (config.batch_size * config.seq_length, self.width) self.seq_length_tensor = Tensor( np.array((config.seq_length, )).astype(np.int32)) self.cast = P.Cast() self.compute_type = config.compute_type self.dtype = config.dtype
def __init__(self, matmul_weight, strategy1=None): super().__init__() self.gatherv2 = P.Gather().shard(strategy1) self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True) self.matmul = P.MatMul(transpose_b=False) self.index = Tensor(np.ones([64, 64]), dtype=ms.int32) self.matmul_weight = Parameter(matmul_weight, "w1") self.axis = 0
def __init__(self, num_sampled, num_classes, num_true=1, sampled_values=None, remove_accidental_hits=True, seed=0, reduction='none'): super(SampledSoftmaxLoss, self).__init__(reduction) if num_true < 1: raise ValueError(f"num_true {num_true} is less than 1.") if seed < 0: raise ValueError(f"seed {seed} is less than 0.") if num_sampled > num_classes: raise ValueError( f"num_sampled {num_sampled} is great than num_classes {num_classes}." ) if num_true > num_classes: raise ValueError( f"num_true {num_true} is great than num_classes {num_classes}." ) if sampled_values is not None: if not isinstance(sampled_values, (list, tuple)): raise TypeError( f"sampled_values {sampled_values} is not a list.") if len(sampled_values) != 3: raise ValueError( f"sampled_values size {len(sampled_values)} is not 3.") self.num_sampled = num_sampled self.num_classes = num_classes self.num_true = num_true self.sampled_values = sampled_values self.remove_accidental_hits = remove_accidental_hits self.seed = seed self.sampler = P.LogUniformCandidateSampler(num_true, num_sampled, True, num_classes, seed) self.cast = P.Cast() self.reshape = P.Reshape() self.shape = P.Shape() self.exp = P.Exp() self.log = P.Log() self.slice_op = P.Slice() self.matmul = P.MatMul(False, True) self.gather_v2 = P.Gather() self.reduce_max_true = P.ReduceMax(True) self.reduce_sum = P.ReduceSum() self.reduce_sum_true = P.ReduceSum(True) self.concat_dim0 = P.Concat(0) self.concat_dim1 = P.Concat(1) self.ones_like = P.OnesLike() self.zeros_like = P.ZerosLike() self.mul = P.Mul() self.expand_dims = P.ExpandDims() self.dtype = P.DType() self.compute_accidental_hits = P.ComputeAccidentalHits(num_true) self.scatter_nd = P.ScatterNd()
def __init__(self, embedding_size, max_position_embeddings=512): super(PositionalEmbedding, self).__init__() self.add = P.TensorAdd() self.expand_dims = P.ExpandDims() self.position_embedding_table = Tensor( position_encoding(max_position_embeddings, embedding_size), mstype.float32) self.gather = P.Gather() self.get_shape = P.Shape()
def __init__(self, dataset_argv, architect_argv, activation, neigh_drop_rate, num_user, num_item, input_dim): super(BGCF, self).__init__() self.user_embed = Parameter( initializer("XavierUniform", [num_user, input_dim], dtype=mstype.float32)) self.item_embed = Parameter( initializer("XavierUniform", [num_item, input_dim], dtype=mstype.float32)) self.cast = P.Cast() self.tanh = P.Tanh() self.shape = P.Shape() self.split = P.Split(0, 2) self.gather = P.Gather() self.reshape = P.Reshape() self.concat_0 = P.Concat(0) self.concat_1 = P.Concat(1) (self.input_dim, self.num_user, self.num_item) = dataset_argv self.layer_dim = architect_argv self.gnew_agg_mean = MeanConv(self.input_dim, self.layer_dim, activation=activation, dropout=neigh_drop_rate[1]) self.gnew_agg_mean.to_float(mstype.float16) self.gnew_agg_user = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2]) self.gnew_agg_user.to_float(mstype.float16) self.gnew_agg_item = AttenConv(self.input_dim, self.layer_dim, dropout=neigh_drop_rate[2]) self.gnew_agg_item.to_float(mstype.float16) self.user_feature_dim = self.input_dim self.item_feature_dim = self.input_dim self.final_weight = Parameter( initializer("XavierUniform", [self.input_dim * 3, self.input_dim * 3], dtype=mstype.float32)) self.raw_agg_funcs_user = MeanConv(self.input_dim, self.layer_dim, activation=activation, dropout=neigh_drop_rate[0]) self.raw_agg_funcs_user.to_float(mstype.float16) self.raw_agg_funcs_item = MeanConv(self.input_dim, self.layer_dim, activation=activation, dropout=neigh_drop_rate[0]) self.raw_agg_funcs_item.to_float(mstype.float16)
def __init__(self, config): super(EmbeddingLookup, self).__init__() self.vocab_size = config.vocab_size self.embedding_size = config.embedding_size self.embedding_table = Parameter( initializer(TruncatedNormal(0.02), [self.vocab_size, self.embedding_size])) self.gather = P.Gather() self.shape = (-1, config.seq_length, config.embedding_size)
def __init__(self, learning_rate, name): super(_IteratorLearningRate, self).__init__() if isinstance(learning_rate, Tensor): if learning_rate.ndim != 1: raise ValueError("The dim of `Tensor` type dynamic learning rate should be a 1," f"but got {learning_rate.ndim}.") else: raise TypeError("Learning rate should be Tensor.") self.learning_rate = Parameter(learning_rate, name) self.gather = P.Gather()
def __init__(self, network, k, num_eval_neg): super(PredictWithSigmoid, self).__init__() self.network = network self.topk = P.TopK(sorted=True) self.squeeze = P.Squeeze() self.k = k self.num_eval_neg = num_eval_neg self.gather = P.Gather() self.reshape = P.Reshape() self.reducesum = P.ReduceSum(keep_dims=False) self.notequal = P.NotEqual()
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, weight_decay=0.0, loss_scale=1.0, num_hidden_layers=24, batch_size=12, damping=0.03, decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) if isinstance(momentum, float) and momentum < 0.0: raise ValueError( "momentum should be at least 0.0, but got momentum {}".format( momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32)) self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.matmul = P.MatMul() self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.gather = P.Gather() self.matrix_A_inv = () self.matrix_G_inv = () self.num_hidden_layers = num_hidden_layers self.sqrt = P.Sqrt() self.assign = P.Assign() self.cast = P.Cast() self.thor = True self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.expand = P.ExpandDims() self.square = P.Square() self.inv = P.Inv() self.batch_size = batch_size self.damping = damping self.one = Tensor(1, mstype.int32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False) mean = _get_gradients_mean() degree = _get_device_num() self.grad_reducer_g = DistributedGradReducerThor( self.parameters, 3, mean, degree)
def __init__(self): super(CriterionsFaceQA, self).__init__() self.gatherv2 = P.Gather() self.squeeze = P.Squeeze(axis=1) self.shape = P.Shape() self.reshape = P.Reshape() self.euler_label_list = Tensor([0, 1, 2], dtype=mstype.int32) self.mse_loss = nn.MSELoss(reduction='sum') self.kp_label_list = Tensor([3, 4, 5, 6, 7], dtype=mstype.int32) self.kps_loss = CEWithIgnoreIndex3D()
def __init__(self, strategy=None, sparse=True): super(NetWithSparseGatherV2, self).__init__() self.axis = 0 self.sparse = sparse if sparse: self.weight = Parameter(Tensor(np.ones([8, 8]).astype(np.float32)), name="weight") self.gather = P.SparseGatherV2() else: self.weight = Parameter(Tensor(np.ones([8, 8]).astype(np.float32)), name="weight") self.gather = P.Gather() if strategy is not None: self.gather.shard(strategy)
def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable): """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse.""" success = True indices = gradient.indices values = gradient.values if ps_parameter and not cache_enable: op_shape = P.Shape() shapes = (op_shape(params), op_shape(m), op_shape(v), op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) success = F.depend( success, pull( push((beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices), shapes), params)) return success if not target: success = F.depend( success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices)) else: op_gather = P.Gather() op_sqrt = P.Sqrt() scatter_add = P.ScatterAdd(use_locking) scatter_update = P.ScatterUpdate(use_locking) m_slice = op_gather(m, indices, 0) v_slice = op_gather(v, indices, 0) next_m = m_slice * beta1 + values * (1 - beta1) next_v = v_slice * beta2 + values * values * (1 - beta2) lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) if use_nesterov: m_temp = beta1 * next_m + values * (1 - beta1) param_update = m_temp / (op_sqrt(next_v) + eps) else: param_update = next_m / (op_sqrt(next_v) + eps) success = F.depend(success, scatter_add(params, indices, -lr_t * param_update)) success = F.depend(success, scatter_update(m, indices, next_m)) success = F.depend(success, scatter_update(v, indices, next_v)) return success
def __init__(self, embedding_size, embedding_shape, use_relative_positions=False, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): super(EmbeddingPostprocessor, self).__init__() self.use_token_type = use_token_type self.token_type_vocab_size = token_type_vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.max_position_embeddings = max_position_embeddings self.token_type_embedding = Embedding_Thor( vocab_size=token_type_vocab_size, embedding_size=embedding_size, embedding_shape=embedding_shape, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=initializer_range, batch_size=batch_size, damping=damping, loss_scale=loss_scale, frequency=frequency) self.shape_flat = (-1, ) self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.1, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape) self.dropout = nn.Dropout(1 - dropout_prob) self.gather = P.Gather() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() _, seq, width = self.shape position_embedding_shape = [1, seq, width] self.full_position_embedding = Embedding_Thor( vocab_size=max_position_embeddings, embedding_size=embedding_size, embedding_shape=position_embedding_shape, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=initializer_range, batch_size=batch_size, damping=damping, loss_scale=loss_scale, frequency=frequency) self.position_ids = Tensor( np.arange(seq).reshape(-1, seq).astype(np.int32)) self.layernorm = nn.LayerNorm((embedding_size, )) self.add = P.TensorAdd()
def __init__(self): super(ComputeRij, self).__init__() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cast = P.Cast() self.rsum = P.ReduceSum() self.broadcastto = P.BroadcastTo((1, 192 * 138)) self.broadcastto1 = P.BroadcastTo((1, 192, 138, 3)) self.expdims = P.ExpandDims() self.concat = P.Concat(axis=1) self.gather = P.Gather() self.mul = P.Mul() self.slice = P.Slice()
def __init__(self): super(ModelTwoHop, self).__init__() self.expanddims_0 = P.ExpandDims() self.expanddims_0_axis = 1 self.expanddims_3 = P.ExpandDims() self.expanddims_3_axis = 2 self.cast_5 = P.Cast() self.cast_5_to = mstype.float32 self.sub_7 = P.Sub() self.sub_7_bias = 1.0 self.mul_9 = P.Mul() self.mul_9_w = -10000.0 self.gather_1_input_weight = Parameter(Tensor( np.random.uniform(0, 1, (30522, 768)).astype(np.float32)), name=None) self.gather_1_axis = 0 self.gather_1 = P.Gather() self.gather_2_input_weight = Parameter(Tensor( np.random.uniform(0, 1, (2, 768)).astype(np.float32)), name=None) self.gather_2_axis = 0 self.gather_2 = P.Gather() self.add_4 = P.Add() self.add_6 = P.Add() self.add_6_bias = Parameter(Tensor( np.random.uniform(0, 1, (1, 448, 768)).astype(np.float32)), name=None) self.layernorm1_0 = LayerNorm() self.module50_0 = Encoder1_4() self.module50_1 = Encoder1_4() self.module50_2 = Encoder1_4() self.gather_643_input_weight = Tensor(np.array(0)) self.gather_643_axis = 1 self.gather_643 = P.Gather() self.dense_644 = nn.Dense(in_channels=768, out_channels=768, has_bias=True) self.tanh_645 = nn.Tanh()
def __init__(self, axis=0, strategy1=None, strategy2=None, shape=None, target=""): super().__init__() if shape is None: shape = [64, 64] self.gatherv2 = P.Gather().shard(strategy1).add_prim_attr( "primitive_target", target) self.mul = P.Mul().shard(strategy2) self.index = Tensor(np.ones(shape), dtype=ms.int32) self.axis = axis