def softmax_kernel(data, projection_matrix, is_query=False, normalize_data=True, eps=1e-4): """ data:[Batch,Heads,Seq,Dim_head] projection_matrix:[m,Dim_head] """ b, h, Seq, Dim_head = data.shape data_normalizer = (data.shape[-1]**-0.25) if normalize_data else 1. ratio = (projection_matrix.shape[0]**-0.5) # W'*X data_dash = data_normalizer * P.MatMul(transpose_b=True)(P.Reshape()( data, (-1, Dim_head)), projection_matrix) data_dash = P.Reshape()(data_dash, (b, h, Seq, -1)) # |X|^2/2 diag_data = data**2 diag_data = P.ReduceSum(keep_dims=True)(diag_data, -1) diag_data = (diag_data / 2.0) * (data_normalizer**2) #exp(W'x-|X|^2/2) if is_query: data_dash = ratio * (P.Exp()(data_dash - diag_data - P.ReduceMax( keep_dims=True)(data_dash, -1)) + eps) else: data_dash = ratio * (P.Exp()(data_dash - diag_data - P.ReduceMax() (data_dash)) + eps) return data_dash
def __init__(self): super().__init__() self.Reshape = P.Reshape() self.MatMul_b = P.MatMul(transpose_b=True) self.ReduceSum = P.ReduceSum(keep_dims=True) self.Exp = P.Exp() self.ReduceMax_keep = P.ReduceMax(keep_dims=True) self.ReduceMax = P.ReduceMax()
def construct(self): return (P.ReduceMax(self.keep_dims0)(self.x0, self.axis0), P.ReduceMax(self.keep_dims1)(self.x1, self.axis1), P.ReduceMax(self.keep_dims2)(self.x2, self.axis2), P.ReduceMax(self.keep_dims3)(self.x3, self.axis3), P.ReduceMax(self.keep_dims4)(self.x4, self.axis4), P.ReduceMax(self.keep_dims5)(self.x5, self.axis5), P.ReduceMax(self.keep_dims6)(self.x6, self.axis6), P.ReduceMax(self.keep_dims7)(self.x7, self.axis7), P.ReduceMax(self.keep_dims8)(self.x8, self.axis8))
def __init__(self, num_classes, num_boxes, neg_pre_positive, batch_size): super(MultiBoxLoss, self).__init__() self.num_classes = num_classes self.num_boxes = num_boxes self.neg_pre_positive = neg_pre_positive self.notequal = P.NotEqual() self.less = P.Less() self.tile = P.Tile() self.reduce_sum = P.ReduceSum() self.reduce_mean = P.ReduceMean() self.expand_dims = P.ExpandDims() self.smooth_l1_loss = P.SmoothL1Loss() self.cross_entropy = SoftmaxCrossEntropyWithLogits() self.maximum = P.Maximum() self.minimum = P.Minimum() self.sort_descend = P.TopK(True) self.sort = P.TopK(True) self.gather = P.GatherNd() self.max = P.ReduceMax() self.log = P.Log() self.exp = P.Exp() self.concat = P.Concat(axis=1) self.reduce_sum2 = P.ReduceSum(keep_dims=True) self.idx = Tensor( np.reshape(np.arange(batch_size * num_boxes), (-1, 1)), ms.int32)
def maximum(inputs: Tensor, axis: _Axis = (), keep_dims: bool = False) -> Tensor: """Reduces a dimension of a tensor by the maximum value in this dimension.""" max_op = op.ReduceMax(keep_dims) outputs = max_op(inputs, axis) return outputs
def __init__(self): super().__init__() self.max = P.ReduceMax() self.param = Parameter(Tensor(np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="weight") self.zero = Tensor(np.zeros(([2, 2, 2])), ms.float32) self.reduce = P.ReduceSum() self.start = Tensor(np.array(0), dtype=ms.int32)
def __init__(self, in_classes, kernel_size, padding, maxpool, has_bias): super(MusicTaggerCNN, self).__init__() self.in_classes = in_classes self.kernel_size = kernel_size self.maxpool = maxpool self.padding = padding self.has_bias = has_bias # build model self.conv1 = nn.Conv2d(self.in_classes[0], self.in_classes[1], self.kernel_size[0]) self.conv2 = nn.Conv2d(self.in_classes[1], self.in_classes[2], self.kernel_size[1]) self.conv3 = nn.Conv2d(self.in_classes[2], self.in_classes[3], self.kernel_size[2]) self.conv4 = nn.Conv2d(self.in_classes[3], self.in_classes[4], self.kernel_size[3]) self.bn1 = nn.BatchNorm2d(self.in_classes[1]) self.bn2 = nn.BatchNorm2d(self.in_classes[2]) self.bn3 = nn.BatchNorm2d(self.in_classes[3]) self.bn4 = nn.BatchNorm2d(self.in_classes[4]) self.pool1 = nn.MaxPool2d(maxpool[0], maxpool[0]) self.pool2 = nn.MaxPool2d(maxpool[1], maxpool[1]) self.pool3 = nn.MaxPool2d(maxpool[2], maxpool[2]) self.pool4 = nn.MaxPool2d(maxpool[3], maxpool[3]) self.poolreduce = P.ReduceMax(keep_dims=False) self.Act = nn.ReLU() self.flatten = nn.Flatten() self.dense = nn.Dense(2048, 50, activation='sigmoid') self.sigmoid = nn.Sigmoid()
def __init__(self, num_sampled, num_classes, num_true=1, sampled_values=None, remove_accidental_hits=True, seed=0, reduction='none'): super(SampledSoftmaxLoss, self).__init__() self.num_sampled = num_sampled self.num_classes = num_classes self.num_true = num_true self.sampled_values = sampled_values self.remove_accidental_hits = remove_accidental_hits self.seed = seed self.sampler = P.UniformSampler( num_true, num_sampled, True, num_classes, seed, remove_accidental_hits) self.cast = P.Cast() self.reshape = P.Reshape() self.shape = P.Shape() self.exp = P.Exp() self.log = P.Log() self.slice_op = P.Slice() self.matmul = P.MatMul(False, True) self.gather_v2 = P.GatherV2() self.reduce_max_true = P.ReduceMax(True) self.reduce_sum = P.ReduceSum() self.reduce_sum_true = P.ReduceSum(True) self.concat_dim0 = P.Concat(0) self.concat_dim1 = P.Concat(1) self.ones_like = P.OnesLike() self.zeros_like = P.ZerosLike() self.mul = P.Mul() self.expand_dims = P.ExpandDims()
def __init__(self): super().__init__() self.max = P.ReduceMax() self.param = Parameter(Tensor( np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="weight") self.zero = Tensor(np.zeros(([2, 2, 2])), ms.float32)
def __init__(self, sparse=False, stra_list=None): super(SoftmaxCrossEntropyExpand, self).__init__() if stra_list is None: stra_list = [] if len(stra_list) < 11: stra_list = [None] * 11 self.exp = P.Exp() self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy( strategy=stra_list[1]) self.onehot = P.OneHot().set_strategy(strategy=stra_list[2]) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.div = P.Div().set_strategy(strategy=stra_list[3]) self.log = P.Log().set_strategy(strategy=stra_list[4]) self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy( strategy=stra_list[5]) self.mul = P.Mul().set_strategy(strategy=stra_list[6]) self.mul2 = P.Mul().set_strategy(strategy=stra_list[7]) self.cast = P.Cast() self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy( strategy=stra_list[8]) self.sparse = sparse self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy( strategy=stra_list[9]) self.sub = P.Sub().set_strategy(strategy=stra_list[10])
def __init__(self, context_dim, epsilon=100, delta=0.1, alpha=0.1, T=1e5): super(LinUCB, self).__init__() self.matmul = P.MatMul() self.expand_dims = P.ExpandDims() self.transpose = P.Transpose() self.reduce_sum = P.ReduceSum() self.squeeze = P.Squeeze(1) self.argmax = P.Argmax() self.reduce_max = P.ReduceMax() # Basic variables self._context_dim = context_dim self._epsilon = epsilon self._delta = delta self._alpha = alpha self._T = int(T) # Parameters self._V = Tensor(np.zeros((context_dim, context_dim), dtype=np.float32)) self._u = Tensor(np.zeros((context_dim, ), dtype=np.float32)) self._theta = Tensor(np.zeros((context_dim, ), dtype=np.float32)) # \sigma = 4*\sqrt{2*\ln{\farc{1.25}{\delta}}}/\epsilon self._sigma = 4 * \ math.sqrt(math.log(1.25 / self._delta)) / self._epsilon self._c = 0.1 self._step = 1 self._regret = 0 self._current_regret = 0 self.inverse_matrix()
def __init__(self, tag_to_index, batch_size=1, seq_length=128, is_training=True): super(CRF, self).__init__() self.target_size = len(tag_to_index) self.is_training = is_training self.tag_to_index = tag_to_index self.batch_size = batch_size self.seq_length = seq_length self.START_TAG = "<START>" self.STOP_TAG = "<STOP>" self.START_VALUE = Tensor(self.target_size-2, dtype=mstype.int32) self.STOP_VALUE = Tensor(self.target_size-1, dtype=mstype.int32) transitions = np.random.normal(size=(self.target_size, self.target_size)).astype(np.float32) transitions[tag_to_index[self.START_TAG], :] = -10000 transitions[:, tag_to_index[self.STOP_TAG]] = -10000 self.transitions = Parameter(Tensor(transitions), name="transition_matrix") self.cat = P.Concat(axis=-1) self.argmax = P.ArgMaxWithValue(axis=-1) self.log = P.Log() self.exp = P.Exp() self.sum = P.ReduceSum() self.tile = P.Tile() self.reduce_sum = P.ReduceSum(keep_dims=True) self.reshape = P.Reshape() self.expand = P.ExpandDims() self.mean = P.ReduceMean() init_alphas = np.ones(shape=(self.batch_size, self.target_size)) * -10000.0 init_alphas[:, self.tag_to_index[self.START_TAG]] = 0. self.init_alphas = Tensor(init_alphas, dtype=mstype.float32) self.cast = P.Cast() self.reduce_max = P.ReduceMax(keep_dims=True) self.on_value = Tensor(1.0, dtype=mstype.float32) self.off_value = Tensor(0.0, dtype=mstype.float32) self.onehot = P.OneHot()
def __init__(self, scale, config=ConfigYOLOV4CspDarkNet53()): super(YoloLossBlock, self).__init__() self.config = config if scale == 's': # anchor mask idx = (0, 1, 2) elif scale == 'm': idx = (3, 4, 5) elif scale == 'l': idx = (6, 7, 8) else: raise KeyError("Invalid scale value for DetectionBlock") self.anchors = Tensor([self.config.anchor_scales[i] for i in idx], ms.float32) self.ignore_threshold = Tensor(self.config.ignore_threshold, ms.float32) self.concat = P.Concat(axis=-1) self.iou = Iou() self.reduce_max = P.ReduceMax(keep_dims=False) self.xy_loss = XYLoss() self.wh_loss = WHLoss() self.confidence_loss = ConfidenceLoss() self.class_loss = ClassLoss() self.reduce_sum = P.ReduceSum() self.giou = Giou()
def __init__(self): super(AxisListNet, self).__init__() self.reduce_sum = P.ReduceSum() self.reduce_mean = P.ReduceMean() self.reduce_max = P.ReduceMax() self.reduce_min = P.ReduceMin() self.add_n = P.AddN() self.axis = [0, 1, 2]
def __init__(self): super().__init__() self.max = P.ReduceMax() self.param = Parameter(Tensor(np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="weight") self.weight = Parameter(Tensor(np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="loss") self.key = Parameter(Tensor(np.arange(2 * 2 * 2).reshape((2, 2, 2)), ms.float32), name="key") self.zero = Tensor(np.zeros(([2, 2, 2])), ms.float32) self.t2 = Tensor(np.array(2), dtype=ms.float32)
def _abs_max(gradients): """ Transform gradients to saliency through abs then take max along channels. """ gradients = op.Abs()(gradients) saliency = op.ReduceMax(keep_dims=True)(gradients, axis=1) return saliency
def __init__(self, num_sampled, num_classes, num_true=1, sampled_values=None, remove_accidental_hits=True, seed=0, reduction='none'): super(SampledSoftmaxLoss, self).__init__(reduction) if num_true < 1: raise ValueError(f"num_true {num_true} is less than 1.") if seed < 0: raise ValueError(f"seed {seed} is less than 0.") if num_sampled > num_classes: raise ValueError( f"num_sampled {num_sampled} is great than num_classes {num_classes}." ) if num_true > num_classes: raise ValueError( f"num_true {num_true} is great than num_classes {num_classes}." ) if sampled_values is not None: if not isinstance(sampled_values, (list, tuple)): raise TypeError( f"sampled_values {sampled_values} is not a list.") if len(sampled_values) != 3: raise ValueError( f"sampled_values size {len(sampled_values)} is not 3.") self.num_sampled = num_sampled self.num_classes = num_classes self.num_true = num_true self.sampled_values = sampled_values self.remove_accidental_hits = remove_accidental_hits self.seed = seed self.sampler = P.LogUniformCandidateSampler(num_true, num_sampled, True, num_classes, seed) self.cast = P.Cast() self.reshape = P.Reshape() self.shape = P.Shape() self.exp = P.Exp() self.log = P.Log() self.slice_op = P.Slice() self.matmul = P.MatMul(False, True) self.gather_v2 = P.Gather() self.reduce_max_true = P.ReduceMax(True) self.reduce_sum = P.ReduceSum() self.reduce_sum_true = P.ReduceSum(True) self.concat_dim0 = P.Concat(0) self.concat_dim1 = P.Concat(1) self.ones_like = P.OnesLike() self.zeros_like = P.ZerosLike() self.mul = P.Mul() self.expand_dims = P.ExpandDims() self.dtype = P.DType() self.compute_accidental_hits = P.ComputeAccidentalHits(num_true) self.scatter_nd = P.ScatterNd()
def __init__(self): super(NetReduce, self).__init__() self.axis0 = 0 self.axis1 = 1 self.axis2 = -1 self.axis3 = (0, 1) self.axis4 = (0, 1, 2) self.reduce_mean = P.ReduceMean(False) self.reduce_sum = P.ReduceSum(False) self.reduce_max = P.ReduceMax(False)
def abs_max(gradients): """ Transform gradients to saliency through abs then take max along channels. Args: gradients (_Tensor): Gradients which will be transformed to saliency map. Returns: _Tensor, saliency map integrated from gradients. """ gradients = op.Abs()(gradients) saliency = op.ReduceMax(keep_dims=True)(gradients, axis=1) return saliency
def __init__(self, sparse=False): super(SoftmaxCrossEntropyExpand, self).__init__() self.exp = P.Exp() self.reduce_sum = P.ReduceSum(keep_dims=True) self.onehot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.div = P.Div() self.log = P.Log() self.sum_cross_entropy = P.ReduceSum(keep_dims=False) self.mul = P.Mul() self.mul2 = P.Mul() self.cast = P.Cast() self.reduce_mean = P.ReduceMean(keep_dims=False) self.sparse = sparse self.reduce_max = P.ReduceMax(keep_dims=True) self.sub = P.Sub()
def __init__(self, num_bits=2, compute_type=mstype.float32, clip_value=1.0, per_channel=False): self.num_bits = num_bits self.compute_type = compute_type self.clip_value = clip_value self.per_channel = per_channel self.clamp = C.clip_by_value self.abs = P.Abs() self.sum = P.ReduceSum() self.nelement = F.size self.div = P.Div() self.cast = P.Cast() self.max = P.ReduceMax() self.min = P.ReduceMin() self.floor = P.Floor()
def __init__(self, scale, config): super(YoloLossBlock, self).__init__() self.config = config if scale == 's': idx = (0, 1, 2) elif scale == 'm': idx = (3, 4, 5) elif scale == 'l': idx = (6, 7, 8) else: raise KeyError("Invalid scale value for DetectionBlock") self.anchors = Tensor([self.config.anchor_scales[i] for i in idx], ms.float32) self.ignore_threshold = Tensor(self.config.ignore_threshold, ms.float32) self.concat = P.Concat(axis=-1) self.iou = Iou() self.cross_entropy = P.SigmoidCrossEntropyWithLogits() self.reduce_sum = P.ReduceSum() self.reduce_max = P.ReduceMax(keep_dims=False) self.input_shape = Tensor(tuple(config.img_shape[::-1]), ms.float32)
def __init__(self, num_bits=8, compute_type=mstype.float32, clip_value=1.0, per_channel=False): super(QuantizeWeightCell, self).__init__() self.num_bits = num_bits self.compute_type = compute_type self.clip_value = clip_value self.per_channel = per_channel self.clamp = C.clip_by_value self.abs = P.Abs() self.sum = P.ReduceSum() self.nelement = F.size self.div = P.Div() self.cast = P.Cast() self.max = P.ReduceMax() self.min = P.ReduceMin() self.round = P.Round()
def __init__(self, vocab_len, word_len, num_classes, vec_length): super(TextCNN, self).__init__() self.vec_length = vec_length self.word_len = word_len self.num_classes = num_classes self.unsqueeze = P.ExpandDims() self.embedding = nn.Embedding(vocab_len, self.vec_length, embedding_table='normal') self.slice = P.Slice() self.layer1 = self.make_layer(kernel_height=3) self.layer2 = self.make_layer(kernel_height=4) self.layer3 = self.make_layer(kernel_height=5) self.concat = P.Concat(1) self.fc = nn.Dense(96 * 3, self.num_classes) self.drop = nn.Dropout(keep_prob=0.5) self.print = P.Print() self.reducemean = P.ReduceMax(keep_dims=False)
def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.mul = P.Mul().set_strategy(strategy1) self.reduce_max = P.ReduceMax( keep_dims=False).set_strategy(strategy2) self.add = P.TensorAdd().set_strategy(strategy3)
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', has_bias=True, activation=None): super(Dense_Thor, self).__init__() self.thor = True self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) if isinstance(weight_init, Tensor): if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("Weight init shape error.") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") self.bias = None if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.dim() != 1 or bias_init.shape[0] != out_channels: raise ValueError("Bias init shape error.") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.bias_add = P.BiasAdd() self.matmul = P.MatMul(transpose_b=True) self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A = Parameter(Tensor( np.zeros([in_channels, in_channels]).astype(np.float32)), name='matrix_A', requires_grad=False) self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.mul = P.Mul() self.is_Ascend = True if context.get_context("device_target") == "Ascend": if out_channels == 1001: self.matrix_G = Parameter(Tensor( np.zeros([1024, 1024]).astype(np.float32)), name='matrix_G', requires_grad=False) self.pad = P.Pad(((0, 23), (0, 23))) self.pad1 = P.Pad(((0, 7), (0, 7))) self.slice = P.Slice() self.add = P.TensorAdd() else: self.matrix_G = Parameter(Tensor( np.eye(out_channels).astype(np.float32)), name="matrix_G", requires_grad=False) self.abs = P.Abs() self.reduce_max = P.ReduceMax(keep_dims=False) self.neg = P.Neg() self.reduce_sum = P.ReduceSum() self.matmul = P.MatMul(transpose_b=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.cast = P.Cast() self.is_nsp_layer = (out_channels == 2) else: self.is_Ascend = False self.matrix_G = Parameter(Tensor( np.eye(out_channels).astype(np.float32)), name="matrix_G", requires_grad=False) self.cube_matmul = P.MatMul(transpose_a=True) self.getG = P.InsertGradientOf(self.save_gradient)
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=100, has_bias=False, activation=None, batch_size=12): super(Dense_Thor, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \ weight_init.shape()[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.dim() != 1 or bias_init.shape( )[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A_inv = Parameter(Tensor( np.zeros([in_channels, in_channels]).astype(np.float16)), name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(Tensor( np.zeros([out_channels, out_channels]).astype(np.float16)), name="matrix_G_inv", requires_grad=False) self.fake_G = Tensor( np.zeros([out_channels, out_channels]).astype(np.float16)) self.matmul = P.MatMul(transpose_b=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = damping self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.vector_matmul = P.CusBatchMatMul() self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.abs = P.Abs() self.reduce_max = P.ReduceMax(keep_dims=False) self.log = P.Log() self.exp = P.Exp() self.dampingA = Tensor(np.identity(in_channels), mstype.float32) self.dampingG = Tensor(np.identity(out_channels), mstype.float32) self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient) self.batch_size = batch_size
def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.mul1 = P.Mul().shard(strategy1) self.reduce_max = P.ReduceMax(keep_dims=False).shard(strategy2) self.mul2 = P.Mul().shard(strategy3)
def __init__(self): super().__init__() self.max = P.ReduceMax()
def __init__(self, args, strategy): super(SemiAutoOneHotNet, self).__init__() self.a = args.a self.b = args.b self.c = args.c self.d = args.d self.e = args.e self.cast = P.Cast() self.cast.set_strategy(strategy=strategy.twod_strategy) self.cast1 = P.Cast() self.cast1.set_strategy(strategy=strategy.twod_strategy) self.cast2 = P.Cast() self.cast2.set_strategy(strategy=strategy.twod_strategy) self.cast3 = P.Cast() self.cast3.set_strategy(strategy=strategy.scalar_strategy) self.cast4 = P.Cast() self.cast4.set_strategy(strategy=strategy.scalar_strategy) self.a_const = Tensor(self.a, dtype=mstype.float32) self.b_const = Tensor(self.b, dtype=mstype.float32) self.c_const = Tensor(self.c, dtype=mstype.float32) self.d_const = Tensor(self.d, dtype=mstype.float32) self.e_const = Tensor(self.e, dtype=mstype.float32) self.m_const_zero = Tensor(0, dtype=mstype.float32) self.a_const_one = Tensor(1, dtype=mstype.float32) self.onehot = P.OneHot() self.onehot.set_strategy(strategy=strategy.onehot_strategy) self.exp = P.Exp() self.exp.set_strategy(strategy=strategy.twod_strategy) self.exp2 = P.Exp() self.exp2.set_strategy(strategy=strategy.twod_strategy) self.exp3 = P.Exp() self.exp3.set_strategy(strategy=strategy.twod_strategy) self.mul_const = P.Mul() self.mul_const.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const2 = P.TensorAdd() self.mul_const2.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const3 = P.Sub() self.mul_const3.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul_const4 = P.Sub() self.mul_const4.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const5 = P.Mul() self.mul_const5.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul = P.Mul() self.mul.set_strategy(strategy=strategy.twod_twod_strategy) self.mul2 = P.Mul() self.mul2.set_strategy(strategy=strategy.twod_twod_strategy) self.mul3 = P.TensorAdd() self.mul3.set_strategy(strategy=strategy.twod_twod_strategy) self.mul4 = P.Sub() self.mul4.set_strategy(strategy=strategy.twod_twodbc_strategy) self.mul5 = P.RealDiv() self.mul5.set_strategy(strategy=strategy.twod_twodbc_strategy) self.mul6 = P.Mul() self.mul6.set_strategy(strategy=strategy.twod_twod_strategy) self.mul7 = P.Mul() self.mul7.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul8 = P.RealDiv() self.mul8.set_strategy(strategy=strategy.scalar_scalar_strategy) self.mul9 = P.TensorAdd() self.mul9.set_strategy(strategy=strategy.twod_scalar_strategy) self.reduce_max = P.ReduceMax(keep_dims=True) self.reduce_max.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum = P.ReduceSum(keep_dims=False) self.reduce_sum.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum_2 = P.ReduceSum(keep_dims=False) self.reduce_sum_2.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum_3 = P.ReduceSum(keep_dims=False) self.reduce_sum_3.set_strategy(strategy=strategy.oned_strategy) self.reshape = P.Reshape() self.log = P.Log() self.log.set_strategy(strategy=strategy.twod_strategy) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.normalize = P.L2Normalize(axis=1) self.normalize.set_strategy(strategy=strategy.twod_strategy_m) self.normalize2 = P.L2Normalize(axis=1) self.normalize2.set_strategy(strategy=strategy.twod_strategy_m) self.fc = P.MatMul(transpose_b=True) self.fc.set_strategy(strategy=strategy.twodbc_twod_strategy) weight_shape = [args.num_classes, args.emb_size] weight_np = np.zeros(weight_shape, np.float32) self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight')