def __init__(self, sparse=False, stra_list=[]): super(SoftmaxCrossEntropyExpand, self).__init__() if len(stra_list) < 11: stra_list = [None] * 11 self.exp = P.Exp() self.reduce_sum = P.ReduceSum(keep_dims=True).set_strategy( strategy=stra_list[1]) self.onehot = P.OneHot().set_strategy(strategy=stra_list[2]) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.div = P.Div().set_strategy(strategy=stra_list[3]) self.log = P.Log().set_strategy(strategy=stra_list[4]) self.sum_cross_entropy = P.ReduceSum(keep_dims=False).set_strategy( strategy=stra_list[5]) self.mul = P.Mul().set_strategy(strategy=stra_list[6]) self.mul2 = P.Mul().set_strategy(strategy=stra_list[7]) self.cast = P.Cast() self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy( strategy=stra_list[8]) self.sparse = sparse self.reduce_max = P.ReduceMax(keep_dims=True).set_strategy( strategy=stra_list[9]) self.sub = P.Sub().set_strategy(strategy=stra_list[10])
def __init__(self, mean=None, sd=None, seed=0, dtype=mstype.float32, name="Normal"): """ Constructor of normal distribution. """ param = dict(locals()) valid_dtype = mstype.float_type check_type(dtype, valid_dtype, "Normal") super(Normal, self).__init__(seed, dtype, name, param) self.parameter_type = dtype if mean is not None and sd is not None: self._mean_value = convert_to_batch(mean, self.broadcast_shape, self.parameter_type) self._sd_value = convert_to_batch(sd, self.broadcast_shape, self.parameter_type) check_greater_zero(self._sd_value, "Standard deviation") else: self._mean_value = mean self._sd_value = sd #ops needed for the class self.squeeze = P.Squeeze(0) self.cast = P.Cast() self.const = P.ScalarToArray() self.erf = P.Erf() self.exp = P.Exp() self.expm1 = self._expm1_by_step self.fill = P.Fill() self.log = P.Log() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.zeroslike = P.ZerosLike()
def __init__(self, low=None, high=None, seed=0, dtype=mstype.float32, name="Uniform"): """ Constructor of Uniform distribution. """ param = dict(locals()) valid_dtype = mstype.float_type check_type(dtype, valid_dtype, "Uniform") super(Uniform, self).__init__(seed, dtype, name, param) if low is not None and high is not None: self._low = convert_to_batch(low, self.broadcast_shape, dtype) self._high = convert_to_batch(high, self.broadcast_shape, dtype) check_greater(self.low, self.high, "low value", "high value") else: self._low = low self._high = high # ops needed for the class self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.exp = P.Exp() self.fill = P.Fill() self.less = P.Less() self.lessequal = P.LessEqual() self.log = P.Log() self.logicaland = P.LogicalAnd() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.zeroslike = P.ZerosLike() self.uniform = C.uniform
def construct(self, x): num_batch = P.Shape()(x)[0] grid_size = P.Shape()(x)[2:4] # Reshape and transpose the feature to [n, 3, grid_size[0], grid_size[1], num_attrib] prediction = P.Reshape()(x, (num_batch, self.num_anchors_per_scale, self.num_attrib, grid_size[0], grid_size[1])) prediction = P.Transpose()(prediction, (0, 3, 4, 1, 2)) range_x = range(grid_size[1]) range_y = range(grid_size[0]) grid_x = P.Cast()(F.tuple_to_array(range_x), ms.float32) grid_y = P.Cast()(F.tuple_to_array(range_y), ms.float32) # Tensor of shape [grid_size[0], grid_size[1], 1, 1] representing the coordinate of x/y axis for each grid grid_x = self.tile(self.reshape(grid_x, (1, 1, -1, 1, 1)), (1, grid_size[0], 1, 1, 1)) grid_y = self.tile(self.reshape(grid_y, (1, -1, 1, 1, 1)), (1, 1, grid_size[1], 1, 1)) # Shape is [grid_size[0], grid_size[1], 1, 2] grid = self.concat((grid_x, grid_y)) box_xy = prediction[:, :, :, :, :2] box_wh = prediction[:, :, :, :, 2:4] box_confidence = prediction[:, :, :, :, 4:5] box_probs = prediction[:, :, :, :, 5:] box_xy = (self.sigmoid(box_xy) + grid) / P.Cast()(F.tuple_to_array( (grid_size[1], grid_size[0])), ms.float32) box_wh = P.Exp()(box_wh) * self.anchors / self.input_shape box_confidence = self.sigmoid(box_confidence) box_probs = self.sigmoid(box_probs) if self.training: return grid, prediction, box_xy, box_wh return self.concat((box_xy, box_wh, box_confidence, box_probs))
def __init__(self): super(IGamma, self).__init__() # const numbers self.log_maxfloat16 = Tensor(np.log(np.finfo(np.float16).max), mstype.float16) self.log_maxfloat32 = Tensor(np.log(np.finfo(np.float32).max), mstype.float32) # operations self.logicaland = P.LogicalAnd() self.logicalor = P.LogicalOr() self.logicalnot = P.LogicalNot() self.equal = P.Equal() self.greater = P.Greater() self.less = P.Less() self.neg = P.Neg() self.log = P.Log() self.exp = P.Exp() self.select = P.Select() self.zeroslike = P.ZerosLike() self.fill = P.Fill() self.shape = P.Shape() self.dtype = P.DType() self.lgamma = LGamma() self.const = P.ScalarToArray() self.cast = P.Cast()
def __init__(self, temperature=0.07, contrast_mode='all', base_temperature=0.07): super(SupConLoss, self).__init__() self.temperature = temperature self.contrast_mode = contrast_mode self.base_temperature = base_temperature self.normalize = P.L2Normalize(axis=2) self.eye = P.Eye() self.unbind = P.Unstack(axis=1) self.cat = P.Concat(axis=0) self.matmul = P.MatMul() self.div = P.Div() self.transpose = P.Transpose() self.maxes = P.ArgMaxWithValue(axis=1, keep_dims=True) self.tile = P.Tile() self.scatter = P.ScatterNd() self.oneslike = P.OnesLike() self.exp = P.Exp() self.sum = P.ReduceSum(keep_dims=True) self.log = P.Log() self.reshape = P.Reshape() self.mean = P.ReduceMean()
def __init__(self, probs=None, seed=0, dtype=mstype.int32, name="Geometric"): """ Constructor of Geometric distribution. """ param = dict(locals()) valid_dtype = mstype.int_type + mstype.uint_type check_type(dtype, valid_dtype, "Geometric") super(Geometric, self).__init__(seed, dtype, name, param) if probs is not None: self._probs = cast_to_tensor(probs, hint_dtype=mstype.float32) check_prob(self._probs) else: self._probs = probs self.minval = np.finfo(np.float).tiny # ops needed for the class self.cast = P.Cast() self.const = P.ScalarToArray() self.dtypeop = P.DType() self.exp = P.Exp() self.fill = P.Fill() self.floor = P.Floor() self.issubclass = P.IsSubClass() self.less = P.Less() self.log = P.Log() self.pow = P.Pow() self.select = P.Select() self.shape = P.Shape() self.sq = P.Square() self.sqrt = P.Sqrt() self.uniform = C.uniform
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=278, has_bias=True, activation=None): super(Dense_Thor, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \ weight_init.shape[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.ndim != 1 or bias_init.shape[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv", requires_grad=False) self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)) self.matmul = P.MatMul(transpose_b=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = Tensor(damping) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.vector_matmul = P.CusBatchMatMul() self.pad = P.Pad(((0, 24), (0, 24))) self.pad1 = P.Pad(((0, 8), (0, 8))) self.slice = P.Slice() self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.fused_abs_max1 = P.CusFusedAbsMax1([1000, 1000]) self.fused_abs_max2 = P.CusFusedAbsMax1() self.log = P.Log() self.exp = P.Exp() self.dampingA = Tensor(np.identity(2048), mstype.float32) self.dampingG = Tensor(np.identity(1024), mstype.float32) self.add = P.TensorAdd() self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient)
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same', padding=0, dilation=1, group=1, data_format='NCHW', has_bias=False, weight_init='normal', damping=0.03, loss_scale=1, frequency=278, bias_init='zeros'): self.thor = True ksizes = (1, kernel_size, kernel_size, 1) self.hw = kernel_size * kernel_size strides = (1, stride, stride, 1) kernel_size = twice(kernel_size) super(Conv2d_Thor, self).__init__( in_channels, out_channels, kernel_size, stride, pad_mode, padding, dilation, group, data_format, has_bias, weight_init, bias_init, ) self.conv2d = P.Conv2D(out_channel=self.out_channels, kernel_size=self.kernel_size, mode=1, pad_mode=self.pad_mode, pad=self.padding, stride=self.stride, dilation=self.dilation, group=self.group ) self.img2col = P.CusImg2Col(ksizes=ksizes, strides=strides) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.transpose02314 = P.CusTranspose02314() self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1] self.matrix_G_dim = self.out_channels self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim, self.in_channels, True) self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim, self.in_channels, False) self.matrix_A_device_temp_shape = ( self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1], self.matrix_A_device_shape[3]) self.matrix_G_device_temp_shape = ( self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1], self.matrix_G_device_shape[3]) self.matrix_A_inv = Parameter( Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)), name='matrix_A_inv', requires_grad=False) self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False) self.matrix_G_inv = Parameter( Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)), name="matrix_G_inv", requires_grad=False) self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) self.fake_G = Tensor( np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)) self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = Tensor(damping) self.vector_matmul = P.CusBatchMatMul() self.diag_block_dim = 128 self.channels_slice_flag = False if self.in_channels % C0 != 0: self.channels_slice_flag = True self.padA_flag = False if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim \ and self.matrix_A_dim > self.diag_block_dim: self.padA_flag = True pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim self.padA = P.Pad(((0, pad_dim), (0, pad_dim))) self.device_shape_pad_flag = False if self.matrix_A_dim != self.matrix_A_device_dim: self.device_shape_pad_flag = True self.device_shape_pad = P.Pad(((0, 0), (0, C0 - self.in_channels), (0, 0), (0, C0 - self.in_channels))) self.slice = P.Slice() self.gather = P.GatherV2() self.freq = Tensor(frequency, mstype.int32) self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.axis = 0 dampingA_dim = self.matrix_A_dim if (self.matrix_A_dim % self.diag_block_dim) != 0 and self.matrix_A_dim > self.diag_block_dim: dampingA_dim = (self.matrix_A_dim // self.diag_block_dim + 1) * self.diag_block_dim dampingG_dim = self.matrix_G_dim if (self.matrix_G_dim % self.diag_block_dim) != 0 and self.matrix_G_dim > self.diag_block_dim: dampingG_dim = (self.matrix_G_dim // self.diag_block_dim + 1) * self.diag_block_dim self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32) self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32) self.fused_abs_max1 = P.CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim]) self.fused_abs_max2 = P.CusFusedAbsMax1() self.log = P.Log() self.exp = P.Exp() self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient)
def __init__(self, in_channels, out_channels, weight_init='normal', bias_init='zeros', damping=0.03, loss_scale=1, frequency=100, has_bias=False, activation=None, batch_size=12): super(Dense_Thor, self).__init__() self.in_channels = Validator.check_positive_int(in_channels) self.out_channels = Validator.check_positive_int(out_channels) self.has_bias = Validator.check_bool(has_bias) self.thor = True if isinstance(weight_init, Tensor): if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \ weight_init.shape()[1] != in_channels: raise ValueError("weight_init shape error") self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight") if self.has_bias: if isinstance(bias_init, Tensor): if bias_init.dim() != 1 or bias_init.shape( )[0] != out_channels: raise ValueError("bias_init shape error") self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias") self.matmul = P.MatMul(transpose_b=True) self.bias_add = P.BiasAdd() self.activation = get_activation(activation) self.activation_flag = self.activation is not None self.matrix_A_inv = Parameter(Tensor( np.zeros([in_channels, in_channels]).astype(np.float16)), name='matrix_A_inv', requires_grad=False) self.matrix_G_inv = Parameter(Tensor( np.zeros([out_channels, out_channels]).astype(np.float16)), name="matrix_G_inv", requires_grad=False) self.fake_G = Tensor( np.zeros([out_channels, out_channels]).astype(np.float16)) self.matmul = P.MatMul(transpose_b=True) self.cube_matmul = P.CusMatMulCube(transpose_a=True) self.matrix_combine = P.CusMatrixCombine() self.cholesky = P.CusCholeskyTrsm() self.shape = P.Shape() self.reshape = P.Reshape() self.transpose = P.Transpose() self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False) self.mul = P.Mul() self.cast = P.Cast() self.damping = damping self.loss_scale = Tensor(1 / loss_scale, mstype.float16) self.vector_matmul = P.CusBatchMatMul() self.gather = P.GatherV2() self.assignadd = P.AssignAdd() self.freq = Tensor(frequency, mstype.int32) self.axis = 0 self.abs = P.Abs() self.reduce_max = P.ReduceMax(keep_dims=False) self.log = P.Log() self.exp = P.Exp() self.dampingA = Tensor(np.identity(in_channels), mstype.float32) self.dampingG = Tensor(np.identity(out_channels), mstype.float32) self.sqrt = P.Sqrt() self.getG = P.InsertGradientOf(self.save_gradient) self.batch_size = batch_size
'skip': ['backward']}), ('Minimum', { 'block': P.Minimum(), 'desc_inputs': [[2, 3, 3, 5], [2, 3, 3, 5]], 'desc_bprop': [[2, 3, 3, 5]]}), ('Pow_0', { 'block': P.Pow(), 'desc_const': [2.0], 'desc_inputs': [[2, 3, 3, 5]], 'desc_bprop': [[2, 3, 3, 5]]}), ('Pow_1', { 'block': P.Pow(), 'desc_inputs': [[3, 5], [2, 3, 3, 5]], 'desc_bprop': [[2, 3, 3, 5]]}), ('Exp', { 'block': P.Exp(), 'desc_inputs': [[2, 3]], 'desc_bprop': [[2, 3]]}), ('Erf', { 'block': P.Erf(), 'desc_inputs': [Tensor(np.array([-2, -1, 0, 1, 2]).astype(np.float16))], 'desc_bprop': [Tensor(np.array([-2, -1, 0, 1, 2]).astype(np.float16))]}), ('Floor', { 'block': P.Floor(), 'desc_inputs': [[2, 512, 56, 56]], 'desc_bprop': [[2, 512, 56, 56]], 'skip': ['backward']}), ('ACos', { 'block': P.ACos(), 'desc_inputs': [[2, 3]], 'desc_bprop': [[2, 3]]}),
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, loss_scale=1.0, decay_filter=lambda x: x.name not in []): super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) if isinstance(momentum, float) and momentum < 0.0: raise ValueError( "momentum should be at least 0.0, but got momentum {}".format( momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.A_inv_max = ParameterTuple(A_inv_max) self.G_inv_max = ParameterTuple(G_inv_max) self.cube_matmul_left = P.CusMatMulCubeFraczLeftCast() self.cube_matmul_left_fc = P.CusMatMulCubeDenseLeft() self.cube_matmul_right_fc = P.CusMatMulCubeDenseRight() self.cube_matmul_right_mul = P.CusMatMulCubeFraczRightMul() self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.weight_idx = [] for i in range(len(self.params)): if "conv" in self.params[i].name or "end_point" in self.params[ i].name: self.weight_idx.append(i) self.weight_idx.append(len(self.params)) self.feature_map = [ 1.0 / 12544, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 ] mean = _get_mirror_mean() degree = _get_device_num() self.grad_reducer_Amax = DistributedGradReducerThor( self.parameters, 2, mean, degree) self.grad_reducer_Gmax = DistributedGradReducerThor( self.parameters, 5, mean, degree) self.grad_reducer_A = DistributedGradReducerThor( self.parameters, 3, mean, degree) self.grad_reducer_G = DistributedGradReducerThor( self.parameters, 4, mean, degree) self.matrix_A_inv = () self.matrix_G_inv = () self.matrix_max_inv = () for i in range(54): self.matrix_max_inv = self.matrix_max_inv + (Parameter( initializer(1, [1], mstype.float32), name="matrix_max" + str(i), requires_grad=False), ) self.log = P.Log() self.exp = P.Exp() self.sqrt = P.Sqrt() self.matrix_max_inv = ParameterTuple(self.matrix_max_inv) self.assign = P.Assign() self.cast = P.Cast() self.thor = True self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters)
def __init__(self, args, strategy): super(SemiAutoOneHotNet, self).__init__() self.a = args.a self.b = args.b self.c = args.c self.d = args.d self.e = args.e self.cast = P.Cast() self.cast.set_strategy(strategy=strategy.twod_strategy) self.cast1 = P.Cast() self.cast1.set_strategy(strategy=strategy.twod_strategy) self.cast2 = P.Cast() self.cast2.set_strategy(strategy=strategy.twod_strategy) self.cast3 = P.Cast() self.cast3.set_strategy(strategy=strategy.scalar_strategy) self.cast4 = P.Cast() self.cast4.set_strategy(strategy=strategy.scalar_strategy) self.a_const = Tensor(self.a, dtype=mstype.float32) self.b_const = Tensor(self.b, dtype=mstype.float32) self.c_const = Tensor(self.c, dtype=mstype.float32) self.d_const = Tensor(self.d, dtype=mstype.float32) self.e_const = Tensor(self.e, dtype=mstype.float32) self.m_const_zero = Tensor(0, dtype=mstype.float32) self.a_const_one = Tensor(1, dtype=mstype.float32) self.onehot = P.OneHot() self.onehot.set_strategy(strategy=strategy.onehot_strategy) self.exp = P.Exp() self.exp.set_strategy(strategy=strategy.twod_strategy) self.exp2 = P.Exp() self.exp2.set_strategy(strategy=strategy.twod_strategy) self.exp3 = P.Exp() self.exp3.set_strategy(strategy=strategy.twod_strategy) self.mul_const = P.Mul() self.mul_const.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const2 = P.TensorAdd() self.mul_const2.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const3 = P.Sub() self.mul_const3.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul_const4 = P.Sub() self.mul_const4.set_strategy(strategy=strategy.scalar_twod_strategy) self.mul_const5 = P.Mul() self.mul_const5.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul = P.Mul() self.mul.set_strategy(strategy=strategy.twod_twod_strategy) self.mul2 = P.Mul() self.mul2.set_strategy(strategy=strategy.twod_twod_strategy) self.mul3 = P.TensorAdd() self.mul3.set_strategy(strategy=strategy.twod_twod_strategy) self.mul4 = P.Sub() self.mul4.set_strategy(strategy=strategy.twod_twodbc_strategy) self.mul5 = P.RealDiv() self.mul5.set_strategy(strategy=strategy.twod_twodbc_strategy) self.mul6 = P.Mul() self.mul6.set_strategy(strategy=strategy.twod_twod_strategy) self.mul7 = P.Mul() self.mul7.set_strategy(strategy=strategy.twod_scalar_strategy) self.mul8 = P.RealDiv() self.mul8.set_strategy(strategy=strategy.scalar_scalar_strategy) self.mul9 = P.TensorAdd() self.mul9.set_strategy(strategy=strategy.twod_scalar_strategy) self.reduce_max = P.ReduceMax(keep_dims=True) self.reduce_max.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum = P.ReduceSum(keep_dims=False) self.reduce_sum.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum_2 = P.ReduceSum(keep_dims=False) self.reduce_sum_2.set_strategy(strategy=strategy.twod_strategy) self.reduce_sum_3 = P.ReduceSum(keep_dims=False) self.reduce_sum_3.set_strategy(strategy=strategy.oned_strategy) self.reshape = P.Reshape() self.log = P.Log() self.log.set_strategy(strategy=strategy.twod_strategy) self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.normalize = P.L2Normalize(axis=1) self.normalize.set_strategy(strategy=strategy.twod_strategy_m) self.normalize2 = P.L2Normalize(axis=1) self.normalize2.set_strategy(strategy=strategy.twod_strategy_m) self.fc = P.MatMul(transpose_b=True) self.fc.set_strategy(strategy=strategy.twodbc_twod_strategy) weight_shape = [args.num_classes, args.emb_size] weight_np = np.zeros(weight_shape, np.float32) self.weight = Parameter(Tensor(weight_np), name='model_parallel_weight')
def __init__(self): super().__init__() # self.softplus = P.Softplus() self.log1p = P.Log1p() self.exp = P.Exp()
def __init__(self): super(NetExp, self).__init__() self.exp = P.Exp()
def __init__(self, strategy1, strategy2): super().__init__() self.matmul = P.MatMul().set_strategy(strategy1) self.exp = P.Exp().set_strategy(strategy2) self.matmul2 = P.MatMul().set_strategy(strategy1)
def std_trans(self, std_pre): """Transform std_pre to prevent its value being zero.""" std = 1e-6 + P.Log()(P.Exp()(std_pre) + 1) return std
('Reciprocal1', { 'block': (P.Reciprocal(), {'exception': TypeError, 'error_keywords': ['Reciprocal']}), 'desc_inputs': [5.0], 'skip': ['backward']}), # input x is Tensor(bool) ('Pow1', { 'block': (P.Pow(), {'exception': TypeError, 'error_keywords': ['Pow']}), 'desc_inputs': [Tensor(np.ones([2, 3]).astype(np.bool_)), 2.0], 'skip': ['backward']}), # input is not Tensor ('Exp1', { 'block': (P.Exp(), {'exception': TypeError, 'error_keywords': ['Exp']}), 'desc_inputs': [5.0], 'skip': ['backward']}), # input is not Tensor ('Log1', { 'block': (P.Log(), {'exception': TypeError, 'error_keywords': ['Log']}), 'desc_inputs': [5.0], 'skip': ['backward']}), # one input is scalar, and another is Tensor(float32) ('Minimum0', { 'block': (P.Minimum(), {'exception': TypeError, 'error_keywords': ['Minimum']}), 'desc_inputs': [5.0, Tensor(np.ones([3, 4]).astype(np.float32))],
def __init__(self, num_classes, anchors, anchors_mask, reduction=32, seen=0, coord_scale=1.0, no_object_scale=1.0, object_scale=1.0, class_scale=1.0, thresh=0.5, head_idx=0.0): super(YoloLoss, self).__init__() self.num_classes = num_classes self.num_anchors = len(anchors_mask) self.anchor_step = len(anchors[0]) # each scale has step anchors self.anchors = np.array(anchors, dtype=np.float32) / reduction # scale every anchor for every scale self.tensor_anchors = Tensor(self.anchors, mstype.float32) self.anchors_mask = anchors_mask anchors_w = [] anchors_h = [] for i in range(len(anchors_mask)): anchors_w.append(self.anchors[self.anchors_mask[i]][0]) anchors_h.append(self.anchors[self.anchors_mask[i]][1]) self.anchors_w = Tensor(np.array(anchors_w).reshape(len(self.anchors_mask), 1)) self.anchors_h = Tensor(np.array(anchors_h).reshape(len(self.anchors_mask), 1)) self.reduction = reduction self.seen = seen self.head_idx = head_idx self.zero = Tensor(0) self.coord_scale = coord_scale self.no_object_scale = no_object_scale self.object_scale = object_scale self.class_scale = class_scale self.thresh = thresh self.info = {'avg_iou': 0, 'class': 0, 'obj': 0, 'no_obj': 0, 'recall50': 0, 'recall75': 0, 'obj_cur': 0, 'obj_all': 0, 'coord_xy': 0, 'coord_wh': 0} self.shape = P.Shape() self.reshape = P.Reshape() self.sigmoid = P.Sigmoid() self.zeros_like = P.ZerosLike() self.concat0 = P.Concat(0) self.concat0_2 = P.Concat(0) self.concat0_3 = P.Concat(0) self.concat0_4 = P.Concat(0) self.concat1 = P.Concat(1) self.concat1_2 = P.Concat(1) self.concat1_3 = P.Concat(1) self.concat1_4 = P.Concat(1) self.concat2 = P.Concat(2) self.concat2_2 = P.Concat(2) self.concat2_3 = P.Concat(2) self.concat2_4 = P.Concat(2) self.tile = P.Tile() self.transpose = P.Transpose() self.cast = P.Cast() self.exp = P.Exp() self.sum = P.ReduceSum() self.smooth_l1_loss = P.SmoothL1Loss() self.bce = P.SigmoidCrossEntropyWithLogits() self.ce = P.SoftmaxCrossEntropyWithLogits() self.pt_linspace = PtLinspace() self.one_hot = nn.OneHot(-1, self.num_classes, 1.0, 0.0) self.squeeze_2 = P.Squeeze(2) self.reduce_sum = P.ReduceSum() self.select = P.Select() self.iou = P.IOU()
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, loss_scale=1.0, num_hidden_layers=24, batch_size=12, damping=0.03, frequency=10, decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) if isinstance(momentum, float) and momentum < 0.0: raise ValueError( "momentum should be at least 0.0, but got momentum {}".format( momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.A_inv_max = ParameterTuple(A_inv_max) self.G_inv_max = ParameterTuple(G_inv_max) self.matmul = P.MatMul() self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.gather = P.GatherV2() self.matrix_A_inv = () self.matrix_G_inv = () self.matrix_max_inv = () self.num_hidden_layers = num_hidden_layers fc_layer_num = num_hidden_layers * 6 + 5 for i in range(fc_layer_num): self.matrix_max_inv = self.matrix_max_inv + (Parameter( initializer(1, [1], mstype.float32), name="matrix_max" + str(i), requires_grad=False), ) self.log = P.Log() self.exp = P.Exp() self.sqrt = P.Sqrt() self.matrix_max_inv = ParameterTuple(self.matrix_max_inv) self.assign = P.Assign() self.cast = P.Cast() self.thor = True self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.expand = P.ExpandDims() self.square = P.Square() self.inv = P.Inv() self.batch_size = batch_size self.damping = damping self.freq = Tensor(frequency, mstype.int32) self.one = Tensor(1, mstype.int32) self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, loss_scale=1.0, batch_size=32.0, decay_filter=lambda x: x.name not in []): super(THOR, self).__init__(learning_rate, params, weight_decay, loss_scale) if isinstance(momentum, float) and momentum < 0.0: raise ValueError( "momentum should be at least 0.0, but got momentum {}".format( momentum)) self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum") self.params = self.parameters self.moments = self.params.clone(prefix="moments", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.ApplyMomentum() self.matrix_A = ParameterTuple(matrix_A) self.matrix_G = ParameterTuple(matrix_G) self.A_inv_max = ParameterTuple(A_inv_max) self.G_inv_max = ParameterTuple(G_inv_max) self.cube_matmul_left = P.CusMatMulCubeFraczLeftCast() self.cube_matmul_left_fc = P.CusMatMulCubeDenseLeft() self.cube_matmul_right_fc = P.CusMatMulCubeDenseRight() self.cube_matmul_right_mul = P.CusMatMulCubeFraczRightMul() self.transpose = P.Transpose() self.shape = P.Shape() self.reshape = P.Reshape() self.mul = P.Mul() self.weight_idx = [] for i in range(len(self.params)): if "conv" in self.params[i].name or "end_point" in self.params[ i].name: self.weight_idx.append(i) self.weight_idx.append(len(self.params)) self.feature_map = [ 1.0 / 12544, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 784, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 196, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 / 49, 1.0 ] mean = _get_mirror_mean() degree = _get_device_num() self.grad_reducer_Amax = DistributedGradReducerThor( self.parameters, 2, mean, degree) self.grad_reducer_Gmax = DistributedGradReducerThor( self.parameters, 5, mean, degree) self.grad_reducer_A = DistributedGradReducerThor( self.parameters, 3, mean, degree) self.grad_reducer_G = DistributedGradReducerThor( self.parameters, 4, mean, degree) self.matrix_A_inv = () self.matrix_G_inv = () self.matrix_max_inv = () for i in range(54): self.matrix_max_inv = self.matrix_max_inv + (Parameter( initializer(1, [1], mstype.float32), name="matrix_max" + str(i), requires_grad=False), ) self.log = P.Log() self.exp = P.Exp() self.sqrt = P.Sqrt() self.matrix_max_inv = ParameterTuple(self.matrix_max_inv) self.assign = P.Assign() self.cast = P.Cast() self.thor = True self.weight_decay = weight_decay * loss_scale self.decay_flags = tuple(decay_filter(x) for x in self.parameters) self.conv_index = [ 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 17, 18, 19, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 38, 39, 40, 43, 44, 45, 46, 49, 50, 51, 54, 55, 56, 59, 60, 61, 64, 65, 66, 69, 70, 71, 74, 75, 76, 77, 80, 81, 82, 85 ] self.batch_size = batch_size self.bn_index = [ 3, 7, 10, 13, 17, 20, 23, 26, 30, 33, 36, 39, 42, 45, 49, 52 ] self.bn_gradient_index = [ -1, -1, -1, 4, -1, -1, -1, 10, -1, -1, 15, -1, -1, 20, -1, -1, -1, 26, -1, -1, 31, -1, -1, 36, -1, -1, 41, -1, -1, -1, 47, -1, -1, 52, -1, -1, 57, -1, -1, 62, -1, -1, 67, -1, -1, 72, -1, -1, -1, 78, -1, -1, 83 ]