Exemplo n.º 1
0
 def __init__(self, strategy1, strategy2, strategy3):
     super().__init__()
     self.mul1 = P.Mul().set_strategy(strategy1)
     self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2)
     self.mul2 = P.Mul().set_strategy(strategy3)
Exemplo n.º 2
0
 def __init__(self):
     super(SoftCrossEntropy, self).__init__()
     self.log_softmax = P.LogSoftmax(axis=-1)
     self.softmax = P.Softmax(axis=-1)
     self.reduce_mean = P.ReduceMean()
     self.cast = P.Cast()
Exemplo n.º 3
0
 def __init__(self):
     super(MSELoss, self).__init__()
     self.sum = P.ReduceSum()
     self.square = P.Square()
     self.reduce_mean = P.ReduceMean()
Exemplo n.º 4
0
 def __init__(self):
     super().__init__()
     self.conv = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=1, stride=1, pad_mode='valid',
                           has_bias=True, weight_init='ones', bias_init='ones')
     self.reduce_mean = P.ReduceMean(keep_dims=False).shard(((1, 1, 1, 8),))
     self.flat = nn.Flatten()
Exemplo n.º 5
0
 def __init__(self, keepdims=False):
     super(ReduceMeanDynamic, self).__init__()
     self.test_dynamic = inner.GpuConvertToDynamicShape()
     self.reducemean = P.ReduceMean(keep_dims=keepdims)
Exemplo n.º 6
0
def mean(input):
    """Apply mean function."""
    return P.ReduceMean(keep_dims=True)(input, (2, 3))
Exemplo n.º 7
0
    def __init__(
        self,
        model,
        scale=1.0,
        shift=0.0,
        max_atoms_num=0,
        aggregate=True,
        average=False,
        atom_types=None,
        full_connect=False,
    ):
        super().__init__()

        self.predict = model
        # dim_atomembedding=model.dim_atomembedding
        self.full_connect = full_connect

        self.scale = scale
        self.shift = shift

        self.aggregate = aggregate
        self.average = average

        self.reducesum = P.ReduceSum(keep_dims=False)
        self.molsum = P.ReduceSum(keep_dims=True)
        self.reducemean = P.ReduceMean(keep_dims=False)

        if atom_types is None:
            self.fixed_atoms = False
            self.num_atoms = 0
        else:
            self.fixed_atoms = True
            model._set_fixed_atoms(True)

            if len(atom_types.shape) == 1:
                self.num_atoms = len(atom_types)
            elif len(atom_types.shape) == 2:
                self.num_atoms = len(atom_types[0])

            if self.num_atoms <= 0:
                raise ValueError(
                    "The 'num_atoms' cannot be 0 " +
                    "'atom_types' is not 'None' in MolCalculator!")

            if type(atom_types) is not Tensor:
                atom_types = Tensor(atom_types, ms.int32)

            self.atom_types = atom_types

        self.neighbors = None
        self.mask = None
        self.fc_neighbors = None
        if self.full_connect:
            if self.fixed_atoms:
                self.fc_neighbors = Types2FullConnectNeighbors(self.num_atoms)
                self.neighbors = self.fc_neighbors.get_full_neighbors()
            else:
                if max_atoms_num <= 0:
                    raise ValueError(
                        "The 'max_atoms_num' cannot be 0 " +
                        "when the 'full_connect' flag is 'True' and " +
                        "'atom_types' is 'None' in MolCalculator!")
                self.fc_neighbors = Types2FullConnectNeighbors(max_atoms_num)

        if self.fixed_atoms and self.full_connect:
            self.distances = AtomDistances(True)
            model._set_fixed_neighbors()
        else:
            self.distances = AtomDistances(False)

        self.ones = P.Ones()
Exemplo n.º 8
0
 def __init__(self):
     super(GlobalAvgPooling, self).__init__()
     self.mean = P.ReduceMean(False)
Exemplo n.º 9
0
 def __init__(self, kernel, bias, in_channel, num_class):
     super().__init__()
     self.relu = P.ReLU()
     self.mean = P.ReduceMean(keep_dims=False)
     self.dense = Dense(in_channel, num_class, kernel, bias)
    def __init__(self,
                 in_channel,
                 out_channel,
                 stride=1,
                 use_se=False,
                 se_block=False):
        super(ResidualBlock, self).__init__()
        self.stride = stride
        self.use_se = use_se
        self.se_block = se_block
        channel = out_channel // self.expansion
        self.conv1 = _conv1x1(in_channel,
                              channel,
                              stride=1,
                              use_se=self.use_se)
        self.bn1 = _bn(channel)
        if self.use_se and self.stride != 1:
            self.e2 = nn.SequentialCell([
                _conv3x3(channel, channel, stride=1, use_se=True),
                _bn(channel),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='same')
            ])
        else:
            self.conv2 = _conv3x3(channel,
                                  channel,
                                  stride=stride,
                                  use_se=self.use_se)
            self.bn2 = _bn(channel)

        self.conv3 = _conv1x1(channel,
                              out_channel,
                              stride=1,
                              use_se=self.use_se)
        self.bn3 = _bn_last(out_channel)
        if self.se_block:
            self.se_global_pool = P.ReduceMean(keep_dims=False)
            self.se_dense_0 = _fc(out_channel,
                                  int(out_channel / 4),
                                  use_se=self.use_se)
            self.se_dense_1 = _fc(int(out_channel / 4),
                                  out_channel,
                                  use_se=self.use_se)
            self.se_sigmoid = nn.Sigmoid()
            self.se_mul = P.Mul()
        self.relu = nn.ReLU()

        self.down_sample = False

        if stride != 1 or in_channel != out_channel:
            self.down_sample = True
        self.down_sample_layer = None

        if self.down_sample:
            if self.use_se:
                if stride == 1:
                    self.down_sample_layer = nn.SequentialCell([
                        _conv1x1(in_channel,
                                 out_channel,
                                 stride,
                                 use_se=self.use_se),
                        _bn(out_channel)
                    ])
                else:
                    self.down_sample_layer = nn.SequentialCell([
                        nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='same'),
                        _conv1x1(in_channel,
                                 out_channel,
                                 1,
                                 use_se=self.use_se),
                        _bn(out_channel)
                    ])
            else:
                self.down_sample_layer = nn.SequentialCell([
                    _conv1x1(in_channel,
                             out_channel,
                             stride,
                             use_se=self.use_se),
                    _bn(out_channel)
                ])
        self.add = P.TensorAdd()
Exemplo n.º 11
0
 def __init__(self, keep_dims, axis):
     super(Net, self).__init__()
     self.reduce_mean = P.ReduceMean(keep_dims=keep_dims)
     self.axis = axis
Exemplo n.º 12
0
    def __init__(self, num_init_features=64, k_R=96, G=32,
                 k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128), num_classes=1000):

        super(DPN, self).__init__()
        blocks = OrderedDict()

        # conv1
        blocks['conv1'] = nn.SequentialCell(OrderedDict([
            ('conv', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, pad_mode='pad', padding=3)),
            ('norm', nn.BatchNorm2d(num_init_features, eps=1e-3, momentum=0.9)),
            ('relu', nn.ReLU()),
            ('maxpool', nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same')),
        ]))

        # conv2
        bw = 256
        inc = inc_sec[0]
        R = int((k_R * bw) / 256)
        blocks['conv2_1'] = DualPathBlock(num_init_features, R, R, bw, inc, G, 'proj', False)
        in_chs = bw + 3 * inc
        for i in range(2, k_sec[0] + 1):
            blocks['conv2_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
            in_chs += inc

        # conv3
        bw = 512
        inc = inc_sec[1]
        R = int((k_R * bw) / 256)
        blocks['conv3_1'] = DualPathBlock(in_chs, R, R, bw, inc, G, 'down')
        in_chs = bw + 3 * inc
        for i in range(2, k_sec[1] + 1):
            blocks['conv3_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
            in_chs += inc

        # conv4
        bw = 1024
        inc = inc_sec[2]
        R = int((k_R * bw) / 256)
        blocks['conv4_1'] = DualPathBlock(in_chs, R, R, bw, inc, G, 'down')
        in_chs = bw + 3 * inc
        for i in range(2, k_sec[2] + 1):
            blocks['conv4_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
            in_chs += inc

        # conv5
        bw = 2048
        inc = inc_sec[3]
        R = int((k_R * bw) / 256)
        blocks['conv5_1'] = DualPathBlock(in_chs, R, R, bw, inc, G, 'down')
        in_chs = bw + 3 * inc
        for i in range(2, k_sec[3] + 1):
            blocks['conv5_{}'.format(i)] = DualPathBlock(in_chs, R, R, bw, inc, G, 'normal')
            in_chs += inc

        self.features = nn.SequentialCell(blocks)
        self.concat = F.Concat(axis=1)
        self.conv5_x = nn.SequentialCell(OrderedDict([
            ('norm', nn.BatchNorm2d(in_chs, eps=1e-3, momentum=0.9)),
            ('relu', nn.ReLU()),
        ]))
        self.avgpool = F.ReduceMean(False)
        self.classifier = nn.Dense(in_chs, num_classes)
Exemplo n.º 13
0
 def __init__(self):
     super().__init__()
     self.mul1 = P.Mul()
     self.reduce_mean = P.ReduceMean(keep_dims=False)
     self.reduce_sum = P.ReduceSum(keep_dims=False).add_prim_attr("cross_batch", True)
Exemplo n.º 14
0
 def __init__(self, strategy1, strategy2, strategy3):
     super().__init__()
     self.mul1 = P.Mul().set_strategy(strategy1)
     self.reduce_mean = P.ReduceMean(keep_dims=False).set_strategy(strategy2)
     self.reduce_sum = P.ReduceSum(keep_dims=False).set_strategy(strategy3).add_prim_attr("cross_batch", True)
Exemplo n.º 15
0
 def __init__(self, keep_dims=False):
     super(GlobalAvgPooling, self).__init__()
     self.mean = P.ReduceMean(keep_dims=keep_dims)
Exemplo n.º 16
0
 def __init__(self, normalized_shape, eps=1e-5):
     super(LayerNorm, self).__init__()
     self.gamma = Parameter(initializer('ones', normalized_shape), name="gamma")
     self.beta = Parameter(initializer('zeros', normalized_shape), name="beta")
     self.mean = P.ReduceMean(keep_dims=True)
     self.eps = eps
Exemplo n.º 17
0
 def __init__(self, output_size=(1, 1)):
     super(AdaptiveAvgPool2d, self).__init__()
     self.output_size = output_size
     self.reduce_mean = P.ReduceMean(keep_dims=True)
Exemplo n.º 18
0
 def __init__(self):
     super(Net, self).__init__()
     self.simplemean = P.ReduceMean(keep_dims=True)
Exemplo n.º 19
0
def mean_all(inputs):
    """Apply mean_all function."""
    return P.ReduceMean()(inputs)
Exemplo n.º 20
0
 def __init__(self):
     super(Net, self).__init__()
     self.reduce_mean = P.ReduceMean(keep_dims=False)
Exemplo n.º 21
0
    def __init__(self, reduction='mean'):
        super(CrossEntropyLoss, self).__init__()

        self.reduce_mean = P.ReduceMean()
        self.cross_entropy = nn.SoftmaxCrossEntropyWithLogits()
        self.reduction = reduction
Exemplo n.º 22
0
    def __init__(self, config):
        super(WideDeepModel, self).__init__()
        emb_128_size = 650000
        emb64_single_size = 17300
        emb64_multi_size = 20900
        indicator_size = 16
        deep_dim_list = [1024, 1024, 1024, 1024, 1024]
        # deep_dropout=0.0
        wide_reg_coef = [0.0, 0.0]
        deep_reg_coef = [0.0, 0.0]
        wide_lr = 0.2
        deep_lr = 1.0

        self.input_emb_dim = config.input_emb_dim
        self.batch_size = config.batch_size
        self.deep_layer_act = config.deep_layers_act
        self.init_args = config.init_args
        self.weight_init, self.bias_init = config.weight_bias_init
        self.weight_bias_init = config.weight_bias_init
        self.emb_init = config.emb_init

        self.keep_prob = config.keep_prob
        self.layer_dims = deep_dim_list + [1]
        self.all_dim_list = [self.input_emb_dim] + self.layer_dims

        self.continue_field_size = 32
        self.emb_128_size = emb_128_size
        self.emb64_single_size = emb64_single_size
        self.emb64_multi_size = emb64_multi_size
        self.indicator_size = indicator_size

        self.wide_l1_coef, self.wide_l2_coef = wide_reg_coef
        self.deep_l1_coef, self.deep_l2_coef = deep_reg_coef
        self.wide_lr = wide_lr
        self.deep_lr = deep_lr

        init_acts_embedding_metrix = [
            ('emb128_embedding', [self.emb_128_size, 128], self.emb_init),
            ('emb64_single', [self.emb64_single_size, 64], self.emb_init),
            ('emb64_multi', [self.emb64_multi_size, 64], self.emb_init),
            ('emb64_indicator', [self.indicator_size, 64], self.emb_init)
        ]
        var_map = init_var_dict(self.init_args, init_acts_embedding_metrix)
        self.emb128_embedding = var_map["emb128_embedding"]
        self.emb64_single = var_map["emb64_single"]
        self.emb64_multi = var_map["emb64_multi"]
        self.emb64_indicator = var_map["emb64_indicator"]

        init_acts_wide_weight = [
            ('wide_continue_w', [self.continue_field_size], self.emb_init),
            ('wide_emb128_w', [self.emb_128_size], self.emb_init),
            ('wide_emb64_single_w', [self.emb64_single_size], self.emb_init),
            ('wide_emb64_multi_w', [self.emb64_multi_size], self.emb_init),
            ('wide_indicator_w', [self.indicator_size], self.emb_init),
            ('wide_bias', [1], self.emb_init)
        ]
        var_map = init_var_dict(self.init_args, init_acts_wide_weight)
        self.wide_continue_w = var_map["wide_continue_w"]
        self.wide_emb128_w = var_map["wide_emb128_w"]
        self.wide_emb64_single_w = var_map["wide_emb64_single_w"]
        self.wide_emb64_multi_w = var_map["wide_emb64_multi_w"]
        self.wide_indicator_w = var_map["wide_indicator_w"]
        self.wide_bias = var_map["wide_bias"]

        self.dense_layer_1 = DenseLayer(self.all_dim_list[0],
                                        self.all_dim_list[1],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_2 = DenseLayer(self.all_dim_list[1],
                                        self.all_dim_list[2],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_3 = DenseLayer(self.all_dim_list[2],
                                        self.all_dim_list[3],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_4 = DenseLayer(self.all_dim_list[3],
                                        self.all_dim_list[4],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_5 = DenseLayer(self.all_dim_list[4],
                                        self.all_dim_list[5],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)

        self.deep_predict = DenseLayer(self.all_dim_list[5],
                                       self.all_dim_list[6],
                                       self.weight_bias_init,
                                       self.deep_layer_act,
                                       drop_out=config.dropout_flag,
                                       convert_dtype=True,
                                       use_activation=False)

        self.gather_v2 = P.GatherV2()
        self.mul = P.Mul()
        self.reduce_sum_false = P.ReduceSum(keep_dims=False)
        self.reduce_sum_true = P.ReduceSum(keep_dims=True)
        self.reshape = P.Reshape()
        self.square = P.Square()
        self.shape = P.Shape()
        self.tile = P.Tile()
        self.concat = P.Concat(axis=1)
        self.cast = P.Cast()
        self.reduceMean_false = P.ReduceMean(keep_dims=False)
        self.Concat = P.Concat(axis=1)
        self.BiasAdd = P.BiasAdd()
        self.expand_dims = P.ExpandDims()
        self.flatten = Flatten()
Exemplo n.º 23
0
 def construct(self):
     return (P.ReduceMean(self.keep_dims0)(self.x0, self.axis0),
             P.ReduceMean(self.keep_dims1)(self.x1, self.axis1),
             P.ReduceMean(self.keep_dims2)(self.x2, self.axis2),
             P.ReduceMean(self.keep_dims3)(self.x3, self.axis3),
             P.ReduceMean(self.keep_dims4)(self.x4, self.axis4),
             P.ReduceMean(self.keep_dims5)(self.x5, self.axis5),
             P.ReduceMean(self.keep_dims6)(self.x6, self.axis6),
             P.ReduceMean(self.keep_dims7)(self.x7, self.axis7),
             P.ReduceMean(self.keep_dims8)(self.x8, self.axis8),
             P.ReduceMean(self.keep_dims9)(self.x9, self.axis9),
             P.ReduceMean(self.keep_dims10)(self.x10, self.axis10),
             P.ReduceMean(self.keep_dims11)(self.x11, self.axis11),
             P.ReduceMean(self.keep_dims12)(self.x12, self.axis12),
             P.ReduceMean(self.keep_dims13)(self.x13, self.axis13),
             P.ReduceMean(self.keep_dims14)(self.x14, self.axis14))
Exemplo n.º 24
0
 def __init__(self):
     super(GlobalAvgPooling, self).__init__()
     self.mean = P.ReduceMean(True)
     self.shape = P.Shape()
     self.reshape = P.Reshape()
Exemplo n.º 25
0
 def __init__(self, network):
     super(NetWithLoss, self).__init__()
     self.sum = P.ReduceSum()
     self.mean = P.ReduceMean()
     self.net = network
Exemplo n.º 26
0
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=True,
                 device_num_each_group=1):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError(
                "momentum should be a number in range [0, 1], but got {}".
                format(momentum))

        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.moving_mean = Parameter(initializer(moving_mean_init,
                                                 num_features),
                                     name="mean",
                                     requires_grad=False)
        self.moving_variance = Parameter(initializer(moving_var_init,
                                                     num_features),
                                         name="variance",
                                         requires_grad=False)
        self.gamma = Parameter(initializer(gamma_init, num_features),
                               name="gamma",
                               requires_grad=affine)
        self.beta = Parameter(initializer(beta_init, num_features),
                              name="beta",
                              requires_grad=affine)
        self.group = check_int_positive(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i),
                                            self.rank_list[i])
                    self.all_reduce = P.AllReduce(
                        P.ReduceOp.SUM,
                        'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE

        if context.get_context("enable_ge"):
            self.is_ge_backend = True
            self.momentum = Tensor(1.0 - momentum, mstype.float32)
        else:
            self.is_ge_backend = False
            self.momentum = 1.0 - momentum
        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
            self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps)

        data_parallel_strategy = ((1, ), (1, ))
        data_parallel_strategy_one = ((1, ), ())
        self.sub_mean = P.Sub().set_strategy(data_parallel_strategy)
        self.sub_var = P.Sub().set_strategy(data_parallel_strategy)
        self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one)
        self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().set_strategy(
            data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().set_strategy(
            data_parallel_strategy)
Exemplo n.º 27
0
     'block': P.DropoutGenMask(),
     'desc_const': [(2, 2), Tensor(0.5, mstype.float32)],
     'desc_inputs': [],
     'desc_bprop': [Tensor(np.ones(1).astype(np.int8))],
     'skip': ['backward']}),
 ('DropoutDoMask', {
     'block': P.DropoutDoMask(),
     'desc_const': [Tensor(0.5)],
     'desc_inputs': [[64, 12, 128, 128], Tensor(np.ones(1572864).astype(np.uint8))],
     'desc_bprop': [[64, 12, 128, 128]]}),
 ('Dropout', {
     'block': nn.Dropout(0.5),
     'desc_inputs': [[64, 12, 128, 128]],
     'desc_bprop': [[64, 12, 128, 128]]}),
 ('ReduceMean0', {
     'block': P.ReduceMean(),
     'desc_const': [(2,)],
     'desc_inputs': [[3, 2, 2]],
     'desc_bprop': [[3, 2]]}),
 ('ReduceMean1', {
     'block': P.ReduceMean(),
     'desc_const': [2],
     'desc_inputs': [[3, 2, 2]],
     'desc_bprop': [[3, 2]]}),
 ('All', {
     'block': P.ReduceAll(),
     'desc_const': [(1,)],
     'desc_inputs': [Tensor(np.ones([3, 2]).astype(np.bool_))],
     'desc_bprop': [[3]],
     'skip': ['backward']}),
 ('DescConst', {
Exemplo n.º 28
0
 def __init__(self):
     super(MSELoss, self).__init__()
     self.sum = P.Sum()
     self.mean = P.ReduceMean(keepdims=False)
     self.pow = P.Pow()
     self.sqrt = P.Sqrt()
Exemplo n.º 29
0
 def __init__(self, output_size=None):
     super().__init__()
     self.mean = P.ReduceMean(keep_dims=True)
     self.output_size = output_size
Exemplo n.º 30
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 data_format='NCHW',
                 has_bias=False,
                 weight_init='normal',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 bias_init='zeros'):
        self.thor = True
        self.hw = kernel_size * kernel_size
        kernel_size = twice(kernel_size)
        super(Conv2d_Thor_GPU, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            pad_mode,
            padding,
            dilation,
            group,
            data_format,
            has_bias,
            weight_init,
            bias_init,
        )
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group
                               )

        self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels

        split_dim = 128
        matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.matrix_A_dim, self.matrix_G_dim, split_dim)
        self.matrix_A_inv = Parameter(np.zeros(matrix_A_shape).astype(np.float32),
                                      name='matrix_A_inv', requires_grad=False)
        self.matrix_G_inv = Parameter(np.zeros(matrix_G_shape).astype(np.float32),
                                      name='matrix_A_inv', requires_grad=False)
        self.broadcast_to = P.BroadcastTo(matrix_A_shape)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.img2col = P.Im2Col(kernel_size=kernel_size, stride=stride, pad_mode="same")
        self.matmul = P.MatMul(transpose_b=True)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.mul = P.Mul()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.transpose = P.Transpose()
        self.cast = P.Cast()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.sqrt = P.Sqrt()
        self.reduce_mean = P.ReduceMean(keep_dims=False)
        self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
        self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
        self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32)
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)