示例#1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor_GPU, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]))

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        split_dim = 128
        matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
        self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False)
        self.broadcast_to = P.BroadcastTo(matrix_A_shape)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.mul = P.Mul()
        self.cube_matmul = P.MatMul(transpose_a=True)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.damping = Parameter(Tensor(damping), requires_grad=False)
        self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
        self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
        self.cast = P.Cast()
        self.gather = P.Gather()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.add = P.Add()
        self.sqrt = P.Sqrt()
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
示例#2
0
 def __init__(self):
     super(InsertGradientNet, self).__init__()
     self.gather = P.GatherV2()
     self.damping = Tensor(np.array([0.03, 0.03], np.float32))
     self.cov_step = Parameter(0, name="cov_step", requires_grad=False)
     self.freq = Tensor(278, ms.int32)
     self.getG = P.InsertGradientOf(self.save_gradient)
示例#3
0
 def __init__(self):
     super(AssignWhenInsertGrad, self).__init__()
     self.gather = P.Gather()
     self.damping = Tensor(np.array([0.03, 0.03]).astype(np.float32))
     self.cov_step = ms.Parameter(0, name="cov_step", requires_grad=False)
     self.freq = Tensor(278, ms.int32)
     self.getG = P.InsertGradientOf(self.save_gradient)
示例#4
0
    def __init__(
        self,
        vocab_size,
        embedding_size,
        embedding_shape,
        use_one_hot_embeddings=False,
        initializer_range=0.02,
        batch_size=12,
        damping=0.03,
        loss_scale=1,
        frequency=100,
    ):
        super(Embedding_Thor, self).__init__()
        self.vocab_size = vocab_size
        self.use_one_hot_embeddings = use_one_hot_embeddings
        self.embedding_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [vocab_size, embedding_size]))
        self.thor = True
        self.expand = P.ExpandDims()
        self.shape_flat = (-1, )
        self.gather = P.Gather()
        self.one_hot = P.OneHot()
        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.array_mul = P.MatMul()
        self.reshape = P.Reshape()
        self.em_shape = tuple(embedding_shape)
        self.shape = P.Shape()
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)

        self.matrix_A_inv = Parameter(Tensor(
            np.zeros([vocab_size]).astype(np.float16)),
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(
            np.zeros([embedding_size, embedding_size]).astype(np.float16)),
                                      requires_grad=False)
        self.fake_G = Tensor(
            np.zeros([embedding_size, embedding_size]).astype(np.float16))
        self.dampingA = Tensor(np.ones([vocab_size]).astype(np.float32))
        self.dampingG = Tensor(np.identity(embedding_size), mstype.float32)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32),
                                  requires_grad=False)
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.damping = damping
        self.gather = P.Gather()
        self.sqrt = P.Sqrt()
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.vector_matmul = P.CusBatchMatMul()
        self.cholesky = P.CusCholeskyTrsm()
        self.matrix_combine = P.CusMatrixCombine()
        self.reduce_sum = P.ReduceSum(keep_dims=False)
        self.inv = P.Inv()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.batch_size = batch_size
示例#5
0
 def __init__(self):
     super(Mul, self).__init__()
     self.matrix_w = mindspore.Parameter(Tensor(
         np.ones([2, 2], np.float32)),
                                         name="matrix_w")
     self.matrix_g = mindspore.Parameter(Tensor(
         np.ones([2, 2], np.float32)),
                                         name="matrix_g")
     self.get_g = P.InsertGradientOf(self.save_gradient)
示例#6
0
 def __init__(self,
              vocab_size,
              embedding_size,
              use_one_hot=False,
              embedding_table='normal',
              dtype=mstype.float32,
              padding_idx=None):
     super(Embedding_Thor, self).__init__()
     self.vocab_size = Validator.check_value_type('vocab_size', vocab_size,
                                                  [int], self.cls_name)
     self.embedding_size = Validator.check_value_type(
         'embedding_size', embedding_size, [int], self.cls_name)
     Validator.check_value_type('use_one_hot', use_one_hot, [bool],
                                self.cls_name)
     Validator.check_subclass("dtype", dtype, mstype.number_type,
                              self.cls_name)
     self.use_one_hot = use_one_hot
     self.dtype = dtype
     self.init_tensor = initializer(embedding_table,
                                    [vocab_size, embedding_size])
     self.padding_idx = padding_idx
     if padding_idx is not None:
         self.padding_idx = Validator.check_int_range(
             padding_idx, 0, vocab_size, Rel.INC_BOTH, "padding_idx",
             self.cls_name)
         self.init_tensor = self.init_tensor.to_tensor().asnumpy()
         self.init_tensor[self.padding_idx] = 0
     self.embedding_table = Parameter(self.init_tensor,
                                      name='embedding_table')
     self.expand = P.ExpandDims()
     self.reshape_flat = P.Reshape()
     self.shp_flat = (-1, )
     self.gather = P.GatherV2()
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, self.dtype)
     self.off_value = Tensor(0.0, self.dtype)
     self.array_mul = P.MatMul()
     self.reshape = P.Reshape()
     self.get_shp = P.Shape()
     self.thor = True
     self.matrix_A = Parameter(Tensor(
         np.zeros([vocab_size]).astype(np.float32)),
                               name='matrix_A',
                               requires_grad=False)
     self.matrix_G = Parameter(Tensor(
         np.zeros([embedding_size, embedding_size]).astype(np.float32)),
                               name="matrix_G",
                               requires_grad=False)
     self.reduce_sum = P.ReduceSum(keep_dims=False)
     self.getG = P.InsertGradientOf(self.save_gradient)
     self.cast = P.Cast()
     if context.get_context("device_target") == "Ascend":
         self.cube_matmul = P.CusMatMulCube(transpose_a=True)
     else:
         self.cube_matmul = P.MatMul(transpose_a=True)
     self.mul = P.Mul()
示例#7
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None

        self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
                                      requires_grad=False)
        self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))

        self.matmul = P.MatMul(transpose_b=True)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.vector_matmul = P.CusBatchMatMul()
        self.pad = P.Pad(((0, 24), (0, 24)))
        self.pad1 = P.Pad(((0, 8), (0, 8)))
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.assignadd = P.AssignAdd()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
        self.fused_abs_max1 = P.CusFusedAbsMax1([1000, 1000])
        self.fused_abs_max2 = P.CusFusedAbsMax1()
        self.log = P.Log()
        self.exp = P.Exp()
        self.dampingA = Tensor(np.identity(2048), mstype.float32)
        self.dampingG = Tensor(np.identity(1024), mstype.float32)
        self.add = P.TensorAdd()
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
示例#8
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 data_format='NCHW',
                 has_bias=False,
                 weight_init='normal',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 bias_init='zeros'):
        self.thor = True
        ksizes = (1, kernel_size, kernel_size, 1)
        self.hw = kernel_size * kernel_size
        strides = (1, stride, stride, 1)
        kernel_size = twice(kernel_size)
        super(Conv2d_Thor, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            pad_mode,
            padding,
            dilation,
            group,
            data_format,
            has_bias,
            weight_init,
            bias_init,
        )
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group
                               )

        self.img2col = P.CusImg2Col(ksizes=ksizes, strides=strides)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.transpose02314 = P.CusTranspose02314()
        self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels
        self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim,
                                                                                     self.in_channels, True)
        self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim,
                                                                                     self.in_channels, False)
        self.matrix_A_device_temp_shape = (
            self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1],
            self.matrix_A_device_shape[3])
        self.matrix_G_device_temp_shape = (
            self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1],
            self.matrix_G_device_shape[3])
        self.matrix_A_inv = Parameter(
            Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
            name='matrix_A_inv', requires_grad=False)
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
        self.matrix_G_inv = Parameter(
            Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
            name="matrix_G_inv", requires_grad=False)

        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
        self.fake_G = Tensor(
            np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))

        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
        self.vector_matmul = P.CusBatchMatMul()
        self.diag_block_dim = 128
        self.channels_slice_flag = False
        if self.in_channels % C0 != 0:
            self.channels_slice_flag = True

        self.padA_flag = False
        if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim \
                and self.matrix_A_dim > self.diag_block_dim:
            self.padA_flag = True
            pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim
            self.padA = P.Pad(((0, pad_dim), (0, pad_dim)))
        self.device_shape_pad_flag = False
        if self.matrix_A_dim != self.matrix_A_device_dim:
            self.device_shape_pad_flag = True
            self.device_shape_pad = P.Pad(((0, 0), (0, C0 - self.in_channels), (0, 0), (0, C0 - self.in_channels)))
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.axis = 0

        dampingA_dim = self.matrix_A_dim
        if (self.matrix_A_dim % self.diag_block_dim) != 0 and self.matrix_A_dim > self.diag_block_dim:
            dampingA_dim = (self.matrix_A_dim // self.diag_block_dim + 1) * self.diag_block_dim
        dampingG_dim = self.matrix_G_dim
        if (self.matrix_G_dim % self.diag_block_dim) != 0 and self.matrix_G_dim > self.diag_block_dim:
            dampingG_dim = (self.matrix_G_dim // self.diag_block_dim + 1) * self.diag_block_dim

        self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32)
        self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32)
        self.fused_abs_max1 = P.CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim])
        self.fused_abs_max2 = P.CusFusedAbsMax1()
        self.log = P.Log()
        self.exp = P.Exp()
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
示例#9
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 has_bias=True,
                 activation=None):
        super(Dense_Thor, self).__init__()
        self.thor = True
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("Weight init shape error.")
        self.weight = Parameter(initializer(weight_init,
                                            [out_channels, in_channels]),
                                name="weight")

        self.bias = None
        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("Bias init shape error.")
            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name="bias")
            self.bias_add = P.BiasAdd()

        self.matmul = P.MatMul(transpose_b=True)
        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None

        self.matrix_A = Parameter(Tensor(
            np.zeros([in_channels, in_channels]).astype(np.float32)),
                                  name='matrix_A',
                                  requires_grad=False)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.mul = P.Mul()
        self.is_Ascend = True
        if context.get_context("device_target") == "Ascend":
            if out_channels == 1001:
                self.matrix_G = Parameter(Tensor(
                    np.zeros([1024, 1024]).astype(np.float32)),
                                          name='matrix_G',
                                          requires_grad=False)
                self.pad = P.Pad(((0, 23), (0, 23)))
                self.pad1 = P.Pad(((0, 7), (0, 7)))
                self.slice = P.Slice()
                self.add = P.TensorAdd()
            else:
                self.matrix_G = Parameter(Tensor(
                    np.eye(out_channels).astype(np.float32)),
                                          name="matrix_G",
                                          requires_grad=False)
                self.abs = P.Abs()
                self.reduce_max = P.ReduceMax(keep_dims=False)
                self.neg = P.Neg()
                self.reduce_sum = P.ReduceSum()
            self.matmul = P.MatMul(transpose_b=True)
            self.cube_matmul = P.CusMatMulCube(transpose_a=True)
            self.cast = P.Cast()
            self.is_nsp_layer = (out_channels == 2)
        else:
            self.is_Ascend = False
            self.matrix_G = Parameter(Tensor(
                np.eye(out_channels).astype(np.float32)),
                                      name="matrix_G",
                                      requires_grad=False)
            self.cube_matmul = P.MatMul(transpose_a=True)
        self.getG = P.InsertGradientOf(self.save_gradient)
示例#10
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 has_bias=False,
                 weight_init='normal',
                 bias_init='zeros'):
        kernel_size = twice(kernel_size)
        stride = twice(stride)
        self._dilation = dilation
        dilation = twice(dilation)
        super(Conv2d_Thor,
              self).__init__(in_channels, out_channels, kernel_size, stride,
                             pad_mode, padding, dilation, group, has_bias,
                             weight_init, bias_init)
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group)
        self._init_depthwise_conv2d(weight_init)
        self.bias_add = P.BiasAdd()

        self.thor = True
        self.hw = kernel_size[0] * kernel_size[1]
        self.matrix_A_dim = self.in_channels * self.kernel_size[
            0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.A_normalizer = Parameter(initializer(0, [1], mstype.float32),
                                      name="A_normalizer",
                                      requires_grad=False)
        self.G_normalizer = Parameter(initializer(0, [1], mstype.float32),
                                      name="G_normalizer",
                                      requires_grad=False)
        self.is_Ascend = True
        if context.get_context("device_target") == "Ascend":
            ksizes = (1, kernel_size[0], kernel_size[1], 1)
            strides = (1, stride[0], stride[1], 1)
            self.img2col = P.CusImg2Col(ksizes=ksizes, strides=strides)
            self.cube_matmul = P.CusMatMulCube(transpose_a=True)
            self.transpose02314 = P.CusTranspose02314()
            dampingA_dim = self.matrix_A_dim
            self.diag_block_dim = 128
            if (self.matrix_A_dim % self.diag_block_dim
                ) != 0 and self.matrix_A_dim > self.diag_block_dim:
                dampingA_dim = (self.matrix_A_dim // self.diag_block_dim +
                                1) * self.diag_block_dim
            dampingG_dim = self.matrix_G_dim
            if (self.matrix_G_dim % self.diag_block_dim
                ) != 0 and self.matrix_G_dim > self.diag_block_dim:
                dampingG_dim = (self.matrix_G_dim // self.diag_block_dim +
                                1) * self.diag_block_dim
            self.matrix_A_cov = Parameter(Tensor(
                np.zeros([dampingA_dim, dampingA_dim]).astype(np.float32)),
                                          name='matrix_A',
                                          requires_grad=False)
            self.matrix_G_cov = Parameter(Tensor(
                np.zeros([dampingG_dim, dampingG_dim]).astype(np.float32)),
                                          name='matrix_G',
                                          requires_grad=False)

            self.channels_slice_flag = False
            self.C0 = 16
            if self.in_channels % self.C0 != 0:
                self.channels_slice_flag = True
            self.padA_flag = False
            if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim \
                    and self.matrix_A_dim > self.diag_block_dim:
                self.padA_flag = True
                pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim
                self.padA = P.Pad(((0, pad_dim), (0, pad_dim)))
            self.slice = P.Slice()
        else:
            self.is_Ascend = False
            self.img2col = P.Im2Col(kernel_size=kernel_size,
                                    stride=stride,
                                    pad_mode="same")
            self.matmul = P.MatMul(transpose_b=True)
            self.reduce_mean = P.ReduceMean(keep_dims=False)
            self.matrix_A_cov = Parameter(Tensor(
                np.zeros([self.matrix_A_dim,
                          self.matrix_A_dim]).astype(np.float32)),
                                          name='matrix_A',
                                          requires_grad=False)
            self.matrix_G_cov = Parameter(Tensor(
                np.zeros([self.matrix_G_dim,
                          self.matrix_G_dim]).astype(np.float32)),
                                          name='matrix_G',
                                          requires_grad=False)
        self.getG = P.InsertGradientOf(self.save_gradient)
示例#11
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=100,
                 has_bias=False,
                 activation=None,
                 batch_size=12):
        super(Dense_Thor, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \
                    weight_init.shape()[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init,
                                            [out_channels, in_channels]),
                                name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape(
                )[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]),
                                  name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        self.matrix_A_inv = Parameter(Tensor(
            np.zeros([in_channels, in_channels]).astype(np.float16)),
                                      name='matrix_A_inv',
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(
            np.zeros([out_channels, out_channels]).astype(np.float16)),
                                      name="matrix_G_inv",
                                      requires_grad=False)
        self.fake_G = Tensor(
            np.zeros([out_channels, out_channels]).astype(np.float16))

        self.matmul = P.MatMul(transpose_b=True)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32),
                                  name="cov_step",
                                  requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = damping
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.vector_matmul = P.CusBatchMatMul()
        self.gather = P.GatherV2()
        self.assignadd = P.AssignAdd()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.abs = P.Abs()
        self.reduce_max = P.ReduceMax(keep_dims=False)
        self.log = P.Log()
        self.exp = P.Exp()
        self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
        self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.batch_size = batch_size
示例#12
0
from ....mindspore_test_framework.utils.debug_util import PrintShapeTypeCell, PrintGradShapeTypeCell

grad_by_list = C.GradOperation(get_by_list=True)
grad_all = C.GradOperation(get_all=True)


def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE)


def stop_gradient(dx):
    """ stop_gradient """
    return C.zeros_like(dx)


stop = P.InsertGradientOf(stop_gradient)


def test_InsertGradientOf_1():
    """ test_InsertGradientOf_1 """
    def stop_test(x, y):
        x = stop(x)
        c = x * y
        return c

    @ms_function
    def f(x, y):
        return grad_all(stop_test)(x, y)

    print("stop_gradient:", f(1, 2))
示例#13
0
from ....mindspore_test_framework.utils.debug_util import PrintShapeTypeCell, PrintGradShapeTypeCell

grad_by_list = C.GradOperation(get_by_list=True)
grad_all = C.GradOperation(get_all=True)


def setup_module(module):
    context.set_context(mode=context.PYNATIVE_MODE)


def stop_gradient(dx):
    """ stop_gradient """
    return C.zeros_like(dx)


stop = P.InsertGradientOf(stop_gradient)


def test_InsertGradientOf_1():
    """ test_InsertGradientOf_1 """
    def stop_test(x, y):
        x = stop(x)
        c = x * y
        return c

    @ms_function
    def f(x, y):
        return grad_all(stop_test)(x, y)

    print("stop_gradient:", f(1, 2))
示例#14
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 data_format='NCHW',
                 has_bias=False,
                 weight_init='normal',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 bias_init='zeros'):
        self.skfac = True
        self.hw = kernel_size * kernel_size
        kernel_size = twice(kernel_size)
        super(Conv2d_SKFAC_GPU, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            pad_mode,
            padding,
            dilation,
            group,
            data_format,
            has_bias,
            weight_init,
            bias_init,
        )
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group)

        self.matrix_A_dim = self.in_channels * self.kernel_size[
            0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels
        split_dim = 128
        self.matrix_A_inv = Parameter(np.zeros(
            (self.matrix_A_dim, self.matrix_A_dim)).astype(np.float32),
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(np.zeros(
            (self.matrix_G_dim, self.matrix_G_dim)).astype(np.float32),
                                      requires_grad=False)

        self.cov_step = Parameter(initializer(0, [1], mstype.int32),
                                  requires_grad=False)
        self.img2col = P.Im2Col(kernel_size=kernel_size,
                                stride=stride,
                                pad_mode="same")
        self.matmul = P.MatMul(transpose_a=True)
        self.matmul_ = P.MatMul()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.mul = P.Mul()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.transpose = P.Transpose()
        self.cast = P.Cast()
        self.gather = P.Gather()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.sqrt = P.Sqrt()
        self.reduce_mean = P.ReduceMean(keep_dims=False)
        self.damping = Parameter(Tensor(damping), requires_grad=False)
        self.dampingA = Tensor(np.identity(batch_size), mstype.float32)
        self.dampingG = Tensor(np.identity(batch_size), mstype.float32)
        self.I_G = Tensor(np.identity(out_channels), mstype.float32)
        self.I_A = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
        self.batch_coefficient = Tensor((1 / 32)**0.5, mstype.float32)
示例#15
0
 def __init__(self, msg):
     super(PrintGradShapeTypeCell, self).__init__()
     self.print_shape_type = P.InsertGradientOf(PrintShapeType(msg))