示例#1
0
    def __init__(self, dim, n_heads):
        super().__init__()

        # h
        self.n_heads = n_heads

        # v = V / h
        self.size_per_head = dim // n_heads
        scores_mul = 1.0 / np.sqrt(float(self.size_per_head))
        self.scores_mul = ms.Tensor(scores_mul, ms.float32)

        self.exones = P.Ones()((1, 1, n_heads, 1, 1), ms.int32)

        # shape = (h, v)
        self.reshape_tail = (self.n_heads, self.size_per_head)

        self.output = Dense(dim, dim, has_bias=False)

        self.mul = P.Mul()
        self.div = P.Div()
        self.softmax = P.Softmax()
        self.bmm = P.BatchMatMul()
        self.bmmt = P.BatchMatMul(transpose_b=True)
        self.squeeze = P.Squeeze(-2)
        self.reducesum = P.ReduceSum(keep_dims=True)

        self.transpose = P.Transpose()
        self.trans_shape = (0, 1, 3, 2, 4)
示例#2
0
 def __init__(self):
     super().__init__()
     self.ReduceSum = P.ReduceSum(keep_dims=True)
     self.BatchMatMul_b = P.BatchMatMul(transpose_b=True)
     self.BatchMatMul_a = P.BatchMatMul(transpose_a=True)
     self.BatchMatMul = P.BatchMatMul()
     self.Mul = P.Mul()
示例#3
0
    def __init__(self,
                 batch_size=512,
                 d_model=768,
                 seq_length=1024,
                 num_attention_heads=12,
                 dim_per_head=64,
                 has_attention_mask=True,
                 do_return_2d_tensor=True,
                 attention_dropout=0.0,
                 compute_type=mstype.float32):
        super(MaskedSelfAttention, self).__init__()

        self.batch_size = batch_size
        self.d_model = d_model
        self.seq_length = seq_length
        self.num_heads = num_attention_heads
        self.dim_per_head = dim_per_head
        self.has_attention_mask = has_attention_mask
        assert has_attention_mask

        self.scale = Tensor([1.0 / math.sqrt(float(self.dim_per_head))],
                            dtype=compute_type)  # attention scale
        self.mask_data = Tensor([
            -10000.0,
        ], dtype=compute_type)
        self.split_head_shape = (self.batch_size, self.seq_length,
                                 self.num_heads, self.dim_per_head)

        self.c_attn = Conv1D(d_model, d_model * 3)
        self.c_proj = Conv1D(d_model, d_model)

        self.split_for_qkv = P.Split(1, 3)  # P.Split(axis, output_num)
        # self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.matmul = P.BatchMatMul()
        self.multiply = P.Mul()

        if self.has_attention_mask:
            self.expand_dims = P.ExpandDims()
            self.sub = P.Sub()
            self.add = P.TensorAdd()
            self.cast = P.Cast()
            self.get_dtype = P.DType()

        if do_return_2d_tensor:
            self.shape_return = (batch_size * seq_length, d_model)
        else:
            self.shape_return = (batch_size, seq_length, d_model)

        self.softmax = nn.Softmax()
        self.softmax_cast = P.Cast()
        self.dropout = nn.Dropout(1 - attention_dropout)
        self.use_attention_dropout = attention_dropout > 0
示例#4
0
    def __init__(self,
                 is_training,
                 query_size,
                 key_size,
                 num_units,
                 normalize=False,
                 initializer_range=0.1,
                 compute_type=mstype.float16):
        super(BahdanauAttention, self).__init__()
        self.is_training = is_training
        self.mask = None
        self.query_size = query_size
        self.key_size = key_size
        self.normalize = normalize
        self.num_units = num_units
        self.linear_att = Parameter(Tensor(np.random.uniform(
            -initializer_range, initializer_range, size=[num_units]),
                                           dtype=mstype.float32),
                                    name='linear_att')
        if self.normalize:
            self.normalize_scalar = Parameter(Tensor(np.array(
                [1.0 / num_units]),
                                                     dtype=mstype.float32),
                                              name='normalize_scalar')
            self.normalize_bias = Parameter(Tensor(np.zeros(num_units),
                                                   dtype=mstype.float32),
                                            name='normalize_bias')
        self.transpose = P.Transpose()
        self.transpose_orders = (1, 0, 2)
        self.shape_op = P.Shape()

        self.linear_q = nn.Dense(
            query_size,
            num_units,
            has_bias=False,
            weight_init=Uniform(initializer_range)).to_float(compute_type)

        self.linear_k = nn.Dense(
            key_size,
            num_units,
            has_bias=False,
            weight_init=Uniform(initializer_range)).to_float(compute_type)
        self.expand = P.ExpandDims()
        self.tile = P.Tile()

        self.norm = nn.Norm(axis=-1)
        self.mul = P.Mul()
        self.matmul = P.MatMul()
        self.batchMatmul = P.BatchMatMul()
        self.tanh = nn.Tanh()

        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.softmax = nn.Softmax(axis=-1)
        self.reshape = P.Reshape()
        self.cast = P.Cast()
 def __init__(self, strategy1, strategy2):
     super().__init__()
     self.matmul1 = P.BatchMatMul().shard(strategy1)
     self.norm = P.FusedBatchNormEx()
     self.gamma = Parameter(Tensor(np.ones([64]), dtype=ms.float32),
                            name="gamma")
     self.beta = Parameter(Tensor(np.ones([64]), dtype=ms.float32),
                           name="beta")
     self.mean = Parameter(Tensor(np.ones([64]), dtype=ms.float32),
                           name="mean")
     self.var = Parameter(Tensor(np.ones([64]), dtype=ms.float32),
                          name="var")
     self.matmul2 = P.BatchMatMul().shard(strategy2)
示例#6
0
 def __init__(self, feature_in_dim, feature_out_dim, dropout=0.2):
     super(AttenConv, self).__init__()
     self.out_weight = Parameter(
         initializer("XavierUniform", [feature_in_dim * 2, feature_out_dim],
                     dtype=mstype.float32))
     self.cast = P.Cast()
     self.squeeze = P.Squeeze(1)
     self.concat = P.Concat(axis=1)
     self.expanddims = P.ExpandDims()
     self.softmax = P.Softmax(axis=-1)
     self.matmul = P.MatMul()
     self.matmul_3 = P.BatchMatMul()
     self.matmul_t = P.BatchMatMul(transpose_b=True)
     self.dropout = nn.Dropout(keep_prob=1 - dropout)
示例#7
0
    def __init__(self,
                 length,
                 depth,
                 max_relative_position,
                 initializer_range,
                 use_one_hot_embeddings=False):
        super(RelaPosEmbeddingsGenerator, self).__init__()
        self.depth = depth
        self.vocab_size = max_relative_position * 2 + 1
        self.use_one_hot_embeddings = use_one_hot_embeddings

        self.embeddings_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [self.vocab_size, self.depth]),
            name='embeddings_for_position')

        self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
                                                                max_relative_position=max_relative_position)
        self.reshape = P.Reshape()
        self.one_hot = P.OneHot()
        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.shape = P.Shape()
        self.gather = P.GatherV2()  # index_select
        self.matmul = P.BatchMatMul()
 def __init__(self, index):
     super().__init__()
     self.matmul = P.BatchMatMul()
     self.relu = P.ReLU()
     self.weight = Parameter(
         Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32),
         "matmul_w" + str(index))
示例#9
0
 def __init__(self, hidden_size, output_size, max_length, dropout_p=0.1):
     super(AttnDecoderRNN, self).__init__()
     self.hidden_size = hidden_size
     self.output_size = output_size
     self.dropout_p = dropout_p
     self.max_length = max_length
     self.embedding = nn.Embedding(self.output_size, self.hidden_size)
     self.attn = nn.Dense(in_channels=self.hidden_size * 2,
                          out_channels=self.max_length).to_float(
                              mstype.float16)
     self.attn_combine = nn.Dense(in_channels=self.hidden_size * 2,
                                  out_channels=self.hidden_size).to_float(
                                      mstype.float16)
     self.dropout = nn.Dropout(keep_prob=1.0 - self.dropout_p)
     self.gru = GRU(hidden_size, hidden_size).to_float(mstype.float16)
     self.out = nn.Dense(in_channels=self.hidden_size,
                         out_channels=self.output_size).to_float(
                             mstype.float16)
     self.transpose = P.Transpose()
     self.concat = P.Concat(axis=2)
     self.concat1 = P.Concat(axis=1)
     self.softmax = P.Softmax(axis=1)
     self.relu = P.ReLU()
     self.log_softmax = P.LogSoftmax(axis=1)
     self.bmm = P.BatchMatMul()
     self.unsqueeze = P.ExpandDims()
     self.squeeze = P.Squeeze(1)
     self.squeeze1 = P.Squeeze(0)
     self.cast = P.Cast()
示例#10
0
    def __init__(self, config, scale=1.0, layer_idx=None):
        super(Attention, self).__init__()
        self.get_attention_mask = AttentionMask(config)
        self.projection = Mapping(config.embedding_size, config.embedding_size,
                                  config.compute_dtype, scale)
        self.split = P.Split(axis=-1, output_num=3)
        self.transpose = P.Transpose()
        self.reshape = P.Reshape()
        self.n_head = config.num_heads
        self.size_per_head = config.embedding_size // self.n_head
        self.concat_k = P.Concat(axis=3)
        self.concat_v = P.Concat(axis=2)
        self.multiply_data = Tensor([
            -10000.0,
        ], dtype=mstype.float32)
        self.batch_matmul = P.BatchMatMul()
        self.scale = scale
        if self.scale:
            self.scale_factor = Tensor(math.sqrt(self.size_per_head))
        if layer_idx is not None:
            self.coeff = math.sqrt(layer_idx * math.sqrt(self.size_per_head))
            self.coeff = Tensor(self.coeff)
        self.use_past = config.use_past
        self.dropout = nn.Dropout(1 - config.dropout_rate)
        self.prob_dropout = nn.Dropout(1 - config.dropout_rate)

        self.dense1 = nn.Dense(config.embedding_size,
                               config.embedding_size).to_float(
                                   config.compute_dtype)
        self.dense2 = nn.Dense(config.embedding_size,
                               config.embedding_size).to_float(
                                   config.compute_dtype)
        self.dense3 = nn.Dense(config.embedding_size,
                               config.embedding_size).to_float(
                                   config.compute_dtype)
示例#11
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 has_bias=True,
                 activation=None):
        super(Dense, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.shape_op = P.Shape()

        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
               weight_init.shape[1] != in_channels:
                raise ValueError("Weight init shape error.")
        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")

        self.bias = None
        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("Bias init shape error.")
            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
            self.bias_add = P.BiasAdd()
            self.tensor_add = P.TensorAdd()

        self.matmul = P.MatMul(transpose_b=True)
        self.batch_matmul = P.BatchMatMul(transpose_b=True)
        self.activation = get_activation(activation) if isinstance(activation, str) else activation
        if activation is not None and not isinstance(self.activation, (Cell, Primitive)):
            raise TypeError("The activation must be str or Cell or Primitive,"" but got {}.".format(activation))
        self.activation_flag = self.activation is not None
示例#12
0
def matmul_op_select(x1_shape, x2_shape, transpose_x1, transpose_x2):
    """select matmul op"""
    x1_dim, x2_dim = len(x1_shape), len(x2_shape)
    if x1_dim == 1 and x2_dim == 1:
        matmul_op = P.Mul()
    elif x1_dim <= 2 and x2_dim <= 2:
        transpose_x1 = False if x1_dim == 1 else transpose_x1
        transpose_x2 = False if x2_dim == 1 else transpose_x2
        matmul_op = P.MatMul(transpose_x1, transpose_x2)
    elif x1_dim == 1 and x2_dim > 2:
        matmul_op = P.BatchMatMul(False, transpose_x2)
    elif x1_dim > 2 and x2_dim == 1:
        matmul_op = P.BatchMatMul(transpose_x1, False)
    else:
        matmul_op = P.BatchMatMul(transpose_x1, transpose_x2)
    return matmul_op
示例#13
0
 def __init__(self, config, is_training=True):
     super(Decoder, self).__init__()
     self.hidden_size = config.hidden_size
     self.vocab_size = config.trg_vocab_size
     self.embedding_size = config.decoder_embedding_size
     self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
     self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \
         hidden_size=self.hidden_size).to_float(config.compute_type)
     self.text_len = config.max_length
     self.shape = P.Shape()
     self.transpose = P.Transpose()
     self.p = P.Print()
     self.cast = P.Cast()
     self.concat = P.Concat(axis=2)
     self.squeeze = P.Squeeze(axis=0)
     self.expandims = P.ExpandDims()
     self.log_softmax = P.LogSoftmax(axis=1)
     weight, bias = dense_default_state(
         self.embedding_size + self.hidden_size * 3, self.vocab_size)
     self.fc = nn.Dense(self.embedding_size + self.hidden_size * 3,
                        self.vocab_size,
                        weight_init=weight,
                        bias_init=bias).to_float(config.compute_type)
     self.attention = Attention(config)
     self.bmm = P.BatchMatMul()
     self.dropout = nn.Dropout(0.7)
     self.expandims = P.ExpandDims()
     self.dtype = config.dtype
示例#14
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor_GPU, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]))

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]))

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        split_dim = 128
        matrix_A_shape, matrix_G_shape = caculate_matmul_shape(self.in_channels, self.out_channels, split_dim)
        self.matrix_A_inv = Parameter(Tensor(np.zeros(matrix_A_shape).astype(np.float32)), requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros(matrix_G_shape).astype(np.float32)), requires_grad=False)
        self.broadcast_to = P.BroadcastTo(matrix_A_shape)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), requires_grad=False)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.mul = P.Mul()
        self.cube_matmul = P.MatMul(transpose_a=True)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.damping = Parameter(Tensor(damping), requires_grad=False)
        self.dampingA = Tensor(np.identity(in_channels), mstype.float32)
        self.dampingG = Tensor(np.identity(out_channels), mstype.float32)
        self.cast = P.Cast()
        self.gather = P.Gather()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.add = P.Add()
        self.sqrt = P.Sqrt()
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
示例#15
0
 def __init__(self, config):
     super(AttentionMask, self).__init__()
     self.reshape = P.Reshape()
     self.mul = P.BatchMatMul()
     ones = np.ones(shape=(config.seq_length, config.seq_length))
     self.lower_triangle_mask = Tensor(np.tril(ones), mstype.float32)
     self.multiply = P.Mul()
示例#16
0
 def __init__(self, shape, offset, reduce_scatter_flag, split_num):
     super().__init__()
     self.index = Tensor(np.ones(shape), dtype=ms.int32)
     self.offset = offset
     self.reduce_scatter_flag = reduce_scatter_flag
     self.split_num = split_num
     self.elu = inner.EmbeddingLookup()
     self.mm = P.BatchMatMul()
    def __init__(self, config, scale=1.0, layer_idx=None):
        super(Attention, self).__init__()
        self.get_attention_mask = AttentionMask(config)
        self.projection = Mapping(config, config.embedding_size,
                                  config.embedding_size, scale)
        self.transpose = P.Transpose().shard(((config.dp, 1, config.mp, 1),))
        self.merger_head_transpose = P.Transpose().shard(
            ((config.dp, config.mp, 1, 1),))
        self.reshape = P.Reshape()
        self.n_head = config.num_heads
        self.size_per_head = config.embedding_size // self.n_head
        self.concat_k = P.Concat(axis=3)
        self.concat_v = P.Concat(axis=2)
        self.multiply_data = Tensor([
            -10000.0,
        ], dtype=mstype.float32)
        self.batch_matmul = P.BatchMatMul().shard(
            ((config.dp, config.mp, 1, 1), (config.dp, config.mp, 1, 1)))
        self.scale = scale
        self.real_div = P.RealDiv().shard(((config.dp, config.mp, 1, 1), ()))
        self.sub = P.Sub().shard(((1,), (config.dp, 1, 1, 1))).add_prim_attr("_side_effect", True)
        self.mul = P.Mul().shard(((config.dp, 1, 1, 1), (1,))).add_prim_attr("_side_effect", True)
        self.add = P.TensorAdd().shard(
            ((config.dp, 1, 1, 1), (config.dp, config.mp, 1, 1)))
        if self.scale:
            self.scale_factor = Tensor(math.sqrt(self.size_per_head))
        if layer_idx is not None:
            self.coeff = math.sqrt(layer_idx * math.sqrt(self.size_per_head))
            self.coeff = Tensor(self.coeff)
        self.use_past = config.use_past
        self.dropout = nn.Dropout(1 - config.dropout_rate)
        self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),))
        self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),))
        self.prob_dropout = nn.Dropout(1 - config.dropout_rate)
        self.prob_dropout.dropout_gen_mask.shard(
            ((config.dp, config.mp, 1, 1),))
        self.prob_dropout.dropout_do_mask.shard(
            ((config.dp, config.mp, 1, 1),))
        self.softmax = nn.Softmax()
        self.softmax.softmax.shard(((config.dp, config.mp, 1),))
        self.expand_dims = P.ExpandDims().shard(((config.dp, 1, 1),))

        self.dense1 = nn.Dense(config.embedding_size,
                               config.embedding_size).to_float(
            config.compute_dtype)
        self.dense1.matmul.shard(((config.dp, 1), (config.mp, 1)))
        self.dense1.bias_add.shard(((config.dp, config.mp), (config.mp,)))
        self.dense2 = nn.Dense(config.embedding_size,
                               config.embedding_size).to_float(
            config.compute_dtype)
        self.dense2.matmul.shard(((config.dp, 1), (config.mp, 1)))
        self.dense2.bias_add.shard(((config.dp, config.mp), (config.mp,)))
        self.dense3 = nn.Dense(config.embedding_size,
                               config.embedding_size).to_float(
            config.compute_dtype)
        self.dense3.matmul.shard(((config.dp, 1), (config.mp, 1)))
        self.dense3.bias_add.shard(((config.dp, config.mp), (config.mp,)))
 def __init__(self, config):
     super(AttentionMask, self).__init__()
     self.reshape = P.Reshape()
     self.mul = P.BatchMatMul().shard(
         ((config.dp, 1, 1), (config.dp, 1, 1)))  # yzz: use 64, 1, 1?
     self.expand_dim = P.ExpandDims().shard(((1, 1),))
     ones = np.ones(shape=(config.seq_length, config.seq_length))
     self.lower_triangle_mask = Tensor(np.tril(ones), mstype.float32)
     self.multiply = P.Mul().shard(((config.dp, 1, 1), (1, 1, 1)))
示例#19
0
 def __init__(self,
              transpose_a=False,
              transpose_b=False,
              strategy0=None,
              strategy1=None):
     super(BatchMatMul, self).__init__()
     self.add = P.TensorAdd(strategy=strategy1)
     self.batchmatmul = P.BatchMatMul(transpose_a,
                                      transpose_b,
                                      strategy=strategy0)
 def __init__(self,
              shape,
              offset,
              strategy1=None,
              strategy2=None,
              target="Device"):
     super().__init__()
     self.index = Tensor(np.ones(shape), dtype=ms.int32)
     self.offset = offset
     self.elu = P.EmbeddingLookup().set_strategy(strategy1).add_prim_attr(
         "primitive_target", target)
     self.mm = P.BatchMatMul().set_strategy(strategy2)
示例#21
0
 def __init__(self,
              mul_weight,
              batch_matmul_weight,
              transpose_b=False,
              strategy1=None,
              strategy2=None):
     super().__init__()
     self.mul = P.Mul().set_strategy(strategy1)
     self.batch_matmul = P.BatchMatMul(
         transpose_b=transpose_b).set_strategy(strategy2)
     self.mul_weight = Parameter(mul_weight, "w1")
     self.batch_matmul_weight = Parameter(batch_matmul_weight, "w2")
示例#22
0
    def affine_grid_generator(self, height, width, theta):
        """
        This function returns a sampling grid, which when
        used with the bilinear sampler on the input feature
        map, will create an output feature map that is an
        affine transformation [1] of the input feature map.

        zero = Tensor(np.zeros([]), mindspore.float32)
        Input
        -----
        - height: desired height of grid/output. Used
          to downsample or upsample.

        - width: desired width of grid/output. Used
          to downsample or upsample.

        - theta: affine transform matrices of shape (num_batch, 2, 3).
          For each image in the batch, we have 6 theta parameters of
          the form (2x3) that define the affine transformation T.

        Returns
        -------
        - normalized grid (-1, 1) of shape (num_batch, 2, H, W).
          The 2nd dimension has 2 components: (x, y) which are the
          sampling points of the original image for each point in the
          target image.

        Note
        ----
        [1]: the affine transformation allows cropping, translation,
             and isotropic scaling.
        """
        shape = P.Shape()
        num_batch = shape(theta)[0]

        cast = P.Cast()
        theta = cast(theta, mindspore.float32)

        # transform the sampling grid - batch multiply
        matmul = P.BatchMatMul()
        tile = P.Tile()
        sampling_grid = tile(self.sampling_grid, (num_batch, 1, 1))
        cast = P.Cast()
        sampling_grid = cast(sampling_grid, mindspore.float32)

        batch_grids = matmul(theta, sampling_grid)
        # batch grid has shape (num_batch, 2, H*W)

        # reshape to (num_batch, H, W, 2)
        reshape = P.Reshape()
        batch_grids = reshape(batch_grids, (num_batch, 2, height, width))
        return batch_grids
示例#23
0
    def __init__(self, transpose_x1=False, transpose_x2=False):
        super(MatMul, self).__init__()

        validator.check_value_type('transpose_x1', transpose_x1, [bool],
                                   self.cls_name)
        validator.check_value_type('transpose_x2', transpose_x2, [bool],
                                   self.cls_name)
        self.transpose_x1 = transpose_x1
        self.transpose_x2 = transpose_x2
        self.shape_op = P.Shape()
        self.matmul_op = P.MatMul(self.transpose_x1, self.transpose_x2)
        self.batch_matmul_op = P.BatchMatMul(self.transpose_x1,
                                             self.transpose_x2)
    def __init__(self,
                 batch_size,
                 from_seq_length,
                 to_seq_length,
                 num_attention_heads=1,
                 size_per_head=512,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 use_relative_positions=False,
                 dtype=mstype.float32,
                 compute_type=mstype.float32):

        super(BertAttentionRelativePositionValues, self).__init__()
        self.batch_size = batch_size
        self.from_seq_length = from_seq_length
        self.to_seq_length = to_seq_length
        self.use_relative_positions = use_relative_positions
        self.size_per_head = size_per_head
        self.num_attention_heads = num_attention_heads
        self.trans_shape_position = (1, 2, 0, 3)
        self.trans_shape_relative = (2, 0, 1, 3)

        self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))],
                                 dtype=dtype)
        self.trans_shape = (0, 2, 1, 3)

        self.reshape = P.Reshape()
        self.multiply = P.Mul()
        self.transpose = P.Transpose()
        self.batch_num = batch_size * num_attention_heads
        self.matmul = P.BatchMatMul()
        self.do_return_2d_tensor = do_return_2d_tensor
        if self.do_return_2d_tensor:
            self.shp_return = (batch_size * from_seq_length,
                               num_attention_heads * size_per_head)
        else:
            self.shp_return = (batch_size, from_seq_length,
                               num_attention_heads * size_per_head)

        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        self._generate_relative_positions_embeddings = \
            RelaPosEmbeddingsGenerator(length=self.to_seq_length,
                                       depth=self.size_per_head,
                                       max_relative_position=16,
                                       initializer_range=initializer_range,
                                       use_one_hot_embeddings=use_one_hot_embeddings)
        self.fill = P.Fill()
        self.multiply = P.Mul()
        self.type = P.DType()
        self.cast = P.Cast()
示例#25
0
    def __init__(self, config):
        super(CreateAttentionMaskFromInputMask, self).__init__()
        self.input_mask = None

        self.cast = P.Cast()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.batch_matmul = P.BatchMatMul()
        self.multiply = P.Mul()
        self.shape = P.Shape()
        # mask future positions
        ones = np.ones(shape=(config.batch_size, config.seq_length,
                              config.seq_length))
        self.lower_triangle_mask = Tensor(np.tril(ones), dtype=mstype.float32)
示例#26
0
    def __init__(self, config):
        super(CreateAttentionMaskFromInputMask, self).__init__()
        self.input_mask_from_dataset = config.input_mask_from_dataset
        self.input_mask = None

        if not self.input_mask_from_dataset:
            self.input_mask = initializer(
                "ones", [config.batch_size, config.seq_length], mstype.int32).to_tensor()

        self.cast = P.Cast()
        self.reshape = P.Reshape()
        self.shape = (config.batch_size, 1, config.seq_length)
        self.broadcast_ones = initializer(
            "ones", [config.batch_size, config.seq_length, 1], mstype.float32).to_tensor()
        self.batch_matmul = P.BatchMatMul()
示例#27
0
 def __init__(self,
              weight1,
              strategy1=None,
              strategy2=None,
              strategy3=None,
              is_parameter=True):
     super(MatMulNet, self).__init__()
     self.shape = (8, 64, 64)
     self.broadcast = P.BroadcastTo(self.shape).shard(strategy1)
     self.matmul = P.BatchMatMul().shard(strategy2)
     self.mul = P.Mul().shard(strategy3)
     if is_parameter:
         self.weight1 = Parameter(weight1, "w1")
     else:
         self.weight1 = weight1
示例#28
0
    def __init__(self, 头数, 尺寸, 丢弃率=0.1):
        super(多头_注意力, self).__init__()
        self.d_model = 尺寸
        self.d_k = 尺寸 // 头数
        self.d_k_Tensor = Tensor(尺寸 // 头数, mindspore.float32)
        self.h = 头数
        self.q_linear = 全连接层(尺寸, 尺寸)
        self.v_linear = 全连接层(尺寸, 尺寸)
        self.k_linear = 全连接层(尺寸, 尺寸)
        self.dropout = nn.Dropout(1 - 丢弃率)
        self.out = 全连接层(尺寸, 尺寸)
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.shape = P.Shape()

        self.batch_matmul = P.BatchMatMul()
        self.add = P.TensorAdd()
        self.transpose = P.Transpose()
        self.sqrt = P.Sqrt()
        self.softmax = P.Softmax(-1)
示例#29
0
    def __init__(self,
                 in_channel,
                 out_channel,
                 in_drop_ratio=0.0,
                 coef_drop_ratio=0.0,
                 residual=False,
                 coef_activation=nn.LeakyReLU(),
                 activation=nn.ELU()):
        super(AttentionHead, self).__init__()
        self.in_channel = Validator.check_positive_int(in_channel)
        self.out_channel = Validator.check_positive_int(out_channel)
        self.in_drop_ratio = in_drop_ratio
        self.in_drop = nn.Dropout(keep_prob=1 - in_drop_ratio)
        self.in_drop_2 = nn.Dropout(keep_prob=1 - in_drop_ratio)
        self.feature_transform = GNNFeatureTransform(
            in_channels=self.in_channel,
            out_channels=self.out_channel,
            has_bias=False)

        self.f_1_transform = GNNFeatureTransform(in_channels=self.out_channel,
                                                 out_channels=1)
        self.f_2_transform = GNNFeatureTransform(in_channels=self.out_channel,
                                                 out_channels=1)
        self.softmax = nn.Softmax()

        self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio)
        self.batch_matmul = P.BatchMatMul()
        self.bias_add = P.BiasAdd()
        self.bias = Parameter(initializer('zeros', self.out_channel),
                              name='bias')
        self.residual = Validator.check_bool(residual)
        if self.residual:
            if in_channel != out_channel:
                self.residual_transform_flag = True
                self.residual_transform = GNNFeatureTransform(
                    in_channels=self.in_channel, out_channels=self.out_channel)
            else:
                self.residual_transform = None
        self.coef_activation = coef_activation
        self.activation = activation
示例#30
0
    def __init__(self,
                 length,
                 depth,
                 max_relative_position,
                 initializer_range,
                 use_one_hot_embeddings=False):
        super(RelaPosEmbeddingsGenerator, self).__init__()
        self.depth = depth
        self.vocab_size = max_relative_position * 2 + 1
        self.use_one_hot_embeddings = use_one_hot_embeddings

        self.embeddings_table = Parameter(
            initializer(TruncatedNormal(initializer_range),
                        [self.vocab_size, self.depth]))

        self.relative_positions_matrix = RelaPosMatrixGenerator(length=length,
                                                                max_relative_position=max_relative_position)
        self.reshape = P.Reshape()
        self.one_hot = nn.OneHot(depth=self.vocab_size)
        self.shape = P.Shape()
        self.gather = P.Gather()  # index_select
        self.matmul = P.BatchMatMul()