Exemplo n.º 1
0
    def __init__(self, decay_policy, decay_rate, cur_noise_multiplier,
                 init_noise_multiplier):
        super(_MechanismsParamsUpdater, self).__init__()
        self._decay_policy = decay_policy
        self._decay_rate = decay_rate
        self._cur_noise_multiplier = cur_noise_multiplier
        self._init_noise_multiplier = init_noise_multiplier

        self._div = P.Div()
        self._add = P.Add()
        self._assign = P.Assign()
        self._sub = P.Sub()
        self._one = Tensor(1, mstype.float32)
        self._mul = P.Mul()
        self._exp = P.Exp()
Exemplo n.º 2
0
 def __init__(self,
              attention_mask_shape,
              has_attention_mask=False,
              dtype=mstype.float32):
     super(BertAttentionMaskBackward, self).__init__()
     self.has_attention_mask = has_attention_mask
     self.multiply_data = Tensor([-1000.0, ], dtype=dtype)
     self.multiply = P.Mul()
     self.attention_mask = Tensor(np.ones(shape=attention_mask_shape).astype(np.float32))
     if self.has_attention_mask:
         self.expand_dims = P.ExpandDims()
         self.sub = P.Sub()
         self.add = P.TensorAdd()
         self.cast = P.Cast()
         self.get_dtype = P.DType()
 def __init__(self):
     super().__init__()
     self.relu = nn.ReLU()
     self.softmax = nn.Softmax()
     self.mul = P.Mul()
     self.add = P.Add()
     self.sub = P.Sub()
     self.div = P.Div()
     self.assign = P.Assign()
     param_a = np.full((1, ), 5, dtype=np.float32)
     self.param_a = Parameter(Tensor(param_a), name='a')
     param_b = np.full((1, ), 2, dtype=np.float32)
     self.param_b = Parameter(Tensor(param_b), name='b')
     param_c = np.full((1, ), 16, dtype=np.float32)
     self.param_c = Parameter(Tensor(param_c), name='c')
Exemplo n.º 4
0
 def __init__(self):
     super(NpuFloatNet, self).__init__()
     self.mul = P.Mul()
     self.alloc_status = P.NPUAllocFloatStatus()
     self.get_status = P.NPUGetFloatStatus()
     self.clear_status = P.NPUClearFloatStatus()
     self.fill = P.Fill()
     self.shape_op = P.Shape()
     self.select = P.Select()
     self.less = P.Less()
     self.cast = P.Cast()
     self.dtype = P.DType()
     self.reduce_sum = P.ReduceSum(keep_dims=True)
     self.sub = P.Sub()
     self.neg = P.Neg()
Exemplo n.º 5
0
 def __init__(self, mul_7_w_shape, add_8_bias_shape):
     """init function"""
     super(LayerNorm, self).__init__()
     self.reducemean_0 = P.ReduceMean(keep_dims=True)
     self.sub_1 = P.Sub()
     self.pow_2 = P.Pow()
     self.pow_2_input_weight = 2.0
     self.reducemean_3 = P.ReduceMean(keep_dims=True)
     self.add_4 = P.Add()
     self.add_4_bias = 9.999999960041972e-13
     self.sqrt_5 = P.Sqrt()
     self.div_6 = P.Div()
     self.mul_7 = P.Mul()
     self.mul_7_w = Parameter(Tensor(np.random.uniform(0, 1, mul_7_w_shape).astype(np.float32)), name=None)
     self.add_8 = P.Add()
     self.add_8_bias = Parameter(Tensor(np.random.uniform(0, 1, add_8_bias_shape).astype(np.float32)), name=None)
Exemplo n.º 6
0
 def construct(self, a, b, x):
     if a < b:
         a = P.TensorAdd()(a, b)
     else:
         a = P.Sub()(a, b)
     if a == x:
         a = P.Mul()(a, b)
     else:
         a = P.RealDiv()(a, b)
     if b == x:
         b = P.TensorAdd()(a, b)
     else:
         b = P.TensorAdd()(a, x)
     a = a * b
     out = a + b + x
     return out
Exemplo n.º 7
0
 def __init__(self,
              axis=-1,
              depth=1,
              on_value=1.0,
              off_value=0.0,
              strategy=None):
     super(Onehot, self).__init__()
     trans_stra = None
     if strategy:
         trans_stra = (strategy[0], )
     self.onehot = P.OneHot().set_strategy(strategy=strategy)
     self.depth = depth
     self.on_value = Tensor(on_value, ms.float32)
     self.off_value = Tensor(off_value, ms.float32)
     self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
     self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
Exemplo n.º 8
0
 def __init__(self, passthrough_w_0, passthrough_w_1):
     """init function"""
     super(LayerNorm, self).__init__()
     self.reducemean_0 = P.ReduceMean(keep_dims=True)
     self.sub_1 = P.Sub()
     self.pow_2 = P.Pow()
     self.pow_2_input_weight = 2.0
     self.reducemean_3 = P.ReduceMean(keep_dims=True)
     self.add_4 = P.Add()
     self.add_4_bias = 9.999999960041972e-13
     self.sqrt_5 = P.Sqrt()
     self.div_6 = P.Div()
     self.mul_7 = P.Mul()
     self.mul_7_w = passthrough_w_0
     self.add_8 = P.Add()
     self.add_8_bias = passthrough_w_1
Exemplo n.º 9
0
 def __init__(self, sparse=False):
     super(SoftmaxCrossEntropyExpand, self).__init__()
     self.exp = P.Exp()
     self.reduce_sum = P.ReduceSum(keep_dims=True)
     self.onehot = P.OneHot()
     self.on_value = Tensor(1.0, mstype.float32)
     self.off_value = Tensor(0.0, mstype.float32)
     self.div = P.Div()
     self.log = P.Log()
     self.sum_cross_entropy = P.ReduceSum(keep_dims=False)
     self.mul = P.Mul()
     self.mul2 = P.Mul()
     self.cast = P.Cast()
     self.reduce_mean = P.ReduceMean(keep_dims=False)
     self.sparse = sparse
     self.reduce_max = P.ReduceMax(keep_dims=True)
     self.sub = P.Sub()
Exemplo n.º 10
0
def test_nobroadcast_fp16():
    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')

    np.random.seed(42)
    x1_np = np.random.rand(10, 20).astype(np.float16)
    x2_np = np.random.rand(10, 20).astype(np.float16)

    output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.minimum(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.maximum(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np > x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np < x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.power(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np / x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np * x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np - x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np / x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    x2_np_zero = np.zeros_like(x2_np)
    output_ms = P.DivNoNan()(Tensor(x1_np), Tensor(x2_np_zero))
    assert np.allclose(output_ms.asnumpy(), x2_np_zero)
Exemplo n.º 11
0
def test_nobroadcast():
    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')

    np.random.seed(42)
    x1_np = np.random.rand(10, 20).astype(np.float32)
    x2_np = np.random.rand(10, 20).astype(np.float32)
    x1_np_int32 = np.random.randint(0, 100, (10, 20)).astype(np.int32)
    x2_np_int32 = np.random.randint(0, 100, (10, 20)).astype(np.int32)

    output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.minimum(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.maximum(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np > x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)
    output_ms = P.Greater()(Tensor(x1_np_int32), Tensor(x2_np_int32))
    output_np = x1_np_int32 > x2_np_int32
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np < x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)
    output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32))
    output_np = x1_np_int32 < x2_np_int32
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.power(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np / x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np * x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np - x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)
Exemplo n.º 12
0
    def __init__(self, channel=1, w=0.25):
        super(PReLU, self).__init__()
        if isinstance(w, (np.float32, float)):
            tmp = np.empty((channel, ), dtype=np.float32)
            tmp.fill(w)
            w = Tensor(tmp)
        elif isinstance(w, list):
            w = Tensor(w)

        if not isinstance(w, Tensor):
            raise TypeError("w only support np.float32, float or Tensor type.")

        self.w = Parameter(initializer(w, [
            channel,
        ]), name='a')
        self.prelu = P.PReLU()
        self.relu = P.ReLU().set_strategy(((1, ), ))
        self.sub = P.Sub().set_strategy(((1, ), (1, )))
        self.assign_sub = P.AssignSub().set_strategy(((1, ), (1, )))
Exemplo n.º 13
0
def test_SubGrad():
    """ test_SubGrad """
    input_x = Tensor(np.array([[2, 2]]))
    input_y = Tensor(np.array([[2, 2], [2, 2]]))
    sub = P.Sub()

    def fn(x, y):
        output = sub(x, y)
        return output

    out = fn(input_x, input_y)
    gfn = grad_all_with_sens(fn)
    sens = Tensor(np.ones_like(out.asnumpy()))
    args = [input_x, input_y, sens]
    gout = gfn(*args)
    expect_dx = np.ones([1, 2]).astype(np.int32) * 2  # reduce sum dout to the shape of x
    expect_dy = np.ones([2, 2]).astype(np.int32) * (-1)
    assert np.array_equal(gout[0].asnumpy(), expect_dx)
    assert np.array_equal(gout[1].asnumpy(), expect_dy)
Exemplo n.º 14
0
 def __init__(self):
     super(LayerNorm, self).__init__()
     self.reducemean = P.ReduceMean(keep_dims=True)
     self.sub = P.Sub()
     self.cast = P.Cast()
     self.cast_to = mstype.float32
     self.pow = P.Pow()
     self.pow_weight = 2.0
     self.add = P.Add()
     self.add_bias_0 = 9.999999960041972e-13
     self.sqrt = P.Sqrt()
     self.div = P.Div()
     self.mul = P.Mul()
     self.mul_weight = Parameter(Tensor(
         np.random.uniform(0, 1, (768, )).astype(np.float32)),
                                 name=None)
     self.add_bias_1 = Parameter(Tensor(
         np.random.uniform(0, 1, (768, )).astype(np.float32)),
                                 name=None)
Exemplo n.º 15
0
 def construct(self, a, b, x):
     add = P.TensorAdd()
     sub = P.Sub()
     mul = P.Mul()
     div = P.RealDiv()
     if 2 < 12:
         a = add(a, b)
     else:
         a = sub(a, b)
     if 2 > 1:
         a = mul(a, b)
     else:
         a = div(a, b)
     if 2 == 1:
         b = add(a, b)
     else:
         b = add(a, x)
     a = a * b
     out = a + b + x
     return out
Exemplo n.º 16
0
 def construct(self, a, b, x):
     add = P.Add()
     sub = P.Sub()
     mul = P.Mul()
     div = P.RealDiv()
     if a < b:
         a = add(a, b)
     else:
         a = sub(a, b)
     if 2 > 1:
         a = mul(a, b)
     else:
         a = div(a, b)
     if b == x:
         b = add(a, b)
     else:
         b = add(a, x)
     a = a * b
     out = a + b + x
     return out
Exemplo n.º 17
0
 def __init__(self, sparse=False, stra_list=None):
     super(SoftmaxCrossEntropyExpand, self).__init__()
     if stra_list is None:
         stra_list = []
     if len(stra_list) < 11:
         stra_list = [None] * 11
     self.exp = P.Exp()
     self.reduce_sum = P.ReduceSum(keep_dims=True).shard(strategy=stra_list[1])
     self.onehot = P.OneHot().shard(strategy=stra_list[2])
     self.on_value = Tensor(1.0, mstype.float32)
     self.off_value = Tensor(0.0, mstype.float32)
     self.div = P.Div().shard(strategy=stra_list[3])
     self.log = P.Log().shard(strategy=stra_list[4])
     self.sum_cross_entropy = P.ReduceSum(keep_dims=False).shard(strategy=stra_list[5])
     self.mul = P.Mul().shard(strategy=stra_list[6])
     self.mul2 = P.Mul().shard(strategy=stra_list[7])
     self.cast = P.Cast()
     self.reduce_mean = P.ReduceMean(keep_dims=False).shard(strategy=stra_list[8])
     self.sparse = sparse
     self.reduce_max = P.ReduceMax(keep_dims=True).shard(strategy=stra_list[9])
     self.sub = P.Sub().shard(strategy=stra_list[10])
Exemplo n.º 18
0
 def __init__(self):
     super(LayerNorm, self).__init__()
     self.reducemean_0 = P.ReduceMean(keep_dims=True)
     self.sub_1 = P.Sub()
     self.cast_2 = P.Cast()
     self.cast_2_to = mstype.float32
     self.pow_3 = P.Pow()
     self.pow_3_input_weight = 2.0
     self.reducemean_4 = P.ReduceMean(keep_dims=True)
     self.add_5 = P.Add()
     self.add_5_bias = 9.999999960041972e-13
     self.sqrt_6 = P.Sqrt()
     self.div_7 = P.Div()
     self.mul_8 = P.Mul()
     self.mul_8_w = Parameter(Tensor(
         np.random.uniform(0, 1, (768, )).astype(np.float32)),
                              name=None)
     self.add_9 = P.Add()
     self.add_9_bias = Parameter(Tensor(
         np.random.uniform(0, 1, (768, )).astype(np.float32)),
                                 name=None)
Exemplo n.º 19
0
    def _attn(self, query, key, value, attention_mask):
        """
        Get the weighted score along the seq_length

        Inputs:
            query: the query matrix
            key: the key matrix
            value: the value matrix
            attention_mask: the attention mask matrix with shape (batch_size, 1, seq_length, seq_length)

        Returns:
            weighted_values: Tensor, the weighted sum scores
        """
        if not self.scale:
            query = query / F.cast(self.coeff, F.dtype(query))
            key = key / F.cast(self.coeff, F.dtype(key))

        score = self.batch_matmul(query, key)
        if self.scale:
            score = score / P.Cast()(self.scale_factor, P.DType()(score))

        ori_dtype = P.DType()(score)
        score = P.Cast()(score, mstype.float32)
        multiplu_out = P.Sub()(P.Cast()(F.tuple_to_array(
            (1.0, )), P.DType()(score)), P.Cast()(attention_mask,
                                                  P.DType()(score)))

        adder = P.Mul()(multiplu_out, self.multiply_data)
        attention_scores = adder + score

        attention_scores = P.Cast()(attention_scores, ori_dtype)
        shape = F.shape(attention_scores)
        attention_probs = nn.Softmax()(F.reshape(attention_scores,
                                                 (-1, shape[-1])))
        attention_probs = F.reshape(attention_probs, shape)

        attention_probs = self.prob_dropout(attention_probs)
        weighted_values = self.batch_matmul(attention_probs, value)
        return weighted_values
Exemplo n.º 20
0
 def __init__(self,
              bert_layer_norm_weight_shape,
              bert_layer_norm_bias_shape,
              eps=1e-12):
     """init function"""
     super(BertLayerNorm, self).__init__()
     self.reducemean = P.ReduceMean(keep_dims=True)
     self.sub = P.Sub()
     self.pow = P.Pow()
     self.add = P.Add()
     self.sqrt = P.Sqrt()
     self.div = P.Div()
     self.mul = P.Mul()
     self.variance_epsilon = eps
     self.bert_layer_norm_weight = Parameter(Tensor(
         np.random.uniform(0, 1, bert_layer_norm_weight_shape).astype(
             np.float32)),
                                             name=None)
     self.bert_layer_norm_bias = Parameter(Tensor(
         np.random.uniform(0, 1,
                           bert_layer_norm_bias_shape).astype(np.float32)),
                                           name=None)
Exemplo n.º 21
0
 def __init__(self):
     super(ModelTwoHop, self).__init__()
     self.expanddims_0 = P.ExpandDims()
     self.expanddims_0_axis = 1
     self.expanddims_3 = P.ExpandDims()
     self.expanddims_3_axis = 2
     self.cast_5 = P.Cast()
     self.cast_5_to = mstype.float32
     self.sub_7 = P.Sub()
     self.sub_7_bias = 1.0
     self.mul_9 = P.Mul()
     self.mul_9_w = -10000.0
     self.gather_1_input_weight = Parameter(Tensor(
         np.random.uniform(0, 1, (30522, 768)).astype(np.float32)),
                                            name=None)
     self.gather_1_axis = 0
     self.gather_1 = P.Gather()
     self.gather_2_input_weight = Parameter(Tensor(
         np.random.uniform(0, 1, (2, 768)).astype(np.float32)),
                                            name=None)
     self.gather_2_axis = 0
     self.gather_2 = P.Gather()
     self.add_4 = P.Add()
     self.add_6 = P.Add()
     self.add_6_bias = Parameter(Tensor(
         np.random.uniform(0, 1, (1, 448, 768)).astype(np.float32)),
                                 name=None)
     self.layernorm1_0 = LayerNorm()
     self.module50_0 = Encoder1_4()
     self.module50_1 = Encoder1_4()
     self.module50_2 = Encoder1_4()
     self.gather_643_input_weight = Tensor(np.array(0))
     self.gather_643_axis = 1
     self.gather_643 = P.Gather()
     self.dense_644 = nn.Dense(in_channels=768,
                               out_channels=768,
                               has_bias=True)
     self.tanh_645 = nn.Tanh()
Exemplo n.º 22
0
def test_broadcast_fp16():
    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')

    x1_np = np.random.rand(3, 1, 5, 1).astype(np.float16)
    x2_np = np.random.rand(1, 4, 1, 6).astype(np.float16)

    output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.minimum(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.maximum(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np > x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np < x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
    output_np = np.power(x1_np, x2_np)
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np / x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np * x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)

    output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np))
    output_np = x1_np - x2_np
    assert np.allclose(output_ms.asnumpy(), output_np)
Exemplo n.º 23
0
    def __init__(self,
                 norm_bound=1.0,
                 initial_noise_multiplier=1.5,
                 noise_decay_rate=6e-4,
                 decay_policy='Time',
                 seed=0):
        super(AdaGaussianRandom, self).__init__()
        norm_bound = check_value_positive('norm_bound', norm_bound)
        initial_noise_multiplier = check_value_positive(
            'initial_noise_multiplier', initial_noise_multiplier)
        self._norm_bound = Tensor(norm_bound, mstype.float32)

        initial_noise_multiplier = Tensor(initial_noise_multiplier,
                                          mstype.float32)
        self._initial_noise_multiplier = Parameter(
            initial_noise_multiplier, name='initial_noise_multiplier')
        self._noise_multiplier = Parameter(initial_noise_multiplier,
                                           name='noise_multiplier')
        self._mean = Tensor(0, mstype.float32)
        noise_decay_rate = check_param_type('noise_decay_rate',
                                            noise_decay_rate, float)
        check_param_in_range('noise_decay_rate', noise_decay_rate, 0.0, 1.0)
        self._noise_decay_rate = Tensor(noise_decay_rate, mstype.float32)
        if decay_policy not in ['Time', 'Step']:
            raise NameError(
                "The decay_policy must be in ['Time', 'Step'], but "
                "get {}".format(decay_policy))
        self._decay_policy = decay_policy
        self._sub = P.Sub()
        self._mul = P.Mul()
        self._add = P.TensorAdd()
        self._div = P.Div()
        self._dtype = mstype.float32
        self._normal = P.Normal(seed=seed)
        self._assign = P.Assign()
        self._one = Tensor(1, self._dtype)
Exemplo n.º 24
0
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
from mindspore.ops import Primitive
from mindspore.ops import operations as P

mul = P.Mul()
reduce_sum = P.ReduceSum(keep_dims=True)
sub = P.Sub()
confusion_softmax_grad = Primitive('ConfusionSoftmaxGrad')
make_tuple = Primitive('make_tuple')
tuple_getitem = Primitive('tuple_getitem')
axis = 2


class FnDict:
    def __init__(self):
        self.fnDict = {}

    def __call__(self, fn):
        self.fnDict[fn.__name__] = fn

    def __getitem__(self, name):
        return self.fnDict[name]
Exemplo n.º 25
0
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=True,
                 device_num_each_group=1):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError(
                "momentum should be a number in range [0, 1], but got {}".
                format(momentum))

        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.moving_mean = Parameter(initializer(moving_mean_init,
                                                 num_features),
                                     name="mean",
                                     requires_grad=False)
        self.moving_variance = Parameter(initializer(moving_var_init,
                                                     num_features),
                                         name="variance",
                                         requires_grad=False)
        self.gamma = Parameter(initializer(gamma_init, num_features),
                               name="gamma",
                               requires_grad=affine)
        self.beta = Parameter(initializer(beta_init, num_features),
                              name="beta",
                              requires_grad=affine)
        self.group = check_int_positive(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i),
                                            self.rank_list[i])
                    self.all_reduce = P.AllReduce(
                        P.ReduceOp.SUM,
                        'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"

        if context.get_context("enable_ge"):
            self.is_ge_backend = True
            self.momentum = Tensor(1.0 - momentum, mstype.float32)
        else:
            self.is_ge_backend = False
            self.momentum = 1.0 - momentum
        if self.is_ge_backend or self.is_ascend:
            self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps)

        data_parallel_strategy = ((1, ), (1, ))
        data_parallel_strategy_one = ((1, ), ())
        self.sub_mean = P.Sub().set_strategy(data_parallel_strategy)
        self.sub_var = P.Sub().set_strategy(data_parallel_strategy)
        self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one)
        self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().set_strategy(
            data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().set_strategy(
            data_parallel_strategy)
Exemplo n.º 26
0
 def __init__(self, compute_type=mstype.float32):
     super(Mod, self).__init__()
     self.compute_type = compute_type
     self.floor_div = P.FloorDiv()
     self.sub = P.Sub()
     self.multiply = P.Mul()
Exemplo n.º 27
0
    def __init__(self,
                 batch_size,
                 seq_length,
                 vocab_size,
                 decoder,
                 beam_width=4,
                 length_penalty_weight=1.0,
                 max_decode_length=128,
                 sos_id=1,
                 eos_id=2,
                 compute_type=mstype.float32):
        super(BeamSearchDecoder, self).__init__(auto_prefix=False)
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        self.beam_width = beam_width
        self.length_penalty_weight = length_penalty_weight
        self.max_decode_length = max_decode_length
        self.decoder = decoder

        self.add = P.TensorAdd()
        self.expand = P.ExpandDims()
        self.reshape = P.Reshape()
        self.shape_flat = (-1, )
        self.shape = P.Shape()

        self.zero_tensor = Tensor(np.zeros([batch_size, beam_width]),
                                  mstype.float32)
        self.ninf_tensor = Tensor(np.full([batch_size, beam_width], -INF),
                                  mstype.float32)

        self.select = P.Select()
        self.flat_shape = (batch_size, beam_width * vocab_size)
        self.topk = P.TopK(sorted=True)
        self.floor_div = P.FloorDiv()
        self.vocab_size_tensor = Tensor(self.vocab_size, mstype.int32)
        self.real_div = P.RealDiv()
        self.mod = Mod()
        self.equal = P.Equal()
        self.eos_ids = Tensor(np.full([batch_size, beam_width], eos_id),
                              mstype.int32)

        beam_ids = np.tile(
            np.arange(beam_width).reshape((1, beam_width)), [batch_size, 1])
        self.beam_ids = Tensor(beam_ids, mstype.int32)
        batch_ids = np.arange(batch_size * beam_width).reshape(
            (batch_size, beam_width)) // beam_width
        self.batch_ids = Tensor(batch_ids, mstype.int32)
        self.concat = P.Concat(axis=-1)
        self.gather_nd = P.GatherNd()

        self.greater_equal = P.GreaterEqual()
        self.sub = P.Sub()
        self.cast = P.Cast()
        self.zeroslike = P.ZerosLike()

        # init inputs and states
        self.start_ids = Tensor(np.full([batch_size * beam_width, 1], sos_id),
                                mstype.int32)
        self.init_seq = Tensor(np.full([batch_size, beam_width, 1], sos_id),
                               mstype.int32)
        init_scores = np.tile(np.array([[0.] + [-INF] * (beam_width - 1)]),
                              [batch_size, 1])
        self.init_scores = Tensor(init_scores, mstype.float32)
        self.init_finished = Tensor(
            np.zeros([batch_size, beam_width], dtype=np.bool))
        self.init_length = Tensor(
            np.zeros([batch_size, beam_width], dtype=np.int32))
        self.length_penalty = LengthPenalty(weight=length_penalty_weight)
        self.one = Tensor(1, mstype.int32)
Exemplo n.º 28
0
 def __init__(self):
     super(SubNet, self).__init__()
     self.sub = P.Sub()
Exemplo n.º 29
0
    def __init__(self,
                 batch_size,
                 from_tensor_width,
                 to_tensor_width,
                 from_seq_length,
                 to_seq_length,
                 num_attention_heads=1,
                 size_per_head=512,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 has_attention_mask=False,
                 attention_probs_dropout_prob=0.0,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 use_relative_positions=False,
                 compute_type=mstype.float32):

        super(BertAttention, self).__init__()
        self.batch_size = batch_size
        self.from_seq_length = from_seq_length
        self.to_seq_length = to_seq_length
        self.num_attention_heads = num_attention_heads
        self.size_per_head = size_per_head
        self.has_attention_mask = has_attention_mask
        self.use_relative_positions = use_relative_positions

        self.scores_mul = 1.0 / math.sqrt(float(self.size_per_head))
        self.reshape = P.Reshape()
        self.shape_from_2d = (-1, from_tensor_width)
        self.shape_to_2d = (-1, to_tensor_width)
        weight = TruncatedNormal(initializer_range)
        units = num_attention_heads * size_per_head
        self.query_layer = nn.Dense(from_tensor_width,
                                    units,
                                    activation=query_act,
                                    weight_init=weight).to_float(compute_type)
        self.key_layer = nn.Dense(to_tensor_width,
                                  units,
                                  activation=key_act,
                                  weight_init=weight).to_float(compute_type)
        self.value_layer = nn.Dense(to_tensor_width,
                                    units,
                                    activation=value_act,
                                    weight_init=weight).to_float(compute_type)

        self.shape_from = (batch_size, from_seq_length, num_attention_heads, size_per_head)
        self.shape_to = (
            batch_size, to_seq_length, num_attention_heads, size_per_head)

        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.multiply = P.Mul()
        self.transpose = P.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.trans_shape_relative = (2, 0, 1, 3)
        self.trans_shape_position = (1, 2, 0, 3)
        self.multiply_data = -10000.0
        self.batch_num = batch_size * num_attention_heads
        self.matmul = P.BatchMatMul()

        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(1 - attention_probs_dropout_prob)

        if self.has_attention_mask:
            self.expand_dims = P.ExpandDims()
            self.sub = P.Sub()
            self.add = P.TensorAdd()
            self.cast = P.Cast()
            self.get_dtype = P.DType()
        if do_return_2d_tensor:
            self.shape_return = (batch_size * from_seq_length, num_attention_heads * size_per_head)
        else:
            self.shape_return = (batch_size, from_seq_length, num_attention_heads * size_per_head)

        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        if self.use_relative_positions:
            self._generate_relative_positions_embeddings = \
                RelaPosEmbeddingsGenerator(length=to_seq_length,
                                           depth=size_per_head,
                                           max_relative_position=16,
                                           initializer_range=initializer_range,
                                           use_one_hot_embeddings=use_one_hot_embeddings)
Exemplo n.º 30
0
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=None,
                 device_num_each_group=1,
                 input_dims='2d',
                 data_format='NCHW'):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError("momentum should be a number in range [0, 1], but got {}".format(momentum))
        self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name)
        if context.get_context("device_target") != "GPU" and self.format == "NHWC":
            raise ValueError("NHWC format only support in GPU target.")
        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.input_dims = input_dims
        self.moving_mean = Parameter(initializer(
            moving_mean_init, num_features), name="mean", requires_grad=False)
        self.moving_variance = Parameter(initializer(
            moving_var_init, num_features), name="variance", requires_grad=False)
        self.gamma = Parameter(initializer(
            gamma_init, num_features), name="gamma", requires_grad=affine)
        self.beta = Parameter(initializer(
            beta_init, num_features), name="beta", requires_grad=affine)
        self.group = validator.check_positive_int(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i), self.rank_list[i])
                    self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"
        self.is_gpu = context.get_context("device_target") == "GPU"
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
        self.momentum = 1.0 - momentum
        if context.get_context("enable_ge"):
            self.is_ge_backend = True
        else:
            self.is_ge_backend = False

        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
            self.bn_train = P.BatchNorm(is_training=True,
                                        epsilon=self.eps)
        elif self.is_gpu:
            self.bn_train = P.FusedBatchNormEx(mode=1,
                                               epsilon=self.eps,
                                               momentum=self.momentum,
                                               data_format=self.format)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps, data_format=self.format)
        self.enable_global_sync = self.is_global and (self.is_ge_backend or (self.is_graph_mode and self.is_ascend))
        self.enable_default_train = self.is_graph_mode and not self.is_global and \
                                    (self.is_ge_backend or self.is_ascend)

        data_parallel_strategy = ((1,), (1,))
        data_parallel_strategy_one = ((1,), ())
        self.sub_mean = P.Sub().shard(data_parallel_strategy)
        self.sub_var = P.Sub().shard(data_parallel_strategy)
        self.mul_mean = P.Mul().shard(data_parallel_strategy_one)
        self.mul_var = P.Mul().shard(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().shard(data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().shard(data_parallel_strategy)