Exemplo n.º 1
0
 def test_dtype():
     x = paddle.static.data(name='x',
                            shape=[1, 10, 10],
                            dtype="int32")
     linear1_weight = paddle.static.data(name='linear1_weight',
                                         shape=[1, 10, 10],
                                         dtype="float32")
     linear2_weight = paddle.static.data(name='linear2_weight',
                                         shape=[1, 10, 10],
                                         dtype="float32")
     incubate_f.fused_feedforward(x, linear1_weight, linear2_weight)
Exemplo n.º 2
0
 def test_dropout_mode():
     x = paddle.static.data(name='x3',
                            shape=[1, 10, 10],
                            dtype="float32")
     linear1_weight = paddle.static.data(name='linear1_weight3',
                                         shape=[10, 10],
                                         dtype="float32")
     linear2_weight = paddle.static.data(name='linear2_weight3',
                                         shape=[10, 10],
                                         dtype="float32")
     incubate_f.fused_feedforward(x,
                                  linear1_weight,
                                  linear2_weight,
                                  mode='test')
Exemplo n.º 3
0
 def test_dropout_rate_value():
     x = paddle.static.data(name='x2',
                            shape=[1, 10, 10],
                            dtype="float32")
     linear1_weight = paddle.static.data(name='linear1_weight2',
                                         shape=[10, 10],
                                         dtype="float32")
     linear2_weight = paddle.static.data(name='linear2_weight2',
                                         shape=[10, 10],
                                         dtype="float32")
     incubate_f.fused_feedforward(x,
                                  linear1_weight,
                                  linear2_weight,
                                  dropout2_rate=-1)
Exemplo n.º 4
0
 def FusedFFN(self):
     paddle.disable_static()
     linear1_weight = paddle.to_tensor(self.linear1.weight,
                                       stop_gradient=False)
     linear1_bias = paddle.to_tensor(self.linear1.bias, stop_gradient=False)
     linear2_weight = paddle.to_tensor(self.linear2.weight,
                                       stop_gradient=False)
     linear2_bias = paddle.to_tensor(self.linear2.bias, stop_gradient=False)
     ln1_scale = paddle.to_tensor(self.norm1.weight, stop_gradient=False)
     ln1_bias = paddle.to_tensor(self.norm1.bias, stop_gradient=False)
     ln2_scale = paddle.to_tensor(self.norm2.weight, stop_gradient=False)
     ln2_bias = paddle.to_tensor(self.norm2.bias, stop_gradient=False)
     x = paddle.to_tensor(self.src, stop_gradient=False)
     out = incubate_f.fused_feedforward(x,
                                        linear1_weight,
                                        linear2_weight,
                                        linear1_bias,
                                        linear2_bias,
                                        ln1_scale,
                                        ln1_bias,
                                        ln2_scale,
                                        ln2_bias,
                                        0.0,
                                        0.0,
                                        activation=self.act_method,
                                        pre_layer_norm=self.pre_layer_norm)
     paddle.autograd.backward([out], [paddle.to_tensor(self.dout)])
     return out, x.grad
Exemplo n.º 5
0
 def forward(self, src, cache=None):
     out = incubate_f.fused_feedforward(
         src,
         self._linear1_weight,
         self._linear2_weight,
         self._linear1_bias,
         self._linear2_bias,
         self._ln1_scale,
         self._ln1_bias,
         self._ln2_scale,
         self._ln2_bias,
         dropout1_rate=self._act_dropout_rate,
         dropout2_rate=self._dropout_rate,
         activation=self._act_method,
         ln1_epsilon=self._epsilon,
         ln2_epsilon=self._epsilon,
         pre_layer_norm=self._normalize_before,
         training=self.training,
         name=self.name)
     return out
Exemplo n.º 6
0
    def test_static(self):
        paddle.enable_static()
        default_main_program().random_seed = 42
        dtype = "float32"
        layer_norm_dtype = "float32"
        batch_size = 1
        d_model = 8
        dim_feedforward = 8

        x = paddle.static.data(name='x',
                               shape=[batch_size, d_model, dim_feedforward],
                               dtype=dtype)
        linear1_weight = paddle.static.data(name='linear1_weight',
                                            shape=[d_model, dim_feedforward],
                                            dtype=dtype)
        linear1_bias = paddle.static.data(name='linear1_bias',
                                          shape=[dim_feedforward])
        linear2_weight = paddle.static.data(name='linear2_weight',
                                            shape=[dim_feedforward, d_model],
                                            dtype=dtype)
        linear2_bias = paddle.static.data(name='linear2_bias', shape=[d_model])
        ln1_scale = paddle.static.data(name='ln1_scale', shape=[d_model])
        ln1_bias = paddle.static.data(name='ln1_scale', shape=[d_model])
        ln2_scale = paddle.static.data(name='ln2_scale', shape=[d_model])
        ln2_bias = paddle.static.data(name='ln2_scale', shape=[d_model])

        fused_out = incubate_f.fused_feedforward(x,
                                                 linear1_weight,
                                                 linear2_weight,
                                                 linear1_bias,
                                                 linear2_bias,
                                                 ln1_scale,
                                                 ln1_bias,
                                                 ln2_scale,
                                                 ln2_bias,
                                                 0.0,
                                                 0.0,
                                                 activation="relu",
                                                 pre_layer_norm=False)

        ######base ffn######
        linear1_out = F.linear(x, linear1_weight, linear1_bias)
        act_out = F.relu(linear1_out)
        dropout1_out = F.dropout(x=act_out, p=0.0, training=False)
        linear2_out = F.linear(dropout1_out, linear2_weight, linear2_bias)
        dropout2_out = x + F.dropout(x=linear2_out, p=0.0, training=False)
        ln_out = F.layer_norm(dropout2_out,
                              normalized_shape=list([d_model]),
                              weight=ln2_scale,
                              bias=ln2_bias)
        ######base ffn######

        exe = paddle.static.Executor(paddle.CUDAPlace(0))

        x_data = np.random.random(
            (batch_size, d_model, dim_feedforward)).astype(dtype)
        linear1_weight_data = np.random.random(
            (d_model, dim_feedforward)).astype(dtype)
        linear1_bias_data = np.zeros((dim_feedforward)).astype(dtype)
        linear2_weight_data = np.random.random(
            (dim_feedforward, d_model)).astype(dtype)
        linear2_bias_data = np.zeros((d_model)).astype(dtype)

        ln1_scale_data = np.ones((d_model)).astype(layer_norm_dtype)
        ln1_bias_data = np.zeros((d_model)).astype(layer_norm_dtype)
        ln2_scale_data = np.ones((d_model)).astype(layer_norm_dtype)
        ln2_bias_data = np.zeros((d_model)).astype(layer_norm_dtype)

        res_list = [fused_out, ln_out]
        real_res = []

        for res in res_list:
            fetch = exe.run(feed={
                'x': x_data,
                'linear1_weight': linear1_weight_data,
                'linear1_bias': linear1_bias_data,
                'linear2_weight': linear2_weight_data,
                'linear2_bias': linear2_bias_data,
                'ln1_scale': ln1_scale_data,
                'ln1_bias': ln1_bias_data,
                'ln2_scale': ln2_scale_data,
                'ln2_bias': ln2_bias_data
            },
                            fetch_list=[res])
            real_res.append(fetch)
        self.assertTrue(np.allclose(real_res[0], real_res[1], atol=1e-3),
                        "two value is check diff")