Пример #1
0
    def test_tensor_sub_scalar(self):
        # tensor(int64) - scalar(int)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='int64')
            b = 1
            c = paddle.zeros([2, 2, 2], dtype="int64")
            self.check_operation(a, b, c, '-')

        # tensor(float32) - scalar(int)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='float32')
            b = 1
            c = paddle.zeros([2, 2, 2], dtype="float32")
            self.check_operation(a, b, c, '-')

        # tensor(int64) - scalar(float, .0)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='int64')
            b = 1.0
            c = paddle.zeros([2, 2, 2], dtype="float32")
            self.check_operation(a, b, c, '-')

        # tensor(int64) - scalar(float, .5)
        with program_guard(Program()):
            a = paddle.full([2, 2, 2], 2, dtype='int64')
            b = 1.5
            c = paddle.full([2, 2, 2], 0.5, dtype="float32")
            self.check_operation(a, b, c, '-')

        # tensor(float32) - scalar(float)
        with program_guard(Program()):
            a = paddle.full([2, 2, 2], 2, dtype='float32')
            b = 1.5
            c = paddle.full([2, 2, 2], 0.5, dtype="float32")
            self.check_operation(a, b, c, '-')
Пример #2
0
    def test_scalar_div_tensor(self):
        # scalar(int) / tensor(int64)
        with program_guard(Program()):
            a = 1
            b = paddle.full([2, 2, 2], 2, dtype='int64')
            c = paddle.full([2, 2, 2], 0.5, dtype="float32")
            self.check_operation(a, b, c, '/')

        # scalar(int) / tensor(float32)
        with program_guard(Program()):
            a = 1
            b = paddle.full([2, 2, 2], 0.5, dtype='float32')
            c = paddle.full([2, 2, 2], 2, dtype="float32")
            self.check_operation(a, b, c, '/')

        # scalar(float) / tensor(int64)
        with program_guard(Program()):
            a = 1.0
            b = paddle.full([2, 2, 2], 2, dtype='int64')
            c = paddle.full([2, 2, 2], 0.5, dtype="float32")
            self.check_operation(a, b, c, '/')

        # scalar(float) / tensor(float32)
        with program_guard(Program()):
            a = 1.0
            b = paddle.full([2, 2, 2], 0.5, dtype='float32')
            c = paddle.full([2, 2, 2], 2, dtype="float32")
            self.check_operation(a, b, c, '/')
Пример #3
0
    def test_tensor_mod_scalar(self):
        # tensor(int64) % scalar(int)
        with program_guard(Program()):
            a = paddle.full([2, 2, 2], 3, dtype='int64')
            b = 2
            c = paddle.full([2, 2, 2], 1, dtype="int64")
            self.check_operation(a, b, c, '%')

        # tensor(int64) % scalar(float)
        with program_guard(Program()):
            a = paddle.full([2, 2, 2], 3, dtype='int64')
            b = 2.0
            c = paddle.full([2, 2, 2], 1, dtype="float32")
            self.check_operation(a, b, c, '%')

        # tensor(float32) % scalar(int)
        with program_guard(Program()):
            a = paddle.full([2, 2, 2], 3, dtype='float32')
            b = 2
            c = paddle.full([2, 2, 2], 1, dtype="float32")
            self.check_operation(a, b, c, '%')

        # tensor(float32) % scalar(float)
        with program_guard(Program()):
            a = paddle.full([2, 2, 2], 3, dtype='float32')
            b = 2.0
            c = paddle.full([2, 2, 2], 1, dtype="float32")
            self.check_operation(a, b, c, '%')
Пример #4
0
    def test_scalar_pow_tensor(self):
        # scalar(int) ** tensor(int64)
        with program_guard(Program()):
            a = 3
            b = paddle.full([2, 2, 2], 2, dtype='int64')
            c = paddle.full([2, 2, 2], 9, dtype="int64")
            self.check_operation(a, b, c, '**')

        # scalar(float) ** tensor(int64)
        with program_guard(Program()):
            a = 3.0
            b = paddle.full([2, 2, 2], 2, dtype='int64')
            c = paddle.full([2, 2, 2], 9, dtype="float32")
            self.check_operation(a, b, c, '**')

        # scalar(int) ** tensor(float32)
        with program_guard(Program()):
            a = 3
            b = paddle.full([2, 2, 2], 2, dtype='float32')
            c = paddle.full([2, 2, 2], 9, dtype="float32")
            self.check_operation(a, b, c, '**')

        # tensor(float32) ** scalar(float)
        with program_guard(Program()):
            a = 3.0
            b = paddle.full([2, 2, 2], 2, dtype='float32')
            c = paddle.full([2, 2, 2], 9, dtype="float32")
            self.check_operation(a, b, c, '**')
Пример #5
0
    def test_tensor_div_scalar(self):
        # tensor(int64) / scalar(int)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='int64')
            b = 2
            c = paddle.full([2, 2, 2], 0.5, dtype="float32")
            self.check_operation(a, b, c, '/')

        # tensor(float32) / scalar(int)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='float32')
            b = 2
            c = paddle.full([2, 2, 2], 0.5, dtype="float32")
            self.check_operation(a, b, c, '/')

        # tensor(int64) / scalar(float, .0)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='int64')
            b = 2.0
            c = paddle.full([2, 2, 2], 0.5, dtype="float32")
            self.check_operation(a, b, c, '/')

        # tensor(int64) / scalar(float, .5)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='int64')
            b = 0.5
            c = paddle.full([2, 2, 2], 2, dtype="float32")
            self.check_operation(a, b, c, '/')

        # tensor(float32) / scalar(float)
        with program_guard(Program()):
            a = paddle.ones([2, 2, 2], dtype='float32')
            b = 0.5
            c = paddle.full([2, 2, 2], 2, dtype="float32")
            self.check_operation(a, b, c, '/')
Пример #6
0
    def test_scalar_sub_tensor(self):
        # scalar(int) - tensor(int64)
        with program_guard(Program()):
            a = 1
            b = paddle.ones([2, 2, 2], dtype='int64')
            c = paddle.zeros([2, 2, 2], dtype="int64")
            self.check_operation(a, b, c, '-')

        # scalar(int) - tensor(float32)
        with program_guard(Program()):
            a = 1
            b = paddle.ones([2, 2, 2], dtype='float32')
            c = paddle.zeros([2, 2, 2], dtype="float32")
            self.check_operation(a, b, c, '-')

        # scalar(float, .0) - tensor(int64)
        with program_guard(Program()):
            a = 1.0
            b = paddle.ones([2, 2, 2], dtype='int64')
            c = paddle.zeros([2, 2, 2], dtype="float32")
            self.check_operation(a, b, c, '-')

        # scalar(float, .5) - tensor(int64)
        with program_guard(Program()):
            a = 1.5
            b = paddle.full([2, 2, 2], 2, dtype='int64')
            c = paddle.full([2, 2, 2], -0.5, dtype="float32")
            self.check_operation(a, b, c, '-')

        # scalar(float) - tensor(float32)
        with program_guard(Program()):
            a = 1.5
            b = paddle.full([2, 2, 2], 2, dtype='float32')
            c = paddle.full([2, 2, 2], -0.5, dtype="float32")
            self.check_operation(a, b, c, '-')
Пример #7
0
    def test_errors(self):
        # test static computation graph: dtype can not be int8
        paddle.enable_static()
        with program_guard(Program(), Program()):
            x = paddle.static.data(name='x', shape=[100], dtype=np.int8)
            y = paddle.static.data(name='y', shape=[100], dtype=np.int8)
            self.assertRaises(TypeError, paddle.inner, x, y)

        # test static computation graph: inputs must be broadcastable
        with program_guard(Program(), Program()):
            x = paddle.static.data(name='x', shape=[20, 50], dtype=np.float64)
            y = paddle.static.data(name='y', shape=[20], dtype=np.float64)
            self.assertRaises(ValueError, paddle.inner, x, y)

        np.random.seed(7)
        # test dynamic computation graph: dtype can not be int8
        paddle.disable_static()
        x_data = np.random.randn(200).astype(np.int8)
        y_data = np.random.randn(200).astype(np.int8)
        x = paddle.to_tensor(x_data)
        y = paddle.to_tensor(y_data)
        self.assertRaises(RuntimeError, paddle.inner, x, y)

        # test dynamic computation graph: inputs must be broadcastable
        x_data = np.random.rand(20, 5)
        y_data = np.random.rand(10, 2)
        x = paddle.to_tensor(x_data)
        y = paddle.to_tensor(y_data)
        self.assertRaises(ValueError, paddle.inner, x, y)

        # test dynamic computation graph: dtype must be same
        x_data = np.random.randn(200).astype(np.float32)
        y_data = np.random.randn(200).astype(np.float64)
        x = paddle.to_tensor(x_data)
        y = paddle.to_tensor(y_data)
        self.assertRaises(ValueError, paddle.inner, x, y)

        # test dynamic computation graph: dtype must be Tensor type
        x_data = np.random.randn(200).astype(np.float64)
        y_data = np.random.randn(200).astype(np.float64)
        y = paddle.to_tensor(y_data)
        self.assertRaises(ValueError, paddle.inner, x_data, y)

        # test dynamic computation graph: dtype must be Tensor type
        x_data = np.random.randn(200).astype(np.float64)
        y_data = np.random.randn(200).astype(np.float64)
        x = paddle.to_tensor(x_data)
        self.assertRaises(ValueError, paddle.inner, x, y_data)

        # test dynamic computation graph: dtype must be Tensor type
        x_data = np.random.randn(200).astype(np.float32)
        y_data = np.random.randn(200).astype(np.float32)
        self.assertRaises(ValueError, paddle.inner, x_data, y_data)
 def run_static_api(self, place):
     paddle.enable_static()
     expected = calc_margin_rank_loss(self.x_data,
                                      self.y_data,
                                      self.label_data,
                                      margin=margin,
                                      reduction=reduction)
     with program_guard(Program(), Program()):
         x = paddle.static.data(name="x",
                                shape=[10, 10],
                                dtype="float64")
         y = paddle.static.data(name="y",
                                shape=[10, 10],
                                dtype="float64")
         label = paddle.static.data(name="label",
                                    shape=[10, 10],
                                    dtype="float64")
         margin_rank_loss = paddle.nn.loss.MarginRankingLoss(
             margin=margin, reduction=reduction)
         result = margin_rank_loss(x, y, label)
         exe = paddle.static.Executor(place)
         result_numpy, = exe.run(feed={
             "x": self.x_data,
             "y": self.y_data,
             "label": self.label_data
         },
                                 fetch_list=[result])
         self.assertTrue(np.allclose(result_numpy, expected))
         self.assertTrue('loss' in result.name)
Пример #9
0
 def test_tensor_floordiv_scalar(self):
     # tensor(int64) // scalar(int)
     with program_guard(Program()):
         a = paddle.full([2, 2, 2], 3, dtype='int64')
         b = 2
         c = paddle.full([2, 2, 2], 1, dtype="int64")
         self.check_operation(a, b, c, '//')
Пример #10
0
    def _test_api(self):
        paddle.enable_static()
        input = np.random.random([2, 25]).astype("float32")
        shape = [2, 5, 5]
        main_prog = Program()
        with program_guard(main_prog, Program()):
            positive_five = self.fill_constant([1], "int32", 5)
            x = self.data(name="x", shape=[2, 25], dtype="float32")

            actual_shape = self.data(name="shape", shape=[3], dtype="int32")

            # situation 1: have shape( list, no tensor), no actual shape(Tensor)
            out_1 = self.reshape(x, shape)

            # situation 2: have shape(list, no tensor), have actual shape(Tensor)
            out_2 = fluid.layers.reshape(
                x, shape=shape, actual_shape=actual_shape)

            # Situation 3: have shape(list, have tensor), no actual shape(Tensor)
            out_3 = self.reshape(x, shape=[positive_five, 10])

            # Situation 4: have shape(Tensor), no actual shape(Tensor)
            out_4 = self.reshape(x, shape=actual_shape)

        exe = paddle.static.Executor(place=paddle.CPUPlace())
        res_1, res_2, res_3, res_4 = exe.run(
            main_prog,
            feed={"x": input,
                  "shape": np.array([2, 5, 5]).astype("int32")},
            fetch_list=[out_1, out_2, out_3, out_4])

        assert np.array_equal(res_1, input.reshape(shape))
        assert np.array_equal(res_2, input.reshape(shape))
        assert np.array_equal(res_3, input.reshape([5, 10]))
        assert np.array_equal(res_4, input.reshape(shape))
Пример #11
0
    def test_api(self):
        shape = [1000, 784]
        train_program = Program()
        startup_program = Program()
        with program_guard(train_program, startup_program):
            x1 = paddle.randn(shape, 'float32')
            x2 = paddle.randn(shape, 'float64')

            dim_1 = paddle.fluid.layers.fill_constant([1], "int64", 20)
            dim_2 = paddle.fluid.layers.fill_constant([1], "int32", 50)
            x3 = paddle.randn([dim_1, dim_2, 784])

            var_shape = paddle.static.data('X', [2], 'int32')
            x4 = paddle.randn(var_shape)

        place = paddle.CUDAPlace(
            0) if core.is_compiled_with_cuda() else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        res = exe.run(train_program,
                      feed={'X': np.array(shape, dtype='int32')},
                      fetch_list=[x1, x2, x3, x4])

        for out in res:
            self.assertAlmostEqual(np.mean(out), .0, delta=0.1)
            self.assertAlmostEqual(np.std(out), 1., delta=0.1)
Пример #12
0
    def setUp(self):
        self._places = [paddle.CPUPlace()]
        if paddle.device.is_compiled_with_cuda():
            self._places.append(paddle.CUDAPlace(0))
        self._ema_decay = 0.999
        self._param_name = "fc.weight"
        self._train_program = static.Program()
        self._startup_prog = static.Program()

        strategy = paddle.distributed.fleet.DistributedStrategy()
        strategy.without_graph_optimization = True
        paddle.distributed.fleet.init(is_collective=True, strategy=strategy)

        with static.program_guard(self._train_program, self._startup_prog):
            with utils.unique_name.guard():
                data = static.data(name='x', shape=[-1, 5], dtype='float32')
                hidden = static.nn.fc(x=data,
                                      size=10,
                                      weight_attr=self._param_name)
                cost = paddle.mean(hidden)

                self._test_program = static.default_main_program().clone(
                    for_test=True)

                optimizer = paddle.optimizer.Adam(learning_rate=0.001)
                optimizer = paddle.distributed.fleet.distributed_optimizer(
                    optimizer, strategy)
                optimizer.minimize(cost)

                self._ema = static.ExponentialMovingAverage(self._ema_decay)
                self._ema.update()
Пример #13
0
def rnn_pretrain_forward(train_program, start_program, topo=None):
    with static.program_guard(train_program,
                              start_program), paddle.utils.unique_name.guard():
        batch_size = 1
        tokens = static.data(
            name="tokens", shape=[batch_size, -1], dtype="int64")
        seq_len = static.data(name="ids", shape=[batch_size], dtype="int64")
        labels = static.data(name="labels", shape=[batch_size], dtype="int64")
        data_holders = [tokens, seq_len, labels]
        vocab_size = 10
        num_classes = 2
        pad_token_id = 0
        model = RNNModel(
            vocab_size,
            num_classes,
            direction='forward',
            padding_idx=pad_token_id,
            pooling_type='max')

        optimizer = paddle.optimizer.Adam(
            parameters=model.parameters(), learning_rate=0.001)
        criterion = paddle.nn.CrossEntropyLoss()
        preds = model(tokens, seq_len)
        loss = criterion(preds, labels)

    return train_program, start_program, loss, optimizer, data_holders
def mlp_forward(train_program, start_program):
    with static.program_guard(train_program,start_program), \
        utils.unique_name.guard():
        batch_size = 4
        hidden_size = 64
        input = static.data(name="input",
                            shape=[batch_size, hidden_size],
                            dtype='float32')
        label = static.data(name="label",
                            shape=[batch_size, 1],
                            dtype='float32')

        if _global_parallel_strategy == "dp_mp_pp":
            auto.shard_tensor(input,
                              dist_attr={
                                  "process_mesh": _global_process_mesh[0],
                                  "dims_mapping": [0, -1]
                              })
        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       initializer_range=0.02)
        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)
    return loss, train_program, start_program
Пример #15
0
def build_program(main_program, startup_program, image_shape, archs, args,
                  is_train):
    with static.program_guard(main_program, startup_program):
        data_loader, data, label, drop_path_prob, drop_path_mask = create_data_loader(
            image_shape, is_train, args)
        logits, logits_aux = archs(data, drop_path_prob, drop_path_mask,
                                   is_train, 10)
        top1 = paddle.metric.accuracy(input=logits, label=label, k=1)
        top5 = paddle.metric.accuracy(input=logits, label=label, k=5)
        loss = paddle.mean(F.softmax_with_cross_entropy(logits, label))

        if is_train:
            if auxiliary:
                loss_aux = paddle.mean(
                    F.softmax_with_cross_entropy(logits_aux, label))
                loss = loss + auxiliary_weight * loss_aux
            step_per_epoch = int(trainset_num / args.batch_size)
            learning_rate = paddle.optimizer.lr.CosineAnnealingDecay(
                lr, T_max=step_per_epoch * args.retain_epoch)
            optimizer = paddle.optimizer.Momentum(
                learning_rate,
                momentum,
                weight_decay=paddle.regularizer.L2Decay(weight_decay),
                grad_clip=nn.ClipGradByGlobalNorm(clip_norm=5.0))
            optimizer.minimize(loss)
            outs = [loss, top1, top5]
        else:
            outs = [loss, top1, top5]
    return outs, (data, label), data_loader
    def test_pipeline_amp_optimizer(self):
        """ test pipeline&amp with device:all """
        role = role_maker.PaddleCloudRoleMaker(is_collective=True)
        fleet.init(role)

        strategy = paddle.distributed.fleet.DistributedStrategy()
        strategy.amp = True
        strategy.pipeline = True
        strategy.pipeline_configs = {
            'micro_batch_size': 1,
            'accumulate_steps': 2
        }

        train_prog, startup_prog = static.Program(), static.Program()
        with static.program_guard(train_prog, startup_prog):
            with fluid.unique_name.guard():
                avg_cost = self.net()

                optimizer = paddle.fluid.optimizer.Adam(0.01)
                optimizer = fleet.distributed_optimizer(optimizer,
                                                        strategy=strategy)
                optimizer.minimize(avg_cost)

        ops = train_prog._pipeline_opt['section_program'].global_block().ops
        ops = [op.type for op in ops]
        self.assertEqual(ops.count('send_v2'), 1)
        self.assertEqual(ops.count('recv_v2'), 1)
Пример #17
0
def mlp_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        input = static.data(name="input",
                            shape=[batch_size, sequence_len, hidden_size],
                            dtype='float32')
        label = static.data(name="label",
                            shape=[batch_size, sequence_len, 1],
                            dtype='float32')

        auto.shard_tensor(input,
                          dist_attr={
                              "process_mesh": _global_process_mesh,
                              "dims_mappig": [-1, -1, -1]
                          })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       dropout_ratio=0.1,
                       initializer_range=0.02)

        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)

        loader = paddle.io.DataLoader.from_generator(feed_list=[input, label],
                                                     capacity=4 * batch_size,
                                                     iterable=True)

    return loss, train_program, start_program, loader
Пример #18
0
def linear_static(func, device, dtype, np_x, np_weight, np_bias):
    paddle.enable_static()
    paddle.set_device(device)
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype)
            weight = static.data(name="weight",
                                 shape=np_weight.shape,
                                 dtype=dtype)
            bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
            x.stop_gradient = False
            weight.stop_gradient = False
            bias.stop_gradient = False
            out = func(x, weight, bias)
            mean_out = paddle.mean(out)
            static.append_backward(mean_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run(
                static.default_main_program(),
                feed={
                    "x": np_x.astype(dtype),
                    "weight": np_weight.astype(dtype),
                    "bias": np_bias.astype(dtype)
                },
                fetch_list=[
                    out.name, x.name + "@GRAD", weight.name + "@GRAD",
                    bias.name + "@GRAD"
                ])
    paddle.disable_static()
    return out_v, x_grad_v, weight_grad_v, bias_grad_v
Пример #19
0
    def test_static_graph(self):
        paddle.enable_static()

        dtype = 'float32'

        train_program = Program()
        startup_program = Program()

        with program_guard(train_program, startup_program):
            x = np.random.random(self.x_shape).astype(dtype)
            data_x = paddle.static.data('x',
                                        shape=self.data_x_shape,
                                        dtype=dtype)

            out = paddle.empty_like(data_x)

        place = paddle.CUDAPlace(
            0) if core.is_compiled_with_cuda() else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        res = exe.run(train_program, feed={'x': x}, fetch_list=[out])

        self.dst_dtype = dtype
        self.dst_shape = x.shape
        self.__check_out__(res[0])

        paddle.disable_static()
Пример #20
0
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
    paddle.enable_static()
    paddle.set_device(device)

    places = static.cpu_places() if device is 'cpu' else static.cuda_places()
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            # in static mode, x data has been covered by out
            compiled_prog = static.CompiledProgram(
                static.default_main_program()).with_data_parallel(
                    loss_name=out.name, places=places)
            out_v = exe.run(compiled_prog,
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
Пример #21
0
def mlp_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input = static.data(name="input",
                            shape=[batch_size, sequence_len, hidden_size],
                            dtype='float32')
        label = static.data(name="label",
                            shape=[batch_size, sequence_len, 1],
                            dtype='float32')

        auto.shard_tensor(input,
                          dist_attr={
                              "process_mesh": _global_process_mesh,
                              "dims_mappig": [-1, -1, -1]
                          })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       dropout_ratio=0.1,
                       initializer_range=0.02)

        predict = mlp(input)

        cost = layers.cross_entropy(input=predict, label=label)
        avg_cost = layers.mean(x=cost)

    return avg_cost, train_program, start_program
Пример #22
0
def custom_relu_static(func,
                       device,
                       dtype,
                       np_x,
                       use_func=True,
                       test_infer=False):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())
            # in static mode, x data has been covered by out
            out_v = exe.run(static.default_main_program(),
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
Пример #23
0
    def check_static_result(self, place):
        from paddle.distributed.fleet.meta_parallel.parallel_layers.random import dropout
        with static.program_guard(static.Program(), static.Program()):
            input = static.data(name="input", shape=[40, 40], dtype="float32")
            res1 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed0')
            res2 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed1')
            res3 = dropout(input, p=0.3)

            in_np = np.random.random([40, 40]).astype("float32")

            exe = static.Executor(place)
            res_list = [res1, res2]
            for i in range(2):
                out1, out2 = exe.run(static.default_main_program(),
                                     feed={"input": in_np},
                                     fetch_list=res_list)
                self.assertTrue(np.allclose(out1, out2))
def mlp_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input = static.data(name="input", shape=[batch_size], dtype='int32')
        label = static.data(name="label",
                            shape=[batch_size, 1],
                            dtype='float32')

        auto.shard_tensor(input,
                          dist_attr={
                              "process_mesh": PP_MESH_0,
                              "dims_mapping": [-1]
                          })
        auto.shard_tensor(label,
                          dist_attr={
                              "process_mesh": PP_MESH_1,
                              "dims_mapping": [-1, -1]
                          })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       initializer_range=0.02)

        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)

    return loss, train_program, start_program
Пример #25
0
    def test_attr_tensor_API(self):
        startup_program = Program()
        train_program = Program()
        with program_guard(train_program, startup_program):
            fill_value = 2.0
            input = paddle.fluid.data(name='input',
                                      dtype='float32',
                                      shape=[2, 3])
            output = paddle.full_like(input, fill_value)
            output_dtype = paddle.full_like(input, fill_value, dtype='float32')

            place = paddle.CPUPlace()
            if core.is_compiled_with_cuda():
                place = paddle.CUDAPlace(0)
            exe = paddle.static.Executor(place)
            exe.run(startup_program)

            img = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)

            res = exe.run(train_program,
                          feed={'input': img},
                          fetch_list=[output])

            out_np = np.array(res[0])
            self.assertTrue(not (out_np - np.full_like(img, fill_value)).any(),
                            msg="full_like output is wrong, out = " +
                            str(out_np))
Пример #26
0
    def test_static_graph(self):
        for x_stop_gradient in [False, True]:
            for vec_stop_gradient in [False, True]:

                paddle.enable_static()

                train_program = Program()
                startup_program = Program()

                self.input_x = np.random.rand(5, 100).astype("float64")
                self.input_vec = np.random.rand(100).astype("float64")

                with program_guard(train_program, startup_program):
                    data_x = paddle.static.data("x",
                                                shape=[5, 100],
                                                dtype="float64")
                    data_vec = paddle.static.data("vec",
                                                  shape=[100],
                                                  dtype="float64")

                    data_x.stop_gradient = x_stop_gradient
                    data_vec.stop_gradient = vec_stop_gradient

                    result_vec = paddle.mv(data_x, data_vec)

                    self.place = paddle.CPUPlace()
                    exe = paddle.static.Executor(self.place)
                    res, = exe.run(feed={
                        "x": self.input_x,
                        "vec": self.input_vec
                    },
                                   fetch_list=[result_vec])
                    z_expected = np.array(np.dot(self.input_x, self.input_vec))
                    self.assertTrue(np.allclose(res, z_expected))
Пример #27
0
def mlp_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sequence_len = 512
        input = static.data(name="input",
                            shape=[batch_size, sequence_len, hidden_size],
                            dtype='float32')

        if _global_parallel_strategy == "dp":
            auto.shard_tensor(input,
                              dist_attr={
                                  "process_mesh": _global_process_mesh,
                                  "dims_mapping": [0, -1, -1]
                              })
        elif _global_parallel_strategy == "dp_mp":
            auto.shard_tensor(input,
                              dist_attr={
                                  "process_mesh": _global_process_mesh,
                                  "dims_mapping": [0, -1, -1]
                              })

        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       dropout_ratio=0.1,
                       initializer_range=0.02)
        out = mlp(input)
    return train_program, start_program
def mlp_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 4
        hidden_size = 1024
        sqrt_hidden_size = 32
        double_hidden_size = 64

        input = static.data(name="input", shape=[8, 8, 16], dtype='int32')
        input = paddle.reshape(input, [hidden_size])
        input = paddle.reshape(input, [sqrt_hidden_size, sqrt_hidden_size])
        embedding = paddle.nn.Embedding(2, batch_size, sparse=True)
        input = embedding(input)
        input = paddle.reshape(input, [hidden_size, batch_size])
        input = paddle.transpose(input, perm=[1, 0])
        matmulinput = static.data(name="matmulinput",
                                  shape=[hidden_size, hidden_size],
                                  dtype='float32')
        input = layers.matmul(x=input, y=matmulinput)
        label = static.data(name="label",
                            shape=[batch_size, 1],
                            dtype='float32')
        mlp = MLPLayer(hidden_size=hidden_size,
                       intermediate_size=4 * hidden_size,
                       initializer_range=0.02)

        predict = mlp(input)
        error_cost = paddle.nn.functional.square_error_cost(predict, label)
        loss = paddle.mean(error_cost)
        m = paddle.nn.Softmax()
        loss = m(loss)
    return loss, train_program, start_program
def gpt_pretrain_forward(train_program, start_program):
    with static.program_guard(train_program,
                              start_program), utils.unique_name.guard():
        batch_size = 16
        sequence_len = 512
        input_ids = static.data(
            name="input_ids", shape=[batch_size, sequence_len], dtype='int64')
        position_ids = static.data(
            name="position_ids",
            shape=[batch_size, sequence_len],
            dtype='int64')
        attention_mask = static.data(
            name="attention_mask",
            shape=[batch_size, 1, sequence_len, sequence_len],
            dtype='float64')
        labels = static.data(
            name="labels", shape=[batch_size, sequence_len], dtype='int64')
        loss_mask = static.data(
            name="loss_mask", shape=[batch_size, sequence_len], dtype='float64')

        if _global_parallel_strategy == "dp":
            auto.shard_tensor(
                input_ids,
                dist_attr={
                    "process_mesh": _global_process_mesh,
                    "dims_mapping": [0, -1]
                })
        elif _global_parallel_strategy == "dp_mp":
            auto.shard_tensor(
                input_ids,
                dist_attr={
                    "process_mesh": _global_process_mesh,
                    "dims_mapping": [0, -1]
                })

        gpt = GPTModel(
            vocab_size=32768,
            hidden_size=1024,
            num_hidden_layers=2,
            num_attention_heads=16,
            intermediate_size=4096,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=1024,
            type_vocab_size=16,
            initializer_range=0.02,
            pad_token_id=0,
            topo=None)

        model = GPTForPretraining(gpt)

        preds = model(input_ids, position_ids, attention_mask)

        criterion = GPTPretrainingCriterion()

        loss = criterion(preds, labels, loss_mask)

    return train_program, start_program
Пример #30
0
    def test_static_empty_input_error(self):
        paddle.enable_static()

        x_list_n_n, x_list_m_n = gen_empty_input()
        for p in (p_list_n_n + p_list_m_n):
            for x in x_list_n_n:
                with static.program_guard(static.Program(), static.Program()):
                    x_data = static.data("X", shape=x.shape, dtype=x.dtype)
                    self.assertRaises(ValueError, paddle.linalg.cond, x_data,
                                      p)

        for p in (p_list_n_n + p_list_m_n):
            for x in x_list_n_n:
                with static.program_guard(static.Program(), static.Program()):
                    x_data = static.data("X", shape=x.shape, dtype=x.dtype)
                    self.assertRaises(ValueError, paddle.linalg.cond, x_data,
                                      p)