示例#1
0
    def _compare_fused_optimizer_ops(self,
                                     model,
                                     use_cuda,
                                     optimizer=fluid.optimizer.Adam):
        if use_cuda and not core.is_compiled_with_cuda():
            return
        img, label = init_data()
        feed_dict = {"image": img, "label": label}
        not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence(
            model,
            feed_dict=feed_dict,
            use_cuda=use_cuda,
            fuse_all_optimizer_ops=False,
            memory_opt=False,  # avoid the gradient's name changed in Python side.
            optimizer=optimizer)
        fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence(
            model,
            feed_dict=feed_dict,
            use_cuda=use_cuda,
            fuse_all_optimizer_ops=True,
            memory_opt=False,  # avoid the gradient's name changed in Python side.
            optimizer=optimizer)

        for loss in zip(not_fuse_op_first_loss, fuse_op_first_loss):
            self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
        for loss in zip(not_fuse_op_last_loss, fuse_op_last_loss):
            self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
示例#2
0
    def check_simple_fc_parallel_accuracy(self, use_device):
        if use_device and not core.is_compiled_with_cuda():
            return

        img, label = init_data()
        single_first_loss, single_last_loss = self.check_network_convergence(
            method=simple_fc_net,
            feed_dict={
                "image": img,
                "label": label
            },
            use_device=use_device,
            use_parallel_executor=False)
        parallel_first_loss, parallel_last_loss = self.check_network_convergence(
            method=simple_fc_net,
            feed_dict={
                "image": img,
                "label": label
            },
            use_device=use_device,
            use_parallel_executor=True)

        self.assertAlmostEquals(
            np.mean(parallel_first_loss),
            single_first_loss,
            delta=1e-6,
        )
        self.assertAlmostEquals(np.mean(parallel_last_loss),
                                single_last_loss,
                                delta=1e-6)
示例#3
0
 def test_backward(self):
     batch_size = 2
     img, label = init_data(batch_size, img_shape=[784], label_range=9)
     feed_dict = {'image': img, 'label': label}
     self.check_backward(case1_fill_grad_vars, feed_dict)
     self.check_backward(case2_prune_no_grad_branch, feed_dict)
     self.check_backward(case3_prune_no_grad_branch2, {'label': label})
     self.check_backward(case4_with_no_grad_op_maker, {})
示例#4
0
 def setUpClass(cls):
     cls.save_dirname = "./"
     cls.model_filename = "test_parallel_executor_run_load_infer_program_model"
     cls.params_filename = "test_parallel_executor_run_load_infer_program_parameter"
     cls.place = fluid.CPUPlace()
     cls.exe = fluid.Executor(cls.place)
     img, label = init_data()
     cls.batch_data = []
     for img, label in zip(img, label):
         cls.batch_data.append([img, label])
 def setUpClass(cls):
     os.environ['CPU_NUM'] = str(4)
     batch_size = 4
     cls.img, cls.label = init_data(batch_size,
                                    img_shape=[784],
                                    label_range=9)
     cls.feed_dict = {
         'image': cls.img,
         'label': cls.label,
         'learning_rate': numpy.array([1.0]).astype("float32")
     }
示例#6
0
    def check_simple_fc_convergence(self, use_device, use_reduce=False):
        if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda():
            return

        img, label = init_data()
        self.check_network_convergence(simple_fc_net,
                                       feed_dict={
                                           "image": img,
                                           "label": label
                                       },
                                       use_device=use_device,
                                       use_reduce=use_reduce)
示例#7
0
    def check_model(self, use_cuda):
        img, label = init_data(
            batch_size=batch_size, img_shape=img_shape, label_range=9)
        img = np.float16(img).view(np.uint16)
        feed_dict = {"image": img, "label": label}

        TestParallelExecutorBase.check_network_convergence(
            conv_net,
            feed_dict=feed_dict,
            iter=10,
            use_cuda=use_cuda,
            fuse_all_reduce_ops=True,
            optimizer=_optimizer)
    def test_batchnorm_fc(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001,
                regularization=fluid.regularizer.L2Decay(1e-4))
            return optimizer

        with self.program_scope_guard():
            img, label = init_data()
            self.check_prune_correctness(
                method=fc_with_batchnorm,
                feed_dict={"image": img,
                           "label": label},
                optimizer=optimizer)
    def _compare_fuse_elewise_add_act_ops(self, model, use_cuda):
        if use_cuda and not core.is_compiled_with_cuda():
            return
        img, label = init_data()

        def _optimizer(learning_rate=1e-6):
            optimizer = fluid.optimizer.SGD(
                learning_rate=learning_rate,
                regularization=fluid.regularizer.L2Decay(1e-6))
            return optimizer

        # NOTE(dzh):
        # need to make it compatible with elewise fuse act
        # FIXME (liuwei12)
        # the new memory optimize strategy will crash this unittest
        # add enable_inplace=False here to force pass the unittest
        not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence(
            model,
            feed_dict={
                "image": img,
                "label": label
            },
            use_cuda=use_cuda,
            fuse_elewise_add_act_ops=False,
            memory_opt=False,
            use_ir_memory_optimize=False,
            enable_inplace=False,
            optimizer=_optimizer)
        fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence(
            model,
            feed_dict={
                "image": img,
                "label": label
            },
            use_cuda=use_cuda,
            fuse_elewise_add_act_ops=True,
            memory_opt=False,
            use_ir_memory_optimize=False,
            enable_inplace=False,
            optimizer=_optimizer)

        for loss in zip(not_fuse_op_first_loss, fuse_op_first_loss):
            self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
        for loss in zip(not_fuse_op_last_loss, fuse_op_last_loss):
            self.assertAlmostEquals(loss[0], loss[1], delta=1e-6)
示例#10
0
 def test_trainable(self):
     batch_size = 2
     img, label = init_data(batch_size, img_shape=[784], label_range=9)
     feed_dict = {'image': img, 'label': label}
     # Note that, because the Weight of FC is not trainable and the x is stop_gradient,
     # so the 'mul_grad' should not be appended.
     self.check_trainable(test_trainable,
                          feed_dict,
                          op_count={
                              'adam': 1,
                              'scale': 0,
                              'mul_grad': 0
                          })
     self.check_trainable(
         test_trainable,
         feed_dict,
         op_count={
             'adamax': 1,
             'scale': 1,
             'mul_grad': 0
         },
         optimizer=fluid.optimizer.Adamax(learning_rate=0.2))
示例#11
0
    def check_backward(self, use_cuda):
        main = paddle.static.Program()
        startup = paddle.static.Program()

        with program_guard(main, startup):
            loss = simple_fc_net()
            loss = paddle.static.Print(loss)
            paddle.optimizer.Adam().minimize(loss)

        print_ops = [op for op in main.blocks[0].ops if op.type == u'print']
        assert len(print_ops) == 2, "The number of print op should be 2"

        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(startup)

        binary = paddle.static.CompiledProgram(main).with_data_parallel(
            loss_name=loss.name)

        img, label = init_data()
        feed_dict = {"image": img, "label": label}
        exe.run(binary, feed_dict)
示例#12
0
    return optimizer


model = SE_ResNeXt50Small


def batch_size():
    return 12


def iter(use_cuda):
    if use_cuda:
        return 10
    return 2


gpu_img, gpu_label = init_data(batch_size=batch_size(),
                               img_shape=img_shape,
                               label_range=999)
cpu_img, cpu_label = init_data(batch_size=batch_size(),
                               img_shape=img_shape,
                               label_range=999)
feed_dict_gpu = {"image": gpu_img, "label": gpu_label}
feed_dict_cpu = {"image": cpu_img, "label": cpu_label}


def feed_dict(use_cuda):
    if use_cuda:
        return feed_dict_gpu
    return feed_dict_cpu
 def _get_feed_dict(self):
     img, label = init_data()
     return {"image": img, "label": label}
示例#14
0
def batch_size(use_device):
    if use_device == DeviceType.CUDA:
        # Paddle uses 8GB P4 GPU for unittest so we decreased the batch size.
        return 4
    return 12


def iter(use_device):
    if use_device == DeviceType.CUDA:
        return 10
    return 1


gpu_img, gpu_label = init_data(
    batch_size=batch_size(use_device=DeviceType.CUDA),
    img_shape=img_shape,
    label_range=999)
cpu_img, cpu_label = init_data(
    batch_size=batch_size(use_device=DeviceType.CPU),
    img_shape=img_shape,
    label_range=999)
feed_dict_gpu = {"image": gpu_img, "label": gpu_label}
feed_dict_cpu = {"image": cpu_img, "label": cpu_label}


def feed_dict(use_device):
    if use_device == DeviceType.CUDA:
        return feed_dict_gpu
    return feed_dict_cpu
示例#15
0
model = SE_ResNeXt50Small


def batch_size(use_cuda):
    if use_cuda:
        # Paddle uses 8GB P4 GPU for unittest so we decreased the batch size.
        return 8
    return 12


def iter(use_cuda):
    if use_cuda:
        return 10
    return 1


gpu_img, gpu_label = init_data(batch_size=batch_size(use_cuda=True),
                               img_shape=img_shape,
                               label_range=999)
cpu_img, cpu_label = init_data(batch_size=batch_size(use_cuda=False),
                               img_shape=img_shape,
                               label_range=999)
feed_dict_gpu = {"image": gpu_img, "label": gpu_label}
feed_dict_cpu = {"image": cpu_img, "label": cpu_label}


def feed_dict(use_cuda):
    if use_cuda:
        return feed_dict_gpu
    return feed_dict_cpu