示例#1
0
 def test_check_grad(self):
     self.calculate_grads()
     self.check_grad_with_place(core.CPUPlace(), ["X"],
                                "Out",
                                user_defined_grads=[self.dx],
                                user_defined_grad_outputs=[self.dout])
示例#2
0
 def test_check_output(self):
     if platform.system() == "Linux":
         self.check_output_with_place(place=core.CPUPlace(), atol=1e-7)
     else:
         self.check_output_with_place(place=core.CPUPlace(), atol=1e-5)
示例#3
0
 def place(self):
     return core.CPUPlace()
 def test_check_output_cpu(self):
     try:
         self.check_output_with_place(place=core.CPUPlace())
     except:
         print("do not support cpu test, skip")
示例#5
0
 def test_tensor_ptr(self):
     t = core.Tensor()
     np_arr = numpy.zeros([2, 3])
     t.set(np_arr, core.CPUPlace())
     self.assertGreater(t._ptr(), 0)
示例#6
0
    def test_fibonacci(self):
        """
        Mimics Fibonacci Go example: https://tour.golang.org/concurrency/5
        """
        with framework.program_guard(framework.Program()):
            quit_ch_input_var = self._create_persistable_tensor(
                'quit_ch_input', core.VarDesc.VarType.LOD_TENSOR,
                core.VarDesc.VarType.INT32)
            quit_ch_input = fill_constant(shape=[1],
                                          dtype=core.VarDesc.VarType.INT32,
                                          value=0,
                                          out=quit_ch_input_var)

            result = self._create_persistable_tensor(
                'result', core.VarDesc.VarType.LOD_TENSOR,
                core.VarDesc.VarType.INT32)
            fill_constant(shape=[1],
                          dtype=core.VarDesc.VarType.INT32,
                          value=0,
                          out=result)

            x = fill_constant(shape=[1],
                              dtype=core.VarDesc.VarType.INT32,
                              value=0)
            y = fill_constant(shape=[1],
                              dtype=core.VarDesc.VarType.INT32,
                              value=1)

            while_cond = fill_constant(shape=[1],
                                       dtype=core.VarDesc.VarType.BOOL,
                                       value=True)

            while_false = fill_constant(shape=[1],
                                        dtype=core.VarDesc.VarType.BOOL,
                                        value=False)

            x_tmp = fill_constant(shape=[1],
                                  dtype=core.VarDesc.VarType.INT32,
                                  value=0)

            def fibonacci(channel, quit_channel):
                while_op = While(cond=while_cond)
                with while_op.block():
                    result2 = fill_constant(shape=[1],
                                            dtype=core.VarDesc.VarType.INT32,
                                            value=0)

                    with fluid.Select() as select:
                        with select.case(fluid.channel_send,
                                         channel,
                                         x,
                                         is_copy=True):
                            assign(input=x, output=x_tmp)
                            assign(input=y, output=x)
                            assign(elementwise_add(x=x_tmp, y=y), output=y)

                        with select.case(fluid.channel_recv, quit_channel,
                                         result2):
                            # Quit
                            helper = layer_helper.LayerHelper('assign')
                            helper.append_op(type='assign',
                                             inputs={'X': [while_false]},
                                             outputs={'Out': [while_cond]})

            ch1 = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR)
            quit_ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR)

            with fluid.Go():
                for i in range(10):
                    fluid.channel_recv(ch1, result)
                    Print(result)

                fluid.channel_send(quit_ch, quit_ch_input)

            fibonacci(ch1, quit_ch)

            fluid.channel_close(ch1)
            fluid.channel_close(quit_ch)

            cpu = core.CPUPlace()
            exe = Executor(cpu)

            exe_result = exe.run(fetch_list=[result])
            self.assertEqual(exe_result[0][0], 34)
 def test_w_is_selected_rows(self):
     places = [core.CPUPlace()]
     # currently only support CPU
     for place in places:
         self.check_with_place(place)
示例#8
0
def train():
    args = parse_args()

    if args.enable_ce:
        framework.default_startup_program().random_seed = 111

    # Training process
    if args.no_attention:
        avg_cost, feed_order = no_attention_model.seq_to_seq_net(
            args.embedding_dim,
            args.encoder_size,
            args.decoder_size,
            args.dict_size,
            args.dict_size,
            False,
            beam_size=args.beam_size,
            max_length=args.max_length)
    else:
        avg_cost, feed_order = attention_model.seq_to_seq_net(
            args.embedding_dim,
            args.encoder_size,
            args.decoder_size,
            args.dict_size,
            args.dict_size,
            False,
            beam_size=args.beam_size,
            max_length=args.max_length)

    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone()

    optimizer = fluid.optimizer.Adam(
        learning_rate=args.learning_rate,
        regularization=fluid.regularizer.L2DecayRegularizer(
            regularization_coeff=1e-5))

    optimizer.minimize(avg_cost)

    # Disable shuffle for Continuous Evaluation only
    if not args.enable_ce:
        train_batch_generator = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.wmt14.train(args.dict_size), buf_size=1000),
                                             batch_size=args.batch_size,
                                             drop_last=False)

        test_batch_generator = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.wmt14.test(args.dict_size), buf_size=1000),
                                            batch_size=args.batch_size,
                                            drop_last=False)
    else:
        train_batch_generator = paddle.batch(paddle.dataset.wmt14.train(
            args.dict_size),
                                             batch_size=args.batch_size,
                                             drop_last=False)

        test_batch_generator = paddle.batch(paddle.dataset.wmt14.test(
            args.dict_size),
                                            batch_size=args.batch_size,
                                            drop_last=False)

    place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    feed_list = [
        main_program.global_block().var(var_name) for var_name in feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    def validation():
        # Use test set as validation each pass
        total_loss = 0.0
        count = 0
        val_feed_list = [
            inference_program.global_block().var(var_name)
            for var_name in feed_order
        ]
        val_feeder = fluid.DataFeeder(val_feed_list, place)

        for batch_id, data in enumerate(test_batch_generator()):
            val_fetch_outs = exe.run(inference_program,
                                     feed=val_feeder.feed(data),
                                     fetch_list=[avg_cost],
                                     return_numpy=False)

            total_loss += np.array(val_fetch_outs[0])[0]
            count += 1

        return total_loss / count

    for pass_id in range(1, args.pass_num + 1):
        pass_start_time = time.time()
        words_seen = 0
        for batch_id, data in enumerate(train_batch_generator()):
            words_seen += len(data) * 2

            fetch_outs = exe.run(framework.default_main_program(),
                                 feed=feeder.feed(data),
                                 fetch_list=[avg_cost])

            avg_cost_train = np.array(fetch_outs[0])
            print('pass_id=%d, batch_id=%d, train_loss: %f' %
                  (pass_id, batch_id, avg_cost_train))
            # This is for continuous evaluation only
            if args.enable_ce and batch_id >= 100:
                break

        pass_end_time = time.time()
        test_loss = validation()
        time_consumed = pass_end_time - pass_start_time
        words_per_sec = words_seen / time_consumed
        print("pass_id=%d, test_loss: %f, words/s: %f, sec/pass: %f" %
              (pass_id, test_loss, words_per_sec, time_consumed))

        # This log is for continuous evaluation only
        if args.enable_ce:
            print("kpis\ttrain_cost\t%f" % avg_cost_train)
            print("kpis\ttest_cost\t%f" % test_loss)
            print("kpis\ttrain_duration\t%f" % time_consumed)

        if pass_id % args.save_interval == 0:
            model_path = os.path.join(args.save_dir, str(pass_id))
            if not os.path.isdir(model_path):
                os.makedirs(model_path)

            fluid.io.save_persistables(
                executor=exe,
                dirname=model_path,
                main_program=framework.default_main_program())
示例#9
0
def create_tensor(scope, name, np_data):
    tensor = scope.var(name).get_tensor()
    tensor.set(np_data, core.CPUPlace())
    return tensor
示例#10
0
    def check_with_place(self, place, data_layout, dtype, shape):
        epsilon = 0.00001
        if len(shape) == 2:
            x_shape = shape
            c = x_shape[1]
        else:
            n, h, w, c = shape[0], shape[1], shape[2], shape[3]
            if data_layout == "NHWC":
                x_shape = [n, h, w, c]
            elif data_layout == "NCHW":
                x_shape = [n, c, h, w]
            else:
                raise ValueError("Unknown data layout.")
        scale_shape = [c]

        x_val = np.random.random_sample(x_shape).astype(dtype)
        # generate some negative values to test case with relu fused
        x_val = x_val - 0.5
        scale_val = np.random.random_sample(scale_shape).astype(np.float32)
        bias_val = np.random.random_sample(scale_shape).astype(np.float32)

        mean = np.zeros(scale_shape).astype(np.float32)
        variance = np.ones(scale_shape).astype(np.float32)

        y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance,
                                   epsilon, data_layout).astype(dtype)
        if self.fuse_with_relu:
            y_out = np.maximum(y_out, 0)

        scope = core.Scope()

        # create input
        x_tensor = create_or_get_tensor(scope, "x_val",
                                        OpTest.np_dtype_to_fluid_dtype(x_val),
                                        place)
        scale_tensor = create_or_get_tensor(
            scope, "scale_val", OpTest.np_dtype_to_fluid_dtype(scale_val),
            place)
        bias_tensor = create_or_get_tensor(
            scope, "bias_val", OpTest.np_dtype_to_fluid_dtype(bias_val), place)
        mean_tensor = create_or_get_tensor(
            scope, "mean", OpTest.np_dtype_to_fluid_dtype(mean), place)
        variance_tensor = create_or_get_tensor(
            scope, "variance", OpTest.np_dtype_to_fluid_dtype(variance), place)

        # create output
        y_tensor = create_or_get_tensor(scope, "y_out", None, place)
        saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None,
                                                 place)
        saved_variance_tensor = create_or_get_tensor(scope, "saved_variance",
                                                     None, place)
        mean_out_tensor = mean_tensor
        variance_out_tensor = variance_tensor

        batch_norm_op = Operator(
            "batch_norm",
            # inputs
            X="x_val",
            Scale="scale_val",
            Bias="bias_val",
            Mean="mean",
            Variance="variance",
            # outputs
            Y="y_out",
            MeanOut="mean",
            VarianceOut="variance",
            SavedMean="saved_mean",
            SavedVariance="saved_variance",
            # attrs
            is_test=True,
            data_layout=data_layout,
            use_mkldnn=self.use_mkldnn,
            fuse_with_relu=self.fuse_with_relu,
            epsilon=epsilon)

        batch_norm_op.run(scope, place)

        # When op is called without Executor then
        # MKL-DNN Tensor is returned. For NHWC data layout
        # dims will be in NCHW order as it is MKL-DNN way
        # of memory descripting. So we need to convert NCHW
        # dims into NHWC.
        if data_layout == "NHWC" and self.use_mkldnn == True:
            # Create executor to have MKL-DNN cache
            # cleared after NHWC unit test
            place = core.CPUPlace()
            exe = fluid.Executor(place)
            dims = y_tensor.shape()
            c = dims.pop(1)
            dims.append(c)
            y_tensor._set_dims(dims)

        # check inference result
        self.__assert_close(y_tensor,
                            y_out,
                            "inference output are different at " + str(place) +
                            ", " + data_layout + ", " + str(np.dtype(dtype)) +
                            str(np.array(y_tensor)) + str(y_out),
                            atol=1e-3)
示例#11
0
    def test_forward_backward(self):
        def test_with_place(place, data_layout, shape):
            # attr
            epsilon = self.epsilon
            momentum = self.momentum
            if data_layout == "NCHW":
                n, c, h, w = shape[0], shape[1], shape[2], shape[3]
            else:
                n, h, w, c = shape[0], shape[1], shape[2], shape[3]
            scale_shape = [c]

            np.random.seed(123)
            x = np.random.random_sample(shape).astype(np.float32)
            scale = np.random.random_sample(scale_shape).astype(np.float32)
            bias = np.random.random_sample(scale_shape).astype(np.float32)
            mean, variance = self.set_mean_variance(scale_shape, x,
                                                    data_layout)
            y_grad = np.random.random_sample(shape).astype(np.float32)
            momentum_var = np.array([momentum]).astype(np.float32)

            y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward(
                x, y_grad, scale, bias, mean, variance, epsilon, momentum,
                shape, data_layout)

            var_dict = locals()
            var_dict['y@GRAD'] = y_grad
            var_dict['x@GRAD'] = x_grad
            var_dict['scale@GRAD'] = scale_grad
            var_dict['bias@GRAD'] = bias_grad

            var_names = [
                'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean',
                'saved_variance', 'momentum_var'
            ]
            ground_truth = {name: var_dict[name] for name in var_names}

            program = fluid.Program()
            with fluid.program_guard(program):
                block = program.global_block()
                for name in ground_truth:
                    block.create_var(name=name,
                                     dtype='float32',
                                     shape=ground_truth[name].shape)
                inputs = {
                    "X": block.var('x'),
                    "Scale": block.var('scale'),
                    "Bias": block.var('bias'),
                    "Mean": block.var('mean'),
                    "Variance": block.var('variance')
                }
                attrs = {
                    "epsilon": epsilon,
                    "is_test": False,
                    "data_layout": data_layout,
                    "use_mkldnn": self.use_mkldnn,
                    "fuse_with_relu": self.fuse_with_relu,
                    "use_global_stats": self.use_global_stats
                }
                if self.use_momentum_variable:
                    inputs['MomentumTensor'] = block.var('momentum_var')
                else:
                    attrs['momentum'] = momentum

                outputs = {
                    "Y": block.var('y'),
                    "MeanOut": block.var('mean'),  # share memory
                    "VarianceOut": block.var('variance'),  # share memory
                    "SavedMean": block.var('saved_mean'),
                    "SavedVariance": block.var('saved_variance')
                }
                has_reserve_space = False
                if data_format == 'NHWC':
                    flag = os.environ.get(
                        'FLAGS_cudnn_batchnorm_spatial_persistent')
                    if flag is not None and flag.lower() in ['true', '1']:
                        has_reserve_space = True
                if has_reserve_space:
                    block.create_var(name="reserve_space", dtype='float16')
                    outputs["ReserveSpace"] = block.var('reserve_space')
                    del os.environ['FLAGS_cudnn_batchnorm_spatial_persistent']
                bn_op = block.append_op(type="batch_norm",
                                        inputs=inputs,
                                        outputs=outputs,
                                        attrs=attrs)
                block.create_var(name='y@GRAD', dtype='float32', shape=y.shape)

                # generate backward op_desc
                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
                    bn_op.desc, self.no_grad_set, [])
                grad_op_desc = grad_op_desc_list[0]
                new_op_desc = block.desc.append_op()
                new_op_desc.copy_from(grad_op_desc)
                for var_name in grad_op_desc.output_arg_names():
                    block.desc.var(var_name.encode("ascii"))
                grad_op_desc.infer_var_type(block.desc)
                grad_op_desc.infer_shape(block.desc)
                for arg in grad_op_desc.output_arg_names():
                    grad_var = block.desc.find_var(arg.encode("ascii"))
                    grad_var.set_dtype(core.VarDesc.VarType.FP32)

                program._sync_with_cpp()

                exe = fluid.Executor(place)
                out = exe.run(program,
                              feed={
                                  name: var_dict[name]
                                  for name in [
                                      'x', 'scale', 'bias', 'mean', 'variance',
                                      'y@GRAD', 'momentum_var'
                                  ]
                              },
                              fetch_list=self.fetch_list)

            for id, name in enumerate(self.fetch_list):
                if name == 'variance':
                    self.__assert_close(var_dict[name],
                                        out[id],
                                        name,
                                        atol=1e-3)
                    continue
                self.__assert_close(var_dict[name], out[id], name)
            print("op test forward passed: ", str(place), data_layout)

        places = [core.CPUPlace()]

        if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"):
            places.append(core.CUDAPlace(0))

        for place in places:
            for data_format in self.data_formats:
                test_with_place(place, data_format, [2, 3, 4, 5])
示例#12
0
 def test_scale_selected_rows_inplace(self):
     places = [core.CPUPlace()]
     if core.is_compiled_with_cuda():
         places.append(core.CUDAPlace(0))
     for place in places:
         self.check_with_place(place, 'in', 'in')
示例#13
0
 def test_check_grad(self):
     self.check_grad_with_place(core.CPUPlace(), ["X"], "Out")
示例#14
0
 def test_check_output(self):
     self.check_output_with_place(core.CPUPlace(), no_check_set=['XShape'])
示例#15
0
 def test_check_grad_ingore_y(self):
     place = core.CPUPlace()
     self.check_grad_with_place(place, ['X'],
                                'Out',
                                max_relative_error=0.5,
                                no_grad_set=set('Y'))
示例#16
0
def infer():
    args = parse_args()

    # Inference
    if args.no_attention:
        translation_ids, translation_scores, feed_order = \
            no_attention_model.seq_to_seq_net(
            args.embedding_dim,
            args.encoder_size,
            args.decoder_size,
            args.dict_size,
            args.dict_size,
            True,
            beam_size=args.beam_size,
            max_length=args.max_length)
    else:
        translation_ids, translation_scores, feed_order = \
            attention_model.seq_to_seq_net(
            args.embedding_dim,
            args.encoder_size,
            args.decoder_size,
            args.dict_size,
            args.dict_size,
            True,
            beam_size=args.beam_size,
            max_length=args.max_length)

    test_batch_generator = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.wmt14.test(args.dict_size), buf_size=1000),
                                        batch_size=args.batch_size,
                                        drop_last=False)

    place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    model_path = os.path.join(args.save_dir, str(args.pass_num))
    fluid.io.load_persistables(executor=exe,
                               dirname=model_path,
                               main_program=framework.default_main_program())

    src_dict, trg_dict = paddle.dataset.wmt14.get_dict(args.dict_size)

    feed_list = [
        framework.default_main_program().global_block().var(var_name)
        for var_name in feed_order[0:1]
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    for batch_id, data in enumerate(test_batch_generator()):
        # The value of batch_size may vary in the last batch
        batch_size = len(data)

        # Setup initial ids and scores lod tensor
        init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64')
        init_scores_data = np.array([1. for _ in range(batch_size)],
                                    dtype='float32')
        init_ids_data = init_ids_data.reshape((batch_size, 1))
        init_scores_data = init_scores_data.reshape((batch_size, 1))
        init_recursive_seq_lens = [1] * batch_size
        init_recursive_seq_lens = [
            init_recursive_seq_lens, init_recursive_seq_lens
        ]
        init_ids = fluid.create_lod_tensor(init_ids_data,
                                           init_recursive_seq_lens, place)
        init_scores = fluid.create_lod_tensor(init_scores_data,
                                              init_recursive_seq_lens, place)

        # Feed dict for inference
        feed_dict = feeder.feed([[x[0]] for x in data])
        feed_dict['init_ids'] = init_ids
        feed_dict['init_scores'] = init_scores

        fetch_outs = exe.run(framework.default_main_program(),
                             feed=feed_dict,
                             fetch_list=[translation_ids, translation_scores],
                             return_numpy=False)

        # Split the output words by lod levels
        lod_level_1 = fetch_outs[0].lod()[1]
        token_array = np.array(fetch_outs[0])
        result = []
        for i in six.moves.xrange(len(lod_level_1) - 1):
            sentence_list = [
                trg_dict[token]
                for token in token_array[lod_level_1[i]:lod_level_1[i + 1]]
            ]
            sentence = " ".join(sentence_list[1:-1])
            result.append(sentence)
        lod_level_0 = fetch_outs[0].lod()[0]
        paragraphs = [
            result[lod_level_0[i]:lod_level_0[i + 1]]
            for i in six.moves.xrange(len(lod_level_0) - 1)
        ]

        for paragraph in paragraphs:
            print(paragraph)
示例#17
0
 def test_check_output(self):
     place = core.CPUPlace()
     self.check_output_with_place(place, atol=1e-3)
示例#18
0
 def test_check_grad(self):
     self.check_grad_with_place(core.CPUPlace(), ['X', 'Y'], 'Out')
示例#19
0
def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
                   batch_acc, args, train_prog, startup_prog, nccl_id_var,
                   num_trainers, trainer_id):
    feed_var_list = [
        var for var in train_prog.global_block().vars.itervalues()
        if var.is_data
    ]
    # generate fake:
    if args.use_fake_data:
        for var in feed_var_list:
            v = startup_prog.global_block().clone_variable(var)
            var.persistable = True
            v.persistable = True

            real_shape = list(var.shape)
            real_shape[0] = args.batch_size / args.gpus
            startup_prog.global_block().append_op(
                outputs={"Out": v},
                type="fill_constant",
                attrs={"shape": real_shape,
                       "value": 1.0,
                       "dtype": var.dtype})

    place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
    if nccl_id_var and trainer_id == 0:
        #FIXME(wuyi): wait other trainer to start listening
        time.sleep(30)

    startup_exe = fluid.Executor(place)
    startup_exe.run(startup_prog)
    strategy = fluid.ExecutionStrategy()
    strategy.num_threads = 1
    strategy.allow_op_delay = False
    exe = fluid.ParallelExecutor(
        True,
        avg_loss.name,
        exec_strategy=strategy,
        num_trainers=num_trainers,
        trainer_id=trainer_id)

    feeder = fluid.DataFeeder(feed_var_list, place)
    for pass_id in range(args.pass_num):
        num_samples = 0
        iters = 0
        start_time = time.time()
        for batch_id, data in enumerate(train_reader()):
            if args.profile and pass_id == 0 and batch_id == 5:
                profiler.start_profiler("All")
            elif args.profile and pass_id == 0 and batch_id == 10:
                profiler.stop_profiler("total", "/tmp/profile_%d" % trainer_id)

            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0
            if iters == args.iterations:
                break
            if args.use_fake_data:
                loss, = exe.run([avg_loss.name])
            else:
                loss, = exe.run([avg_loss.name], feed=feeder.feed(data))
            if args.update_method == "pserver":
                exe.bcast_params()
            num_samples += len(data)
            iters += 1
            if batch_id % 1 == 0:
                print("Pass %d, batch %d, loss %s" %
                      (pass_id, batch_id, np.array(loss)))
        train_elapsed = time.time() - start_time
        examples_per_sec = num_samples / train_elapsed
        print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
              (num_samples, train_elapsed, examples_per_sec))
        if not args.no_test and batch_acc != None:
            test_acc = test(startup_exe, infer_prog, test_reader, feeder,
                            batch_acc)
            print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc))
        exit(0)
示例#20
0
 def test_check_grad_ingore_y(self):
     self.check_grad_with_place(core.CPUPlace(), ['X'], 'Out', set('Y'))
示例#21
0
    def test_out_scale_acc(self):
        seed = 1000
        lr = 0.1

        imperative_out_scale = ImperativeQuantAware()

        np.random.seed(seed)
        reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
        lenet = ImperativeLenet()
        fixed_state = {}
        for name, param in lenet.named_parameters():
            p_shape = param.numpy().shape
            p_value = param.numpy()
            if name.endswith("bias"):
                value = np.zeros_like(p_value).astype('float32')
            else:
                value = np.random.normal(
                    loc=0.0, scale=0.01,
                    size=np.product(p_shape)).reshape(p_shape).astype('float32')
            fixed_state[name] = value
        lenet.set_dict(fixed_state)
        imperative_out_scale.quantize(lenet)
        adam = AdamOptimizer(
            learning_rate=lr, parameter_list=lenet.parameters())
        dynamic_loss_rec = []
        lenet.train()
        for batch_id, data in enumerate(reader()):
            x_data = np.array([x[0].reshape(1, 28, 28)
                               for x in data]).astype('float32')
            y_data = np.array(
                [x[1] for x in data]).astype('int64').reshape(-1, 1)

            img = fluid.dygraph.to_variable(x_data)
            label = fluid.dygraph.to_variable(y_data)

            out = lenet(img)
            loss = fluid.layers.cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)
            avg_loss.backward()
            adam.minimize(avg_loss)
            lenet.clear_gradients()
            dynamic_loss_rec.append(avg_loss.numpy()[0])
            if batch_id % 100 == 0:
                _logger.info('{}: {}'.format('loss', avg_loss.numpy()))

        lenet.eval()

        path = "./save_dynamic_quant_infer_model/lenet"
        save_dir = "./save_dynamic_quant_infer_model"

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        [inference_program, feed_target_names, fetch_targets] = (
            fluid.io.load_inference_model(
                dirname=save_dir,
                executor=exe,
                model_filename="lenet" + INFER_MODEL_SUFFIX,
                params_filename="lenet" + INFER_PARAMS_SUFFIX))
        model_ops = inference_program.global_block().ops

        conv2d_count, mul_count = 0, 0
        for i, op in enumerate(model_ops):
            if op.type == 'conv2d':
                if conv2d_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
                conv2d_count += 1

            if op.type == 'mul':
                if mul_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
                mul_count += 1
示例#22
0
 def setUp(self):
     if core.is_compiled_with_cuda():
         self.place = core.CUDAPlace(0)
     else:
         self.place = core.CPUPlace()
示例#23
0
    def save_quantized_model(self, layer, path, input_spec=None, **config):
        """
        Save the quantized model for the inference.

        Args:
            layer (Layer): The Layer to be saved.
            path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward 
                method, which can be described by InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of the saved model. Default None.
            **configs (dict, optional): Other save configuration options for compatibility. We do not 
                recommend using these configurations, they may be removed in the future. If not necessary, 
                DO NOT use them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of the saved model.
                By default, all return variables of original Layer's forward method are kept as the 
                output of the saved model. If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given ``output_spec`` list. 

        Returns:
            None
        """

        assert isinstance(
            layer,
            dygraph.Layer), "model must be the instance of dygraph.Layer"
        is_dynamic_mode = False
        with dygraph.guard():
            layer.eval()
            for handle in self._register_hook_handle_list:
                handle.remove()
            for key in self._out_scale_dict:
                self._out_scale_dict[key] = float(
                    self._out_scale_dict[key].numpy())

        paddle.jit.save(layer=layer,
                        path=path,
                        input_spec=input_spec,
                        **config)

        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = Executor(place)

        file_prefix = os.path.basename(path)
        dirname = os.path.dirname(path)
        model_filename = file_prefix + INFER_MODEL_SUFFIX
        params_filename = file_prefix + INFER_PARAMS_SUFFIX

        [inference_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        # Traverse all ops in the program and find out the op matching
        # the Layer in the dynamic graph.
        layer_var_dict = {}
        for block in inference_program.blocks:
            for op in block.ops:
                if op.type in _op_real_in_out_name:
                    output_var_names = quantization_pass._get_op_output_var_names(
                        op)
                    for output_var_name in output_var_names:
                        output_var_tensor = block.var(output_var_name)
                        if output_var_tensor.dtype not in [
                                core.VarDesc.VarType.FP64,
                                core.VarDesc.VarType.FP32
                        ]:
                            continue
                        # Because the Layer in dygraph may correspond to multiple ops
                        # in static program after being saved. To ensure correctness,
                        # the outscale collected for output of dygraph Layer can only
                        # be set to the last op in the corresponding ops in static program.
                        #
                        # We can judge the execution order of the ops which corresponding
                        # to dygraph Layer by the name of output. And use dict to save
                        # the corresponding relationship between the dygraph Layer and the
                        # static graph op that needs to set the outscale attribute.
                        if '.' not in output_var_name:
                            continue
                        dynamic_layer_name, var_name_suffix = output_var_name.split(
                            ".")
                        if dynamic_layer_name in layer_var_dict:
                            if layer_var_dict[dynamic_layer_name][
                                    0] < var_name_suffix:
                                layer_var_dict[dynamic_layer_name] = [
                                    var_name_suffix, op
                                ]
                        else:
                            layer_var_dict[dynamic_layer_name] = [
                                var_name_suffix, op
                            ]

        # Because the naming styles of static and dynamic graph are different,
        # in order to avoid mistakes, we unify the name here.
        for (layer_name, var_name_op_list) in layer_var_dict.items():
            if 'prelu' in layer_name:
                layer_name = layer_name.replace('prelu', 'p_re_lu')
            if 'relu' in layer_name:
                layer_name = layer_name.replace('relu', 're_lu')
            if layer_name not in self._out_scale_dict:
                continue
            var_name_op_list[1]._set_attr('out_threshold',
                                          self._out_scale_dict[layer_name])

        # Save the processed program.
        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=inference_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename)

        if is_dynamic_mode:
            paddle.disable_static()
示例#24
0
    def check_forward_backward(self,
                               shape,
                               begin_norm_axis,
                               has_scale=True,
                               has_bias=True,
                               y_grad_scale=1.0):
        def test_with_place(place, shape, begin_norm_axis):
            # attr
            epsilon = 0.00001
            x_shape = shape
            D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1)
            scale_shape = [D]

            np.random.seed(123)
            x = np.random.random_sample(x_shape).astype(np.float32)
            scale = np.random.random_sample(scale_shape).astype(
                np.float32) if has_scale else None
            bias = np.random.random_sample(scale_shape).astype(
                np.float32) if has_bias else None
            y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype(
                np.float32)

            # reference forward & backward
            y, mean, variance = _reference_layer_norm_naive(
                x, scale, bias, epsilon, begin_norm_axis)
            x_grad, scale_grad, bias_grad = _reference_layer_norm_grad(
                x, y_grad, scale, bias, mean, variance, begin_norm_axis)

            var_dict = locals()
            var_dict['y@GRAD'] = y_grad
            var_names = ['x', 'mean', 'variance', 'y', 'y@GRAD']
            if has_scale:
                var_names += ['scale']
            if has_bias:
                var_names += ['bias']
            ground_truth = {name: var_dict[name] for name in var_names}

            program = fluid.Program()
            with fluid.program_guard(program):
                block = program.global_block()
                for name in ground_truth:
                    block.create_var(name=name,
                                     dtype='float32',
                                     shape=ground_truth[name].shape)
                inputs = {"X": block.var('x')}
                fetch_list = [
                    'y',
                    'mean',
                    'variance',
                    'x@GRAD',
                ]
                if has_scale:
                    inputs["Scale"] = block.var('scale')
                    fetch_list += ['scale@GRAD']
                if has_bias:
                    inputs["Bias"] = block.var('bias')
                    fetch_list += ['bias@GRAD']
                layer_norm_op = block.append_op(
                    type="layer_norm",
                    inputs=inputs,
                    outputs={
                        "Y": block.var('y'),
                        "Mean": block.var('mean'),  # share the same memory
                        "Variance":
                        block.var('variance'),  # share the same memory
                    },
                    attrs={
                        "epsilon": epsilon,
                        "begin_norm_axis": begin_norm_axis
                    })
                # generate backward op_desc
                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
                    layer_norm_op.desc, set(), [])
                grad_op_desc = grad_op_desc_list[0]
                new_op_desc = block.desc.append_op()
                new_op_desc.copy_from(grad_op_desc)
                for var_name in grad_op_desc.output_arg_names():
                    block.desc.var(var_name.encode("ascii"))
                grad_op_desc.infer_var_type(block.desc)
                grad_op_desc.infer_shape(block.desc)
                for arg in grad_op_desc.output_arg_names():
                    grad_var = block.desc.find_var(arg.encode("ascii"))
                    grad_var.set_dtype(core.VarDesc.VarType.FP32)

                program._sync_with_cpp()
                exe = fluid.Executor(place)
                out = exe.run(program,
                              feed={
                                  name: var_dict[name]
                                  for name in ['x', 'scale', 'bias', 'y@GRAD']
                              },
                              fetch_list=fetch_list)
                self.__assert_close(y, out[0], "y")
                self.__assert_close(mean, out[1], "mean")
                self.__assert_close(variance, out[2], "variance", 1e-3)
                self.__assert_close(x_grad, out[3], "x_grad")
                if has_scale:
                    self.__assert_close(scale_grad,
                                        out[fetch_list.index('scale@GRAD')],
                                        "scale_grad", 1e-3)
                if has_bias:
                    self.__assert_close(bias_grad,
                                        out[fetch_list.index('bias@GRAD')],
                                        "bias_grad")

        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda() and core.op_support_gpu(
                "layer_norm") and self.use_cudnn:
            places.append(core.CUDAPlace(0))

        for place in places:
            test_with_place(place, shape, begin_norm_axis)
示例#25
0
    def test_case(self):
        x = fluid.data(name="x", shape=[1, 3, 64], dtype="float32")

        dim = fluid.data(name="dim", shape=[1], dtype="int32")
        shape_tensor = fluid.data(name="shape_tensor",
                                  shape=[1],
                                  dtype="int32")
        actual_size = fluid.data(name="actual_size", shape=[1], dtype="int32")
        scale_tensor = fluid.data(name="scale_tensor",
                                  shape=[1],
                                  dtype="float32")

        out1 = fluid.layers.resize_linear(x,
                                          out_shape=[
                                              128,
                                          ],
                                          align_mode=1,
                                          align_corners=False)
        out2 = fluid.layers.resize_linear(x,
                                          out_shape=[128],
                                          align_mode=1,
                                          align_corners=False)
        out3 = fluid.layers.resize_linear(x,
                                          out_shape=shape_tensor,
                                          align_mode=1,
                                          align_corners=False)
        out4 = fluid.layers.resize_linear(x,
                                          out_shape=[
                                              128,
                                          ],
                                          actual_shape=actual_size,
                                          align_mode=1,
                                          align_corners=False)
        out5 = fluid.layers.resize_linear(x,
                                          scale=scale_tensor,
                                          align_mode=1,
                                          align_corners=False)

        out6 = interpolate(x,
                           scale_factor=scale_tensor,
                           mode='linear',
                           align_mode=1,
                           align_corners=False,
                           data_format='NCW')
        out7 = interpolate(x,
                           size=[
                               128,
                           ],
                           mode='linear',
                           align_mode=1,
                           align_corners=False,
                           data_format='NCW')
        out8 = interpolate(x,
                           size=shape_tensor,
                           mode='linear',
                           align_mode=1,
                           align_corners=False,
                           data_format='NCW')

        x_data = np.random.random((1, 3, 64)).astype("float32")
        dim_data = np.array([128]).astype("int32")
        shape_data = np.array([
            128,
        ]).astype("int32")
        actual_size_data = np.array([
            128,
        ]).astype("int32")
        scale_data = np.array([2.0]).astype("float32")

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        results = exe.run(
            fluid.default_main_program(),
            feed={
                "x": x_data,
                "dim": dim_data,
                "shape_tensor": shape_data,
                "actual_size": actual_size_data,
                "scale_tensor": scale_data
            },
            fetch_list=[out1, out2, out3, out4, out5, out6, out7, out8],
            return_numpy=True)

        expect_res = linear_interp_np(x_data,
                                      out_w=128,
                                      align_mode=1,
                                      align_corners=False)
        for res in results:
            self.assertTrue(np.allclose(res, expect_res))
示例#26
0
 def test_check_output(self):
     self.check_output_with_place(core.CPUPlace())
    def check_forward_backward(self):
        def test_with_place(place):
            out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
            x_grad = out_grad
            sum_axis = list(range(0, len(self.x.shape)))
            del sum_axis[self.axis]
            y_grad = np.sum(out_grad, axis=tuple(sum_axis))

            var_dict = locals()
            var_dict['y'] = self.y
            var_dict['x'] = self.x
            var_dict['out'] = self.out
            var_dict['y@GRAD'] = y_grad
            var_dict['x@GRAD'] = x_grad
            var_dict['out@GRAD'] = out_grad

            var_names = ['x', 'y', 'out', 'y@GRAD', 'x@GRAD', 'out@GRAD']
            ground_truth = {name: var_dict[name] for name in var_names}

            program = fluid.Program()
            with fluid.program_guard(program):
                block = program.global_block()
                for name in ground_truth:
                    block.create_var(name=name,
                                     dtype='float32',
                                     shape=ground_truth[name].shape)
                elementwise_add_op = block.append_op(type="elementwise_add",
                                                     inputs={
                                                         "X": block.var('x'),
                                                         "Y": block.var('y'),
                                                     },
                                                     outputs={
                                                         "Out":
                                                         block.var('out'),
                                                     },
                                                     attrs={
                                                         "axis": self.axis,
                                                     })

                # generate backward op_desc
                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
                    elementwise_add_op.desc, set(), [])
                grad_op_desc = grad_op_desc_list[0]
                new_op_desc = block.desc.append_op()
                new_op_desc.copy_from(grad_op_desc)
                for var_name in grad_op_desc.output_arg_names():
                    block.desc.var(var_name.encode("ascii"))
                grad_op_desc.infer_var_type(block.desc)
                grad_op_desc.infer_shape(block.desc)
                for arg in grad_op_desc.output_arg_names():
                    grad_var = block.desc.find_var(arg.encode("ascii"))
                    grad_var.set_dtype(core.VarDesc.VarType.FP32)

                exe = fluid.Executor(place)
                out = exe.run(program,
                              feed={
                                  name: var_dict[name]
                                  for name in ['x', 'y', 'out@GRAD']
                              },
                              fetch_list=['x@GRAD', 'y@GRAD'])
                self.__assert_close(x_grad, out[0], "x@GRAD")
                self.__assert_close(y_grad, out[1], "y@GRAD", atol=1.4)

        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda() and core.op_support_gpu(
                "elementwise_add"):
            places.append(core.CUDAPlace(0))

        for place in places:
            test_with_place(place)
示例#28
0
 def test_check_grad_normal(self):
     place = core.CPUPlace()
     self.check_grad_with_place(place, ['X', 'Y'], 'Out')
 def test_check_output(self):
     # TODO(wangzhongpu): support mkldnn op in dygraph mode
     self.check_output_with_place(core.CPUPlace(),
                                  atol=0,
                                  check_dygraph=False)
示例#30
0
    def test_forward_backward(self):
        def test_with_place(place, data_layout, shape):
            # attr
            epsilon = 0.00001
            momentum = 0.9
            if data_layout == "NCHW":
                n, c, h, w = shape[0], shape[1], shape[2], shape[3]
            else:
                n, h, w, c = shape[0], shape[1], shape[2], shape[3]
            scale_shape = [c]

            np.random.seed(123)
            x = np.random.random_sample(shape).astype(np.float32)
            scale = np.random.random_sample(scale_shape).astype(np.float32)
            bias = np.random.random_sample(scale_shape).astype(np.float32)
            mean = np.zeros(scale_shape).astype(np.float32)
            variance = np.ones(scale_shape).astype(np.float32)

            y_grad = np.random.random_sample(shape).astype(np.float32)

            y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward(
                x, y_grad, scale, bias, mean, variance, epsilon, momentum,
                shape, data_layout)

            var_dict = locals()
            var_dict['y@GRAD'] = y_grad

            var_names = [
                'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean',
                'saved_variance'
            ]
            ground_truth = {name: var_dict[name] for name in var_names}

            program = fluid.Program()
            with fluid.program_guard(program):
                block = program.global_block()
                for name in ground_truth:
                    block.create_var(name=name,
                                     dtype='float32',
                                     shape=ground_truth[name].shape)
                bn_op = block.append_op(
                    type="batch_norm",
                    inputs={
                        "X": block.var('x'),
                        "Scale": block.var('scale'),
                        "Bias": block.var('bias'),
                        "Mean": block.var('mean'),
                        "Variance": block.var('variance')
                    },
                    outputs={
                        "Y": block.var('y'),
                        "MeanOut": block.var('mean'),  # share the same memory
                        "VarianceOut":
                        block.var('variance'),  # share the same memory
                        "SavedMean": block.var('saved_mean'),
                        "SavedVariance": block.var('saved_variance')
                    },
                    attrs={
                        "momentum": momentum,
                        "epsilon": epsilon,
                        "is_test": False,
                        "data_layout": data_layout,
                        "use_mkldnn": self.use_mkldnn
                    })
                block.create_var(name='y@GRAD', dtype='float32', shape=y.shape)

                # generate backward op_desc
                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
                    bn_op.desc, set(), [])
                grad_op_desc = grad_op_desc_list[0]
                new_op_desc = block.desc.append_op()
                new_op_desc.copy_from(grad_op_desc)
                for var_name in grad_op_desc.output_arg_names():
                    block.desc.var(var_name.encode("ascii"))
                grad_op_desc.infer_var_type(block.desc)
                grad_op_desc.infer_shape(block.desc)
                for arg in grad_op_desc.output_arg_names():
                    grad_var = block.desc.find_var(arg.encode("ascii"))
                    grad_var.set_dtype(core.VarDesc.VarType.FP32)

                exe = fluid.Executor(place)
                out = exe.run(
                    program,
                    feed={
                        name: var_dict[name]
                        for name in
                        ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD']
                    },
                    fetch_list=[
                        'y', 'mean', 'variance', 'saved_mean',
                        'saved_variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD'
                    ])

            self.__assert_close(y, out[0], "y")
            self.__assert_close(mean_out, out[1], "mean")
            self.__assert_close(variance_out, out[2], "variance", 1e-3)
            self.__assert_close(saved_mean, out[3], "saved_mean")
            self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3)
            self.__assert_close(x_grad, out[5], "x_grad")
            self.__assert_close(scale_grad, out[6], "scale_grad")
            self.__assert_close(bias_grad, out[7], "bias_grad")

            print "op test forward passed: ", str(place), data_layout

        places = [core.CPUPlace()]

        if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"):
            places.append(core.CUDAPlace(0))

        for place in places:
            for data_format in self.data_formats:
                test_with_place(place, data_format, [2, 3, 4, 5])