def test_forward(self):
        data = layers.data(name='X', shape=[1], dtype='float32')
        data.stop_gradient = False
        cond = layers.ConditionalBlock(inputs=[data])
        out = layers.create_tensor(dtype='float32')
        with cond.block():
            hidden = layers.fc(input=data, size=10)
            layers.assign(hidden, out)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        x = numpy.random.random(size=(10, 1)).astype('float32')

        outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
        print outs
        loss = layers.mean(x=out)
        append_backward_ops(loss=loss)
        outs = exe.run(feed={'X': x},
                       fetch_list=[
                           default_main_program().block(0).var(data.name +
                                                               "@GRAD")
                       ])[0]
        print outs
Exemplo n.º 2
0
    def test_grad(self):
        place = core.CPUPlace()
        program = Program()

        x = layers.data(name='x',
                        shape=[1],
                        dtype='float32',
                        main_program=program,
                        stop_gradient=False)
        table = layers.lod_rank_table(x, level=0, main_program=program)
        array = layers.lod_tensor_to_array(x, table, main_program=program)
        result = layers.array_to_lod_tensor(array, table, main_program=program)

        mean = layers.mean(x=result, main_program=program)

        append_backward_ops(mean)

        tensor = core.LoDTensor()
        tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
        tensor.set_lod([[0, 3, 9, 10]])

        g_vars = program.global_block().var(x.name + "@GRAD")

        exe = Executor(place)
        g_out = [
            numpy.array(item).sum() for item in exe.run(program,
                                                        feed={'x': tensor},
                                                        fetch_list=[g_vars],
                                                        return_numpy=False)
        ]
        g_out_sum = numpy.array(g_out).sum()

        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
    def test_grad(self):
        place = core.CPUPlace()
        program = Program()

        x = layers.data(
            name='x',
            shape=[1],
            dtype='float32',
            main_program=program,
            stop_gradient=False)
        y = layers.data(
            name='y',
            shape=[1],
            dtype='bool',
            main_program=program,
            stop_gradient=False)

        level = 0

        out_true, out_false = layers.split_lod_tensor(
            input=x, mask=y, level=level, main_program=program)
        out = layers.merge_lod_tensor(
            in_true=out_true,
            in_false=out_false,
            mask=y,
            x=x,
            level=level,
            main_program=program)
        mean = layers.mean(x=out, main_program=program)

        append_backward_ops(mean)

        tensor = core.LoDTensor()
        tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
        tensor.set_lod([[0, 3, 9, 10]])

        mask_np = np.array([0, 1, 0]).astype('bool')
        mask_np = np.expand_dims(mask_np, axis=1)

        mask = core.LoDTensor()
        mask.set(mask_np, place)

        exe = Executor(place)
        scope = core.Scope()

        g_vars = program.global_block().var(x.name + "@GRAD")
        g_out = [
            item.sum()
            for item in map(np.array,
                            exe.run(program,
                                    feed={'x': tensor,
                                          'y': mask},
                                    fetch_list=[g_vars],
                                    scope=scope,
                                    return_numpy=False))
        ]

        g_out_sum = np.array(g_out).sum()

        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
Exemplo n.º 4
0
    def test_forward(self):
        data = layers.data(name='X', shape=[1], dtype='float32')
        data.stop_gradient = False
        cond = layers.ConditionalBlock(inputs=[data])
        out = layers.create_tensor(dtype='float32')
        with cond.block():
            hidden = layers.fc(input=data, size=10)
            layers.assign(hidden, out)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        x = numpy.random.random(size=(10, 1)).astype('float32')

        outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
        print outs
        loss = layers.mean(x=out)
        append_backward_ops(loss=loss)
        outs = exe.run(
            feed={'X': x},
            fetch_list=[
                default_main_program().block(0).var(data.name + "@GRAD")
            ])[0]
        print outs
    def test_grad(self):
        place = core.CPUPlace()
        program = Program()

        x = layers.data(
            name='x',
            shape=[1],
            dtype='float32',
            main_program=program,
            stop_gradient=False)
        table = layers.lod_rank_table(x, level=0, main_program=program)
        array = layers.lod_tensor_to_array(x, table, main_program=program)
        result = layers.array_to_lod_tensor(array, table, main_program=program)

        mean = layers.mean(x=result, main_program=program)

        append_backward_ops(mean)

        tensor = core.LoDTensor()
        tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
        tensor.set_lod([[0, 3, 9, 10]])

        g_vars = program.global_block().var(x.name + "@GRAD")

        exe = Executor(place)
        g_out = [
            numpy.array(item).sum()
            for item in exe.run(program,
                                feed={'x': tensor},
                                fetch_list=[g_vars],
                                return_numpy=False)
        ]
        g_out_sum = numpy.array(g_out).sum()

        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
    def test_shrink_rnn_memory(self):
        x = layers.data('x', shape=[100], dtype='float32')
        x.stop_gradient = False
        table = layers.lod_rank_table(x=x)
        i = layers.zeros(dtype='int64', shape=[1])
        mem1 = layers.shrink_memory(x=x, i=i, table=table)
        i = layers.increment(x=i)
        i.stop_gradient = True
        mem2 = layers.shrink_memory(x=mem1, i=i, table=table)
        i = layers.increment(x=i)
        i.stop_gradient = True
        mem3 = layers.shrink_memory(x=mem2, i=i, table=table)

        cpu = core.CPUPlace()
        tensor = core.LoDTensor()
        tensor.set_lod([[0, 2, 5, 6]])
        tensor_np = numpy.random.random(size=(3, 100)).astype('float32')
        tensor.set(tensor_np, cpu)
        exe = Executor(cpu)
        outs = exe.run(feed={'x': tensor}, fetch_list=[mem1, mem2, mem3])
        self.assertTrue(numpy.allclose(tensor_np[0:3], outs[0]))
        self.assertTrue(numpy.allclose(tensor_np[0:2], outs[1]))
        self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2]))

        mem3_mean = layers.mean(x=mem3)
        append_backward_ops(loss=mem3_mean)
        x_grad = exe.run(
            feed={'x': tensor},
            fetch_list=[main_program.global_block().var('x@GRAD')])[0]
        self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1)
Exemplo n.º 7
0
    def test_simple_forward(self):
        d0 = layers.data(
            "d0", shape=[10], append_batch_size=False, dtype='float32')
        d1 = layers.data(
            "d1", shape=[10], append_batch_size=False, dtype='float32')
        d2 = layers.data(
            "d2", shape=[10], append_batch_size=False, dtype='float32')
        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True
        init = layers.zeros(shape=[10], dtype='float32')
        mem_array = layers.array_write(x=init, i=i)
        data_array = layers.array_write(x=d0, i=i)

        i = layers.increment(i)
        layers.array_write(d1, i, array=data_array)

        i = layers.increment(i)
        layers.array_write(d2, i, array=data_array)

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True

        array_len = layers.fill_constant(shape=[1], dtype='int64', value=3)
        array_len.stop_gradient = True
        cond = layers.less_than(x=i, y=array_len)

        while_op = layers.While(cond=cond)
        with while_op.block():
            d = layers.array_read(array=data_array, i=i)
            prev = layers.array_read(array=mem_array, i=i)
            result = layers.sums(input=[d, prev])

            i = layers.increment(x=i, in_place=True)
            layers.array_write(result, i=i, array=mem_array)
            layers.less_than(x=i, y=array_len, cond=cond)

        sum_result = layers.array_read(array=mem_array, i=i)
        loss = layers.mean(x=sum_result)

        append_backward_ops(loss)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        d = []

        for i in xrange(3):
            d.append(numpy.random.random(size=[10]).astype('float32'))

        outs = exe.run(feed={'d0': d[0],
                             'd1': d[1],
                             'd2': d[2]},
                       fetch_list=[sum_result])
        self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
Exemplo n.º 8
0
    def test_backward(self):
        self.check_forward()

        append_backward_ops(self.output)

        ana_grad = [np.array(x) for x in self.backward()]

        num_grad = self.get_numerical_gradient()
        for idx, name in enumerate(self.data_field):
            self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape)
            self.assertTrue(
                np.isclose(num_grad[idx], ana_grad[idx], rtol=0.1).all())
Exemplo n.º 9
0
    def test_backward(self):
        self.check_forward()

        append_backward_ops(self.output)

        ana_grad = [np.array(x) for x in self.backward()]

        num_grad = self.get_numerical_gradient()
        for idx, name in enumerate(self.data_field):
            self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape)
            self.assertTrue(
                np.isclose(
                    num_grad[idx], ana_grad[idx], rtol=0.1).all())
Exemplo n.º 10
0
 def test_l2decay_regularizer(self):
     program = framework.Program()
     block = program.global_block()
     mul_x = block.create_parameter(
         dtype="float32",
         shape=[5, 10],
         lod_level=0,
         name="mul.x",
         regularizer=regularizer.L2DecayRegularizer(0.5))
     self.assertTrue(mul_x.regularizer is not None)
     self.assertTrue(
         isinstance(mul_x.regularizer, regularizer.L2DecayRegularizer))
     mul_y = block.create_var(
         dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
     mul_out = block.create_var(
         dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
     block.append_op(
         type="mul",
         inputs={"X": mul_x,
                 "Y": mul_y},
         outputs={"Out": mul_out},
         attrs={"x_num_col_dims": 1})
     mean_out = block.create_var(
         dtype="float32", shape=[1], lod_level=0, name="mean.out")
     block.append_op(
         type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
     params_grads = append_backward_ops(mean_out)
     self.assertEqual(len(params_grads), 1)
     count_ops = len(block.ops)
     params_grads = optimizer.append_regularization_ops(params_grads)
     self.assertEqual(len(params_grads), 1)
     self.assertEqual(len(block.ops), count_ops + 2)
     self.assertEqual(block.ops[-1].type, 'elementwise_add')
     self.assertEqual(block.ops[-2].type, 'scale')
Exemplo n.º 11
0
    def test_adamax_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
            dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
        learning_rate = 0.01
        adamax_optimizer = self.MockAdamax(
            learning_rate=learning_rate, beta1=0.9, beta2=0.999)
        params_grads = append_backward_ops(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
        opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
                                                         init_program)
        self.assertEqual(len(opts), 2)
        adam_op = opts[0]
        self.assertEqual(adam_op.type, "adamax")

        # Check accumulators
        accumulators = adamax_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 2)
        self.assertTrue(adamax_optimizer.get_moment_str() in accumulators)
        self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators)
        moment_acc = accumulators[adamax_optimizer.get_moment_str()]
        inf_norm_acc = accumulators[adamax_optimizer.get_inf_norm_str()]
        self.assertEqual(len(moment_acc), 1)
        self.assertEqual(len(inf_norm_acc), 1)
        self.assertTrue(mul_x.name in moment_acc)
        self.assertTrue(mul_x.name in inf_norm_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 4)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
Exemplo n.º 12
0
    def test_vanilla_momentum_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
            dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
        learning_rate = 0.01
        momentum_optimizer = self.MockMomentum(
            learning_rate=learning_rate, momentum=0.2)
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
        params_grads = append_backward_ops(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
        opts = momentum_optimizer.create_optimization_pass(
            params_grads, mul_out, init_program)
        self.assertEqual(len(opts), 1)
        sgd_op = opts[0]
        self.assertEqual(sgd_op.type, "momentum")
        self.assertFalse(sgd_op.attr('use_nesterov'))

        # Check accumulators
        accumulators = momentum_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators)
        velocity_acc = accumulators[momentum_optimizer.get_velocity_str()]
        self.assertEqual(len(velocity_acc), 1)
        self.assertTrue(mul_x.name in velocity_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 2)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
Exemplo n.º 13
0
 def test_l2decay_regularizer(self):
     program = framework.Program()
     block = program.global_block()
     mul_x = block.create_parameter(
         dtype="float32",
         shape=[5, 10],
         lod_level=0,
         name="mul.x",
         regularizer=regularizer.L1DecayRegularizer(0.5))
     self.assertTrue(mul_x.regularizer is not None)
     self.assertTrue(
         isinstance(mul_x.regularizer, regularizer.L1DecayRegularizer))
     mul_y = block.create_var(dtype="float32",
                              shape=[10, 8],
                              lod_level=0,
                              name="mul.y")
     mul_out = block.create_var(dtype="float32",
                                shape=[5, 8],
                                lod_level=0,
                                name="mul.out")
     block.append_op(type="mul",
                     inputs={
                         "X": mul_x,
                         "Y": mul_y
                     },
                     outputs={"Out": mul_out},
                     attrs={"x_num_col_dims": 1})
     mean_out = block.create_var(dtype="float32",
                                 shape=[1],
                                 lod_level=0,
                                 name="mean.out")
     block.append_op(type="mean",
                     inputs={"X": mul_out},
                     outputs={"Out": mean_out})
     params_grads = append_backward_ops(mean_out)
     self.assertEqual(len(params_grads), 1)
     count_ops = len(block.ops)
     params_grads = optimizer.append_regularization_ops(params_grads)
     self.assertEqual(len(params_grads), 1)
     self.assertEqual(len(block.ops), count_ops + 3)
     self.assertEqual(block.ops[-1].type, 'elementwise_add')
     self.assertEqual(block.ops[-2].type, 'scale')
     self.assertEqual(block.ops[-3].type, 'sign')
    def test_read_write(self):
        x = [
            layers.data(name='x0', shape=[100]),
            layers.data(name='x1', shape=[100]),
            layers.data(name='x2', shape=[100])
        ]

        for each_x in x:
            each_x.stop_gradient = False

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = False
        arr = layers.array_write(x=x[0], i=i)
        i = layers.increment(x=i)
        arr = layers.array_write(x=x[1], i=i, array=arr)
        i = layers.increment(x=i)
        arr = layers.array_write(x=x[2], i=i, array=arr)

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = False
        a0 = layers.array_read(array=arr, i=i)
        i = layers.increment(x=i)
        a1 = layers.array_read(array=arr, i=i)
        i = layers.increment(x=i)
        a2 = layers.array_read(array=arr, i=i)

        mean_a0 = layers.mean(x=a0)
        mean_a1 = layers.mean(x=a1)
        mean_a2 = layers.mean(x=a2)

        a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2])

        mean_x0 = layers.mean(x=x[0])
        mean_x1 = layers.mean(x=x[1])
        mean_x2 = layers.mean(x=x[2])

        x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2])

        scope = core.Scope()
        cpu = core.CPUPlace()

        exe = Executor(cpu)

        tensor = numpy.random.random(size=(100, 100)).astype('float32')

        outs = exe.run(feed={
            'x0': tensor,
            'x1': tensor,
            'x2': tensor
        },
                       fetch_list=[a_sum, x_sum],
                       scope=scope)
        self.assertEqual(outs[0], outs[1])

        total_sum = layers.sums(input=[a_sum, x_sum])
        total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)

        append_backward_ops(total_sum_scaled)

        g_vars = map(default_main_program().global_block().var,
                     [each_x.name + "@GRAD" for each_x in x])
        g_out = [
            item.sum() for item in exe.run(feed={
                'x0': tensor,
                'x1': tensor,
                'x2': tensor
            },
                                           fetch_list=g_vars)
        ]
        g_out_sum = numpy.array(g_out).sum()

        # since our final gradient is 1 and the neural network are all linear
        # with mean_op.
        # the input gradient should also be 1
        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
Exemplo n.º 15
0
    def _get_gradient(self, input_to_check, place, output_names, no_grad_set):
        prog = Program()
        block = prog.global_block()
        inputs_with_np = {
            key: value
            for (key, value) in OpTest._create_var_descs_(
                block, getattr(self, 'inputs', {}))
        }
        outputs_with_np = {
            key: val
            for (key, val) in OpTest._create_var_descs_(
                block, getattr(self, 'outputs', {}))
        }
        inputs = {
            k: [item[0] for item in inputs_with_np[k]]
            for k in inputs_with_np
        }
        outputs = {
            k: [item[0] for item in outputs_with_np[k]]
            for k in outputs_with_np
        }

        op = block.append_op(
            type=self.op_type,
            inputs=inputs,
            outputs=outputs,
            attrs=getattr(self, 'attrs', {}))

        # infer variable type and infer shape in compile-time
        op.desc.infer_var_type(block.desc)
        op.desc.infer_shape(block.desc)

        mean_inputs = map(block.var, output_names)

        if len(mean_inputs) == 1:
            loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1])
            op = block.append_op(
                inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean')
            op.desc.infer_var_type(block.desc)
            op.desc.infer_shape(block.desc)
        else:
            avg_sum = []
            for cur_loss in mean_inputs:
                cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1])
                op = block.append_op(
                    inputs={"X": [cur_loss]},
                    outputs={"Out": [cur_avg_loss]},
                    type="mean")
                op.desc.infer_var_type(block.desc)
                op.desc.infer_shape(block.desc)
                avg_sum.append(cur_avg_loss)

            loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1])
            op_sum = block.append_op(
                inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum')
            op_sum.desc.infer_var_type(block.desc)
            op_sum.desc.infer_shape(block.desc)

            loss = block.create_var(dtype=loss_sum.dtype, shape=[1])
            op_loss = block.append_op(
                inputs={"X": loss_sum},
                outputs={"Out": loss},
                type='scale',
                attrs={'scale': 1.0 / float(len(avg_sum))})
            op_loss.desc.infer_var_type(block.desc)
            op_loss.desc.infer_shape(block.desc)

        param_grad_list = append_backward_ops(
            loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set)

        feed_dict = {
            item[0].name: OpTest._numpy_to_lod_tensor(item[1], item[2], place)
            for p_name in inputs_with_np for item in inputs_with_np[p_name]
        }

        fetch_list = [g for p, g in param_grad_list]
        executor = Executor(place)
        return map(
            np.array,
            executor.run(prog, feed_dict, fetch_list, return_numpy=False))
Exemplo n.º 16
0
    def test_read_write(self):
        x = [
            layers.data(
                name='x0', shape=[100]), layers.data(
                    name='x1', shape=[100]), layers.data(
                        name='x2', shape=[100])
        ]

        for each_x in x:
            each_x.stop_gradient = False

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = False
        arr = layers.array_write(x=x[0], i=i)
        i = layers.increment(x=i)
        arr = layers.array_write(x=x[1], i=i, array=arr)
        i = layers.increment(x=i)
        arr = layers.array_write(x=x[2], i=i, array=arr)

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = False
        a0 = layers.array_read(array=arr, i=i)
        i = layers.increment(x=i)
        a1 = layers.array_read(array=arr, i=i)
        i = layers.increment(x=i)
        a2 = layers.array_read(array=arr, i=i)

        mean_a0 = layers.mean(x=a0)
        mean_a1 = layers.mean(x=a1)
        mean_a2 = layers.mean(x=a2)

        a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2])

        mean_x0 = layers.mean(x=x[0])
        mean_x1 = layers.mean(x=x[1])
        mean_x2 = layers.mean(x=x[2])

        x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2])

        scope = core.Scope()
        cpu = core.CPUPlace()

        exe = Executor(cpu)

        tensor = numpy.random.random(size=(100, 100)).astype('float32')

        outs = exe.run(feed={'x0': tensor,
                             'x1': tensor,
                             'x2': tensor},
                       fetch_list=[a_sum, x_sum],
                       scope=scope)
        self.assertEqual(outs[0], outs[1])

        total_sum = layers.sums(input=[a_sum, x_sum])
        total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)

        append_backward_ops(total_sum_scaled)

        g_vars = map(default_main_program().global_block().var,
                     [each_x.name + "@GRAD" for each_x in x])
        g_out = [
            item.sum()
            for item in exe.run(
                feed={'x0': tensor,
                      'x1': tensor,
                      'x2': tensor},
                fetch_list=g_vars)
        ]
        g_out_sum = numpy.array(g_out).sum()

        # since our final gradient is 1 and the neural network are all linear
        # with mean_op.
        # the input gradient should also be 1
        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
    def test_grad(self):
        place = core.CPUPlace()
        program = Program()

        x = layers.data(name='x',
                        shape=[1],
                        dtype='float32',
                        main_program=program,
                        stop_gradient=False)
        y = layers.data(name='y',
                        shape=[1],
                        dtype='bool',
                        main_program=program,
                        stop_gradient=False)

        level = 0

        out_true, out_false = layers.split_lod_tensor(input=x,
                                                      mask=y,
                                                      level=level,
                                                      main_program=program)
        out = layers.merge_lod_tensor(in_true=out_true,
                                      in_false=out_false,
                                      mask=y,
                                      x=x,
                                      level=level,
                                      main_program=program)
        mean = layers.mean(x=out, main_program=program)

        append_backward_ops(mean)

        tensor = core.LoDTensor()
        tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
        tensor.set_lod([[0, 3, 9, 10]])

        mask_np = np.array([0, 1, 0]).astype('bool')
        mask_np = np.expand_dims(mask_np, axis=1)

        mask = core.LoDTensor()
        mask.set(mask_np, place)

        exe = Executor(place)
        scope = core.Scope()

        g_vars = program.global_block().var(x.name + "@GRAD")
        g_out = [
            item.sum() for item in map(
                np.array,
                exe.run(program,
                        feed={
                            'x': tensor,
                            'y': mask
                        },
                        fetch_list=[g_vars],
                        scope=scope,
                        return_numpy=False))
        ]

        g_out_sum = np.array(g_out).sum()

        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)