示例#1
0
    def test_main(self):
        N = 10

        img_expected_res = []
        lbl_expected_res = []
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_file = fluid.layers.io.open_recordio_file(
                './mnist_for_preprocessor_test.recordio',
                shapes=[[-1, 784], [-1, 1]],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            img, lbl = fluid.layers.io.read_file(data_file)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            for _ in range(N):
                img_v, lbl_v = exe.run(fetch_list=[img, lbl])
                img_expected_res.append(img_v / 2)
                lbl_expected_res.append(lbl_v + 1)

        img_actual_res = []
        lbl_actual_res = []
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_file = fluid.layers.io.open_recordio_file(
                './mnist_for_preprocessor_test.recordio',
                shapes=[[-1, 784], [-1, 1]],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            preprocessor = fluid.layers.io.Preprocessor(reader=data_file)
            with preprocessor.block():
                img, lbl = preprocessor.inputs()
                img_out = img / 2
                lbl_out = lbl + 1
                preprocessor.outputs(img_out, lbl_out)

            data_file = fluid.layers.io.double_buffer(preprocessor())
            img, lbl = fluid.layers.io.read_file(data_file)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            for _ in range(N):
                img_v, lbl_v = exe.run(fetch_list=[img, lbl])
                img_actual_res.append(img_v)
                lbl_actual_res.append(lbl_v)

        for idx in range(N):
            np.allclose(img_expected_res[idx], img_actual_res[idx])
            np.allclose(lbl_expected_res[idx], lbl_actual_res[idx])
示例#2
0
 def program_scope_guard(self):
     prog = fluid.Program()
     startup_prog = fluid.Program()
     scope = fluid.core.Scope()
     with fluid.scope_guard(scope):
         with fluid.program_guard(prog, startup_prog):
             yield
示例#3
0
 def __fn__(*args, **kwargs):
     prog = fluid.Program()
     startup_prog = fluid.Program()
     scope = fluid.core.Scope()
     with fluid.scope_guard(scope):
         with fluid.program_guard(prog, startup_prog):
             fn(*args, **kwargs)
示例#4
0
 def __impl__(self):
     prog = fluid.Program()
     startup_prog = fluid.Program()
     scope = fluid.core.Scope()
     with fluid.scope_guard(scope):
         with fluid.program_guard(prog, startup_prog):
             main(use_cuda, parallel, nn_type, combine)
示例#5
0
    def get_main_program(self):
        main = fluid.Program()

        with fluid.program_guard(main):
            self.net_conf()

        return main
    def main(self, thread_num):
        file_list = [
            './mnist_0.recordio', './mnist_1.recordio', './mnist_2.recordio'
        ]
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_files = fluid.layers.open_files(
                filenames=file_list,
                thread_num=thread_num,
                shapes=[(-1, 784), (-1, 1)],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            img, label = fluid.layers.read_file(data_files)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()

            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())

            batch_count = 0
            while True:
                try:
                    img_val, = exe.run(fetch_list=[img])
                except fluid.core.EnforceNotMet as ex:
                    self.assertIn("There is no next data.", ex.message)
                    break
                batch_count += 1
                self.assertLessEqual(img_val.shape[0], self.batch_size)
            self.assertEqual(batch_count, self.num_batch * 3)
    def test_main(self):
        main = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = 1
        with fluid.scope_guard(fluid.core.Scope()):
            with fluid.program_guard(main, startup):
                data = fluid.layers.data(
                    name='image', shape=[3, 224, 224], dtype='float32')
                label = fluid.layers.data(
                    name='label', shape=[1], dtype='int64')
                out = Lenet(data, class_dim=102)
                loss = fluid.layers.cross_entropy(input=out, label=label)
                loss = fluid.layers.mean(loss)
                opt = fluid.optimizer.Momentum(
                    learning_rate=0.1,
                    momentum=0.9,
                    regularization=fluid.regularizer.L2Decay(1e-4))

                opt.minimize(loss)
        place = fluid.CUDAPlace(0)
        feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
        reader = feeder.decorate_reader(
            paddle.batch(
                flowers.train(), batch_size=16), multi_devices=True)
        exe = fluid.Executor(place)
        exe.run(startup)
        pe = fluid.ParallelExecutor(
            use_cuda=True, loss_name=loss.name, main_program=main)

        for batch_id, data in enumerate(reader()):
            loss_np = np.array(pe.run(feed=data, fetch_list=[loss.name])[0])
            print batch_id, loss_np
            if batch_id == 2:
                break
        def test_with_place(place):
            out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
            x_grad = out_grad
            sum_axis = range(0, len(self.x.shape))
            del sum_axis[self.axis]
            y_grad = np.sum(out_grad, axis=tuple(sum_axis))

            var_dict = locals()
            var_dict['y'] = self.y
            var_dict['x'] = self.x
            var_dict['out'] = self.out
            var_dict['y@GRAD'] = y_grad
            var_dict['x@GRAD'] = x_grad
            var_dict['out@GRAD'] = out_grad

            var_names = ['x', 'y', 'out', 'y@GRAD', 'x@GRAD', 'out@GRAD']
            ground_truth = {name: var_dict[name] for name in var_names}

            program = fluid.Program()
            with fluid.program_guard(program):
                block = program.global_block()
                for name in ground_truth:
                    block.create_var(
                        name=name,
                        dtype='float32',
                        shape=ground_truth[name].shape)
                elementwise_add_op = block.append_op(
                    type="elementwise_add",
                    inputs={
                        "X": block.var('x'),
                        "Y": block.var('y'),
                    },
                    outputs={"Out": block.var('out'), },
                    attrs={"axis": self.axis, })

                # generate backward op_desc
                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
                    elementwise_add_op.desc, set(), [])
                grad_op_desc = grad_op_desc_list[0]
                new_op_desc = block.desc.append_op()
                new_op_desc.copy_from(grad_op_desc)
                for var_name in grad_op_desc.output_arg_names():
                    block.desc.var(var_name.encode("ascii"))
                grad_op_desc.infer_var_type(block.desc)
                grad_op_desc.infer_shape(block.desc)
                for arg in grad_op_desc.output_arg_names():
                    grad_var = block.desc.find_var(arg.encode("ascii"))
                    grad_var.set_dtype(core.VarDesc.VarType.FP32)

                exe = fluid.Executor(place)
                out = exe.run(program,
                              feed={
                                  name: var_dict[name]
                                  for name in ['x', 'y', 'out@GRAD']
                              },
                              fetch_list=['x@GRAD', 'y@GRAD'])
                self.__assert_close(x_grad, out[0], "x@GRAD")
                self.__assert_close(y_grad, out[1], "y@GRAD", atol=1.4)
    def test_dropout_layer(self):
        main_program = Program()
        startup_program = Program()
        with fluid.program_guard(main_program, startup_program):
            images = fluid.layers.data(
                name='pixel', shape=[3, 48, 48], dtype='float32')
            fluid.layers.dropout(x=images, dropout_prob=0.5)

        print str(main_program)
示例#10
0
    def net_profiler(self, state, profile_path='/tmp/profile'):
        enable_if_gpu = state == 'GPU' or state == "All"
        if enable_if_gpu and not core.is_compiled_with_cuda():
            return
        startup_program = fluid.Program()
        main_program = fluid.Program()

        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(
                shape=[1], dtype='int64', force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(
                input=predict, label=label, total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)

        pass_acc_calculator = fluid.average.WeightedAverage()
        with profiler.profiler(state, 'total', profile_path) as prof:
            for iter in range(10):
                if iter == 2:
                    profiler.reset_profiler()
                x = np.random.random((32, 784)).astype("float32")
                y = np.random.randint(0, 10, (32, 1)).astype("int64")

                outs = exe.run(main_program,
                               feed={'x': x,
                                     'y': y},
                               fetch_list=[avg_cost, batch_acc, batch_size])
                acc = np.array(outs[1])
                b_size = np.array(outs[2])
                pass_acc_calculator.add(value=acc, weight=b_size)
                pass_acc = pass_acc_calculator.eval()
 def test_elementwise_add_with_act(self):
     main_program = Program()
     startup_program = Program()
     with fluid.program_guard(main_program, startup_program):
         image1 = fluid.layers.data(
             name='pixel1', shape=[3, 48, 48], dtype='float32')
         image2 = fluid.layers.data(
             name='pixel2', shape=[3, 48, 48], dtype='float32')
         fluid.layers.elementwise_add(x=image1, y=image2, act='relu')
     print(main_program)
示例#12
0
 def __impl__(*args, **kwargs):
     prog = fluid.Program()
     startup_prog = fluid.Program()
     scope = fluid.core.Scope()
     with fluid.scope_guard(scope):
         with fluid.program_guard(prog, startup_prog):
             main(
                 use_cuda=use_cuda,
                 is_sparse=is_sparse,
                 is_parallel=is_parallel)
示例#13
0
文件: train.py 项目: absorbguo/Paddle
def main():
    train = fluid.Program()
    startup = fluid.Program()

    with fluid.program_guard(train, startup):
        train_args = network_cfg(is_train=True)

    test = fluid.Program()

    with fluid.program_guard(test, fluid.Program()):
        test_args = network_cfg(is_train=False)

    # startup
    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place=place)
    exe.run(startup)

    train_exe = fluid.ParallelExecutor(
        use_cuda=True, loss_name=train_args['loss'].name, main_program=train)

    fetch_var_list = [var.name for var in train_args['log']]
    for i in xrange(sys.maxint):
        result = map(numpy.array,
                     train_exe.run(fetch_list=fetch_var_list
                                   if i % 1000 == 0 else []))
        if len(result) != 0:
            print 'Train: ', result

        if i % 1000 == 0:
            test_exe = fluid.ParallelExecutor(
                use_cuda=True, main_program=test, share_vars_from=train_exe)
            loss = []
            acc = []
            try:
                while True:
                    loss_np, acc_np = map(
                        numpy.array, test_exe.run(fetch_list=fetch_var_list))
                    loss.append(loss_np[0])
                    acc.append(acc_np[0])
            except:
                test_args['file'].reset()
                print 'TEST: ', numpy.mean(loss), numpy.mean(acc)
    def test_img_conv_group(self):
        main_program = Program()
        startup_program = Program()

        with fluid.program_guard(main_program, startup_program):
            images = fluid.layers.data(
                name='pixel', shape=[3, 48, 48], dtype='float32')
            conv1 = conv_block(images, 64, 2, [0.3, 0])
            conv_block(conv1, 256, 3, [0.4, 0.4, 0])

        print str(main_program)
    def test_batch_norm_layer(self):
        main_program = Program()
        startup_program = Program()
        with fluid.program_guard(main_program, startup_program):
            images = fluid.layers.data(
                name='pixel', shape=[3, 48, 48], dtype='float32')
            hidden1 = fluid.layers.batch_norm(input=images)
            hidden2 = fluid.layers.fc(input=hidden1, size=128, act='relu')
            fluid.layers.batch_norm(input=hidden2)

        print str(main_program)
示例#16
0
 def run_local(self, place):
     main = fluid.Program()
     with fluid.program_guard(main):
         x = layers.data(
             shape=[32, 32],
             dtype='float32',
             name='X',
             append_batch_size=False)
         fluid.initializer.Constant(value=2.3)(x, main.global_block())
         o = layers.scale(x=x, scale=10.0)
     exe = fluid.Executor(place)
     self.local_out = exe.run(main, fetch_list=[o])
示例#17
0
    def get_expect_trainer_ops(self):
        trainer = fluid.Program()

        with fluid.program_guard(trainer):
            optimize_ops, params_grads = self.net_conf()

        delete_ops(trainer.global_block(), optimize_ops)
        ops = [op.type for op in trainer.global_block().ops] + [
            "split_byref", "send_vars", "send_barrier", "recv", "recv",
            "fetch_barrier", "concat"
        ]
        ops.insert(ops.index("elementwise_add_grad") + 1, "send_vars")
        return ops
示例#18
0
 def setUp(self):
     with fluid.program_guard(fluid.Program(), fluid.Program()):
         reader = paddle.batch(mnist.train(), batch_size=32)
         feeder = fluid.DataFeeder(
             feed_list=[  # order is image and label
                 fluid.layers.data(
                     name='image', shape=[784]),
                 fluid.layers.data(
                     name='label', shape=[1], dtype='int64'),
             ],
             place=fluid.CPUPlace())
         self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file(
             './mnist_for_preprocessor_test.recordio', reader, feeder)
示例#19
0
文件: vgg.py 项目: absorbguo/Paddle
def get_model(args):
    if args.data_set == "cifar10":
        classdim = 10
        if args.data_format == 'NCHW':
            data_shape = [3, 32, 32]
        else:
            data_shape = [32, 32, 3]
    else:
        classdim = 102
        if args.data_format == 'NCHW':
            data_shape = [3, 224, 224]
        else:
            data_shape = [224, 224, 3]

    # Input data
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    # Train program
    net = vgg16_bn_drop(images)
    predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    # Evaluator
    batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
    batch_acc = fluid.layers.accuracy(
        input=predict, label=label, total=batch_size_tensor)

    # inference program
    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc, batch_size_tensor])

    # Optimization
    optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)

    # data reader
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.cifar.train10()
            if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
            buf_size=5120),
        batch_size=args.batch_size)
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10()
        if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
        batch_size=args.batch_size)

    return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc
 def setUpClass(cls):
     # Convert mnist to recordio file
     with fluid.program_guard(fluid.Program(), fluid.Program()):
         reader = paddle.batch(mnist.train(), batch_size=4)
         feeder = fluid.DataFeeder(
             feed_list=[  # order is image and label
                 fluid.layers.data(
                     name='image', shape=[784]),
                 fluid.layers.data(
                     name='label', shape=[1], dtype='int64'),
             ],
             place=fluid.CPUPlace())
         fluid.recordio_writer.convert_reader_to_recordio_file(
             MNIST_RECORDIO_FILE, reader, feeder)
示例#21
0
 def init_client(self, place, port):
     main = fluid.Program()
     with fluid.program_guard(main):
         x = layers.data(
             shape=[32, 32],
             dtype='float32',
             name='X',
             append_batch_size=False)
         fluid.initializer.Constant(value=2.3)(x, main.global_block())
         get_var = main.global_block().create_var(
             name="scale_0.tmp_0",  # server side var
             dtype="float32",
             persistable=False,
             shape=[32, 32])
         o = layers.Send("127.0.0.1:%d" % port, [x], [get_var])
     exe = fluid.Executor(place)
     self.dist_out = exe.run(main, fetch_list=o)  # o is a list
示例#22
0
 def setUp(self):
     self.batch_size = 64
     # Convert mnist to recordio file
     with fluid.program_guard(fluid.Program(), fluid.Program()):
         reader = paddle.batch(mnist.train(), batch_size=self.batch_size)
         feeder = fluid.DataFeeder(
             feed_list=[  # order is image and label
                 fluid.layers.data(
                     name='image', shape=[784]),
                 fluid.layers.data(
                     name='label', shape=[1], dtype='int64'),
             ],
             place=fluid.CPUPlace())
         self.num_batch = fluid.recordio_writer.convert_reader_to_recordio_file(
             './mnist_0.recordio', reader, feeder)
     copyfile('./mnist_0.recordio', './mnist_1.recordio')
     copyfile('./mnist_0.recordio', './mnist_2.recordio')
    def parallel_exe(self, train_inputs, seed):
        main = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = seed
        with fluid.program_guard(main, startup):
            data = fluid.layers.data(
                name='image', shape=[3, 224, 224], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            out = Lenet(data, class_dim=102)
            loss = fluid.layers.cross_entropy(input=out, label=label)
            loss = fluid.layers.mean(loss)

            opt = fluid.optimizer.Momentum(
                learning_rate=0.1,
                momentum=0.9,
                regularization=fluid.regularizer.L2Decay(1e-4))

            opt.minimize(loss)

            # TODO(zcd): I found that onece the memory optimizer is open,
            # parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD,
            # conv2d_1.b_0@GRAD. Those variables should not be pruned.
            # fluid.memory_optimize(main)

            place = fluid.CUDAPlace(0)
            exe = fluid.Executor(place)
            exe.run(startup)

            feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
            pe = fluid.ParallelExecutor(
                use_cuda=True, loss_name=loss.name, main_program=main)

            fetch_list = []
            all_vars = main.global_block().vars
            for k, v in all_vars.iteritems():
                if 'tmp' not in k and k[0] is not '_' or v.persistable:
                    fetch_list.append(k)

            for data in train_inputs:
                ret = pe.run(fetch_list, feed=feeder.feed(data))
                for i in range(len(fetch_list)):
                    assert not math.isnan(np.sum(ret[i])) and \
                           not math.isinf(np.sum(ret[i]))
    def check_network_convergence(self, build_strategy=None):
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = simple_fc_net()
            test_program = main.clone(for_test=True)

            opt = fluid.optimizer.SGD(learning_rate=0.001)
            opt.minimize(loss)

            batch_size = 32
            image = np.random.normal(size=(batch_size, 784)).astype('float32')
            label = np.random.randint(0, 10, (batch_size, 1), dtype="int64")

            place = fluid.CUDAPlace(0)
            exe = fluid.Executor(place)
            exe.run(startup)
            feed_dict = {'image': image, 'label': label}

            train_exe = fluid.ParallelExecutor(
                use_cuda=True,
                loss_name=loss.name,
                main_program=main,
                build_strategy=build_strategy)

            test_exe = fluid.ParallelExecutor(
                use_cuda=True,
                main_program=test_program,
                share_vars_from=train_exe,
                build_strategy=build_strategy)

            for i in xrange(5):
                test_loss, = test_exe.run([loss.name], feed=feed_dict)
                test_loss = np.array(test_loss)

                train_loss, = train_exe.run([loss.name], feed=feed_dict)
                train_loss = np.array(train_loss)
                self.assertTrue(
                    np.allclose(
                        train_loss, test_loss, atol=1e-8),
                    "Train loss: " + str(train_loss) + "\n Test loss:" +
                    str(test_loss))
示例#25
0
    def test_main(self, decorator_callback=None):
        # use new program
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_file = fluid.layers.open_recordio_file(
                './mnist.recordio',
                shapes=[[-1, 784], [-1, 1]],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            if decorator_callback is not None:
                data_file = decorator_callback(data_file)
            img, label = fluid.layers.read_file(data_file)

            hidden = fluid.layers.fc(input=img, size=100, act='tanh')
            prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
            loss = fluid.layers.cross_entropy(input=prediction, label=label)
            avg_loss = fluid.layers.mean(loss)

            fluid.optimizer.Adam(learning_rate=1e-3).minimize(avg_loss)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()

            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            avg_loss_np = []

            # train a pass
            batch_id = 0
            while True:
                try:
                    tmp, = exe.run(fetch_list=[avg_loss])
                except fluid.core.EnforceNotMet as ex:
                    self.assertIn("There is no next data.", ex.message)
                    break

                avg_loss_np.append(tmp)
                batch_id += 1
            self.assertEqual(batch_id, self.num_batches)
            self.assertLess(avg_loss_np[-1], avg_loss_np[0])
示例#26
0
    def test_train_dyn_rnn(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            sentence = fluid.layers.data(
                name='word', shape=[1], dtype='int64', lod_level=1)
            sent_emb = fluid.layers.embedding(
                input=sentence, size=[len(self.word_dict), 32], dtype='float32')

            rnn = fluid.layers.DynamicRNN()

            with rnn.block():
                in_ = rnn.step_input(sent_emb)
                mem = rnn.memory(shape=[100], dtype='float32')
                out_ = fluid.layers.fc(input=[in_, mem], size=100, act='tanh')
                rnn.update_memory(mem, out_)
                rnn.output(out_)

            last = fluid.layers.sequence_last_step(input=rnn())
            logits = fluid.layers.fc(input=last, size=1, act=None)
            label = fluid.layers.data(name='label', shape=[1], dtype='float32')
            loss = fluid.layers.sigmoid_cross_entropy_with_logits(
                x=logits, label=label)
            loss = fluid.layers.mean(loss)
            sgd = fluid.optimizer.Adam(1e-3)
            sgd.minimize(loss=loss)

        cpu = fluid.CPUPlace()
        exe = fluid.Executor(cpu)
        exe.run(startup_program)
        feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu)
        data = next(self.train_data())
        loss_0 = exe.run(main_program,
                         feed=feeder.feed(data),
                         fetch_list=[loss])[0]
        for _ in xrange(100):
            val = exe.run(main_program,
                          feed=feeder.feed(data),
                          fetch_list=[loss])[0]
        # loss should be small after 100 mini-batch
        self.assertLess(val[0], loss_0[0])
示例#27
0
def main():
    sys.path.append(os.getcwd())
    some_test_failed = False
    for module_name in sys.argv[1:]:
        buffer = cStringIO.StringIO()
        main = fluid.Program()
        startup = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.program_guard(main, startup):
            with fluid.scope_guard(scope):
                with fluid.unique_name.guard():
                    test_loader = unittest.TestLoader()
                    module = importlib.import_module(module_name)
                    tests = test_loader.loadTestsFromModule(module)
                    res = unittest.TextTestRunner(stream=buffer).run(tests)
                    if not res.wasSuccessful():
                        some_test_failed = True
                        print >> sys.stderr, module_name, 'failed\n', buffer.getvalue(
                        )

    if some_test_failed:
        exit(1)
示例#28
0
    def init_serv(self, place):
        main = fluid.Program()

        with fluid.program_guard(main):
            serv = layers.ListenAndServ(
                "127.0.0.1:0", ["X"], optimizer_mode=False)
            with serv.do():
                out_var = main.global_block().create_var(
                    name="scale_0.tmp_0",
                    psersistable=True,
                    dtype="float32",
                    shape=[32, 32])
                x = layers.data(
                    shape=[32, 32],
                    dtype='float32',
                    name="X",
                    append_batch_size=False)
                fluid.initializer.Constant(value=1.0)(x, main.global_block())
                layers.scale(x=x, scale=10.0, out=out_var)

        self.server_exe = fluid.Executor(place)
        self.server_exe.run(main)
示例#29
0
def main(args):
    """
    Call the configuration function of the model, build the model and load data, then start training.

    model_config:
        a json file  with the  model configurations,such as dropout rate ,learning rate,num tasks and so on;

    context_pooling:
        it means the pooling type of context prediction;
    
    PreGNNContextpredModel:
        It is an unsupervised pretraining model which use subgraphs to predict their surrounding graph structures. Our goal is to pre-train a GNN so that it maps nodes appearing in similar structural contexts to nearby embeddings.

    """
    model_config = json.load(open(args.model_config, 'r'))
    if not args.dropout_rate is None:
        model_config['dropout_rate'] = args.dropout_rate
    model_config['context_pooling'] = args.context_pooling

    ### build model
    train_prog = fluid.Program()
    test_prog = fluid.Program()
    startup_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = PreGNNContextpredModel(model_config)
            model.forward()
            opt = fluid.optimizer.Adam(learning_rate=args.lr)
            if args.distributed:
                opt = get_distributed_optimizer(opt)
            opt.minimize(model.loss)
    with fluid.program_guard(test_prog, fluid.Program()):
        with fluid.unique_name.guard():
            model = PreGNNContextpredModel(model_config)
            model.forward(is_test=True)

    # Use CUDAPlace for GPU training, or use CPUPlace for CPU training.
    place = fluid.CUDAPlace(int(os.environ.get('FLAGS_selected_gpus', 0))) \
            if args.use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if not args.init_model is None and not args.init_model == "":
        load_partial_params(exe, args.init_model, train_prog)

    ### load data
    # PreGNNContextPredFeaturizer:
    #     It is used along with `PreGNNContextPredModel`. It inherits from the super class `Featurizer` which is used for feature extractions. The `Featurizer` has two functions: `gen_features` for converting from a single raw smiles to a single graph data, `collate_fn` for aggregating a sublist of graph data into a big batch.
    # k is the number of layer,l1 and l2 are the different size of context,usually l1 < l2.
    # splitter:
    #     split type of the dataset:random,scaffold,random with scaffold. Here is randomsplit.
    #     `ScaffoldSplitter` will firstly order the compounds according to Bemis-Murcko scaffold, 
    #     then take the first `frac_train` proportion as the train set, the next `frac_valid` proportion as the valid set 
    #     and the rest as the test set. `ScaffoldSplitter` can better evaluate the generalization ability of the model on 
    #     out-of-distribution samples. Note that other splitters like `RandomSplitter`, `RandomScaffoldSplitter` 
    #     and `IndexSplitter` is also available."
    k = model_config['layer_num']
    l1 = k - 1
    l2 = l1 + args.context_size
    featurizer = PreGNNContextPredFeaturizer(
            model.substruct_graph_wrapper, 
            model.context_graph_wrapper, 
            k, l1, l2)
    dataset = load_zinc_dataset(args.data_path, featurizer=featurizer)

    splitter = RandomSplitter()
    train_dataset, _, test_dataset = splitter.split(
            dataset, frac_train=0.9, frac_valid=0, frac_test=0.1)
    if args.distributed:
        indices = list(range(fleet.worker_index(), len(train_dataset), fleet.worker_num()))
        train_dataset = train_dataset[indices]
    print("Train/Test num: %s/%s" % (len(train_dataset), len(test_dataset)))

    ### start train
    # Load the train function and calculate the train loss and test loss in each epoch.
    # Here we set the epoch is in range of max epoch,you can change it if you want. 

    # Then we will calculate the train loss ,test loss and print them.
    # Finally we save the best epoch to the model according to the dataset.
    list_test_loss = []
    for epoch_id in range(args.max_epoch):
        train_loss = train(args, exe, train_prog, model, train_dataset, featurizer)
        test_loss = evaluate(args, exe, test_prog, model, test_dataset, featurizer)
        if not args.distributed or fleet.worker_index() == 0:
            fluid.io.save_params(exe, '%s/epoch%s' % (args.model_dir, epoch_id), train_prog)
            list_test_loss.append(test_loss)
            print("epoch:%d train/loss:%s" % (epoch_id, train_loss))
            print("epoch:%d test/loss:%s" % (epoch_id, test_loss))

    if not args.distributed or fleet.worker_index() == 0:
        best_epoch_id = np.argmin(list_test_loss)
        fluid.io.load_params(exe, '%s/epoch%d' % (args.model_dir, best_epoch_id), train_prog)
        fluid.io.save_params(exe, '%s/epoch_best' % (args.model_dir), train_prog)
        return list_test_loss[best_epoch_id]
示例#30
0
def main(args):
    """"Main function."""
    dataset = load(args.dataset)

    # normalize
    indegree = dataset.graph.indegree()
    norm = np.zeros_like(indegree, dtype="float32")
    norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
    dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1)

    data = expand_data_dim(dataset)

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    precompute_program = fluid.Program()
    startup_program = fluid.Program()
    train_program = fluid.Program()
    val_program = train_program.clone(for_test=True)
    test_program = train_program.clone(for_test=True)

    # precompute message passing and gather
    initializer = []
    with fluid.program_guard(precompute_program, startup_program):
        gw = pgl.graph_wrapper.StaticGraphWrapper(name="graph",
                                                  place=place,
                                                  graph=dataset.graph)

        cached_h = MessagePassing(gw,
                                  gw.node_feat["words"],
                                  num_layers=args.num_layers,
                                  norm=gw.node_feat['norm'])

        train_cached_h, init = pre_gather(cached_h, 'train',
                                          data['train_index'])
        initializer.append(init)
        val_cached_h, init = pre_gather(cached_h, 'val', data['val_index'])
        initializer.append(init)
        test_cached_h, init = pre_gather(cached_h, 'test', data['test_index'])
        initializer.append(init)

    exe = fluid.Executor(place)
    gw.initialize(place)
    for init in initializer:
        init(place)

    # get train features, val features and test features
    np_train_cached_h, np_val_cached_h, np_test_cached_h = exe.run(
        precompute_program,
        feed={},
        fetch_list=[train_cached_h, val_cached_h, test_cached_h],
        return_numpy=True)

    initializer = []
    with fluid.program_guard(train_program, startup_program):
        with fluid.unique_name.guard():
            train_handle = calculate_loss('train', np_train_cached_h,
                                          data['train_label'],
                                          dataset.num_classes, args)
            initializer += train_handle['initializer']
            adam = fluid.optimizer.Adam(
                learning_rate=args.lr,
                regularization=fluid.regularizer.L2DecayRegularizer(
                    regularization_coeff=args.weight_decay))
            adam.minimize(train_handle['loss'])

    with fluid.program_guard(val_program, startup_program):
        with fluid.unique_name.guard():
            val_handle = calculate_loss('val', np_val_cached_h,
                                        data['val_label'], dataset.num_classes,
                                        args)
            initializer += val_handle['initializer']

    with fluid.program_guard(test_program, startup_program):
        with fluid.unique_name.guard():
            test_handle = calculate_loss('test', np_test_cached_h,
                                         data['test_label'],
                                         dataset.num_classes, args)
            initializer += test_handle['initializer']

    exe.run(startup_program)
    for init in initializer:
        init(place)

    dur = []
    for epoch in range(args.epochs):
        if epoch >= 3:
            t0 = time.time()
        train_loss_t = exe.run(train_program,
                               feed={},
                               fetch_list=[train_handle['loss']],
                               return_numpy=True)[0]

        if epoch >= 3:
            time_per_epoch = 1.0 * (time.time() - t0)
            dur.append(time_per_epoch)

        val_loss_t, val_acc_t = exe.run(
            val_program,
            feed={},
            fetch_list=[val_handle['loss'], val_handle['acc']],
            return_numpy=True)

        log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) +
                 "Train Loss: %f " % train_loss_t +
                 "Val Loss: %f " % val_loss_t + "Val Acc: %f " % val_acc_t)

    test_loss_t, test_acc_t = exe.run(
        test_program,
        feed={},
        fetch_list=[test_handle['loss'], test_handle['acc']],
        return_numpy=True)
    log.info("Test Accuracy: %f" % test_acc_t)
    def check_network_convergence(self,
                                  is_sparse,
                                  build_strategy=None,
                                  use_cuda=True):
        os.environ['CPU_NUM'] = str(4)
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            word = fluid.layers.data(name='word_data',
                                     shape=[1],
                                     dtype='int64',
                                     lod_level=1)
            predicate = fluid.layers.data(name='verb_data',
                                          shape=[1],
                                          dtype='int64',
                                          lod_level=1)
            ctx_n2 = fluid.layers.data(name='ctx_n2_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            ctx_n1 = fluid.layers.data(name='ctx_n1_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            ctx_0 = fluid.layers.data(name='ctx_0_data',
                                      shape=[1],
                                      dtype='int64',
                                      lod_level=1)
            ctx_p1 = fluid.layers.data(name='ctx_p1_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            ctx_p2 = fluid.layers.data(name='ctx_p2_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            mark = fluid.layers.data(name='mark_data',
                                     shape=[1],
                                     dtype='int64',
                                     lod_level=1)

            feature_out = db_lstm(**locals())
            target = fluid.layers.data(name='target',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            crf_cost = fluid.layers.linear_chain_crf(
                input=feature_out,
                label=target,
                param_attr=fluid.ParamAttr(name='crfw', learning_rate=1e-1))
            avg_cost = fluid.layers.mean(crf_cost)

            sgd_optimizer = fluid.optimizer.SGD(
                learning_rate=fluid.layers.exponential_decay(
                    learning_rate=0.01,
                    decay_steps=100000,
                    decay_rate=0.5,
                    staircase=True))
            sgd_optimizer.minimize(avg_cost)

            train_data = paddle.batch(paddle.reader.shuffle(
                paddle.dataset.conll05.test(), buf_size=8192),
                                      batch_size=16)

            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(startup)

            pe = fluid.ParallelExecutor(use_cuda=use_cuda,
                                        loss_name=avg_cost.name,
                                        build_strategy=build_strategy)

            feeder = fluid.DataFeeder(feed_list=[
                word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark,
                target
            ],
                                      place=fluid.CPUPlace())

            data = train_data()
            for i in range(10):
                cur_batch = next(data)
                print(
                    pe.run(feed=feeder.feed(cur_batch),
                           fetch_list=[avg_cost.name])[0])
示例#32
0
    def test_errors(self):
        with program_guard(Program(), Program()):
            anchor_np = np.random.random((2, 4)).astype("float32")
            positive_np = np.random.random((2, 4)).astype("float32")
            labels_np = np.random.random((2)).astype("float32")
            anchor_data = fluid.data(name='anchor',
                                     shape=[2, 4],
                                     dtype='float32')
            positive_data = fluid.data(name='positive',
                                       shape=[2, 4],
                                       dtype='float32')
            labels_data = fluid.data(name='labels', shape=[2], dtype='float32')

            def test_anchor_Variable():
                # the anchor type must be Variable
                fluid.layers.npair_loss(anchor=anchor_np,
                                        positive=positive_data,
                                        labels=labels_data)

            def test_positive_Variable():
                # the positive type must be Variable
                fluid.layers.npair_loss(anchor=anchor_data,
                                        positive=positive_np,
                                        labels=labels_data)

            def test_labels_Variable():
                # the labels type must be Variable
                fluid.layers.npair_loss(anchor=anchor_data,
                                        positive=positive_data,
                                        labels=labels_np)

            self.assertRaises(TypeError, test_anchor_Variable)
            self.assertRaises(TypeError, test_positive_Variable)
            self.assertRaises(TypeError, test_labels_Variable)

            def test_anchor_type():
                # dtype must be float32 or float64
                anchor_data1 = fluid.data(name='anchor1',
                                          shape=[2, 4],
                                          dtype='int32')
                fluid.layers.npair_loss(anchor=anchor_data,
                                        positive=positive_data,
                                        labels=labels_np)

            def test_positive_type():
                # dtype must be float32 or float64
                positive_data1 = fluid.data(name='positive1',
                                            shape=[2, 4],
                                            dtype='int32')
                fluid.layers.npair_loss(anchor=anchor_data,
                                        positive=positive_data1,
                                        labels=labels_np)

            def test_labels_type():
                # dtype must be float32 or float64
                labels_data1 = fluid.data(name='labels1',
                                          shape=[2],
                                          dtype='int32')
                fluid.layers.npair_loss(anchor=anchor_data,
                                        positive=positive_data,
                                        labels=labels_data1)

            self.assertRaises(TypeError, test_anchor_type)
            self.assertRaises(TypeError, test_positive_type)
            self.assertRaises(TypeError, test_labels_type)
示例#33
0
def main():
    cfg = load_config(FLAGS.config)

    if 'architecture' in cfg:
        main_arch = cfg.architecture
    else:
        raise ValueError("'architecture' not specified in config file.")

    merge_config(FLAGS.opt)

    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    if 'test_feed' not in cfg:
        test_feed = create(main_arch + 'TestFeed')
    else:
        test_feed = create(cfg.test_feed)

    test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
    test_feed.dataset.add_images(test_images)

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    model = create(main_arch)

    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    with fluid.program_guard(infer_prog, startup_prog):
        with fluid.unique_name.guard():
            loader, feed_vars = create_feed(test_feed, iterable=True)
            test_fetches = model.test(feed_vars)
    infer_prog = infer_prog.clone(True)

    reader = create_reader(test_feed)
    loader.set_sample_list_generator(reader, place)

    exe.run(startup_prog)
    if cfg.weights:
        checkpoint.load_params(exe, infer_prog, cfg.weights)

    # parse infer fetches
    assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \
            "unknown metric type {}".format(cfg.metric)
    extra_keys = []
    if cfg['metric'] in ['COCO', 'OID']:
        extra_keys = ['im_info', 'im_id', 'im_shape']
    if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE':
        extra_keys = ['im_id', 'im_shape']
    keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)

    # parse dataset category
    if cfg.metric == 'COCO':
        from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
    if cfg.metric == 'OID':
        from ppdet.utils.oid_eval import bbox2out, get_category_info
    if cfg.metric == "VOC":
        from ppdet.utils.voc_eval import bbox2out, get_category_info
    if cfg.metric == "WIDERFACE":
        from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info

    anno_file = getattr(test_feed.dataset, 'annotation', None)
    with_background = getattr(test_feed, 'with_background', True)
    use_default_label = getattr(test_feed, 'use_default_label', False)
    clsid2catid, catid2name = get_category_info(anno_file, with_background,
                                                use_default_label)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    # use tb-paddle to log image
    if FLAGS.use_tb:
        from tb_paddle import SummaryWriter
        tb_writer = SummaryWriter(FLAGS.tb_log_dir)
        tb_image_step = 0
        tb_image_frame = 0  # each frame can display ten pictures at most.

    imid2path = reader.imid2path
    for iter_id, data in enumerate(loader()):
        outs = exe.run(infer_prog,
                       feed=data,
                       fetch_list=values,
                       return_numpy=False)
        res = {
            k: (np.array(v), v.recursive_sequence_lengths())
            for k, v in zip(keys, outs)
        }
        logger.info('Infer iter {}'.format(iter_id))

        bbox_results = None
        mask_results = None
        if 'bbox' in res:
            bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
        if 'mask' in res:
            mask_results = mask2out([res], clsid2catid,
                                    model.mask_head.resolution)

        # visualize result
        im_ids = res['im_id'][0]
        for im_id in im_ids:
            image_path = imid2path[int(im_id)]
            image = Image.open(image_path).convert('RGB')

            # use tb-paddle to log original image
            if FLAGS.use_tb:
                original_image_np = np.array(image)
                tb_writer.add_image("original/frame_{}".format(tb_image_frame),
                                    original_image_np,
                                    tb_image_step,
                                    dataformats='HWC')

            image = visualize_results(image, int(im_id), catid2name,
                                      FLAGS.draw_threshold, bbox_results,
                                      mask_results)

            # use tb-paddle to log image with bbox
            if FLAGS.use_tb:
                infer_image_np = np.array(image)
                tb_writer.add_image("bbox/frame_{}".format(tb_image_frame),
                                    infer_image_np,
                                    tb_image_step,
                                    dataformats='HWC')
                tb_image_step += 1
                if tb_image_step % 10 == 0:
                    tb_image_step = 0
                    tb_image_frame += 1

            save_name = get_save_image_name(FLAGS.output_dir, image_path)
            logger.info("Detection bbox results save in {}".format(save_name))
            image.save(save_name, quality=95)
示例#34
0
    def graph_apis(self, use_cuda=False, for_ci=True):
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                feeds, loss = conv_block()
                opt = fluid.optimizer.Adam(learning_rate=0.001)
                opt.minimize(loss)
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        backup_graph = graph.clone()
        self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes()))
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        origin_binary = fluid.CompiledProgram(graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
        backup_binary = fluid.CompiledProgram(
            backup_graph.graph).with_data_parallel(
                loss_name=loss.name, build_strategy=build_strategy)
        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup)
        iters = 5
        batch_size = 8
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)

        def _train(binary):
            for _ in range(iters):
                data = next(train_reader())
                loss_v = exe.run(binary,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss.name])
                if not for_ci:
                    print('{}: {}'.format('loss', loss_v))

        _train(origin_binary)
        _train(backup_binary)

        checkponit_dir = "checkpoint_gpu" if use_cuda else "checkpoint_cpu"

        def _set_zero(var_name, scope, place):
            var = scope.find_var(var_name).get_tensor()
            var_array = np.zeros(var._get_dims()).astype("float32")
            var.set(var_array, place)

        sum_before = np.sum(
            np.array(
                fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()))
        fluid.io._save_persistable_nodes(exe, checkponit_dir, graph)
        _set_zero('conv2d_1.w_0', fluid.global_scope(), place)
        set_after = np.sum(
            np.array(
                fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()))
        self.assertEqual(set_after, 0)
        fluid.io._load_persistable_nodes(exe, checkponit_dir, graph)
        sum_after = np.sum(
            np.array(
                fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()))
        self.assertEqual(sum_before, sum_after)

        marked_nodes = set()
        for op in graph.all_op_nodes():
            if op.name().find('conv2d') > -1:
                marked_nodes.add(op)
        if not for_ci:
            graph.draw('.', 'residual', marked_nodes)
            backup_marked_nodes = set()
            for op in backup_graph.all_op_nodes():
                if op.name().find('conv2d') > -1:
                    backup_marked_nodes.add(op)
            backup_graph.draw('.', 'backup', backup_marked_nodes)
        self.assertFalse(graph.has_circle())
        self.assertEqual(graph.graph_num(), 1)
        nodes = graph.topology_sort()
        self.assertEqual(len(nodes), len(graph.all_op_nodes()))
        nodes_map = graph.build_adjacency_list()
        self.assertEqual(len(nodes_map), len(graph.all_op_nodes()))
        nodes_num = len(graph.all_nodes())
        graph.safe_remove_nodes(marked_nodes)
        self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))
示例#35
0
 def test_errors(self):
     with program_guard(Program(), Program()):
         # The input type of cast_op must be Variable.
         x1 = fluid.create_lod_tensor(
             np.array([[-1]]), [[1]], fluid.MLUPlace(0))
         self.assertRaises(TypeError, fluid.layers.cast, x1, 'int32')
示例#36
0
    def context(self, trainable=True, pretrained=True):
        """context for transfer learning.

        Args:
            trainable (bool): Set parameters in program to be trainable.
            pretrained (bool) : Whether to load pretrained model.

        Returns:
            inputs (dict): key is 'image', corresponding vaule is image tensor.
            outputs (dict): key is :
                'classification', corresponding value is the result of classification.
                'feature_map', corresponding value is the result of the layer before the fully connected layer.
            context_prog (fluid.Program): program for transfer learning.
        """
        context_prog = fluid.Program()
        startup_prog = fluid.Program()
        with fluid.program_guard(context_prog, startup_prog):
            with fluid.unique_name.guard():
                image = fluid.layers.data(name="image",
                                          shape=[3, 224, 224],
                                          dtype="float32")
                mobile_net = MobileNetV2()
                output, feature_map = mobile_net.net(input=image,
                                                     class_dim=len(
                                                         self.label_list))

                name_prefix = '@HUB_{}@'.format(self.name)
                inputs = {'image': name_prefix + image.name}
                outputs = {
                    'classification': name_prefix + output.name,
                    'feature_map': name_prefix + feature_map.name
                }
                add_vars_prefix(context_prog, name_prefix)
                add_vars_prefix(startup_prog, name_prefix)
                global_vars = context_prog.global_block().vars
                inputs = {
                    key: global_vars[value]
                    for key, value in inputs.items()
                }
                outputs = {
                    key: global_vars[value]
                    for key, value in outputs.items()
                }

                place = fluid.CPUPlace()
                exe = fluid.Executor(place)
                # pretrained
                if pretrained:

                    def _if_exist(var):
                        b = os.path.exists(
                            os.path.join(self.default_pretrained_model_path,
                                         var.name))
                        return b

                    fluid.io.load_vars(exe,
                                       self.default_pretrained_model_path,
                                       context_prog,
                                       predicate=_if_exist)
                else:
                    exe.run(startup_prog)
                # trainable
                for param in context_prog.global_block().iter_parameters():
                    param.trainable = trainable
        return inputs, outputs, context_prog
示例#37
0
文件: train.py 项目: Yelrose/PGL
def main(args):
    dataset = load(args.dataset)

    # normalize
    indegree = dataset.graph.indegree()
    norm = np.zeros_like(indegree, dtype="float32")
    norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5)
    dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1)

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    train_program = fluid.Program()
    startup_program = fluid.Program()
    test_program = fluid.Program()
    hidden_size = 16

    with fluid.program_guard(train_program, startup_program):
        gw = pgl.graph_wrapper.GraphWrapper(
            name="graph",
            place=place,
            node_feat=dataset.graph.node_feat_info())

        output = pgl.layers.gcn(gw,
                                gw.node_feat["words"],
                                hidden_size,
                                activation="relu",
                                norm=gw.node_feat['norm'],
                                name="gcn_layer_1")
        output = fluid.layers.dropout(
            output, 0.5, dropout_implementation='upscale_in_train')
        output = pgl.layers.gcn(gw,
                                output,
                                dataset.num_classes,
                                activation=None,
                                norm=gw.node_feat['norm'],
                                name="gcn_layer_2")
        node_index = fluid.layers.data(
            "node_index",
            shape=[None, 1],
            dtype="int64",
            append_batch_size=False)
        node_label = fluid.layers.data(
            "node_label",
            shape=[None, 1],
            dtype="int64",
            append_batch_size=False)

        pred = fluid.layers.gather(output, node_index)
        loss, pred = fluid.layers.softmax_with_cross_entropy(
            logits=pred, label=node_label, return_softmax=True)
        acc = fluid.layers.accuracy(input=pred, label=node_label, k=1)
        loss = fluid.layers.mean(loss)

    test_program = train_program.clone(for_test=True)
    with fluid.program_guard(train_program, startup_program):
        adam = fluid.optimizer.Adam(
            learning_rate=1e-2,
            regularization=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=0.0005))
        adam.minimize(loss)

    exe = fluid.Executor(place)
    exe.run(startup_program)

    feed_dict = gw.to_feed(dataset.graph)

    train_index = dataset.train_index
    train_label = np.expand_dims(dataset.y[train_index], -1)
    train_index = np.expand_dims(train_index, -1)

    val_index = dataset.val_index
    val_label = np.expand_dims(dataset.y[val_index], -1)
    val_index = np.expand_dims(val_index, -1)

    test_index = dataset.test_index
    test_label = np.expand_dims(dataset.y[test_index], -1)
    test_index = np.expand_dims(test_index, -1)

    dur = []
    for epoch in range(200):
        if epoch >= 3:
            t0 = time.time()
        feed_dict["node_index"] = np.array(train_index, dtype="int64")
        feed_dict["node_label"] = np.array(train_label, dtype="int64")
        train_loss, train_acc = exe.run(train_program,
                                        feed=feed_dict,
                                        fetch_list=[loss, acc],
                                        return_numpy=True)

        if epoch >= 3:
            time_per_epoch = 1.0 * (time.time() - t0)
            dur.append(time_per_epoch)
        feed_dict["node_index"] = np.array(val_index, dtype="int64")
        feed_dict["node_label"] = np.array(val_label, dtype="int64")
        val_loss, val_acc = exe.run(test_program,
                                    feed=feed_dict,
                                    fetch_list=[loss, acc],
                                    return_numpy=True)

        log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) +
                 "Train Loss: %f " % train_loss + "Train Acc: %f " % train_acc
                 + "Val Loss: %f " % val_loss + "Val Acc: %f " % val_acc)

    feed_dict["node_index"] = np.array(test_index, dtype="int64")
    feed_dict["node_label"] = np.array(test_label, dtype="int64")
    test_loss, test_acc = exe.run(test_program,
                                  feed=feed_dict,
                                  fetch_list=[loss, acc],
                                  return_numpy=True)
    log.info("Accuracy: %f" % test_acc)
示例#38
0
def main():
    env = os.environ
    FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
    if FLAGS.dist:
        trainer_id = int(env['PADDLE_TRAINER_ID'])
        local_seed = (99 + trainer_id)
        random.seed(local_seed)
        np.random.seed(local_seed)

    if FLAGS.enable_ce:
        random.seed(0)
        np.random.seed(0)

    cfg = load_config(FLAGS.config)
    if 'architecture' in cfg:
        main_arch = cfg.architecture
    else:
        raise ValueError("'architecture' not specified in config file.")

    merge_config(FLAGS.opt)

    if 'log_iter' not in cfg:
        cfg.log_iter = 20

    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()
    if not FLAGS.dist or trainer_id == 0:
        print_total_cfg(cfg)

    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))

    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
        device_id = 0
    place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    lr_builder = create('LearningRate')
    optim_builder = create('OptimizerBuilder')

    # build program
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    if FLAGS.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = create(main_arch)
            if FLAGS.fp16:
                assert (getattr(model.backbone, 'norm_type', None)
                        != 'affine_channel'), \
                    '--fp16 currently does not support affine channel, ' \
                    ' please modify backbone settings to use batch norm'

            with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx:
                inputs_def = cfg['TrainReader']['inputs_def']
                feed_vars, train_loader = model.build_inputs(**inputs_def)
                train_fetches = model.train(feed_vars)
                loss = train_fetches['loss']
                if FLAGS.fp16:
                    loss *= ctx.get_loss_scale_var()
                lr = lr_builder()
                optimizer = optim_builder(lr)
                optimizer.minimize(loss)
                if FLAGS.fp16:
                    loss /= ctx.get_loss_scale_var()

    # parse train fetches
    train_keys, train_values, _ = parse_fetches(train_fetches)
    train_values.append(lr)

    if FLAGS.eval:
        eval_prog = fluid.Program()
        with fluid.program_guard(eval_prog, startup_prog):
            with fluid.unique_name.guard():
                model = create(main_arch)
                inputs_def = cfg['EvalReader']['inputs_def']
                feed_vars, eval_loader = model.build_inputs(**inputs_def)
                fetches = model.eval(feed_vars)
        eval_prog = eval_prog.clone(True)

        eval_reader = create_reader(cfg.EvalReader)
        eval_loader.set_sample_list_generator(eval_reader, place)

        # parse eval fetches
        extra_keys = []
        if cfg.metric == 'COCO':
            extra_keys = ['im_info', 'im_id', 'im_shape']
        if cfg.metric == 'VOC':
            extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
        if cfg.metric == 'WIDERFACE':
            extra_keys = ['im_id', 'im_shape', 'gt_bbox']
        eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog,
                                                         extra_keys)

    # compile program for multi-devices
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    # only enable sync_bn in multi GPU devices
    sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
    build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
        and cfg.use_gpu

    exec_strategy = fluid.ExecutionStrategy()
    # iteration number when CompiledProgram tries to drop local execution scopes.
    # Set it to be 1 to save memory usages, so that unused variables in
    # local execution scopes can be deleted after each iteration.
    exec_strategy.num_iteration_per_drop_scope = 1
    if FLAGS.dist:
        dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog,
                                             train_prog)
        exec_strategy.num_threads = 1

    exe.run(startup_prog)
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    if FLAGS.eval:
        compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog)

    fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'

    ignore_params = cfg.finetune_exclude_pretrained_params \
                 if 'finetune_exclude_pretrained_params' in cfg else []

    start_iter = 0
    if FLAGS.resume_checkpoint:
        checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint)
        start_iter = checkpoint.global_step()
    elif cfg.pretrain_weights and fuse_bn and not ignore_params:
        checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights)
    elif cfg.pretrain_weights:
        checkpoint.load_params(
            exe, train_prog, cfg.pretrain_weights, ignore_params=ignore_params)

    train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) *
                                 devices_num, cfg)
    train_loader.set_sample_list_generator(train_reader, place)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'

    train_stats = TrainingStats(cfg.log_smooth_window, train_keys)
    train_loader.start()
    start_time = time.time()
    end_time = time.time()

    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)
    time_stat = deque(maxlen=cfg.log_smooth_window)
    best_box_ap_list = [0.0, 0]  #[map, iter]

    # use tb-paddle to log data
    if FLAGS.use_tb:
        from tb_paddle import SummaryWriter
        tb_writer = SummaryWriter(FLAGS.tb_log_dir)
        tb_loss_step = 0
        tb_mAP_step = 0

    for it in range(start_iter, cfg.max_iters):
        start_time = end_time
        end_time = time.time()
        time_stat.append(end_time - start_time)
        time_cost = np.mean(time_stat)
        eta_sec = (cfg.max_iters - it) * time_cost
        eta = str(datetime.timedelta(seconds=int(eta_sec)))
        outs = exe.run(compiled_train_prog, fetch_list=train_values)
        stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])}

        # use tb-paddle to log loss
        if FLAGS.use_tb:
            if it % cfg.log_iter == 0:
                for loss_name, loss_value in stats.items():
                    tb_writer.add_scalar(loss_name, loss_value, tb_loss_step)
                tb_loss_step += 1

        train_stats.update(stats)
        logs = train_stats.log()
        if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0):
            strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
                it, np.mean(outs[-1]), logs, time_cost, eta)
            logger.info(strs)


        if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \
           and (not FLAGS.dist or trainer_id == 0):
            save_name = str(it) if it != cfg.max_iters - 1 else "model_final"
            checkpoint.save(exe, train_prog, os.path.join(save_dir, save_name))

            if FLAGS.eval:
                # evaluation
                results = eval_run(exe, compiled_eval_prog, eval_loader,
                                   eval_keys, eval_values, eval_cls)
                resolution = None
                if 'mask' in results[0]:
                    resolution = model.mask_head.resolution
                box_ap_stats = eval_results(
                    results, cfg.metric, cfg.num_classes, resolution,
                    is_bbox_normalized, FLAGS.output_eval, map_type,
                    cfg['EvalReader']['dataset'])

                # use tb_paddle to log mAP
                if FLAGS.use_tb:
                    tb_writer.add_scalar("mAP", box_ap_stats[0], tb_mAP_step)
                    tb_mAP_step += 1

                if box_ap_stats[0] > best_box_ap_list[0]:
                    best_box_ap_list[0] = box_ap_stats[0]
                    best_box_ap_list[1] = it
                    checkpoint.save(exe, train_prog,
                                    os.path.join(save_dir, "best_model"))
                logger.info("Best test box ap: {}, in iter: {}".format(
                    best_box_ap_list[0], best_box_ap_list[1]))

    train_loader.reset()
示例#39
0
    def context(self, trainable=True, pretrained=True, get_prediction=False):
        """
        Distill the Head Features, so as to perform transfer learning.

        Args:
            trainable (bool): whether to set parameters trainable.
            pretrained (bool): whether to load default pretrained model.
            get_prediction (bool): whether to get prediction.

        Returns:
             inputs(dict): the input variables.
             outputs(dict): the output variables.
             context_prog (Program): the program to execute transfer learning.
        """
        context_prog = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(context_prog, startup_program):
            with fluid.unique_name.guard():
                # image
                image = fluid.layers.data(name='image',
                                          shape=[3, 300, 300],
                                          dtype='float32')
                # backbone
                backbone = MobileNet(**self.mobilenet_config)
                # body_feats
                body_feats = backbone(image)
                # im_size
                im_size = fluid.layers.data(name='im_size',
                                            shape=[2],
                                            dtype='int32')
                # var_prefix
                var_prefix = '@HUB_{}@'.format(self.name)
                # names of inputs
                inputs = {
                    'image': var_prefix + image.name,
                    'im_size': var_prefix + im_size.name
                }
                # names of outputs
                if get_prediction:
                    locs, confs, box, box_var = fluid.layers.multi_box_head(
                        inputs=body_feats,
                        image=image,
                        num_classes=21,
                        **self.multi_box_head_config)
                    pred = fluid.layers.detection_output(
                        loc=locs,
                        scores=confs,
                        prior_box=box,
                        prior_box_var=box_var,
                        **self.output_decoder_config)
                    outputs = {'bbox_out': [var_prefix + pred.name]}
                else:
                    outputs = {
                        'body_features':
                        [var_prefix + var.name for var in body_feats]
                    }

                # add_vars_prefix
                add_vars_prefix(context_prog, var_prefix)
                add_vars_prefix(fluid.default_startup_program(), var_prefix)
                # inputs
                inputs = {
                    key: context_prog.global_block().vars[value]
                    for key, value in inputs.items()
                }
                outputs = {
                    out_key: [
                        context_prog.global_block().vars[varname]
                        for varname in out_value
                    ]
                    for out_key, out_value in outputs.items()
                }
                # trainable
                for param in context_prog.global_block().iter_parameters():
                    param.trainable = trainable

                place = fluid.CPUPlace()
                exe = fluid.Executor(place)
                # pretrained
                if pretrained:

                    def _if_exist(var):
                        return os.path.exists(
                            os.path.join(self.default_pretrained_model_path,
                                         var.name))

                    fluid.io.load_vars(exe,
                                       self.default_pretrained_model_path,
                                       predicate=_if_exist)
                else:
                    exe.run(startup_program)

                return inputs, outputs, context_prog
示例#40
0
        loss = fluid.layers.reduce_mean(loss)

        optimizer = fluid.optimizer.AdamOptimizer(learning_rate = 0.01)
        optimizer.minimize(loss)

        return loss

    else:

        return input_text_hidden

startup_program = fluid.Program()
train_program = fluid.Program()
test_program = fluid.Program()

with fluid.program_guard(train_program, startup_program):
    with fluid.unique_name.guard():
        loss = build_model(is_training = True)

exe = fluid.Executor(fluid.CPUPlace())

exe.run(startup_program)
step = 0
for in_text, in_re_text, in_label, in_len in build_batch(batch_size, max_len, epochs, train_reader):

    out = exe.run(program = train_program,
        feed = {"text": in_text, "label": in_label, "text_len": in_len, "re_text": in_re_text},
        fetch_list = [loss.name])

    print("step %d, loss %.5f" % (step, out[0][0]))
    step += 1
示例#41
0
    def test_errors(self):
        with paddle.static.program_guard(paddle.static.Program(),
                                         paddle.static.Program()):
            # test paddle.nn.HSigmoidLoss
            self.assertRaises(ValueError, paddle.nn.HSigmoidLoss, 6, 1)

            # test paddle.nn.functional.hsigmoid_loss
            x = paddle.static.data('x', [4, 6])
            label = paddle.static.data('label', [4, 1], 'int64')
            weight = paddle.static.data('weight', [7, 6])
            bias = paddle.static.data('bias', [7])

            x_int32 = paddle.static.data('x_int32', [4, 6], 'int32')
            self.assertRaises(TypeError, F.hsigmoid_loss, x_int32, label, 8,
                              weight)

            label_float32 = paddle.static.data('label_float32', [4, 1],
                                               'float32')
            self.assertRaises(TypeError, F.hsigmoid_loss, x, label_float32, 8,
                              weight)

            weight_int32 = paddle.static.data('weight_int32', [7, 6], 'int32')
            self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8,
                              weight_int32)

            bias_int32 = paddle.static.data('bias_int32', [7], 'int32')
            self.assertRaises(TypeError,
                              F.hsigmoid_loss,
                              x,
                              label,
                              8,
                              weight,
                              bias=bias_int32)

            path_table_int32 = paddle.static.data('path_table_int32', [7],
                                                  'int32')
            self.assertRaises(TypeError,
                              F.hsigmoid_loss,
                              x,
                              label,
                              8,
                              weight,
                              path_table=path_table_int32)

            path_code_int32 = paddle.static.data('path_code_int32', [7],
                                                 'int32')
            self.assertRaises(TypeError,
                              F.hsigmoid_loss,
                              x,
                              label,
                              8,
                              weight,
                              path_code=path_code_int32)

        # test paddle.nn.HSigmoidLoss
        paddle.disable_static(self.place)
        x_arr = np.array([], dtype=np.float32)
        x = paddle.to_tensor(np.reshape(x_arr, (100000, 0)))
        label = paddle.to_tensor(0, dtype='int64')
        self.assertRaises(ValueError, paddle.nn.HSigmoidLoss, x, label)

        # test paddle.nn.functional.hsigmoid_loss
        x = paddle.to_tensor(np.reshape(x_arr, (10, 0)), dtype='float32')
        label = paddle.to_tensor([], dtype='int64')
        weight = paddle.to_tensor([], dtype='float32')
        self.assertRaises(ValueError, F.hsigmoid_loss, x, label, 0, weight)
        paddle.enable_static()

        # test paddle.fluid.layers.hsigmoid
        with program_guard(Program()):
            label = fluid.data('label', [4, 1], 'int64')
            # The input type must be Variable.
            self.assertRaises(TypeError, fluid.layers.hsigmoid, 1, label, 2)
            # The input dtype must be float16, float32, float64.
            x_int32 = fluid.data(name='x_int32', shape=[4, 3], dtype='int32')
            self.assertRaises(TypeError, fluid.layers.hsigmoid, x_int32, label,
                              2)
            # support the input dtype is float32
            x_fp32 = fluid.data(name='x_fp32', shape=[4, 3], dtype='float32')
            fluid.layers.hsigmoid(x_fp32, label, 2)

            # The label type must be Variable.
            self.assertRaises(TypeError, fluid.layers.hsigmoid, x_fp32, 1, 2)
            # The label dtype must be int64.
            label_int32 = fluid.data('label_int32', [4, 1], 'int32')
            self.assertRaises(TypeError, fluid.layers.hsigmoid, x_fp32,
                              label_int32, 2)
示例#42
0
    def context(self,
                trainable=True,
                pretrained=True,
                override_params=None,
                phase='train'):
        """context for transfer learning.

        Args:
            trainable (bool): Set parameters in program to be trainable.
            pretrained (bool) : Whether to load pretrained model.

        Returns:
            inputs (dict): key is 'image', corresponding vaule is image tensor.
            outputs (dict): key is :
                'classification', corresponding value is the result of classification.
                'feature_map', corresponding value is the result of the layer before the fully connected layer.
            context_prog (fluid.Program): program for transfer learning.
        """
        if phase in ["dev", "test", "predict", "eval"]:
            is_test = True
        elif phase in ["train"]:
            is_test = False
        else:
            raise ValueError(
                "Phase %s is error, which must be one of train, dev, test, eval and predict."
                % phase)

        context_prog = fluid.Program()
        startup_prog = fluid.Program()
        with fluid.program_guard(context_prog, startup_prog):
            with fluid.unique_name.guard():
                image = fluid.layers.data(name="image",
                                          shape=[3, 224, 224],
                                          dtype="float32")
                efficientnet_b4 = EfficientNetB4(
                    override_params=override_params)
                output, feature_map = efficientnet_b4.net(input=image,
                                                          class_dim=len(
                                                              self.label_list),
                                                          is_test=is_test)

                name_prefix = '@HUB_{}@'.format(self.name)
                inputs = {'image': name_prefix + image.name}
                outputs = {
                    'classification': name_prefix + output.name,
                    'feature_map': name_prefix + feature_map.name
                }
                add_vars_prefix(context_prog, name_prefix)
                add_vars_prefix(startup_prog, name_prefix)
                global_vars = context_prog.global_block().vars
                inputs = {
                    key: global_vars[value]
                    for key, value in inputs.items()
                }
                outputs = {
                    key: global_vars[value]
                    for key, value in outputs.items()
                }

                place = fluid.CPUPlace()
                exe = fluid.Executor(place)
                # pretrained
                if pretrained:

                    def _if_exist(var):
                        b = os.path.exists(
                            os.path.join(self.default_pretrained_model_path,
                                         var.name))
                        return b

                    fluid.io.load_vars(exe,
                                       self.default_pretrained_model_path,
                                       context_prog,
                                       predicate=_if_exist)
                else:
                    exe.run(startup_prog)
                # trainable
                for param in context_prog.global_block().iter_parameters():
                    param.trainable = trainable
        return inputs, outputs, context_prog
示例#43
0
def main(args):
    bert_config = BertConfig(args.bert_config_path)
    bert_config.print_config()
    
    if args.use_xpu:
        paddle.enable_static()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = get_device_num()
    elif args.use_xpu:
        xpu_id = int(os.getenv('FLAGS_selected_xpus', '0'))
        place = fluid.XPUPlace(xpu_id)
        dev_count = len([place])       
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    task_name = args.task_name.lower()
    processors = {
        'xnli': reader.XnliProcessor,
        'cola': reader.ColaProcessor,
        'mrpc': reader.MrpcProcessor,
        'mnli': reader.MnliProcessor,
    }

    processor = processors[task_name](data_dir=args.data_dir,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case,
                                      in_tokens=args.in_tokens,
                                      random_seed=args.random_seed)
    num_labels = len(processor.get_labels())

    if not (args.do_train or args.do_val or args.do_test):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed
        train_program.random_seed = args.random_seed

    if args.do_train:
        # NOTE: If num_trainers > 1, the shuffle_seed must be set, because
        # the order of batch data generated by reader
        # must be the same in the respective processes.
        shuffle_seed = 1 if num_trainers > 1 else None
        train_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='train',
            epoch=args.epoch,
            dev_count=dev_count,
            shuffle=args.shuffle,
            shuffle_seed=shuffle_seed)

        num_train_examples = processor.get_num_examples(phase='train')

        if args.in_tokens:
            max_train_steps = args.epoch * num_train_examples // (
                args.batch_size // args.max_seq_len) // dev_count
        else:
            max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        warmup_steps = int(max_train_steps * args.warmup_proportion)
        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)
        print("Num warmup steps: %d" % warmup_steps)

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_data_loader, loss, probs, accuracy, num_seqs = create_model(
                    args,
                    bert_config=bert_config,
                    num_labels=num_labels)
                scheduled_lr, loss_scaling = optimization(
                    loss=loss,
                    warmup_steps=warmup_steps,
                    num_train_steps=max_train_steps,
                    learning_rate=args.learning_rate,
                    train_program=train_program,
                    startup_prog=startup_prog,
                    weight_decay=args.weight_decay,
                    scheduler=args.lr_scheduler,
                    use_fp16=args.use_fp16,
                    use_dynamic_loss_scaling=args.use_dynamic_loss_scaling,
                    init_loss_scaling=args.init_loss_scaling,
                    incr_every_n_steps=args.incr_every_n_steps,
                    decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf,
                    incr_ratio=args.incr_ratio,
                    decr_ratio=args.decr_ratio)

    if args.do_val:
        dev_prog = fluid.Program()
        with fluid.program_guard(dev_prog, startup_prog):
            with fluid.unique_name.guard():
                dev_data_loader, loss, probs, accuracy, num_seqs = create_model(
                    args,
                    bert_config=bert_config,
                    num_labels=num_labels)

        dev_prog = dev_prog.clone(for_test=True)
        dev_data_loader.set_batch_generator(
                            processor.data_generator(
                                batch_size=args.batch_size,
                                phase='dev',
                                epoch=1,
                                dev_count=1,
                                shuffle=False), place)

    if args.do_test:
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_data_loader, loss, probs, accuracy, num_seqs = create_model(
                    args,
                    bert_config=bert_config,
                    num_labels=num_labels)

        test_prog = test_prog.clone(for_test=True)
        test_data_loader.set_batch_generator(
                            processor.data_generator(
                                batch_size=args.batch_size,
                                phase='test',
                                epoch=1,
                                dev_count=1,
                                shuffle=False), place)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            init_checkpoint(
                exe,
                args.init_checkpoint,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
        elif args.init_pretraining_params:
            init_pretraining_params(
                exe,
                args.init_pretraining_params,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
    elif args.do_val or args.do_test:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(
            exe,
            args.init_checkpoint,
            main_program=startup_prog,
            use_fp16=args.use_fp16)

    if args.do_train:
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.use_experimental_executor = args.use_fast_executor
        exec_strategy.num_threads = dev_count
        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope
        build_strategy = fluid.BuildStrategy()

        if args.use_cuda and num_trainers > 1:
            assert shuffle_seed is not None
            dist_utils.prepare_for_multi_process(exe, build_strategy, train_program)
            train_data_generator = fluid.contrib.reader.distributed_batch_reader(
                  train_data_generator)

        if args.use_xpu:
            train_compiled_program = train_program
        else:

            train_compiled_program = fluid.CompiledProgram(train_program).with_data_parallel(
                    loss_name=loss.name, build_strategy=build_strategy)

        train_data_loader.set_batch_generator(train_data_generator, place)


    if args.do_train:
        train_data_loader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        time_begin = time.time()
        throughput = []
        ce_info = []

        total_batch_num=0 # used for benchmark
        interval_seq_num = 0

        while True:
            try:
                steps += 1

                total_batch_num += 1 # used for benchmark
                if args.max_iter and total_batch_num == args.max_iter: # used for benchmark
                    return

                if args.use_fp16:
                    fetch_list = [loss.name, accuracy.name, scheduled_lr.name, num_seqs.name, loss_scaling.name]
                else:
                    fetch_list = [loss.name, accuracy.name, scheduled_lr.name, num_seqs.name]

                outputs = exe.run(train_compiled_program, fetch_list=fetch_list)
                interval_seq_num += np.sum( outputs[3] )  # get the sequence number

                if steps % args.skip_steps == 0:
                    if args.use_fp16:
                        np_loss, np_acc, np_lr, np_num_seqs, np_scaling = outputs
                    else:
                        np_loss, np_acc, np_lr, np_num_seqs = outputs

                    total_cost.extend(np_loss * np_num_seqs)
                    total_acc.extend(np_acc * np_num_seqs)
                    total_num_seqs.extend(np_num_seqs)

                    if args.verbose:
                        verbose = "train data_loader queue size: %d, " % train_data_loader.queue.size(
                        )
                        verbose += "learning rate: %f" % np_lr[0]
                        if args.use_fp16:
                            verbose += ", loss scaling: %f" % np_scaling[0]
                        print(verbose)

                    current_example, current_epoch = processor.get_train_progress(
                    )
                    time_end = time.time()
                    used_time = time_end - time_begin

                    # profiler tools
                    if args.is_profiler and current_epoch == 0 and steps == args.skip_steps:
                        profiler.start_profiler("All")
                    elif args.is_profiler and current_epoch == 0 and steps == args.skip_steps * 2:
                        profiler.stop_profiler("total", args.profiler_path)
                        return

                    log_record = "epoch: {}, progress: {}/{}, step: {}, ave loss: {}, ave acc: {}".format(
                           current_epoch, current_example, num_train_examples,
                           steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs))
                    ce_info.append([np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time])
                    if steps > 0 :
                        throughput.append( args.skip_steps / used_time)
                        log_record = log_record + ", speed: %f steps/s" % (args.skip_steps / used_time) + ", ips: %f sequence/s" % ( interval_seq_num / used_time )
                        print(log_record)
                    else:
                        print(log_record)
                    total_cost, total_acc, total_num_seqs = [], [], []
                    interval_seq_num = 0
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    fluid.save(program=train_program, model_path=save_path)

                if steps % args.validation_steps == 0:
                    print("Average throughtput: %s" % (np.average(throughput)))
                    throughput = []
                    # evaluate dev set
                    if args.do_val:
                        evaluate(exe, dev_prog, dev_data_loader,
                                 [loss.name, accuracy.name, num_seqs.name],
                                 "dev")
                    # evaluate test set
                    if args.do_test:
                        evaluate(exe, test_prog, test_data_loader,
                                 [loss.name, accuracy.name, num_seqs.name],
                                 "test")
            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints, "step_" + str(steps))
                fluid.save(program=train_program, model_path=save_path)
                train_data_loader.reset()
                break
        if args.enable_ce:
            card_num = get_cards()
            ce_cost = 0
            ce_acc = 0
            ce_time = 0
            try:
                ce_cost = ce_info[-2][0]
                ce_acc = ce_info[-2][1]
                ce_time = ce_info[-2][2]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_%s_card%s\t%s" %
                (args.task_name, card_num, ce_time))
            print("kpis\ttrain_cost_%s_card%s\t%f" %
                (args.task_name, card_num, ce_cost))
            print("kpis\ttrain_acc_%s_card%s\t%f" %
                (args.task_name, card_num, ce_acc))


    # final eval on dev set
    if args.do_val:
        print("Final validation result:")
        evaluate(exe, dev_prog, dev_data_loader,
                 [loss.name, accuracy.name, num_seqs.name], "dev")

    # final eval on test set
    if args.do_test:
        print("Final test result:")
        evaluate(exe, test_prog, test_data_loader,
                 [loss.name, accuracy.name, num_seqs.name], "test")
示例#44
0
 def test_type_error(self):
     paddle.enable_static()
     with program_guard(Program(), Program()):
         x = [paddle.randn([3, 3]), paddle.randn([3, 3])]
         # not support to assign list(var)
         self.assertRaises(TypeError, paddle.assign, x)
    def freeze_graph(self,
                     use_cuda,
                     seed,
                     activation_quant_type,
                     weight_quant_type='abs_max',
                     for_ci=False):
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    loss = conv_net(img, label)
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type)
        transform_pass.apply(main_graph)
        transform_pass.apply(test_graph)
        dev_name = '_gpu_' if use_cuda else '_cpu_'
        if not for_ci:
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_'
                            + weight_quant_type, marked_nodes)
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_'
                            + weight_quant_type, marked_nodes)

        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
        quantized_test_program = test_graph.to_program()
        iters = 5
        batch_size = 8

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
        with fluid.scope_guard(scope):
            for _ in range(iters):
                data = next(train_reader())
                loss_v = exe.run(binary,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
                if not for_ci:
                    print('{}: {}'.format('loss' + dev_name +
                                          activation_quant_type + '_' +
                                          weight_quant_type, loss_v))

        test_data = next(test_reader())
        with fluid.program_guard(quantized_test_program):
            w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
                                             quantized_test_program)
        # Testing
        with fluid.scope_guard(scope):
            test_loss1, w_quant = exe.run(program=quantized_test_program,
                                          feed=feeder.feed(test_data),
                                          fetch_list=[loss, w_var])

        # Freeze graph for inference, but the weight of fc/conv is still float type.
        freeze_pass = QuantizationFreezePass(
            scope=scope, place=place, weight_quantize_type=weight_quant_type)
        freeze_pass.apply(test_graph)
        if not for_ci:
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_freeze' + dev_name +
                            activation_quant_type + '_' + weight_quant_type,
                            marked_nodes)

        server_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            test_loss2, = exe.run(program=server_program,
                                  feed=feeder.feed(test_data),
                                  fetch_list=[loss])
        self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
        if not for_ci:
            print(
                '{}: {}'.format('test_loss1' + dev_name + activation_quant_type
                                + '_' + weight_quant_type, test_loss1))
            print(
                '{}: {}'.format('test_loss2' + dev_name + activation_quant_type
                                + '_' + weight_quant_type, test_loss2))
        w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
        # Maybe failed, this is due to the calculation precision
        # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
        if not for_ci:
            print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
                                  + '_' + weight_quant_type, np.sum(w_freeze)))
            print('{}: {}'.format('w_quant' + dev_name + activation_quant_type +
                                  '_' + weight_quant_type, np.sum(w_quant)))

        # Convert parameter to 8-bit.
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
        if not for_ci:
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type
                            + '_' + weight_quant_type, marked_nodes)
        server_program_int8 = test_graph.to_program()
        # Save the 8-bit parameter and model file.
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model(
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                server_program_int8)
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model(
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, exe)
        # Check the loaded 8-bit weight.
        w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
        self.assertEqual(w_8bit.dtype, np.int8)
        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
        if not for_ci:
            print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type +
                                  '_' + weight_quant_type, np.sum(w_8bit)))
            print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
                                  + '_' + weight_quant_type, np.sum(w_freeze)))

        mobile_pass = TransformForMobilePass()
        mobile_pass.apply(test_graph)
        if not for_ci:
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_mobile' + dev_name +
                            activation_quant_type + '_' + weight_quant_type,
                            marked_nodes)

        mobile_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model(
                'mobile_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                mobile_program)
示例#46
0
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        dev_list = fluid.cuda_places()
        place = dev_list[0]
        dev_count = len(dev_list)
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    reader = task_reader.SequenceLabelReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=args.in_tokens,
        random_seed=args.random_seed,
        task_id=args.task_id)

    if not (args.do_train or args.do_val or args.do_test):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = reader.data_generator(
            input_file=args.train_set,
            batch_size=args.batch_size,
            epoch=args.epoch,
            shuffle=True,
            phase="train")

        num_train_examples = reader.get_num_examples(args.train_set)

        if args.in_tokens:
            if args.batch_size < args.max_seq_len:
                raise ValueError(
                    'if in_tokens=True, batch_size should greater than max_sqelen, got batch_size:%d seqlen:%d'
                    % (args.batch_size, args.max_seq_len))

            max_train_steps = args.epoch * num_train_examples // (
                args.batch_size // args.max_seq_len) // dev_count
        else:
            max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        warmup_steps = int(max_train_steps * args.warmup_proportion)
        log.info("Device count: %d" % dev_count)
        log.info("Num train examples: %d" % num_train_examples)
        log.info("Max train steps: %d" % max_train_steps)
        log.info("Num warmup steps: %d" % warmup_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='train_reader',
                    ernie_config=ernie_config)
                scheduled_lr, loss_scaling = optimization(
                    loss=graph_vars["loss"],
                    warmup_steps=warmup_steps,
                    num_train_steps=max_train_steps,
                    learning_rate=args.learning_rate,
                    train_program=train_program,
                    startup_prog=startup_prog,
                    weight_decay=args.weight_decay,
                    scheduler=args.lr_scheduler,
                    use_fp16=args.use_fp16,
                    use_dynamic_loss_scaling=args.use_dynamic_loss_scaling,
                    init_loss_scaling=args.init_loss_scaling,
                    incr_every_n_steps=args.incr_every_n_steps,
                    decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf,
                    incr_ratio=args.incr_ratio,
                    decr_ratio=args.decr_ratio)

        if args.verbose:
            if args.in_tokens:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program,
                    batch_size=args.batch_size // args.max_seq_len)
            else:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program, batch_size=args.batch_size)
            log.info("Theoretical memory usage in training: %.3f - %.3f %s" %
                     (lower_mem, upper_mem, unit))

    if args.do_val or args.do_test:
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='test_reader',
                    ernie_config=ernie_config)

        test_prog = test_prog.clone(for_test=True)

    nccl2_num_trainers = 1
    nccl2_trainer_id = 0
    if args.is_distributed:
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
        worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS")
        current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT")
        worker_endpoints = worker_endpoints_env.split(",")
        trainers_num = len(worker_endpoints)

        log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \
              trainer_id:{}".format(worker_endpoints, trainers_num,
                                    current_endpoint, trainer_id))

        # prepare nccl2 env.
        config = fluid.DistributeTranspilerConfig()
        config.mode = "nccl2"
        t = fluid.DistributeTranspiler(config=config)
        t.transpile(trainer_id,
                    trainers=worker_endpoints_env,
                    current_endpoint=current_endpoint,
                    program=train_program if args.do_train else test_prog,
                    startup_program=startup_prog)
        nccl2_num_trainers = trainers_num
        nccl2_trainer_id = trainer_id

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            log.info(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            init_checkpoint(exe,
                            args.init_checkpoint,
                            main_program=startup_prog,
                            use_fp16=args.use_fp16)
        elif args.init_pretraining_params:
            init_pretraining_params(exe,
                                    args.init_pretraining_params,
                                    main_program=startup_prog,
                                    use_fp16=args.use_fp16)
    elif args.do_val or args.do_test:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(exe,
                        args.init_checkpoint,
                        main_program=startup_prog,
                        use_fp16=args.use_fp16)

    if args.do_train:
        exec_strategy = fluid.ExecutionStrategy()
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope

        train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                           loss_name=graph_vars["loss"].name,
                                           exec_strategy=exec_strategy,
                                           main_program=train_program,
                                           num_trainers=nccl2_num_trainers,
                                           trainer_id=nccl2_trainer_id)

        train_pyreader.decorate_tensor_provider(train_data_generator)
    else:
        train_exe = None

    if args.do_val or args.do_test:
        test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                          main_program=test_prog,
                                          share_vars_from=train_exe)

    if args.do_train:
        train_pyreader.start()
        steps = 0
        graph_vars["learning_rate"] = scheduled_lr

        time_begin = time.time()
        while True:
            try:
                steps += 1
                if steps % args.skip_steps != 0:
                    train_exe.run(fetch_list=[])
                else:
                    fetch_list = [
                        graph_vars["num_infer"].name,
                        graph_vars["num_label"].name,
                        graph_vars["num_correct"].name,
                        graph_vars["loss"].name,
                        graph_vars['learning_rate'].name,
                    ]

                    out = train_exe.run(fetch_list=fetch_list)
                    num_infer, num_label, num_correct, np_loss, np_lr = out
                    lr = float(np_lr[0])
                    loss = np_loss.mean()
                    precision, recall, f1 = calculate_f1(
                        num_label, num_infer, num_correct)
                    if args.verbose:
                        log.info(
                            "train pyreader queue size: %d, learning rate: %f"
                            % (train_pyreader.queue.size(),
                               lr if warmup_steps > 0 else args.learning_rate))

                    current_example, current_epoch = reader.get_train_progress(
                    )
                    time_end = time.time()
                    used_time = time_end - time_begin
                    log.info(
                        "epoch: %d, progress: %d/%d, step: %d, loss: %f, "
                        "f1: %f, precision: %f, recall: %f, speed: %f steps/s"
                        % (current_epoch, current_example, num_train_examples,
                           steps, loss, f1, precision, recall,
                           args.skip_steps / used_time))
                    time_begin = time.time()

                if nccl2_trainer_id == 0 and steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    fluid.io.save_persistables(exe, save_path, train_program)

                if nccl2_trainer_id == 0 and steps % args.validation_steps == 0:
                    # evaluate dev set
                    if args.do_val:
                        evaluate_wrapper(reader, exe, test_prog, test_pyreader,
                                         graph_vars, current_epoch, steps)
                    # evaluate test set
                    if args.do_test:
                        predict_wrapper(reader, exe, test_prog, test_pyreader,
                                        graph_vars, current_epoch, steps)

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    # final eval on dev set
    if nccl2_trainer_id == 0 and args.do_val:
        evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars,
                         current_epoch, 'final')

    if nccl2_trainer_id == 0 and args.do_test:
        predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars,
                        current_epoch, 'final')
示例#47
0
        def test_with_place(place, shape, begin_norm_axis):
            # attr
            epsilon = 0.00001
            x_shape = shape
            D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1)
            scale_shape = [D]

            np.random.seed(123)
            x = np.random.random_sample(x_shape).astype(np.float32)
            scale = np.random.random_sample(scale_shape).astype(
                np.float32) if has_scale else None
            bias = np.random.random_sample(scale_shape).astype(
                np.float32) if has_bias else None
            y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype(
                np.float32)

            # reference forward & backward
            y, mean, variance = _reference_layer_norm_naive(
                x, scale, bias, epsilon, begin_norm_axis)
            x_grad, scale_grad, bias_grad = _reference_layer_norm_grad(
                x, y_grad, scale, bias, mean, variance, begin_norm_axis)

            var_dict = locals()
            var_dict['y@GRAD'] = y_grad
            var_names = ['x', 'mean', 'variance', 'y', 'y@GRAD']
            if has_scale:
                var_names += ['scale']
            if has_bias:
                var_names += ['bias']
            ground_truth = {name: var_dict[name] for name in var_names}

            program = fluid.Program()
            with fluid.program_guard(program):
                block = program.global_block()
                for name in ground_truth:
                    block.create_var(name=name,
                                     dtype='float32',
                                     shape=ground_truth[name].shape)
                inputs = {"X": block.var('x')}
                fetch_list = [
                    'y',
                    'mean',
                    'variance',
                    'x@GRAD',
                ]
                if has_scale:
                    inputs["Scale"] = block.var('scale')
                    fetch_list += ['scale@GRAD']
                if has_bias:
                    inputs["Bias"] = block.var('bias')
                    fetch_list += ['bias@GRAD']
                layer_norm_op = block.append_op(
                    type="layer_norm",
                    inputs=inputs,
                    outputs={
                        "Y": block.var('y'),
                        "Mean": block.var('mean'),  # share the same memory
                        "Variance":
                        block.var('variance'),  # share the same memory
                    },
                    attrs={
                        "epsilon": epsilon,
                        "begin_norm_axis": begin_norm_axis
                    })
                # generate backward op_desc
                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
                    layer_norm_op.desc, set(), [])
                grad_op_desc = grad_op_desc_list[0]
                new_op_desc = block.desc.append_op()
                new_op_desc.copy_from(grad_op_desc)
                for var_name in grad_op_desc.output_arg_names():
                    block.desc.var(var_name.encode("ascii"))
                grad_op_desc.infer_var_type(block.desc)
                grad_op_desc.infer_shape(block.desc)
                for arg in grad_op_desc.output_arg_names():
                    grad_var = block.desc.find_var(arg.encode("ascii"))
                    grad_var.set_dtype(core.VarDesc.VarType.FP32)

                program._sync_with_cpp()
                exe = fluid.Executor(place)
                out = exe.run(program,
                              feed={
                                  name: var_dict[name]
                                  for name in ['x', 'scale', 'bias', 'y@GRAD']
                              },
                              fetch_list=fetch_list)
                self.__assert_close(y, out[0], "y")
                self.__assert_close(mean, out[1], "mean")
                self.__assert_close(variance, out[2], "variance", 1e-3)
                self.__assert_close(x_grad, out[3], "x_grad")
                if has_scale:
                    self.__assert_close(scale_grad,
                                        out[fetch_list.index('scale@GRAD')],
                                        "scale_grad", 1e-3)
                if has_bias:
                    self.__assert_close(bias_grad,
                                        out[fetch_list.index('bias@GRAD')],
                                        "bias_grad")
    def check_network_convergence(self,
                                  method,
                                  use_cuda=True,
                                  memory_opt=True,
                                  iter=50,
                                  batch_size=None,
                                  allow_op_delay=False,
                                  feed_dict=None,
                                  seed=None,
                                  use_parallel_executor=True,
                                  use_reduce=False,
                                  use_ir_memory_optimize=True,
                                  enable_inplace=True,
                                  fuse_elewise_add_act_ops=False,
                                  fuse_relu_depthwise_conv=False,
                                  optimizer=fluid.optimizer.Adam,
                                  use_fast_executor=False,
                                  enable_sequential_execution=False):
        def run_executor(exe, binary, feed, fetch_list):
            res = exe.run(binary, feed=feed, fetch_list=fetch_list)
            return res

        main = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = 1  # Fix random seed
        main.random_seed = 1
        with fluid.program_guard(main, startup):
            if seed is not None:
                startup.random_seed = seed
                main.random_seed = seed

            loss = method(use_feed=feed_dict is not None)
            if optimizer:
                optimizer().minimize(loss)

            if memory_opt:
                fluid.memory_optimize(main)

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup)
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.allow_op_delay = allow_op_delay
        if use_fast_executor:
            exec_strategy.use_experimental_executor = True
        build_strategy = fluid.BuildStrategy()
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce \
            if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce
        build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops
        build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv
        build_strategy.memory_optimize = False if memory_opt else use_ir_memory_optimize
        # python memory optimization is conflict with inplace pass.
        # Use ir graph memory optimization after inplace pass is the correct way.
        build_strategy.enable_inplace = False if memory_opt else enable_inplace
        build_strategy.enable_sequential_execution = enable_sequential_execution

        if use_cuda and core.is_compiled_with_cuda():
            build_strategy.remove_unnecessary_lock = True
        if use_parallel_executor:
            binary = compiler.CompiledProgram(main).with_data_parallel(
                loss_name=loss.name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
        else:
            binary = compiler.CompiledProgram(main)

        if batch_size is not None:
            batch_size *= fluid.core.get_cuda_device_count(
            ) if use_cuda else int(
                os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
        begin = time.time()
        first_loss, = run_executor(exe=exe,
                                   binary=binary,
                                   feed=feed_dict,
                                   fetch_list=[loss.name])

        for i in range(iter):
            run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[])

        last_loss, = run_executor(exe=exe,
                                  binary=binary,
                                  feed=feed_dict,
                                  fetch_list=[loss.name])
        end = time.time()

        if batch_size is not None:
            print("%.4f Instance per second" % ((batch_size * iter + 2) /
                                                (end - begin)))

        avg_last_loss_val = np.array(last_loss).mean()
        avg_first_loss_val = np.array(first_loss).mean()
        if math.isnan(float(avg_last_loss_val)) or math.isnan(
                float(avg_first_loss_val)):
            sys.exit("got NaN loss, training failed.")

        print(first_loss, last_loss)
        # self.assertGreater(first_loss[0], last_loss[0])
        return first_loss, last_loss
示例#49
0
文件: main.py 项目: smallZh/PaddleNLP
def train(args):
    """
    Train Program
    """
    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    # data data_config
    data_conf = {
        "batch_size": args.batch_size,
        "max_turn_num": args.max_turn_num,
        "max_turn_len": args.max_turn_len,
        "_EOS_": args._EOS_,
    }

    dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
              args.emb_size, args.stack_num, args.channel1_num,
              args.channel2_num)

    train_program = fluid.Program()
    train_startup = fluid.Program()
    if "CE_MODE_X" in os.environ:
        train_program.random_seed = 110
        train_startup.random_seed = 110
    with fluid.program_guard(train_program, train_startup):
        with fluid.unique_name.guard():
            if args.use_pyreader:
                train_pyreader = dam.create_py_reader(capacity=10,
                                                      name='train_reader')
            else:
                dam.create_data_layers()
            loss, logits = dam.create_network()
            loss.persistable = True
            logits.persistable = True
            # gradient clipping
            fluid.clip.set_gradient_clip(
                clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0))

            optimizer = fluid.optimizer.Adam(
                learning_rate=fluid.layers.exponential_decay(
                    learning_rate=args.learning_rate,
                    decay_steps=400,
                    decay_rate=0.9,
                    staircase=True))
            optimizer.minimize(loss)
            print("begin memory optimization ...")
            print(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(time.time())))
            fluid.memory_optimize(train_program)
            print("end memory optimization ...")
            print(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(time.time())))

    test_program = fluid.Program()
    test_startup = fluid.Program()
    if "CE_MODE_X" in os.environ:
        test_program.random_seed = 110
        test_startup.random_seed = 110
    with fluid.program_guard(test_program, test_startup):
        with fluid.unique_name.guard():
            if args.use_pyreader:
                test_pyreader = dam.create_py_reader(capacity=10,
                                                     name='test_reader')
            else:
                dam.create_data_layers()

            loss, logits = dam.create_network()
            loss.persistable = True
            logits.persistable = True

    test_program = test_program.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    print("device count %d" % dev_count)
    print("theoretical memory usage: ")
    print(
        fluid.contrib.memory_usage(program=train_program,
                                   batch_size=args.batch_size))

    exe = fluid.Executor(place)
    exe.run(train_startup)
    exe.run(test_startup)

    train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                       loss_name=loss.name,
                                       main_program=train_program)

    test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    if args.word_emb_init is not None:
        print("start loading word embedding init ...")
        if six.PY2:
            word_emb = np.array(pickle.load(open(args.word_emb_init,
                                                 'rb'))).astype('float32')
        else:
            word_emb = np.array(
                pickle.load(open(args.word_emb_init, 'rb'),
                            encoding="bytes")).astype('float32')
        dam.set_word_embedding(word_emb, place)
        print("finish init word embedding  ...")

    print("start loading data ...")
    with open(args.data_path, 'rb') as f:
        if six.PY2:
            train_data, val_data, test_data = pickle.load(f)
        else:
            train_data, val_data, test_data = pickle.load(f, encoding="bytes")
    print("finish loading data ...")

    val_batches = reader.build_batches(val_data, data_conf)

    batch_num = len(train_data[six.b('y')]) // args.batch_size
    val_batch_num = len(val_batches["response"])

    print_step = max(1, batch_num // (dev_count * 100))
    save_step = max(1, batch_num // (dev_count * 10))

    print("begin model training ...")
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

    def train_with_feed(step):
        """
        Train on one epoch data by feeding
        """
        ave_cost = 0.0
        for it in six.moves.xrange(batch_num // dev_count):
            feed_list = []
            for dev in six.moves.xrange(dev_count):
                index = it * dev_count + dev
                batch_data = reader.make_one_batch_input(train_batches, index)
                feed_dict = dict(zip(dam.get_feed_names(), batch_data))
                feed_list.append(feed_dict)

            cost = train_exe.run(feed=feed_list, fetch_list=[loss.name])

            ave_cost += np.array(cost[0]).mean()
            step = step + 1
            if step % print_step == 0:
                print("processed: [" +
                      str(step * dev_count * 1.0 / batch_num) +
                      "] ave loss: [" + str(ave_cost / print_step) + "]")
                ave_cost = 0.0

            if (args.save_path is not None) and (step % save_step == 0):
                save_path = os.path.join(args.save_path, "step_" + str(step))
                print("Save model at step %d ... " % step)
                print(
                    time.strftime('%Y-%m-%d %H:%M:%S',
                                  time.localtime(time.time())))
                fluid.io.save_persistables(exe, save_path, train_program)

                score_path = os.path.join(args.save_path, 'score.' + str(step))
                test_with_feed(test_exe, test_program, dam.get_feed_names(),
                               [logits.name], score_path, val_batches,
                               val_batch_num, dev_count)

                result_file_path = os.path.join(args.save_path,
                                                'result.' + str(step))
                evaluate(score_path, result_file_path)
        return step, np.array(cost[0]).mean()

    def train_with_pyreader(step):
        """
        Train on one epoch with pyreader
        """
        def data_provider():
            """
            Data reader
            """
            for index in six.moves.xrange(batch_num):
                yield reader.make_one_batch_input(train_batches, index)

        train_pyreader.decorate_tensor_provider(data_provider)

        ave_cost = 0.0
        train_pyreader.start()
        while True:
            try:
                cost = train_exe.run(fetch_list=[loss.name])

                ave_cost += np.array(cost[0]).mean()
                step = step + 1
                if step % print_step == 0:
                    print("processed: [" +
                          str(step * dev_count * 1.0 / batch_num) +
                          "] ave loss: [" + str(ave_cost / print_step) + "]")
                    ave_cost = 0.0

                if (args.save_path is not None) and (step % save_step == 0):
                    save_path = os.path.join(args.save_path,
                                             "step_" + str(step))
                    print("Save model at step %d ... " % step)
                    print(
                        time.strftime('%Y-%m-%d %H:%M:%S',
                                      time.localtime(time.time())))
                    fluid.io.save_persistables(exe, save_path, train_program)

                    score_path = os.path.join(args.save_path,
                                              'score.' + str(step))
                    test_with_pyreader(test_exe, test_program, test_pyreader,
                                       [logits.name], score_path, val_batches,
                                       val_batch_num, dev_count)

                    result_file_path = os.path.join(args.save_path,
                                                    'result.' + str(step))
                    evaluate(score_path, result_file_path)

            except fluid.core.EOFException:
                train_pyreader.reset()
                break
        return step, np.array(cost[0]).mean()

    # train over different epoches
    global_step, train_time = 0, 0.0
    for epoch in six.moves.xrange(args.num_scan_data):
        shuffle_train = reader.unison_shuffle(
            train_data, seed=110 if ("CE_MODE_X" in os.environ) else None)
        train_batches = reader.build_batches(shuffle_train, data_conf)

        begin_time = time.time()
        if args.use_pyreader:
            global_step, last_cost = train_with_pyreader(global_step)
        else:
            global_step, last_cost = train_with_feed(global_step)

        pass_time_cost = time.time() - begin_time
        train_time += pass_time_cost
        print("Pass {0}, pass_time_cost {1}".format(
            epoch, "%2.2f sec" % pass_time_cost))
    # For internal continuous evaluation
    if "CE_MODE_X" in os.environ:
        card_num = get_cards()
        print("kpis\ttrain_cost_card%d\t%f" % (card_num, last_cost))
        print("kpis\ttrain_duration_card%d\t%f" % (card_num, train_time))
示例#50
0
def main(args):
    args = parser.parse_args()
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    exe = fluid.Executor(place)

    reader = task_reader.ExtractEmbeddingReader(
        vocab_path=args.vocab_path,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case)

    startup_prog = fluid.Program()

    data_generator = reader.data_generator(input_file=args.data_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           shuffle=False)

    total_examples = reader.get_num_examples(args.data_set)

    print("Device count: %d" % dev_count)
    print("Total num examples: %d" % total_examples)

    infer_program = fluid.Program()

    with fluid.program_guard(infer_program, startup_prog):
        with fluid.unique_name.guard():
            pyreader, graph_vars = create_model(args,
                                                pyreader_name='reader',
                                                ernie_config=ernie_config)

    infer_program = infer_program.clone(for_test=True)

    exe.run(startup_prog)

    if args.init_pretraining_params:
        init_pretraining_params(exe,
                                args.init_pretraining_params,
                                main_program=startup_prog)
    else:
        raise ValueError(
            "WARNING: args 'init_pretraining_params' must be specified")

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_threads = dev_count

    pyreader.decorate_tensor_provider(data_generator)
    pyreader.start()

    total_cls_emb = []
    total_top_layer_emb = []
    total_labels = []
    while True:
        try:
            cls_emb, unpad_top_layer_emb = exe.run(
                program=infer_program,
                fetch_list=[
                    graph_vars["cls_embeddings"].name,
                    graph_vars["top_layer_embeddings"].name
                ],
                return_numpy=False)
            # batch_size * embedding_size
            total_cls_emb.append(np.array(cls_emb))
            total_top_layer_emb.append(np.array(unpad_top_layer_emb))
        except fluid.core.EOFException:
            break
    print(len(total_cls_emb))
    print(np.array(total_cls_emb).shape)
    total_cls_emb = np.concatenate(total_cls_emb)
    print('total_cls_emb=============', total_cls_emb.shape)
    print('total_top_layer_emb=============',
          np.array(total_top_layer_emb).shape)
    total_top_layer_emb = np.concatenate(total_top_layer_emb)
    print('total_top_layer_emb=============', total_top_layer_emb.shape)
    with open(os.path.join(args.output_dir, "word2id_cls_emb.npy"),
              "wb") as cls_emb_file:
        np.save(cls_emb_file, total_cls_emb)
    with open(os.path.join(args.output_dir, "word2id_top_layer_emb.npy"),
              "wb") as top_layer_emb_file:
        np.save(top_layer_emb_file, total_top_layer_emb)
示例#51
0
def main():
    if FLAGS.eval is False:
        raise ValueError(
            "Currently only supports `--eval==True` while training in `quantization`."
        )
    env = os.environ
    FLAGS.dist = 'PADDLE_TRAINER_ID' in env \
                    and 'PADDLE_TRAINERS_NUM' in env \
                    and int(env['PADDLE_TRAINERS_NUM']) > 1
    num_trainers = int(env.get('PADDLE_TRAINERS_NUM', 1))
    if FLAGS.dist:
        trainer_id = int(env['PADDLE_TRAINER_ID'])
        import random
        local_seed = (99 + trainer_id)
        random.seed(local_seed)
        np.random.seed(local_seed)

    cfg = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    check_config(cfg)
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    main_arch = cfg.architecture

    if cfg.use_gpu:
        devices_num = fluid.core.get_cuda_device_count()
    else:
        devices_num = int(os.environ.get('CPU_NUM', 1))

    if 'FLAGS_selected_gpus' in env:
        device_id = int(env['FLAGS_selected_gpus'])
    else:
        device_id = 0
    place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    lr_builder = create('LearningRate')
    optim_builder = create('OptimizerBuilder')

    # build program
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = create(main_arch)
            inputs_def = cfg['TrainReader']['inputs_def']
            feed_vars, train_loader = model.build_inputs(**inputs_def)
            if FLAGS.use_pact:
                feed_vars['image'].stop_gradient = False
            train_fetches = model.train(feed_vars)
            loss = train_fetches['loss']
            lr = lr_builder()
            optimizer = optim_builder(lr)
            optimizer.minimize(loss)

    # parse train fetches
    train_keys, train_values, _ = parse_fetches(train_fetches)
    train_values.append(lr)

    if FLAGS.eval:
        eval_prog = fluid.Program()
        with fluid.program_guard(eval_prog, startup_prog):
            with fluid.unique_name.guard():
                model = create(main_arch)
                inputs_def = cfg['EvalReader']['inputs_def']
                feed_vars, eval_loader = model.build_inputs(**inputs_def)
                fetches = model.eval(feed_vars)
        eval_prog = eval_prog.clone(True)

        eval_reader = create_reader(cfg.EvalReader)
        # When iterable mode, set set_sample_list_generator(eval_reader, place)
        eval_loader.set_sample_list_generator(eval_reader)

        # parse eval fetches
        extra_keys = []
        if cfg.metric == 'COCO':
            extra_keys = ['im_info', 'im_id', 'im_shape']
        if cfg.metric == 'VOC':
            extra_keys = ['gt_bbox', 'gt_class', 'is_difficult']
        if cfg.metric == 'WIDERFACE':
            extra_keys = ['im_id', 'im_shape', 'gt_bbox']
        eval_keys, eval_values, eval_cls = parse_fetches(
            fetches, eval_prog, extra_keys)

    # compile program for multi-devices
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    build_strategy.fuse_all_reduce_ops = False

    # only enable sync_bn in multi GPU devices
    sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn'
    sync_bn = False
    build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \
        and cfg.use_gpu

    exec_strategy = fluid.ExecutionStrategy()
    # iteration number when CompiledProgram tries to drop local execution scopes.
    # Set it to be 1 to save memory usages, so that unused variables in
    # local execution scopes can be deleted after each iteration.
    exec_strategy.num_iteration_per_drop_scope = 1
    if FLAGS.dist:
        dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog,
                                             train_prog)
        exec_strategy.num_threads = 1

    exe.run(startup_prog)
    not_quant_pattern = []
    if FLAGS.not_quant_pattern:
        not_quant_pattern = FLAGS.not_quant_pattern
    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern
    }

    ignore_params = cfg.finetune_exclude_pretrained_params \
                 if 'finetune_exclude_pretrained_params' in cfg else []

    fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel'

    if cfg.pretrain_weights and fuse_bn and not ignore_params:
        checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights)
    elif cfg.pretrain_weights:
        checkpoint.load_params(exe,
                               train_prog,
                               cfg.pretrain_weights,
                               ignore_params=ignore_params)

    if FLAGS.use_pact:
        act_preprocess_func = pact
        optimizer_func = get_optimizer
        executor = exe
    else:
        act_preprocess_func = None
        optimizer_func = None
        executor = None
    # insert quantize op in train_prog, return type is CompiledProgram
    train_prog_quant = quant_aware(train_prog,
                                   place,
                                   config,
                                   scope=None,
                                   act_preprocess_func=act_preprocess_func,
                                   optimizer_func=optimizer_func,
                                   executor=executor,
                                   for_test=False)

    compiled_train_prog = train_prog_quant.with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    if FLAGS.eval:
        # insert quantize op in eval_prog
        eval_prog = quant_aware(eval_prog,
                                place,
                                config,
                                scope=None,
                                act_preprocess_func=act_preprocess_func,
                                optimizer_func=optimizer_func,
                                executor=executor,
                                for_test=True)
        compiled_eval_prog = fluid.CompiledProgram(eval_prog)

    start_iter = 0

    train_reader = create_reader(cfg.TrainReader,
                                 (cfg.max_iters - start_iter) * devices_num,
                                 cfg,
                                 devices_num=devices_num,
                                 num_trainers=num_trainers)
    # When iterable mode, set set_sample_list_generator(train_reader, place)
    train_loader.set_sample_list_generator(train_reader)

    # whether output bbox is normalized in model output layer
    is_bbox_normalized = False
    if hasattr(model, 'is_bbox_normalized') and \
            callable(model.is_bbox_normalized):
        is_bbox_normalized = model.is_bbox_normalized()

    # if map_type not set, use default 11point, only use in VOC eval
    map_type = cfg.map_type if 'map_type' in cfg else '11point'

    train_stats = TrainingStats(cfg.log_iter, train_keys)
    train_loader.start()
    start_time = time.time()
    end_time = time.time()

    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)
    time_stat = deque(maxlen=cfg.log_iter)
    best_box_ap_list = [0.0, 0]  #[map, iter]

    for it in range(start_iter, cfg.max_iters):
        start_time = end_time
        end_time = time.time()
        time_stat.append(end_time - start_time)
        time_cost = np.mean(time_stat)
        eta_sec = (cfg.max_iters - it) * time_cost
        eta = str(datetime.timedelta(seconds=int(eta_sec)))
        outs = exe.run(compiled_train_prog, fetch_list=train_values)
        stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])}

        train_stats.update(stats)
        logs = train_stats.log()
        if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0):
            strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
                it, np.mean(outs[-1]), logs, time_cost, eta)
            logger.info(strs)

        if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \
           and (not FLAGS.dist or trainer_id == 0):
            save_name = str(it) if it != cfg.max_iters - 1 else "model_final"

            if FLAGS.eval:
                # evaluation
                results = eval_run(exe,
                                   compiled_eval_prog,
                                   eval_loader,
                                   eval_keys,
                                   eval_values,
                                   eval_cls,
                                   cfg=cfg)
                resolution = None
                if 'mask' in results[0]:
                    resolution = model.mask_head.resolution
                box_ap_stats = eval_results(results, cfg.metric,
                                            cfg.num_classes, resolution,
                                            is_bbox_normalized,
                                            FLAGS.output_eval, map_type,
                                            cfg['EvalReader']['dataset'])

                if box_ap_stats[0] > best_box_ap_list[0]:
                    best_box_ap_list[0] = box_ap_stats[0]
                    best_box_ap_list[1] = it
                    save_checkpoint(exe, eval_prog,
                                    os.path.join(save_dir, "best_model"),
                                    train_prog)
                logger.info("Best test box ap: {}, in iter: {}".format(
                    best_box_ap_list[0], best_box_ap_list[1]))

    train_loader.reset()
示例#52
0
 def test_dtype1():
     with fluid.program_guard(fluid.Program(), fluid.Program()):
         data = fluid.data(name="data", shape=[10], dtype="float64")
         paddle.sum(data, dtype="float32")
示例#53
0
        def test_with_place(place, data_layout, shape):
            # attr
            epsilon = 0.00001
            momentum = 0.9
            if data_layout == "NCHW":
                n, c, h, w = shape[0], shape[1], shape[2], shape[3]
            else:
                n, h, w, c = shape[0], shape[1], shape[2], shape[3]
            scale_shape = [c]

            np.random.seed(123)
            x = np.random.random_sample(shape).astype(np.float32)
            scale = np.random.random_sample(scale_shape).astype(np.float32)
            bias = np.random.random_sample(scale_shape).astype(np.float32)
            mean = np.zeros(scale_shape).astype(np.float32)
            variance = np.ones(scale_shape).astype(np.float32)

            y_grad = np.random.random_sample(shape).astype(np.float32)

            y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward(
                x, y_grad, scale, bias, mean, variance, epsilon, momentum,
                shape, data_layout)

            var_dict = locals()
            var_dict['y@GRAD'] = y_grad

            var_names = [
                'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean',
                'saved_variance'
            ]
            ground_truth = {name: var_dict[name] for name in var_names}

            program = fluid.Program()
            with fluid.program_guard(program):
                block = program.global_block()
                for name in ground_truth:
                    block.create_var(
                        name=name,
                        dtype='float32',
                        shape=ground_truth[name].shape)
                bn_op = block.append_op(
                    type="batch_norm",
                    inputs={
                        "X": block.var('x'),
                        "Scale": block.var('scale'),
                        "Bias": block.var('bias'),
                        "Mean": block.var('mean'),
                        "Variance": block.var('variance')
                    },
                    outputs={
                        "Y": block.var('y'),
                        "MeanOut": block.var('mean'),  # share the same memory
                        "VarianceOut":
                        block.var('variance'),  # share the same memory
                        "SavedMean": block.var('saved_mean'),
                        "SavedVariance": block.var('saved_variance')
                    },
                    attrs={
                        "momentum": momentum,
                        "epsilon": epsilon,
                        "is_test": False,
                        "data_layout": data_layout,
                        "use_mkldnn": self.use_mkldnn
                    })
                block.create_var(name='y@GRAD', dtype='float32', shape=y.shape)

                # generate backward op_desc
                grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
                    bn_op.desc, set(), [])
                grad_op_desc = grad_op_desc_list[0]
                new_op_desc = block.desc.append_op()
                new_op_desc.copy_from(grad_op_desc)
                for var_name in grad_op_desc.output_arg_names():
                    block.desc.var(var_name.encode("ascii"))
                grad_op_desc.infer_var_type(block.desc)
                grad_op_desc.infer_shape(block.desc)
                for arg in grad_op_desc.output_arg_names():
                    grad_var = block.desc.find_var(arg.encode("ascii"))
                    grad_var.set_dtype(core.VarDesc.VarType.FP32)

                exe = fluid.Executor(place)
                out = exe.run(
                    program,
                    feed={
                        name: var_dict[name]
                        for name in
                        ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD']
                    },
                    fetch_list=[
                        'y', 'mean', 'variance', 'saved_mean', 'saved_variance',
                        'x@GRAD', 'scale@GRAD', 'bias@GRAD'
                    ])

            self.__assert_close(y, out[0], "y")
            self.__assert_close(mean_out, out[1], "mean")
            self.__assert_close(variance_out, out[2], "variance", 1e-3)
            self.__assert_close(saved_mean, out[3], "saved_mean")
            self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3)
            self.__assert_close(x_grad, out[5], "x_grad")
            self.__assert_close(scale_grad, out[6], "scale_grad")
            self.__assert_close(bias_grad, out[7], "bias_grad")

            print "op test forward passed: ", str(place), data_layout
示例#54
0
 def test_type():
     with fluid.program_guard(fluid.Program(), fluid.Program()):
         data = fluid.data(name="data", shape=[10], dtype="int32")
         paddle.sum(data, dtype="bool")
示例#55
0
    def test_nested_net_with_backward_and_lodtensor(self):
        def external_cond(i, j, x, mem_array):
            return layers.less_than(i, array_len)

        def external_body(i, j, x, mem_array):
            def internal_cond(j, x, mem_array):
                return layers.less_than(j, array_len2)

            def internal_body(j, x, mem_array):
                inner_data = layers.array_read(array=data_array, i=j)
                inner_prev = layers.array_read(array=mem_array, i=j)
                inner_sum_0 = layers.elementwise_add(x=inner_data,
                                                     y=inner_prev)
                inner_sum_1 = layers.elementwise_add(x=x, y=inner_sum_0)
                j = layers.increment(x=j, in_place=True)
                layers.array_write(inner_sum_1, i=j, array=mem_array)
                return [j, x, mem_array]

            outer_data = layers.array_read(array=data_array, i=i)
            outer_prev = layers.array_read(array=mem_array, i=i)
            outer_sum_0 = layers.elementwise_add(x=outer_data, y=outer_prev)
            outer_sum_1 = layers.elementwise_add(x=x, y=outer_sum_0)
            i = layers.increment(x=i, in_place=True)
            layers.array_write(outer_sum_1, i=i, array=mem_array)
            j, x, mem_array = layers.while_loop(internal_cond, internal_body,
                                                [j, x, mem_array])
            return [i, j, x, mem_array]

        main_program = Program()
        startup_program = Program()
        with fluid.program_guard(main_program, startup_program):
            d0 = fluid.data(name='d0', shape=[10], dtype='float32')
            d1 = fluid.data(name='d1', shape=[10], dtype='float32')
            d2 = fluid.data(name='d2', shape=[10], dtype='float32')
            x = fluid.data(name='x', shape=[10], dtype='float32')
            x.stop_gradient = False
            i = layers.zeros(shape=[1], dtype='int64')
            i.stop_gradient = True
            init = layers.zeros(shape=[10], dtype='float32')
            mem_array = layers.array_write(x=init, i=i)
            data_array = layers.array_write(x=d0, i=i)
            i = layers.increment(i)
            layers.array_write(d1, i, array=data_array)
            i = layers.increment(i)
            layers.array_write(d2, i, array=data_array)
            i = layers.zeros(shape=[1], dtype='int64')
            i.stop_gradient = True
            array_len = layers.fill_constant(shape=[1], dtype='int64', value=1)
            j = layers.fill_constant(shape=[1], dtype='int64', value=1)
            j.stop_gradient = True
            array_len2 = layers.fill_constant(shape=[1],
                                              dtype='int64',
                                              value=3)

            out = layers.while_loop(external_cond, external_body,
                                    [i, j, x, mem_array])

            sum_result = layers.array_read(array=mem_array, i=j)
            mean = layers.mean(sum_result)
            append_backward(mean)

            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
            exe = fluid.Executor(place)

            d = []
            for i in range(3):
                d.append(np.random.random(size=[10]).astype('float32'))
            feed_x = np.ones(10).astype('float32')
            data_sum = d[0] + d[1] + d[2] + 3 * feed_x
            x_grad = [0.3] * 10
            res = exe.run(main_program,
                          feed={
                              'd0': d[0],
                              'd1': d[1],
                              'd2': d[2],
                              'x': feed_x
                          },
                          fetch_list=[sum_result.name, x.grad_name])
            self.assertTrue(np.allclose(res[0], data_sum))
            self.assertTrue(np.allclose(res[1], x_grad))
示例#56
0
train_lr_img_list = sorted(
    load_file_list(im_path=train_lr_img_path, im_format='*.png'))
valid_hr_img_list = sorted(
    load_file_list(im_path=valid_hr_img_path, im_format='*.png'))
valid_lr_img_list = sorted(
    load_file_list(im_path=valid_lr_img_path, im_format='*.png'))

# load im data
train_hr_imgs = im_read(train_hr_img_list)
train_lr_imgs = im_read(train_lr_img_list)
valid_hr_imgs = im_read(valid_hr_img_list)
valid_lr_imgs = im_read(valid_lr_img_list)

# LOAD VGG
vgg19_program = fluid.Program()
with fluid.program_guard(vgg19_program):
    vgg19_input = fluid.layers.data(name='vgg19_input',
                                    shape=[224, 224, 3],
                                    dtype='float32')
    vgg19_input_transpose = fluid.layers.transpose(vgg19_input,
                                                   perm=[0, 3, 1, 2])
    # define vgg19
    _, vgg_target_emb = vgg19(vgg19_input_transpose)

# DEFINE MODEL ==> SRGAN_g SRGAN_d
SRGAN_g_program = fluid.Program()
with fluid.program_guard(SRGAN_g_program):
    # Low resolution image
    t_image = fluid.layers.data(name='t_image',
                                shape=[96, 96, 3],
                                dtype='float32')
    def build_normal_program(self, test_program, batch_size, dims, loc_float,
                             scale_float, other_loc_float, other_scale_float,
                             scale_np, other_scale_np, loc_np, other_loc_np,
                             values_np):
        with fluid.program_guard(test_program):
            loc = layers.data(name='loc', shape=[dims], dtype='float32')
            scale = layers.data(name='scale', shape=[dims], dtype='float32')

            other_loc = layers.data(name='other_loc',
                                    shape=[dims],
                                    dtype='float32')
            other_scale = layers.data(name='other_scale',
                                      shape=[dims],
                                      dtype='float32')

            values = layers.data(name='values', shape=[dims], dtype='float32')

            normal_float = Normal(loc_float, scale_float)
            other_normal_float = Normal(other_loc_float, other_scale_float)

            normal_float_np_broadcast = Normal(loc_float, scale_np)
            other_normal_float_np_broadcast = Normal(other_loc_float,
                                                     other_scale_np)

            normal_np = Normal(loc_np, scale_np)
            other_normal_np = Normal(other_loc_np, other_scale_np)

            normal_variable = Normal(loc, scale)
            other_normal_variable = Normal(other_loc, other_scale)

            sample_float = normal_float.sample([batch_size, dims])
            sample_float_np_broadcast = normal_float_np_broadcast.sample(
                [batch_size, dims])
            sample_np = normal_np.sample([batch_size, dims])
            sample_variable = normal_variable.sample([batch_size, dims])

            entropy_float = normal_float.entropy()
            entropy_float_np_broadcast = normal_float_np_broadcast.entropy()
            entropy_np = normal_np.entropy()
            entropy_variable = normal_variable.entropy()

            lp_float_np_broadcast = normal_float_np_broadcast.log_prob(values)
            lp_np = normal_np.log_prob(values)
            lp_variable = normal_variable.log_prob(values)

            kl_float = normal_float.kl_divergence(other_normal_float)
            kl_float_np_broadcast = normal_float_np_broadcast.kl_divergence(
                other_normal_float_np_broadcast)
            kl_np = normal_np.kl_divergence(other_normal_np)
            kl_variable = normal_variable.kl_divergence(other_normal_variable)

        fetch_list = [
            sample_float, sample_float_np_broadcast, sample_np,
            sample_variable, entropy_float, entropy_float_np_broadcast,
            entropy_np, entropy_variable, lp_float_np_broadcast, lp_np,
            lp_variable, kl_float, kl_float_np_broadcast, kl_np, kl_variable
        ]
        feed_vars = {
            'loc': loc_np,
            'scale': scale_np,
            'other_loc': other_loc_np,
            'other_scale': other_scale_np,
            'values': values_np
        }
        return feed_vars, fetch_list
示例#58
0
def train():
    args = parse_args()
    print_arguments(args)
    # check whether the installed paddle is compiled with GPU
    check_gpu(args.use_gpu)

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    assert args.model in ['MSG', 'SSG'], \
            "--model can only be 'MSG' or 'SSG'"

    # build model
    if args.enable_ce:
        SEED = 102
        fluid.default_main_program().random_seed = SEED
        framework.default_startup_program().random_seed = SEED

    startup = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup):
        with fluid.unique_name.guard():
            train_model = PointNet2ClsMSG(args.num_classes, args.num_points) \
                            if args.model == "MSG" else \
                          PointNet2ClsSSG(args.num_classes, args.num_points)
            train_model.build_model(bn_momentum=args.bn_momentum)
            train_feeds = train_model.get_feeds()
            train_loader = train_model.get_loader()
            train_outputs = train_model.get_outputs()
            train_loss = train_outputs['loss']
            lr = fluid.layers.exponential_decay(
                    learning_rate=args.lr,
                    decay_steps=args.decay_steps,
                    decay_rate=args.lr_decay,
                    staircase=True)
            lr = fluid.layers.clip(lr, 1e-5, args.lr)
            params = []
            for var in train_prog.list_vars():
                if fluid.io.is_parameter(var):
                    params.append(var.name)
            optimizer = fluid.optimizer.Adam(learning_rate=lr,
                    regularization=fluid.regularizer.L2Decay(args.weight_decay))
            optimizer.minimize(train_loss, parameter_list=params)
    train_keys, train_values = parse_outputs(train_outputs)

    test_prog = fluid.Program()
    with fluid.program_guard(test_prog, startup):
        with fluid.unique_name.guard():
            test_model = PointNet2ClsMSG(args.num_classes, args.num_points) \
                           if args.model == "MSG" else \
                         PointNet2ClsSSG(args.num_classes, args.num_points)
            test_model.build_model()
            test_feeds = test_model.get_feeds()
            test_outputs = test_model.get_outputs()
            test_loader = test_model.get_loader()
    test_prog = test_prog.clone(True)
    test_keys, test_values = parse_outputs(test_outputs)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup)

    if args.resume:
        if not os.path.isdir(args.resume):
            assert os.path.exists("{}.pdparams".format(args.resume)), \
                    "Given resume weight {}.pdparams not exist.".format(args.resume)
            assert os.path.exists("{}.pdopt".format(args.resume)), \
                    "Given resume optimizer state {}.pdopt not exist.".format(args.resume)
        fluid.load(train_prog, args.resume, exe)

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_optimizer_ops = False
    train_compile_prog = fluid.compiler.CompiledProgram(
            train_prog).with_data_parallel(loss_name=train_loss.name,
                    build_strategy=build_strategy)
    test_compile_prog = fluid.compiler.CompiledProgram(test_prog)

    def save_model(exe, prog, path):
        if os.path.isdir(path):
            shutil.rmtree(path)
        logger.info("Save model to {}".format(path))
        fluid.save(prog, path)

    # get reader
    trans_list = [
        PointcloudScale(),
        PointcloudRotate(),
        PointcloudRotatePerturbation(),
        PointcloudTranslate(),
        PointcloudJitter(),
        PointcloudRandomInputDropout(),
    ]
    modelnet_reader = ModelNet40ClsReader(args.data_dir, mode='train', transforms=trans_list)
    train_reader = modelnet_reader.get_reader(args.batch_size, args.num_points)
    train_loader.set_sample_list_generator(train_reader, place)
    modelnet_reader = ModelNet40ClsReader(args.data_dir, mode='test', transforms=None)
    test_reader = modelnet_reader.get_reader(args.batch_size, args.num_points)
    test_loader.set_sample_list_generator(test_reader, place)

    train_stat = Stat()
    test_stat = Stat()

    ce_time = 0
    ce_loss = []

    for epoch_id in range(args.epoch):
        try:
            train_loader.start()
            train_iter = 0
            train_periods = []
            while True:
                cur_time = time.time()
                train_outs = exe.run(train_compile_prog, fetch_list=train_values + [lr.name])
                period = time.time() - cur_time
                train_periods.append(period)
                train_stat.update(train_keys, train_outs[:-1])
                if train_iter % args.log_interval == 0:
                    log_str = ""
                    for name, values in zip(train_keys + ['learning_rate'], train_outs):
                        log_str += "{}: {:.5f}, ".format(name, np.mean(values))
                        if name == 'loss':
                            ce_loss.append(np.mean(values))
                    logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period))
                train_iter += 1
        except fluid.core.EOFException:
            logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:])))
            ce_time = np.mean(train_periods[1:])
            save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id)))

            # evaluation
            if not args.enable_ce:
                try:
                    test_loader.start()
                    test_iter = 0
                    test_periods = []
                    while True:
                        cur_time = time.time()
                        test_outs = exe.run(test_compile_prog, fetch_list=test_values)
                        period = time.time() - cur_time
                        test_periods.append(period)
                        test_stat.update(test_keys, test_outs)
                        if test_iter % args.log_interval == 0:
                            log_str = ""
                            for name, value in zip(test_keys, test_outs):
                                log_str += "{}: {:.4f}, ".format(name, np.mean(value))
                            logger.info("[TEST] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, test_iter, log_str, period))
                        test_iter += 1
                except fluid.core.EOFException:
                    logger.info("[TEST] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, test_stat.get_mean_log(), np.mean(test_periods[1:])))
                finally:
                    test_loader.reset()
                    test_stat.reset()
                    test_periods = []

        finally:
            train_loader.reset()
            train_stat.reset()
            train_periods = []

    # only for ce
    if args.enable_ce:
        card_num = get_cards()
        _loss = 0
        _time = 0
        try:
            _time = ce_time
            _loss = np.mean(ce_loss[1:])
        except:
            print("ce info error")
        print("kpis\ttrain_cls_%s_duration_card%s\t%s" % (args.model, card_num, _time))
        print("kpis\ttrain_cls_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
    def check_network_convergence(self,
                                  method,
                                  memory_opt=True,
                                  iter=50,
                                  batch_size=None,
                                  allow_op_delay=False,
                                  feed_dict=None,
                                  seed=None,
                                  use_parallel_executor=True,
                                  balance_parameter_opt_between_cards=False):
        def run_executor(exe, feed, fetch_list, program=None):
            if isinstance(exe, fluid.ParallelExecutor):
                res = exe.run(fetch_list=fetch_list, feed=feed)
            elif isinstance(exe, fluid.Executor):
                if program is None:
                    program = fluid.default_main_program()
                res = exe.run(program=program, feed=feed, fetch_list=fetch_list)
            else:
                raise ValueError('Unkown type exe')
            return res

        main = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = 1  # Fix random seed
        with fluid.program_guard(main, startup):
            if seed is not None:
                startup.random_seed = seed
            loss = method(use_feed=feed_dict is not None)
            adam = fluid.optimizer.Adam()
            adam.minimize(loss)
            if memory_opt:
                fluid.memory_optimize(main)
            place = fluid.CUDAPlace(0)
            startup_exe = fluid.Executor(place)
            startup_exe.run(startup)
            exec_strategy = fluid.ExecutionStrategy()
            exec_strategy.allow_op_delay = allow_op_delay

            build_strategy = fluid.BuildStrategy()
            build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce

            if use_parallel_executor:
                exe = fluid.ParallelExecutor(
                    True,
                    loss_name=loss.name,
                    exec_strategy=exec_strategy,
                    build_strategy=build_strategy)
            else:
                exe = fluid.Executor(place=place)

            if batch_size is not None:
                batch_size *= fluid.core.get_cuda_device_count()
            begin = time.time()
            first_loss, = run_executor(
                exe=exe, feed=feed_dict, fetch_list=[loss.name])
            first_loss = np.array(first_loss)

            for i in xrange(iter):
                run_executor(exe=exe, feed=feed_dict, fetch_list=[])

            last_loss, = run_executor(
                exe=exe, feed=feed_dict, fetch_list=[loss.name])
            end = time.time()

            if batch_size is not None:
                print "%.4f Instance per second" % (
                    (batch_size * iter + 2) / (end - begin))

            last_loss = np.array(last_loss)

            print first_loss, last_loss
            # self.assertGreater(first_loss[0], last_loss[0])
            return first_loss, last_loss
    def test_multivariateNormalDiag_distribution(self,
                                                 batch_size=2,
                                                 tolerance=1e-6):
        test_program = fluid.Program()

        loc_np = np.random.random(batch_size, ).astype('float32')
        scale_np = np.diag(np.random.random(batch_size, )).astype('float32')
        other_loc_np = np.random.random(batch_size, ).astype('float32')
        other_scale_np = np.diag(np.random.random(
            batch_size, )).astype('float32')

        with fluid.program_guard(test_program):
            loc = layers.data(name='loc',
                              shape=[
                                  batch_size,
                              ],
                              dtype='float32',
                              append_batch_size=False)
            scale = layers.data(name='scale',
                                shape=[batch_size, batch_size],
                                dtype='float32',
                                append_batch_size=False)
            other_loc = layers.data(name='other_loc',
                                    shape=[
                                        batch_size,
                                    ],
                                    dtype='float32',
                                    append_batch_size=False)
            other_scale = layers.data(name='other_scale',
                                      shape=[batch_size, batch_size],
                                      dtype='float32',
                                      append_batch_size=False)

            multivariate_np = MultivariateNormalDiag(loc, scale)
            other_multivariate_np = MultivariateNormalDiag(
                other_loc, other_scale)

            entropy_np = multivariate_np.entropy()
            other_entropy_np = other_multivariate_np.entropy()
            kl_np = multivariate_np.kl_divergence(other_multivariate_np)

        self.executor.run(fluid.default_main_program())

        np_multivariate = MultivariateNormalDiagNumpy(loc_np, scale_np)
        np_other_multivariate = MultivariateNormalDiagNumpy(
            other_loc_np, other_scale_np)
        gt_entropy_np = np_multivariate.entropy()
        gt_kl_np = np_multivariate.kl_divergence(np_other_multivariate)

        # result calculated by paddle
        [output_entropy_np,
         output_kl_np] = self.executor.run(program=test_program,
                                           feed={
                                               'loc': loc_np,
                                               'scale': scale_np,
                                               'other_loc': other_loc_np,
                                               'other_scale': other_scale_np
                                           },
                                           fetch_list=[entropy_np, kl_np])
        np.testing.assert_allclose(output_entropy_np,
                                   gt_entropy_np,
                                   rtol=tolerance,
                                   atol=tolerance)
        np.testing.assert_allclose(output_kl_np,
                                   gt_kl_np,
                                   rtol=tolerance,
                                   atol=tolerance)