Exemplo n.º 1
0
def append_nccl2_prepare(trainer_id):
    if trainer_id >= 0:
        # append gen_nccl_id at the end of startup program
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        port = os.getenv("PADDLE_PSERVER_PORT")
        worker_ips = os.getenv("PADDLE_TRAINER_IPS")
        worker_endpoints = []
        for ip in worker_ips.split(","):
            worker_endpoints.append(':'.join([ip, port]))
        num_trainers = len(worker_endpoints)
        current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port
        worker_endpoints.remove(current_endpoint)

        nccl_id_var = fluid.default_startup_program().global_block().create_var(
            name="NCCLID",
            persistable=True,
            type=fluid.core.VarDesc.VarType.RAW)
        fluid.default_startup_program().global_block().append_op(
            type="gen_nccl_id",
            inputs={},
            outputs={"NCCLID": nccl_id_var},
            attrs={
                "endpoint": current_endpoint,
                "endpoint_list": worker_endpoints,
                "trainer_id": trainer_id
            })
        return nccl_id_var, num_trainers, trainer_id
    else:
        raise Exception("must set positive PADDLE_TRAINER_ID env variables for "
                        "nccl-based dist train.")
Exemplo n.º 2
0
    def test_main(self):
        N = 10

        img_expected_res = []
        lbl_expected_res = []
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_file = fluid.layers.io.open_recordio_file(
                './mnist_for_preprocessor_test.recordio',
                shapes=[[-1, 784], [-1, 1]],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            img, lbl = fluid.layers.io.read_file(data_file)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            for _ in range(N):
                img_v, lbl_v = exe.run(fetch_list=[img, lbl])
                img_expected_res.append(img_v / 2)
                lbl_expected_res.append(lbl_v + 1)

        img_actual_res = []
        lbl_actual_res = []
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_file = fluid.layers.io.open_recordio_file(
                './mnist_for_preprocessor_test.recordio',
                shapes=[[-1, 784], [-1, 1]],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            preprocessor = fluid.layers.io.Preprocessor(reader=data_file)
            with preprocessor.block():
                img, lbl = preprocessor.inputs()
                img_out = img / 2
                lbl_out = lbl + 1
                preprocessor.outputs(img_out, lbl_out)

            data_file = fluid.layers.io.double_buffer(preprocessor())
            img, lbl = fluid.layers.io.read_file(data_file)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            for _ in range(N):
                img_v, lbl_v = exe.run(fetch_list=[img, lbl])
                img_actual_res.append(img_v)
                lbl_actual_res.append(lbl_v)

        for idx in range(N):
            np.allclose(img_expected_res[idx], img_actual_res[idx])
            np.allclose(lbl_expected_res[idx], lbl_actual_res[idx])
Exemplo n.º 3
0
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())
        embedding_param = fluid.global_scope().find_var(
            embedding_name).get_tensor()
        embedding_param.set(
            load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
            place)

        start_time = time.time()
        batch_id = 0
        for pass_id in xrange(PASS_NUM):
            for data in train_data():
                cost = exe.run(main_program,
                               feed=feeder.feed(data),
                               fetch_list=[avg_cost])
                cost = cost[0]

                if batch_id % 10 == 0:
                    print("avg_cost:" + str(cost))
                    if batch_id != 0:
                        print("second per batch: " + str((time.time(
                        ) - start_time) / batch_id))
                    # Set the threshold low to speed up the CI test
                    if float(cost) < 60.0:
                        if save_dirname is not None:
                            # TODO(liuyiqun): Change the target to crf_decode
                            fluid.io.save_inference_model(save_dirname, [
                                'word_data', 'verb_data', 'ctx_n2_data',
                                'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
                                'ctx_p2_data', 'mark_data'
                            ], [feature_out], exe)
                        return

                batch_id = batch_id + 1
Exemplo n.º 4
0
def dist_transpile(trainer_id):
    if trainer_id < 0:
        return None, None

    # the port of all pservers, needed by both trainer and pserver
    port = os.getenv("PADDLE_PSERVER_PORT", "6174")
    # comma separated ips of all pservers, needed by trainer and
    # pserver
    pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
    eplist = []
    for ip in pserver_ips.split(","):
        eplist.append(':'.join([ip, port]))
    pserver_endpoints = ",".join(eplist)
    # total number of workers/trainers in the job, needed by
    # trainer and pserver
    trainers = int(os.getenv("PADDLE_TRAINERS"))
    # the IP of the local machine, needed by pserver only
    current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
    # the role, should be either PSERVER or TRAINER
    training_role = os.getenv("PADDLE_TRAINING_ROLE")

    t = distribute_transpiler.DistributeTranspiler()
    t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
    if training_role == "PSERVER":
        pserver_program = t.get_pserver_program(current_endpoint)
        pserver_startup_program = t.get_startup_program(current_endpoint,
                                                        pserver_program)
        return pserver_program, pserver_startup_program
    elif training_role == "TRAINER":
        train_program = t.get_trainer_program()
        return train_program, fluid.default_startup_program()
    else:
        raise ValueError(
            'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
        )
Exemplo n.º 5
0
    def main(self, thread_num):
        file_list = [
            './mnist_0.recordio', './mnist_1.recordio', './mnist_2.recordio'
        ]
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_files = fluid.layers.open_files(
                filenames=file_list,
                thread_num=thread_num,
                shapes=[(-1, 784), (-1, 1)],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            img, label = fluid.layers.read_file(data_files)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()

            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())

            batch_count = 0
            while True:
                try:
                    img_val, = exe.run(fetch_list=[img])
                except fluid.core.EnforceNotMet as ex:
                    self.assertIn("There is no next data.", ex.message)
                    break
                batch_count += 1
                self.assertLessEqual(img_val.shape[0], self.batch_size)
            self.assertEqual(batch_count, self.num_batch * 3)
Exemplo n.º 6
0
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())
        loss = 0.0
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                exe.run(main_program, feed=feeder.feed(data))

                if (batch_id % 10) == 0:
                    acc_list = []
                    avg_loss_list = []
                    for tid, test_data in enumerate(test_reader()):
                        loss_t, acc_t = exe.run(program=test_program,
                                                feed=feeder.feed(test_data),
                                                fetch_list=[avg_cost, acc])
                        if math.isnan(float(loss_t)):
                            sys.exit("got NaN loss, training failed.")
                        acc_list.append(float(acc_t))
                        avg_loss_list.append(float(loss_t))
                        break  # Use 1 segment for speeding up CI

                    acc_value = numpy.array(acc_list).mean()
                    avg_loss_value = numpy.array(avg_loss_list).mean()

                    print(
                        'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
                        format(pass_id, batch_id + 1,
                               float(avg_loss_value), float(acc_value)))

                    if acc_value > 0.01:  # Low threshold for speeding up CI
                        fluid.io.save_inference_model(save_dirname, ["pixel"],
                                                      [predict], exe)
                        return
Exemplo n.º 7
0
 def test_calc_gradient(self):
     x = layers.create_parameter(dtype="float32", shape=[5, 10])
     y = layers.create_parameter(dtype="float32", shape=[10, 8])
     mul_out = layers.mul(x=x, y=y)
     mean_out = layers.mean(mul_out)
     a = calc_gradient(mean_out, mul_out)
     b = calc_gradient(mean_out, x)
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     exe.run(fluid.default_startup_program())
     exe.run(fluid.default_main_program(), feed={}, fetch_list=[a, b])
Exemplo n.º 8
0
def main():
    args = parse_args()
    print_arguments(args)

    # the unique trainer id, starting from 0, needed by trainer
    # only
    nccl_id_var, num_trainers, trainer_id = (
        None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1")))

    if args.use_cprof:
        pr = cProfile.Profile()
        pr.enable()
    model_def = __import__("models.%s" % args.model, fromlist=["models"])
    train_args = list(model_def.get_model(args))
    train_args.append(args)
    # Run optimizer.minimize(avg_loss)
    train_args[2].minimize(train_args[0])
    if args.memory_optimize:
        fluid.memory_optimize(fluid.default_main_program())

    if args.update_method == "pserver":
        train_prog, startup_prog = dist_transpile(trainer_id)
        if not train_prog:
            raise Exception(
                "Must configure correct environments to run dist train.")
        train_args.extend([train_prog, startup_prog])
        if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER":
            train_args.extend([nccl_id_var, num_trainers, trainer_id])
            train_parallel(*train_args)
        train(*train_args)
        exit(0)

    # for other update methods, use default programs
    train_args.append(fluid.default_main_program())
    train_args.append(fluid.default_startup_program())

    if args.update_method == "nccl2":
        nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id)
    if args.gpus == 1:
        # NOTE: parallel executor use profiler interanlly
        if args.use_nvprof and args.device == 'GPU':
            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
                train(*train_args)
        else:
            train(*train_args)
    else:
        if args.device == "CPU":
            raise Exception("Only support GPU perf with parallel exe")
        train_args.extend([nccl_id_var, num_trainers, trainer_id])
        train_parallel(*train_args)
Exemplo n.º 9
0
 def run_program(self):
     outputs = []
     places = [core.CPUPlace()]
     if core.is_compiled_with_cuda():
         places.append(core.CUDAPlace(0))
     for place in places:
         self.set_inputs(place)
         exe = fluid.Executor(place)
         exe.run(fluid.default_startup_program())
         output = exe.run(fluid.default_main_program(),
                          feed=self.inputs,
                          fetch_list=self.fetch_list,
                          return_numpy=False)
         outputs.append(output)
     self.actual_outputs = outputs
Exemplo n.º 10
0
    def run_program(self):
        """Run the test program.
        """
        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))

        for place in places:
            self.set_inputs(place)
            exe = fluid.Executor(place)

            exe.run(fluid.default_startup_program())
            output = exe.run(fluid.default_main_program(),
                             feed=self.inputs,
                             fetch_list=self.fetch_list,
                             return_numpy=True)
            self.op_output = output
Exemplo n.º 11
0
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in xrange(PASS_NUM):
            for data in train_data():
                cost_val, acc_val = exe.run(main_program,
                                            feed=feeder.feed(data),
                                            fetch_list=[cost, acc_out])
                print("cost=" + str(cost_val) + " acc=" + str(acc_val))
                if cost_val < 0.4 and acc_val > 0.8:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(save_dirname, ["words"],
                                                      prediction, exe)
                    return
                if math.isnan(float(cost_val)):
                    sys.exit("got NaN loss, training failed.")
        raise AssertionError("Cost is too large for {0}".format(
            net_method.__name__))
Exemplo n.º 12
0
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in range(PASS_NUM):
            for data in train_reader():
                avg_cost_np = exe.run(main_program,
                                      feed=feeder.feed(data),
                                      fetch_list=[avg_cost])
                if avg_cost_np[0] < 5.0:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(save_dirname, [
                            'firstw', 'secondw', 'thirdw', 'forthw'
                        ], [predict_word], exe)
                    return
                if math.isnan(float(avg_cost_np[0])):
                    sys.exit("got NaN loss, training failed.")

        raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
Exemplo n.º 13
0
    def test_nvprof(self):
        if not fluid.core.is_compiled_with_cuda():
            return
        epoc = 8
        dshape = [4, 3, 28, 28]
        data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
        conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])

        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        output_file = 'cuda_profiler.txt'
        with profiler.cuda_profiler(output_file, 'csv') as nvprof:
            for i in range(epoc):
                input = np.random.random(dshape).astype('float32')
                exe.run(fluid.default_main_program(), feed={'data': input})
        os.remove(output_file)
Exemplo n.º 14
0
    def train_loop(main_program):
        feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
        exe.run(fluid.default_startup_program())

        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                avg_loss_value, = exe.run(main_program,
                                          feed=feeder.feed(data),
                                          fetch_list=[avg_cost])
                print(avg_loss_value)
                if avg_loss_value[0] < 10.0:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(save_dirname, ['x'],
                                                      [y_predict], exe)
                    return
                if math.isnan(float(avg_loss_value)):
                    sys.exit("got NaN loss, training failed.")
        raise AssertionError("Fit a line cost is too large, {0:2.2}".format(
            avg_loss_value[0]))
Exemplo n.º 15
0
    def test_main(self, decorator_callback=None):
        # use new program
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            data_file = fluid.layers.open_recordio_file(
                './mnist.recordio',
                shapes=[[-1, 784], [-1, 1]],
                lod_levels=[0, 0],
                dtypes=['float32', 'int64'])
            if decorator_callback is not None:
                data_file = decorator_callback(data_file)
            img, label = fluid.layers.read_file(data_file)

            hidden = fluid.layers.fc(input=img, size=100, act='tanh')
            prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
            loss = fluid.layers.cross_entropy(input=prediction, label=label)
            avg_loss = fluid.layers.mean(loss)

            fluid.optimizer.Adam(learning_rate=1e-3).minimize(avg_loss)

            if fluid.core.is_compiled_with_cuda():
                place = fluid.CUDAPlace(0)
            else:
                place = fluid.CPUPlace()

            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            avg_loss_np = []

            # train a pass
            batch_id = 0
            while True:
                try:
                    tmp, = exe.run(fetch_list=[avg_loss])
                except fluid.core.EnforceNotMet as ex:
                    self.assertIn("There is no next data.", ex.message)
                    break

                avg_loss_np.append(tmp)
                batch_id += 1
            self.assertEqual(batch_id, self.num_batches)
            self.assertLess(avg_loss_np[-1], avg_loss_np[0])
    def check_result(self, fn, place, dtype):
        shape = [9, 10]

        x_data = np.random.random(size=shape).astype(dtype)
        y_data = np.random.random(size=shape).astype(dtype)
        python_out = fn(x_data, y_data)

        x_var = layers.create_global_var(
            name='x', shape=shape, value=0.0, dtype=dtype, persistable=True)
        y_var = layers.create_global_var(
            name='y', shape=shape, value=0.0, dtype=dtype, persistable=True)
        out = fn(x_var, y_var)

        exe = fluid.Executor(place)

        exe.run(fluid.default_startup_program())
        fluid_out = exe.run(fluid.default_main_program(),
                            feed={'x': x_data,
                                  'y': y_data},
                            fetch_list=[out])

        np.testing.assert_array_equal(python_out, fluid_out[0])
Exemplo n.º 17
0
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                # train a mini-batch, fetch nothing
                exe.run(main_program, feed=feeder.feed(data))
                if (batch_id + 1) % 10 == 0:
                    acc_set = []
                    avg_loss_set = []
                    for test_data in test_reader():
                        acc_np, avg_loss_np = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[acc, avg_loss])
                        acc_set.append(float(acc_np))
                        avg_loss_set.append(float(avg_loss_np))
                    # get test acc and loss
                    acc_val = numpy.array(acc_set).mean()
                    avg_loss_val = numpy.array(avg_loss_set).mean()
                    if float(acc_val
                             ) > 0.2:  # Smaller value to increase CI speed
                        if save_dirname is not None:
                            fluid.io.save_inference_model(
                                save_dirname, ["img"], [prediction],
                                exe,
                                model_filename=model_filename,
                                params_filename=params_filename)
                        return
                    else:
                        print(
                            'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
                            format(pass_id, batch_id + 1,
                                   float(avg_loss_val), float(acc_val)))
                        if math.isnan(float(avg_loss_val)):
                            sys.exit("got NaN loss, training failed.")
        raise AssertionError("Loss of recognize digits is too large")
Exemplo n.º 18
0
    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in xrange(PASS_NUM):
            train_loss_set = []
            train_acc_set = []  
   
            # Calculate average training loss and accuracy
            # across all mini-batches in the training set
            for batch_id, data in enumerate(train_reader()):
                cost_val, acc_val = exe.run(main_program,
                                            feed=feeder.feed(data),
                                            fetch_list=[cost, acc_out])
		train_loss_set.append(float(cost_val))
		train_acc_set.append(float(acc_val)) 
	    train_loss = np.array(train_loss_set).mean()
            train_acc = np.array(train_acc_set).mean() * 100

            # Calculate average valication loss and accuracy 
            # across all mini-batches in the validation set
            acc_set = []
            avg_loss_set = []
            for tid, test_data in enumerate(test_reader()):
                avg_loss_np, acc_np = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[cost, acc_out])
                acc_set.append(float(acc_np))
                avg_loss_set.append(float(avg_loss_np))
            acc_val = np.array(acc_set).mean() * 100 
            avg_loss_val = np.array(avg_loss_set).mean()
            print("Epoch =", pass_id, ", train-accuracy =", train_acc, ", train-loss =", train_loss, ", validation-accuracy =", acc_val, ", validation-loss =", avg_loss_val)

            if acc_val > target_val_acc:
                ## Exit the program on reaching desired accuracy value
                break
Exemplo n.º 19
0
    def build_model(self):

        img = fluid.layers.data(name='img', shape=[784], dtype='float32')
        condition = fluid.layers.data(name='condition',
                                      shape=[1],
                                      dtype='float32')
        noise = fluid.layers.data(name='noise',
                                  shape=[self.cfg.noise_size],
                                  dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='float32')

        g_trainer = GTrainer(noise, condition, self.cfg)
        d_trainer = DTrainer(img, condition, label, self.cfg)

        # prepare environment
        place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        const_n = np.random.uniform(
            low=-1.0,
            high=1.0,
            size=[self.cfg.batch_size, self.cfg.noise_size]).astype('float32')

        if self.cfg.init_model:
            utility.init_checkpoints(self.cfg, exe, g_trainer, "net_G")
            utility.init_checkpoints(self.cfg, exe, d_trainer, "net_D")

### memory optim
        build_strategy = fluid.BuildStrategy()
        build_strategy.enable_inplace = True
        build_strategy.memory_optimize = False

        g_trainer_program = fluid.CompiledProgram(
            g_trainer.program).with_data_parallel(
                loss_name=g_trainer.g_loss.name, build_strategy=build_strategy)
        d_trainer_program = fluid.CompiledProgram(
            d_trainer.program).with_data_parallel(
                loss_name=d_trainer.d_loss.name, build_strategy=build_strategy)

        t_time = 0
        losses = [[], []]
        for epoch_id in range(self.cfg.epoch):
            for batch_id, data in enumerate(self.train_reader()):
                if len(data) != self.cfg.batch_size:
                    continue

                noise_data = np.random.uniform(
                    low=-1.0,
                    high=1.0,
                    size=[self.cfg.batch_size,
                          self.cfg.noise_size]).astype('float32')
                real_image = np.array(list(map(lambda x: x[0], data))).reshape(
                    [-1, 784]).astype('float32')
                condition_data = np.array([x[1] for x in data
                                           ]).reshape([-1,
                                                       1]).astype('float32')
                real_label = np.ones(shape=[real_image.shape[0], 1],
                                     dtype='float32')
                fake_label = np.zeros(shape=[real_image.shape[0], 1],
                                      dtype='float32')
                s_time = time.time()

                generate_image = exe.run(g_trainer.infer_program,
                                         feed={
                                             'noise': noise_data,
                                             'condition': condition_data
                                         },
                                         fetch_list=[g_trainer.fake])

                d_real_loss = exe.run(d_trainer_program,
                                      feed={
                                          'img': real_image,
                                          'condition': condition_data,
                                          'label': real_label
                                      },
                                      fetch_list=[d_trainer.d_loss])[0]
                d_fake_loss = exe.run(d_trainer_program,
                                      feed={
                                          'img': generate_image,
                                          'condition': condition_data,
                                          'label': fake_label
                                      },
                                      fetch_list=[d_trainer.d_loss])[0]
                d_loss = d_real_loss + d_fake_loss
                losses[1].append(d_loss)

                for _ in six.moves.xrange(self.cfg.num_generator_time):
                    g_loss = exe.run(g_trainer_program,
                                     feed={
                                         'noise': noise_data,
                                         'condition': condition_data
                                     },
                                     fetch_list=[g_trainer.g_loss])[0]
                    losses[0].append(g_loss)

                batch_time = time.time() - s_time
                t_time += batch_time

                if batch_id % self.cfg.print_freq == 0:
                    image_path = self.cfg.output + '/images'
                    if not os.path.exists(image_path):
                        os.makedirs(image_path)
                    generate_const_image = exe.run(g_trainer.infer_program,
                                                   feed={
                                                       'noise': const_n,
                                                       'condition':
                                                       condition_data
                                                   },
                                                   fetch_list={g_trainer.fake
                                                               })[0]

                    generate_image_reshape = np.reshape(
                        generate_const_image, (self.cfg.batch_size, -1))
                    total_images = np.concatenate(
                        [real_image, generate_image_reshape])
                    fig = utility.plot(total_images)
                    print(
                        'Epoch ID={} Batch ID={} D_loss={} G_loss={} Batch_time_cost={:.2f}'
                        .format(epoch_id, batch_id, d_loss[0], g_loss[0],
                                batch_time))
                    plt.title('Epoch ID={}, Batch ID={}'.format(
                        epoch_id, batch_id))
                    plt.savefig('{}/{:04d}_{:04d}.png'.format(
                        image_path, epoch_id, batch_id),
                                bbox_inches='tight')
                    plt.close(fig)

            if self.cfg.save_checkpoints:
                utility.checkpoints(epoch_id, self.cfg, exe, g_trainer,
                                    "net_G")
                utility.checkpoints(epoch_id, self.cfg, exe, d_trainer,
                                    "net_D")
Exemplo n.º 20
0
    def _init_train(self):

        instances = self.instances
        Backbone = self.Backbone
        bb_conf = self.bb_conf
        bb_name = self.bb_name
        dev_count = self.dev_count
        num_instances = len(instances)
        mrs = self.mrs
        branch = fluid.data(name="branch", shape=[1], dtype='int64')

        # set first_target/main task instance
        main_inst = None
        for inst in instances:
            if inst.is_target:
                main_inst = inst
                inst.is_first_target = True
                break
        main_conf = main_inst.config
        if not os.path.exists(main_conf['save_path']):
            os.makedirs(main_conf['save_path'])
            os.makedirs(os.path.join(main_conf['save_path'], 'ckpt'))

        # prepare backbone
        train_backbone = Backbone(bb_conf, phase='train')
        pred_backbone = Backbone(bb_conf, phase='pred')

        # create reader, task
        # then check i/o across reader, backbone and task_layer

        # check_fns = {}
        task_attrs = {}
        pred_task_attrs = []
        joint_input_names = {}
        joint_shape_and_dtypes = {}
        name_to_position = {}
        for i in range(num_instances):
            # def check_tasks():
            #     i = s
            #     def checkeach():

            train_reader = instances[i].Reader(instances[i].config,
                                               phase='train')
            instances[i].reader['train'] = train_reader
            train_parad = instances[i].Paradigm(instances[i].config,
                                                phase='train',
                                                backbone_config=bb_conf)
            instances[i].task_layer['train'] = train_parad
            task_attr_from_reader = _encode_inputs(
                train_parad.inputs_attrs['reader'], instances[i].name)
            task_attrs[i] = task_attr_from_reader

            _check_io(train_backbone.inputs_attr,
                      train_reader.outputs_attr,
                      in_name=bb_name + '_backbone',
                      out_name='reader.train')
            _check_io(train_parad.inputs_attrs['reader'],
                      train_reader.outputs_attr,
                      in_name='task_paradigm.train.reader',
                      out_name='reader.train')
            _check_io(train_parad.inputs_attrs['backbone'],
                      train_backbone.outputs_attr,
                      in_name='task_paradigm.train.backbone',
                      out_name=bb_name + '_backbone')
            # merge reader input attrs from backbone and task_instances
            # pred_joint_input_names = []
            # pred_joint_shape_and_dtypes = []
            if instances[i].is_target:
                if 'pred_file' not in instances[i].config:
                    instances[i].config['pred_file'] = ''
                pred_reader = instances[i].Reader(instances[i].config,
                                                  phase='pred')
                pred_parad = instances[i].Paradigm(instances[i].config,
                                                   phase='pred',
                                                   backbone_config=bb_conf)
                instances[i].task_layer['pred'] = pred_parad
                task_attr_from_reader = _encode_inputs(
                    pred_parad.inputs_attrs['reader'], instances[i].name)
                pred_task_attrs.append(task_attr_from_reader)
                _check_io(pred_backbone.inputs_attr,
                          pred_reader.outputs_attr,
                          in_name=bb_name + '_backbone',
                          out_name='reader.pred')
                _check_io(pred_parad.inputs_attrs['reader'],
                          pred_reader.outputs_attr,
                          in_name='task_paradigm.pred.reader',
                          out_name='reader.pred')
                _check_io(pred_parad.inputs_attrs['backbone'],
                          pred_backbone.outputs_attr,
                          in_name='task_paradigm.pred.backbone',
                          out_name=bb_name + '_backbone')
                # pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False)
                #     return joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[i], pred_joint_input_names, pred_joint_shape_and_dtypes
                #   return checkeach
                # check_fns[i] = check_tasks()
            joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[
                i] = merge_input_attrs(train_backbone.inputs_attr,
                                       task_attrs[i])

        pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(
            pred_backbone.inputs_attr,
            pred_task_attrs,
            insert_taskid=False,
            insert_batchsize=False,
            insert_seqlen=False,
            insert_batchsize_x_seqlen=False)

        # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]

        if DEBUG:
            print('----- for debug -----')
            print('joint input names:')
            print(joint_input_names)
            print('joint input shape and dtypes:')
            print(joint_shape_and_dtypes)

        # load data
        data_fns = {}
        for i in range(num_instances):
            print(instances[i].name + ": preparing data...", end='')
            instances[i].reader['train'].load_data()
            print('ok!')

        # merge dataset iterators and create net input vars
        iterators = []
        prefixes = []
        mrs = []

        for inst in instances:
            iterators.append(inst.reader['train'].iterator())
            prefixes.append(inst.name)
            mrs.append(inst.mix_ratio)

        joint_iterator_fn = create_joint_iterator_fn(iterators,
                                                     prefixes,
                                                     joint_shape_and_dtypes,
                                                     mrs,
                                                     name_to_position,
                                                     dev_count=dev_count,
                                                     verbose=VERBOSE,
                                                     return_type='dict')
        self._joint_iterator_fn = joint_iterator_fn

        input_attrs = {}
        net_inputs = {}
        bb_output_vars = {}
        bb_output_fns = {}

        # prepare predict vars for saving inference model
        pred_input_attrs = [[i, j, k] for i, (
            j, k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)]
        pred_prog = fluid.Program()
        pred_init_prog = fluid.Program()
        self._pred_prog = pred_prog

        with fluid.program_guard(main_program=pred_prog,
                                 startup_program=pred_init_prog):
            pred_net_inputs = create_net_inputs(pred_input_attrs)
            pred_bb_output_vars = pred_backbone.build(
                pred_net_inputs, scope_name='__paddlepalm_')

        task_inputs = {}
        task_output_vars = {}
        task_fns = {}

        def get_loss(i):
            input_attrs[i] = [[m, j, k] for m, (
                j, k) in zip(joint_input_names[i], joint_shape_and_dtypes[i])]
            net_inputs[i] = create_net_inputs(input_attrs[i], async=False)
            # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3)
            bb_output_vars[i] = train_backbone.build(
                net_inputs[i], scope_name='__paddlepalm_')
            assert sorted(bb_output_vars[i].keys()) == sorted(
                train_backbone.outputs_attr.keys())

            # build backbone and task layers
            task_inputs[i] = {'backbone': bb_output_vars[i]}
            task_inputs_from_reader = _decode_inputs(net_inputs[i],
                                                     instances[i].name)
            task_inputs[i]['reader'] = task_inputs_from_reader

            scope = instances[i].task_reuse_scope + '/'
            with fluid.unique_name.guard(scope):
                output_vars = instances[i].build_task_layer(task_inputs[i],
                                                            phase='train',
                                                            scope=scope)
                output_vars = {
                    instances[i].name + '/' + key: val
                    for key, val in output_vars.items()
                }
                loss_var = output_vars[instances[i].name + '/loss']
                task_output_vars[i] = output_vars

            if instances[i].is_target:
                with fluid.program_guard(pred_prog, pred_init_prog):
                    cur_inputs = _decode_inputs(pred_net_inputs,
                                                instances[i].name)
                    instances[i].pred_input = cur_inputs
                    pred_task_inputs = {
                        'backbone': pred_bb_output_vars,
                        'reader': cur_inputs
                    }
                    scope = instances[i].task_reuse_scope + '/'
                    with fluid.unique_name.guard(scope):
                        instances[i].build_task_layer(pred_task_inputs,
                                                      phase='pred',
                                                      scope=scope)
            return loss_var

        for i in range(num_instances):

            def task_loss():
                task_id = i
                return lambda: get_loss(task_id)

            task_fns[i] = task_loss()

        loss = layers.switch_case(branch_index=branch, branch_fns=task_fns)
        self._switched_loss = loss.name
        main_reader = main_inst.reader['train']

        num_examples = main_reader.num_examples
        for inst in instances:
            max_train_steps = int(
                main_conf['num_epochs'] * inst.mix_ratio *
                (num_examples // main_conf['batch_size'] // dev_count))
            if inst.is_target:
                print('{}: expected train steps {}.'.format(
                    inst.name, max_train_steps))
            inst.steps_pur_epoch = inst.reader[
                'train'].num_examples // main_conf['batch_size'] // dev_count
            inst.expected_train_steps = max_train_steps

        global_max_train_steps = int(
            main_conf['num_epochs'] * sum(mrs) *
            (num_examples // main_conf['batch_size'] // dev_count))
        print(
            'Estimated overall train steps {}.'.format(global_max_train_steps))

        if 'warmup_proportion' in main_conf and main_conf[
                'warmup_proportion'] > 0:
            warmup_steps = int(global_max_train_steps *
                               main_conf['warmup_proportion'])
            print('Warmup steps: ' + str(warmup_steps))
        else:
            warmup_steps = 0

        # build optimizer
        if 'optimizer' in main_conf:
            optim_mod = importlib.import_module(OPTIMIZER_DIR + '.' +
                                                main_conf['optimizer'])
            optimize = getattr(optim_mod, OPTIMIZE_METHOD)
            optimize(loss, main_conf, max_train_steps, warmup_steps,
                     fluid.default_main_program())

            loss.persistable = True
            if main_conf.get('use_ema', False):
                assert 'ema_decay' in main_conf, "ema_decay should be set when use_ema is enabled."
                ema = fluid.optimizer.ExponentialMovingAverage(
                    main_conf['ema_decay'])
                ema.update()

        # prepare for train
        self.train_backbone = train_backbone
        self.train_program = fluid.CompiledProgram(
            fluid.default_main_program()).with_data_parallel(
                loss_name=loss.name)
        self.saver_program = fluid.default_main_program()

        self.main_inst = main_inst
        self.has_init_train = True
        self.has_init_pred = True
        self._net_inputs = net_inputs

        self.exe.run(fluid.default_startup_program())
        print("\nRandomly initialize parameters...\n")
Exemplo n.º 21
0
def train(place, save_dirname):
    if args.data_set == "cifar10":
        class_dim = 10
        data_shape = [3, 32, 32]
    elif args.data_set == "imagenet":
        class_dim = 102
        data_shape = [3, 224, 224]
    else:
        raise ValueError("%s dataset is not supported" % data_set)

    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if args.model == "vgg":
        print("train vgg")
        net = vgg16(images)
    elif args.model == "resnet":
        print("train resnet")
        if args.data_set == "cifar10":
            net = resnet_cifar10(images)
        elif args.data_set == "imagenet":
            net = resnet_imagenet(images)
        else:
            raise ValueError("%s dataset is not supported" % args.data_set)
    else:
        raise ValueError("%s network is not supported" % args.model)

    predict = fluid.layers.fc(input=net, size=class_dim, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc = fluid.layers.accuracy(input=predict, label=label)

    #Test program
    test_program = fluid.default_main_program().clone(for_test=True)
    optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
    optimizer.minimize(avg_cost)

    BATCH_SIZE = args.train_batch_size
    PASS_NUM = 100

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.flowers.train()
            if args.data_set == 'imagenet' else paddle.dataset.cifar.train10(),
            buf_size=128 * 10),
        batch_size=args.train_batch_size)

    test_reader = paddle.batch(
        paddle.dataset.flowers.test()
        if args.data_set == 'imagenet' else paddle.dataset.cifar.test10(),
        batch_size=args.inf_batch_size)

    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    exe.run(fluid.default_startup_program())
    main_program = fluid.default_main_program()

    for pass_id in range(PASS_NUM):
        for batch_id, data in enumerate(train_reader()):
            train_image = np.array(
                map(lambda x: x[0].reshape(data_shape), data)).astype("float32")
            train_label = np.array(map(lambda x: x[1], data)).astype("int64")
            train_label = train_label.reshape([-1, 1])

            exe.run(main_program,
                    feed={'pixel': train_image,
                          'label': train_label})

            if (batch_id % 100) == 0:
                acc_list = []
                avg_loss_list = []
                for tid, test_data in enumerate(test_reader()):
                    test_image = np.array(
                        map(lambda x: x[0].reshape(data_shape),
                            test_data)).astype("float32")
                    test_label = np.array(map(lambda x: x[1],
                                              test_data)).astype("int64")
                    test_label = test_label.reshape([-1, 1])

                    loss_t, acc_t = exe.run(
                        program=test_program,
                        feed={"pixel": test_image,
                              "label": test_label},
                        fetch_list=[avg_cost, acc])
                    if math.isnan(float(loss_t)):
                        sys.exit("got NaN loss, training failed.")
                    acc_list.append(float(acc_t))
                    avg_loss_list.append(float(loss_t))

                acc_value = np.array(acc_list).mean()
                avg_loss_value = np.array(avg_loss_list).mean()

                print(
                    'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Accuracy {3:2.2}'.
                    format(pass_id, batch_id + 1,
                           float(avg_loss_value), float(acc_value)))

                if acc_value > args.threshold:
                    print(
                        'Save inference model with test accuracy of {0} at {1}'.
                        format(float(acc_value), save_dirname))
                    fluid.io.save_inference_model(save_dirname, ["pixel"],
                                                  [predict], exe)
                    return
    def build_model(self):
        data_shape = [None, 3, self.cfg.crop_size, self.cfg.crop_size]

        input_A = fluid.data(name='input_A', shape=data_shape, dtype='float32')
        input_B = fluid.data(name='input_B', shape=data_shape, dtype='float32')
        fake_pool_A = fluid.data(name='fake_pool_A',
                                 shape=data_shape,
                                 dtype='float32')
        fake_pool_B = fluid.data(name='fake_pool_B',
                                 shape=data_shape,
                                 dtype='float32')
        # used for continuous evaluation
        if self.cfg.enable_ce:
            fluid.default_startup_program().random_seed = 90

        A_loader = fluid.io.DataLoader.from_generator(feed_list=[input_A],
                                                      capacity=4,
                                                      iterable=True,
                                                      use_double_buffer=True)

        B_loader = fluid.io.DataLoader.from_generator(feed_list=[input_B],
                                                      capacity=4,
                                                      iterable=True,
                                                      use_double_buffer=True)

        gen_trainer = GTrainer(input_A, input_B, self.cfg, self.batch_num)
        d_A_trainer = DATrainer(input_B, fake_pool_B, self.cfg, self.batch_num)
        d_B_trainer = DBTrainer(input_A, fake_pool_A, self.cfg, self.batch_num)

        # prepare environment
        place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace()

        A_loader.set_batch_generator(self.A_reader,
                                     places=fluid.cuda_places() if
                                     self.cfg.use_gpu else fluid.cpu_places())
        B_loader.set_batch_generator(self.B_reader,
                                     places=fluid.cuda_places() if
                                     self.cfg.use_gpu else fluid.cpu_places())

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())

        A_pool = utility.ImagePool()
        B_pool = utility.ImagePool()

        if self.cfg.init_model:
            utility.init_checkpoints(self.cfg, gen_trainer, "net_G")
            utility.init_checkpoints(self.cfg, d_A_trainer, "net_DA")
            utility.init_checkpoints(self.cfg, d_B_trainer, "net_DB")

        ### memory optim
        build_strategy = fluid.BuildStrategy()
        build_strategy.enable_inplace = True

        gen_trainer_program = fluid.CompiledProgram(
            gen_trainer.program).with_data_parallel(
                loss_name=gen_trainer.g_loss.name,
                build_strategy=build_strategy)
        d_A_trainer_program = fluid.CompiledProgram(
            d_A_trainer.program).with_data_parallel(
                loss_name=d_A_trainer.d_loss_A.name,
                build_strategy=build_strategy)
        d_B_trainer_program = fluid.CompiledProgram(
            d_B_trainer.program).with_data_parallel(
                loss_name=d_B_trainer.d_loss_B.name,
                build_strategy=build_strategy)

        t_time = 0

        total_train_batch = 0  # NOTE :used for benchmark
        for epoch_id in range(self.cfg.epoch):
            batch_id = 0
            for data_A, data_B in zip(A_loader(), B_loader()):
                if self.cfg.max_iter and total_train_batch == self.cfg.max_iter:  # used for benchmark
                    return
                s_time = time.time()
                tensor_A, tensor_B = data_A[0]['input_A'], data_B[0]['input_B']
                ## optimize the g_A network
                g_A_loss, g_A_cyc_loss, g_A_idt_loss, g_B_loss, g_B_cyc_loss,\
                g_B_idt_loss, fake_A_tmp, fake_B_tmp = exe.run(
                    gen_trainer_program,
                    fetch_list=[
                        gen_trainer.G_A, gen_trainer.cyc_A_loss,
                        gen_trainer.idt_loss_A, gen_trainer.G_B,
                        gen_trainer.cyc_B_loss, gen_trainer.idt_loss_B,
                        gen_trainer.fake_A, gen_trainer.fake_B
                    ],
                    feed={"input_A": tensor_A,
                          "input_B": tensor_B})

                fake_pool_B = B_pool.pool_image(fake_B_tmp)
                fake_pool_A = A_pool.pool_image(fake_A_tmp)

                if self.cfg.enable_ce:
                    fake_pool_B = fake_B_tmp
                    fake_pool_A = fake_A_tmp

                # optimize the d_A network
                d_A_loss = exe.run(d_A_trainer_program,
                                   fetch_list=[d_A_trainer.d_loss_A],
                                   feed={
                                       "input_B": tensor_B,
                                       "fake_pool_B": fake_pool_B
                                   })[0]

                # optimize the d_B network
                d_B_loss = exe.run(d_B_trainer_program,
                                   fetch_list=[d_B_trainer.d_loss_B],
                                   feed={
                                       "input_A": tensor_A,
                                       "fake_pool_A": fake_pool_A
                                   })[0]

                batch_time = time.time() - s_time
                t_time += batch_time
                if batch_id % self.cfg.print_freq == 0:
                    print("epoch{}: batch{}: \n\
                         d_A_loss: {}; g_A_loss: {}; g_A_cyc_loss: {}; g_A_idt_loss: {}; \n\
                         d_B_loss: {}; g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}; \n\
                         Batch_time_cost: {}".format(
                        epoch_id, batch_id, d_A_loss[0], g_A_loss[0],
                        g_A_cyc_loss[0], g_A_idt_loss[0], d_B_loss[0],
                        g_B_loss[0], g_B_cyc_loss[0], g_B_idt_loss[0],
                        batch_time))

                sys.stdout.flush()
                batch_id += 1
                #NOTE: used for benchmark
                total_train_batch += 1  # used for benchmark
                # profiler tools
                if self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq:
                    profiler.reset_profiler()
                elif self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq + 5:
                    return
                # used for continuous evaluation
                if self.cfg.enable_ce and batch_id == 10:
                    break

            if self.cfg.run_test:
                A_image_name = fluid.data(name='A_image_name',
                                          shape=[None, 1],
                                          dtype='int32')
                B_image_name = fluid.data(name='B_image_name',
                                          shape=[None, 1],
                                          dtype='int32')
                A_test_loader = fluid.io.DataLoader.from_generator(
                    feed_list=[input_A, A_image_name],
                    capacity=4,
                    iterable=True,
                    use_double_buffer=True)

                B_test_loader = fluid.io.DataLoader.from_generator(
                    feed_list=[input_B, B_image_name],
                    capacity=4,
                    iterable=True,
                    use_double_buffer=True)

                A_test_loader.set_batch_generator(
                    self.A_test_reader,
                    places=fluid.cuda_places()
                    if self.cfg.use_gpu else fluid.cpu_places())
                B_test_loader.set_batch_generator(
                    self.B_test_reader,
                    places=fluid.cuda_places()
                    if self.cfg.use_gpu else fluid.cpu_places())
                test_program = gen_trainer.infer_program
                utility.save_test_image(epoch_id,
                                        self.cfg,
                                        exe,
                                        place,
                                        test_program,
                                        gen_trainer,
                                        A_test_loader,
                                        B_test_loader,
                                        A_id2name=self.A_id2name,
                                        B_id2name=self.B_id2name)

            if self.cfg.save_checkpoints:
                utility.checkpoints(epoch_id, self.cfg, gen_trainer, "net_G")
                utility.checkpoints(epoch_id, self.cfg, d_A_trainer, "net_DA")
                utility.checkpoints(epoch_id, self.cfg, d_B_trainer, "net_DB")

        # used for continuous evaluation
        if self.cfg.enable_ce:
            device_num = fluid.core.get_cuda_device_count(
            ) if self.cfg.use_gpu else 1
            print("kpis\tcyclegan_g_A_loss_card{}\t{}".format(
                device_num, g_A_loss[0]))
            print("kpis\tcyclegan_g_A_cyc_loss_card{}\t{}".format(
                device_num, g_A_cyc_loss[0]))
            print("kpis\tcyclegan_g_A_idt_loss_card{}\t{}".format(
                device_num, g_A_idt_loss[0]))
            print("kpis\tcyclegan_d_A_loss_card{}\t{}".format(
                device_num, d_A_loss[0]))
            print("kpis\tcyclegan_g_B_loss_card{}\t{}".format(
                device_num, g_B_loss[0]))
            print("kpis\tcyclegan_g_B_cyc_loss_card{}\t{}".format(
                device_num, g_B_cyc_loss[0]))
            print("kpis\tcyclegan_g_B_idt_loss_card{}\t{}".format(
                device_num, g_B_idt_loss[0]))
            print("kpis\tcyclegan_d_B_loss_card{}\t{}".format(
                device_num, d_B_loss[0]))
            print("kpis\tcyclegan_Batch_time_cost_card{}\t{}".format(
                device_num, batch_time))
Exemplo n.º 23
0
def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
               weight):

    py_reader.decorate_tensor_provider(
        convert_python_to_tensor(weight, args.batch_size, reader.train()))

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.use_experimental_executor = True

    print("CPU_NUM:" + str(os.getenv("CPU_NUM")))
    exec_strategy.num_threads = int(os.getenv("CPU_NUM"))

    build_strategy = fluid.BuildStrategy()
    if int(os.getenv("CPU_NUM")) > 1:
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce

    train_exe = fluid.ParallelExecutor(
        use_cuda=False,
        loss_name=loss.name,
        main_program=train_program,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    for pass_id in range(args.num_passes):
        py_reader.start()
        time.sleep(10)
        epoch_start = time.time()
        batch_id = 0
        start = time.time()
        try:
            while True:

                loss_val = train_exe.run(fetch_list=[loss.name])
                loss_val = np.mean(loss_val)

                if batch_id % args.print_batch == 0:
                    logger.info(
                        "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}".
                        format(pass_id, batch_id,
                               loss_val.mean(), py_reader.queue.size()))
                if args.with_speed:
                    if batch_id % 500 == 0 and batch_id != 0:
                        elapsed = (time.time() - start)
                        start = time.time()
                        samples = 1001 * args.batch_size * int(
                            os.getenv("CPU_NUM"))
                        logger.info("Time used: {}, Samples/Sec: {}".format(
                            elapsed, samples / elapsed))

                if batch_id % args.save_step == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/pass-' + str(
                        pass_id) + ('/batch-' + str(batch_id))
                    if trainer_id == 0:
                        fluid.io.save_params(executor=exe, dirname=model_dir)
                        print("model saved in %s" % model_dir)
                batch_id += 1

        except fluid.core.EOFException:
            py_reader.reset()
            epoch_end = time.time()
            logger.info("Epoch: {0}, Train total expend: {1} ".format(
                pass_id, epoch_end - epoch_start))
            model_dir = args.model_output_dir + '/pass-' + str(pass_id)
            if trainer_id == 0:
                fluid.io.save_params(executor=exe, dirname=model_dir)
                print("model saved in %s" % model_dir)
Exemplo n.º 24
0
input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')

cost = mlp(input_x, input_y)
optimizer = fluid.optimizer.SGD(learning_rate=0.01)

dist_algorithm = KSDistributedFactory.instantiation(flag=1)
role = RoleMaker.PaddleCloudRoleMaker(is_collective=True)
dist_algorithm.init(role)

# algorithm + local optimizer
optimizer = GPUStrategy(exec_config=[NumThreadsConfig(32)],
                        dist_config=[CollectiveMode(),
                                     GradAllreduce()]).setup_optimizer(
                                         dist_algorithm, optimizer)
optimizer.minimize(cost, fluid.default_startup_program())

train_prog = dist_algorithm.main_program

gpu_id = int(os.getenv("FLAGS_selected_gpus", "0"))
place = fluid.CUDAPlace(gpu_id)

exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())

step = 1001
for i in range(step):
    cost_val = exe.run(program=train_prog,
                       feed=gen_data(),
                       fetch_list=[cost.name])
    print("worker_index: %d, step%d cost = %f" %
Exemplo n.º 25
0
    def _test_slice(self, place):
        b = default_main_program().current_block()
        w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0)

        for i in range(3):
            nw = w[i]
            self.assertEqual((100, 100), nw.shape)

        nw = w[:]
        self.assertEqual((784, 100, 100), nw.shape)

        nw = w[:, :]
        self.assertEqual((784, 100, 100), nw.shape)

        nw = w[:, :, -1]
        self.assertEqual((784, 100), nw.shape)

        nw = w[1, 1, 1]

        self.assertEqual(len(nw.shape), 1)
        self.assertEqual(nw.shape[0], 1)

        nw = w[:, :, :-1]
        self.assertEqual((784, 100, 99), nw.shape)

        self.assertEqual(0, nw.lod_level)

        main = fluid.Program()
        with fluid.program_guard(main):
            exe = fluid.Executor(place)
            tensor_array = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                                     [[10, 11, 12], [13, 14, 15], [16, 17,
                                                                   18]],
                                     [[19, 20, 21], [22, 23, 24],
                                      [25, 26, 27]]]).astype('float32')
            var = fluid.layers.assign(tensor_array)
            var1 = var[0, 1, 1]
            var2 = var[1:]
            var3 = var[0:1]
            var4 = var[::-1]
            var5 = var[1, 1:, 1:]
            var_reshape = fluid.layers.reshape(var, [3, -1, 3])
            var6 = var_reshape[:, :, -1]
            var7 = var[:, :, :-1]
            var8 = var[:1, :1, :1]
            var9 = var[:-1, :-1, :-1]
            var10 = var[::-1, :1, :-1]
            var11 = var[:-1, ::-1, -1:]
            var12 = var[1:2, 2:, ::-1]
            var13 = var[2:10, 2:, -2:-1]
            var14 = var[1:-1, 0:2, ::-1]
            var15 = var[::-1, ::-1, ::-1]

            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
            y = fluid.layers.fc(input=x, size=1, act=None)
            y_1 = y[:, 0]
            feeder = fluid.DataFeeder(place=place, feed_list=[x])
            data = []
            data.append((np.random.randint(10, size=[13]).astype('float32')))
            exe.run(fluid.default_startup_program())

            local_out = exe.run(main,
                                feed=feeder.feed([data]),
                                fetch_list=[
                                    var, var1, var2, var3, var4, var5, var6,
                                    var7, var8, var9, var10, var11, var12,
                                    var13, var14, var15
                                ])

            self.assertTrue(
                np.array_equal(local_out[1], tensor_array[0, 1, 1:2]))
            self.assertTrue(np.array_equal(local_out[2], tensor_array[1:]))
            self.assertTrue(np.array_equal(local_out[3], tensor_array[0:1]))
            self.assertTrue(np.array_equal(local_out[4], tensor_array[::-1]))
            self.assertTrue(
                np.array_equal(local_out[5], tensor_array[1, 1:, 1:]))
            self.assertTrue(
                np.array_equal(local_out[6],
                               tensor_array.reshape((3, -1, 3))[:, :, -1]))
            self.assertTrue(
                np.array_equal(local_out[7], tensor_array[:, :, :-1]))
            self.assertTrue(
                np.array_equal(local_out[8], tensor_array[:1, :1, :1]))
            self.assertTrue(
                np.array_equal(local_out[9], tensor_array[:-1, :-1, :-1]))
            self.assertTrue(
                np.array_equal(local_out[10], tensor_array[::-1, :1, :-1]))
            self.assertTrue(
                np.array_equal(local_out[11], tensor_array[:-1, ::-1, -1:]))
            self.assertTrue(
                np.array_equal(local_out[12], tensor_array[1:2, 2:, ::-1]))
            self.assertTrue(
                np.array_equal(local_out[13], tensor_array[2:10, 2:, -2:-1]))
            self.assertTrue(
                np.array_equal(local_out[14], tensor_array[1:-1, 0:2, ::-1]))
            self.assertTrue(
                np.array_equal(local_out[15], tensor_array[::-1, ::-1, ::-1]))
Exemplo n.º 26
0
    def test_case(self):
        np.random.seed(200)
        x_data = np.random.random((2, 3, 6, 6)).astype("float32")
        dim_data = np.array([12]).astype("int32")
        shape_data = np.array([12, 12]).astype("int32")
        actual_size_data = np.array([12, 12]).astype("int32")
        scale_data = np.array([2.0]).astype("float32")

        prog = fluid.Program()
        startup_prog = fluid.Program()
        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
        ) else fluid.CPUPlace()

        with fluid.program_guard(prog, startup_prog):

            x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32")

            dim = fluid.data(name="dim", shape=[1], dtype="int32")
            shape_tensor = fluid.data(
                name="shape_tensor", shape=[2], dtype="int32")
            actual_size = fluid.data(
                name="actual_size", shape=[2], dtype="int32")
            scale_tensor = fluid.data(
                name="scale_tensor", shape=[1], dtype="float32")

            out1 = interpolate(
                x, size=[12, 12], mode='bicubic', align_corners=False)
            out2 = interpolate(
                x, size=[12, dim], mode='bicubic', align_corners=False)
            out3 = interpolate(
                x, size=shape_tensor, mode='bicubic', align_corners=False)
            out4 = interpolate(
                x, size=[12, 12], mode='bicubic', align_corners=False)
            out5 = interpolate(
                x,
                scale_factor=scale_tensor,
                mode='bicubic',
                align_corners=False)
            out6 = interpolate(
                x, scale_factor=2.0, mode='bicubic', align_corners=False)
            out7 = interpolate(
                x, scale_factor=[2.0, 2.0], mode='bicubic', align_corners=False)

            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            results = exe.run(
                fluid.default_main_program(),
                feed={
                    "x": x_data,
                    "dim": dim_data,
                    "shape_tensor": shape_data,
                    "actual_size": actual_size_data,
                    "scale_tensor": scale_data
                },
                fetch_list=[out1, out2, out3, out4, out5, out6, out7],
                return_numpy=True)

            expect_res = bicubic_interp_np(
                x_data, out_h=12, out_w=12, align_corners=False)
            for res in results:
                self.assertTrue(np.allclose(res, expect_res))

        with fluid.dygraph.guard():
            x = fluid.dygraph.to_variable(x_data)
            interp = interpolate(
                x, size=[12, 12], mode='bicubic', align_corners=False)
            dy_result = interp.numpy()
            expect = bicubic_interp_np(
                x_data, out_h=12, out_w=12, align_corners=False)
            self.assertTrue(np.allclose(dy_result, expect))
Exemplo n.º 27
0
    #def req_one_data(): # 注释了原文的此条语句
    for i in range(10):
        data_X = [i]
        data_Y = [i * 10 + 3]
        data_X = np.array(data_X).reshape(1, 1).astype("float32")
        data_Y = np.array(data_Y).reshape(1, 1).astype("float32")
        yield data_X, data_Y  # 使用yield来返回单条数据
#return req_one_data    # 返回 req_one_data 这个变量名!可不是req_one_data() # 注释了原文的此条语句


# 初始化项目环境
# fluid.Program 默认有 default_startup_program 和 default_main_program
# 将 start_program 和 main_program 分开定义后,就可以用 program_guard 设置两个不同的程序空间
main_program = fluid.Program()  # 空白程序框架
start_test = fluid.Program()  # 空白的初始化程序,用于测试
start_train = fluid.default_startup_program()  # 默认的初始化程序,用于训练。

# 定义 main_program 程序空间的变量,使用startup_program 进行初始化,此处因使用的空白的初始化程序,说明在此程序空间不需要初始化变量
with fluid.program_guard(main_program=main_program, startup_program=start_test
                         ):  # startup_program 默认为 default_startup_program
    # 定义张量格式
    x = fluid.data(name="x", shape=[-1, 1],
                   dtype="float32")  # 第一个参数-1表示每批可以喂任意多的题目。第二个参数1表示每题只有一个已知条件。
    y = fluid.data(
        name="y", shape=[-1, 1],
        dtype="float32")  # 第一个参数-1表示每批可以喂任意多的题目。第二个参数1表示每题只有一个数字表示的答案。

    # 定义神经网络
    out = fluid.layers.fc(input=x, size=1)

    # 定义损失函数
Exemplo n.º 28
0
def train():
    use_cuda = False
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    prediction, [avg_cost, acc] = train_program()

    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)

    optimizer = optimizer_program()
    optimizer.minimize(avg_cost)

    PASS_NUM = 5
    epochs = [epoch_id for epoch_id in range(PASS_NUM)]

    save_dirname = 'recognize_digits.inference.model'

    def train_test(train_test_program, train_test_feed, train_test_reader):
        acc_set = []
        avg_loss_set = []

        for test_data in train_test_reader():
            acc_np, avg_loss_np = exe.run(program=train_test_program,
                                          feed=train_test_feed.feed(test_data),
                                          fetch_list=[acc, avg_cost])
            acc_set.append(float(acc_np))
            avg_loss_set.append(float(avg_loss_np))

        acc_val_mean = numpy.array(acc_set).mean()
        avg_loss_val_mean = numpy.array(avg_loss_set).mean()

        return avg_loss_val_mean, acc_val_mean

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    main_program = fluid.default_main_program()
    test_program = main_program.clone(for_test=True)

    result_lists = []
    step = 0
    for epoch_id in epochs:
        for step_id, data in enumerate(train_reader()):
            metrics = exe.run(main_program,
                              feed=feeder.feed(data),
                              fetch_list=[avg_cost, acc])
            if step % 100 == 0:
                event_handler(step, epoch_id, metrics[0])

            step += 1

        avg_cost_val, acc_val = train_test(train_test_program=test_program,
                                           train_test_reader=test_reader,
                                           train_test_feed=feeder)

        print('\nTest with Epoch %d, avg_cost: %s, acc: %s\n\n' %
              (epoch_id, avg_cost_val, acc_val))
        # Test with Epoch 4, avg_cost: 0.01788416613656345, acc: 0.9940286624203821

        result_lists.append((epoch_id, avg_cost_val, acc_val))

        if save_dirname is not None:
            fluid.io.save_inference_model(save_dirname, ['img'], [prediction],
                                          exe,
                                          model_filename=None,
                                          params_filename=None)

    best = sorted(result_lists, key=lambda list: float(list[1]))[0]

    print('Best pass is %s, testing Avgcost is %s' % (best[0], best[1]))
    print('The classification accuracy is %.2f%%' % (float(best[2]) * 100))
Exemplo n.º 29
0
def train():
    """ do training """
    args = parse_args()
    hid_size = args.hid_size
    train_dir = args.train_dir
    vocab_path = args.vocab_path
    use_cuda = True if args.use_cuda else False
    parallel = True if args.parallel else False
    print("use_cuda:", use_cuda, "parallel:", parallel)
    batch_size = args.batch_size
    vocab_size, train_reader = utils.prepare_data(
        train_dir, vocab_path, batch_size=batch_size * get_cards(args),\
        buffer_size=1000, word_freq_threshold=0, is_train=True)

    # Train program
    if args.loss == 'bpr':
        print('bpr loss')
        src, pos_label, label, avg_cost = net.train_bpr_network(
            neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
    else:
        print('cross-entory loss')
        src, pos_label, label, avg_cost = net.train_cross_entropy_network(
            neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)

    # Optimization to minimize lost
    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr)
    sgd_optimizer.minimize(avg_cost)

    # Initialize executor
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    if parallel:
        train_exe = fluid.ParallelExecutor(use_cuda=use_cuda,
                                           loss_name=avg_cost.name)
    else:
        train_exe = exe

    pass_num = args.pass_num
    model_dir = args.model_dir
    fetch_list = [avg_cost.name]

    total_time = 0.0
    for pass_idx in six.moves.xrange(pass_num):
        epoch_idx = pass_idx + 1
        print("epoch_%d start" % epoch_idx)

        t0 = time.time()
        i = 0
        newest_ppl = 0
        for data in train_reader():
            i += 1
            ls, lp, ll = utils.to_lodtensor_bpr(data, args.neg_size,
                                                vocab_size, place)
            ret_avg_cost = train_exe.run(feed={
                "src": ls,
                "label": ll,
                "pos_label": lp
            },
                                         fetch_list=fetch_list)
            avg_ppl = np.exp(ret_avg_cost[0])
            newest_ppl = np.mean(avg_ppl)
            if i % args.print_batch == 0:
                print("step:%d ppl:%.3f" % (i, newest_ppl))

        t1 = time.time()
        total_time += t1 - t0
        print("epoch:%d num_steps:%d time_cost(s):%f" %
              (epoch_idx, i, total_time / epoch_idx))
        save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
        fluid.save(fluid.default_main_program(), model_path=save_dir)
        print("model saved in %s" % save_dir)

    print("finish training")
Exemplo n.º 30
0
def validate(fluid_model_filename,
             golden_data_filename,
             model_func_name='inference',
             decimal=3,
             save_inference_model=False):
    """
    inferece the converted Paddle fluid model, validate with given golden data
    """

    import numpy as np
    import paddle.fluid as fluid

    logger = logging.getLogger('validate')

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    fluid_model_dir, basename = os.path.split(fluid_model_filename)
    if basename == '__model__':  # is desc program
        logger.debug('using desc file %s', basename)
        prog, _, var_outs = fluid.io.load_inference_model(fluid_model_dir, exe)
        out_names = var_outs  # HINT: pass var if fetch ops already created
        logger.info('model load passed')
    elif basename.endswith('.py'):  # is python code
        logger.debug('using python code file %s', basename)
        module_name, _ = os.path.splitext(basename)
        sys_path = sys.path.copy()
        sys.path.append(fluid_model_dir)
        try:
            module = importlib.import_module(module_name)
            func = getattr(module, model_func_name)
        except AttributeError:
            module_name = module_name + '.' + module_name
            module = importlib.import_module(module_name)
            func = getattr(module, model_func_name)
        sys.path = sys_path
        logger.debug('from %s imported %s: %s', module_name, model_func_name,
                     func)

        var_outs = func()
        var_outs = _ensure_list(var_outs)
        out_names = [var.name for var in var_outs
                     ]  # HINT: pass string to create fetch ops
        logger.info('import passed')

        prog = fluid.default_main_program()
        fluid.io.load_persistables(executor=exe,
                                   dirname=fluid_model_dir,
                                   main_program=prog)
        logger.info('weight load passed')
    else:
        raise ValueError('unsupported Paddle fluid model filename')

    # load data
    logger.info('using golden data %s', golden_data_filename)
    if golden_data_filename.endswith('.npz'):
        test_data = np.load(golden_data_filename, encoding='bytes')
        input_data = test_data['inputs'].tolist()
        output_data = test_data['outputs'].tolist()
    else:
        test_data = np.load(golden_data_filename, encoding='bytes').tolist()
        input_data = test_data['inputs']
        output_data = test_data['outputs']
    input_data = _flatten_dict(input_data)
    output_data = _flatten_dict(output_data)
    logger.info('found %d I/O golden data, starting test ...',
                len(input_data) + len(output_data))

    # DEBUG: reload test for python code
    if basename.endswith('.py') and save_inference_model:
        fluid.io.save_inference_model(fluid_model_dir,
                                      input_data.keys(),
                                      var_outs,
                                      exe,
                                      main_program=prog,
                                      export_for_deployment=True)
        logger.info('model re-save passed')
        fluid.io.load_inference_model(fluid_model_dir, exe)
        logger.info('model re-load passed')

    # execute
    outputs = exe.run(prog, feed=input_data, fetch_list=out_names)
    logger.info('execution passed')

    # validate
    passed = True
    for (name, truth), output in zip(output_data.items(), outputs):
        logger.info('testing output {} ...'.format(name))
        try:
            np.testing.assert_almost_equal(output, truth, decimal=decimal)
        except AssertionError as e:
            passed = False
            logger.error('failed: %s\n', e)
    if passed:
        logger.info('accuracy passed')
    else:
        logger.info('accuracy not passed')


#    globals().update(locals())
    return passed
Exemplo n.º 31
0
def train(place):

    num_layers = 1
    batch_size = 4
    hidden_size = 10
    num_steps = 3
    init_scale = 0.1
    max_epoch = 1
    dropout = 0.0
    vocab_size = 1000
    batch_num = 200

    with fluid.dygraph.guard(place):
        fluid.default_startup_program().random_seed = SEED
        fluid.default_main_program().random_seed = SEED
        ptb_model = PtbModel(hidden_size=hidden_size,
                             vocab_size=vocab_size,
                             num_layers=num_layers,
                             num_steps=num_steps,
                             init_scale=init_scale,
                             dropout=dropout)

        sgd = SGDOptimizer(learning_rate=1e-3,
                           parameter_list=ptb_model.parameters())

        for epoch_id in range(max_epoch):

            total_loss = 0.0
            iters = 0.0
            total_sample = 0

            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            init_hidden = to_variable(init_hidden_data)
            init_cell = to_variable(init_cell_data)
            for step_id in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))

                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, num_steps, 1))

                x = to_variable(x_data)
                y = to_variable(y_data)

                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                out_loss = dy_loss.numpy()

                dy_loss.backward()
                sgd.minimize(dy_loss)
                ptb_model.clear_gradients()

                total_loss += out_loss
                iters += num_steps
                total_sample += 1
                if step_id % PRINT_STEP == 0:
                    if step_id == 0:
                        logging.info(
                            "epoch %d | step %d, loss %0.3f" %
                            (epoch_id, step_id, total_loss / total_sample))
                        avg_batch_time = time.time()
                    else:
                        speed = PRINT_STEP / (time.time() - avg_batch_time)
                        logging.info(
                            "epoch %d | step %d, loss %0.3f, speed %.3f steps/s"
                            % (epoch_id, step_id, total_loss / total_sample,
                               speed))
                        avg_batch_time = time.time()

            return out_loss, last_hidden.numpy(), last_cell.numpy()
def train(args):
    if args.use_cuda and not fluid.core.is_compiled_with_cuda():
        return

    startup_program = fluid.default_startup_program()
    main_program = fluid.default_main_program()

    if args.enable_ce:
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=BATCH_SIZE)
        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE)
        startup_program.random_seed = 90
        main_program.random_seed = 90
    else:
        train_reader = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=500),
                                    batch_size=BATCH_SIZE)
        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE)

    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    net_conf = vae_neural_network

    _, reconstruction_loss, kl_loss, vae_loss = net_conf(img, args)

    test_program = main_program.clone(for_test=True)
    optimizer = fluid.optimizer.RMSProp(learning_rate=0.001)
    optimizer.minimize(vae_loss)

    def train_test(train_test_program, train_test_feed, train_test_reader):
        reconstruction_loss_set = []
        kl_loss_set = []
        vae_loss_set = []
        for test_data in train_test_reader():
            reconstruction_loss_np, kl_loss_np, vae_loss_np = exe.run(
                program=train_test_program,
                feed=train_test_feed.feed(test_data),
                fetch_list=[reconstruction_loss, kl_loss, vae_loss])
            reconstruction_loss_set.append(float(reconstruction_loss_np))
            kl_loss_set.append(float(kl_loss_np))
            vae_loss_set.append(float(vae_loss_np))
        # get test acc and loss
        reconstruction_loss_mean = np.array(reconstruction_loss_set).mean()
        kl_loss_mean = np.array(kl_loss_set).mean()
        vae_loss_mean = np.array(vae_loss_set).mean()
        return reconstruction_loss_mean, kl_loss_mean, vae_loss_mean

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()

    exe = fluid.Executor(place)

    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
    exe.run(startup_program)
    epochs = [epoch_id for epoch_id in range(50)]

    lists = []
    step = 0
    for epoch_id in epochs:
        for step_id, data in enumerate(train_reader()):
            metrics = exe.run(
                main_program,
                feed=feeder.feed(data),
                fetch_list=[reconstruction_loss, kl_loss, vae_loss])
            if step % 100 == 0:
                print(
                    "Pass %d, Epoch %d, reconstruction_loss %f, kl_loss %f, vae_loss %f"
                    % (step, epoch_id, metrics[0], metrics[1], metrics[2]))
            step += 1
        # test for epoch
        reconstruction_loss_val, kl_loss_val, vae_loss_val = train_test(
            train_test_program=test_program,
            train_test_reader=test_reader,
            train_test_feed=feeder)

        print(
            "Test with Epoch %d, reconstruction_loss_val: %s, kl_loss_val: %s, vae_loss_val: %s"
            % (epoch_id, reconstruction_loss_val, kl_loss_val, vae_loss_val))
        lists.append(
            (epoch_id, reconstruction_loss_val, kl_loss_val, vae_loss_val))
Exemplo n.º 33
0
def train(model):

    predict, loss, iou = create_model(model=model)
    optimizer = fluid.optimizer.Adam(learning_rate=1e-4)
    optimizer.minimize(loss)
    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())
    fluid.memory_optimize(fluid.default_main_program(),
                          print_log=False,
                          skip_opt_set=set([loss.name, predict.name]))

    if pretrain_model:
        load_model(exe, fluid.default_main_program(), model=model)
        print("load model succeed")
    else:
        print("load succeed")

    def trainLoop():
        batches = DataSet.get_batch_generator(1, total_step)
        iou_count = 0
        mean_iou = 0
        iou_sum = 0
        for i, imgs, labels, names in batches:
            preTime = time.time()
            result = exe.run(fluid.default_main_program(),
                             feed={
                                 'img': imgs,
                                 'label': labels
                             },
                             fetch_list=[loss, predict, iou])
            nowTime = time.time()

            iou_sum += result[2]
            iou_count += 1
            mean_iou = iou_sum / iou_count

            print(
                '                                                         iou = ',
                result[2], 'mean_iou = ', mean_iou)

            if iou_count % 1000 == 0:
                iou_count = 0
                iou_sum = 0

            if i % 1000 == 0 and i != 0:
                print("Model saved")
                save_model(exe, fluid.default_main_program(), model=model)

            if i % 10 == 0:
                train_path = path + '/train.png'
                picture = result[1]
                picture = np.argmax(picture, axis=-1)
                picture = picture.reshape((1024, 1024))
                saveImage(picture, train_path)
                label_path = path + '/trainlabel.png'
                train_lab = np.argmax(labels[0], axis=2)
                saveImage(train_lab, label_path)

            if i % 20 == 0:
                argmax = np.argmax(result[1], axis=1)
                abc = Counter(argmax)
                print('                                        ', abc)

            if i % 2 == 0:
                print("step {:d},loss {:.6f},step_time: {:.3f}".format(
                    i, result[0][0], nowTime - preTime))

    trainLoop()
Exemplo n.º 34
0
def do_train(args):
    if args.use_cuda:
        trainer_count = fluid.dygraph.parallel.Env().nranks
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id
                                ) if trainer_count > 1 else fluid.CUDAPlace(0)
    else:
        trainer_count = 1
        place = fluid.CPUPlace()

    # define the data generator
    processor = reader.DataProcessor(fpattern=args.training_file,
                                     src_vocab_fpath=args.src_vocab_fpath,
                                     trg_vocab_fpath=args.trg_vocab_fpath,
                                     token_delimiter=args.token_delimiter,
                                     use_token_batch=args.use_token_batch,
                                     batch_size=args.batch_size,
                                     device_count=trainer_count,
                                     pool_size=args.pool_size,
                                     sort_type=args.sort_type,
                                     shuffle=args.shuffle,
                                     shuffle_batch=args.shuffle_batch,
                                     start_mark=args.special_token[0],
                                     end_mark=args.special_token[1],
                                     unk_mark=args.special_token[2],
                                     max_length=args.max_length,
                                     n_head=args.n_head)
    batch_generator = processor.data_generator(phase="train")
    if args.validation_file:
        val_processor = reader.DataProcessor(
            fpattern=args.validation_file,
            src_vocab_fpath=args.src_vocab_fpath,
            trg_vocab_fpath=args.trg_vocab_fpath,
            token_delimiter=args.token_delimiter,
            use_token_batch=args.use_token_batch,
            batch_size=args.batch_size,
            device_count=trainer_count,
            pool_size=args.pool_size,
            sort_type=args.sort_type,
            shuffle=False,
            shuffle_batch=False,
            start_mark=args.special_token[0],
            end_mark=args.special_token[1],
            unk_mark=args.special_token[2],
            max_length=args.max_length,
            n_head=args.n_head)
        val_batch_generator = val_processor.data_generator(phase="train")
    if trainer_count > 1:  # for multi-process gpu training
        batch_generator = fluid.contrib.reader.distributed_batch_reader(
            batch_generator)
    args.src_vocab_size, args.trg_vocab_size, args.bos_idx, args.eos_idx, \
        args.unk_idx = processor.get_vocab_summary()

    with fluid.dygraph.guard(place):
        # set seed for CE
        random_seed = eval(str(args.random_seed))
        if random_seed is not None:
            fluid.default_main_program().random_seed = random_seed
            fluid.default_startup_program().random_seed = random_seed

        # define data loader
        train_loader = fluid.io.DataLoader.from_generator(capacity=10)
        train_loader.set_batch_generator(batch_generator, places=place)
        if args.validation_file:
            val_loader = fluid.io.DataLoader.from_generator(capacity=10)
            val_loader.set_batch_generator(val_batch_generator, places=place)

        # define model
        transformer = Transformer(
            args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
            args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model,
            args.d_inner_hid, args.prepostprocess_dropout,
            args.attention_dropout, args.relu_dropout, args.preprocess_cmd,
            args.postprocess_cmd, args.weight_sharing, args.bos_idx,
            args.eos_idx)

        # define loss
        criterion = CrossEntropyCriterion(args.label_smooth_eps)

        # define optimizer
        optimizer = fluid.optimizer.Adam(
            learning_rate=NoamDecay(args.d_model, args.warmup_steps,
                                    args.learning_rate),
            beta1=args.beta1,
            beta2=args.beta2,
            epsilon=float(args.eps),
            parameter_list=transformer.parameters())

        ## init from some checkpoint, to resume the previous training
        if args.init_from_checkpoint:
            model_dict, opt_dict = fluid.load_dygraph(
                os.path.join(args.init_from_checkpoint, "transformer"))
            transformer.load_dict(model_dict)
            optimizer.set_dict(opt_dict)
        ## init from some pretrain models, to better solve the current task
        if args.init_from_pretrain_model:
            model_dict, _ = fluid.load_dygraph(
                os.path.join(args.init_from_pretrain_model, "transformer"))
            transformer.load_dict(model_dict)

        if trainer_count > 1:
            strategy = fluid.dygraph.parallel.prepare_context()
            transformer = fluid.dygraph.parallel.DataParallel(
                transformer, strategy)

        # the best cross-entropy value with label smoothing
        loss_normalizer = -(
            (1. - args.label_smooth_eps) * np.log(
                (1. - args.label_smooth_eps)) +
            args.label_smooth_eps * np.log(args.label_smooth_eps /
                                           (args.trg_vocab_size - 1) + 1e-20))

        ce_time = []
        ce_ppl = []
        step_idx = 0

        #NOTE: used for benchmark
        total_batch_num = 0

        # train loop
        for pass_id in range(args.epoch):
            pass_start_time = time.time()
            batch_id = 0
            for input_data in train_loader():
                if args.max_iter and total_batch_num == args.max_iter:  #NOTE: used for benchmark
                    return
                batch_start = time.time()
                (src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
                 trg_slf_attn_bias, trg_src_attn_bias, lbl_word,
                 lbl_weight) = input_data
                logits = transformer(src_word, src_pos, src_slf_attn_bias,
                                     trg_word, trg_pos, trg_slf_attn_bias,
                                     trg_src_attn_bias)

                sum_cost, avg_cost, token_num = criterion(
                    logits, lbl_word, lbl_weight)

                if trainer_count > 1:
                    avg_cost = transformer.scale_loss(avg_cost)
                    avg_cost.backward()
                    transformer.apply_collective_grads()
                else:
                    avg_cost.backward()

                optimizer.minimize(avg_cost)
                transformer.clear_gradients()

                if step_idx % args.print_step == 0:
                    total_avg_cost = avg_cost.numpy() * trainer_count

                    if step_idx == 0:
                        logging.info(
                            "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
                            "normalized loss: %f, ppl: %f" %
                            (step_idx, pass_id, batch_id, total_avg_cost,
                             total_avg_cost - loss_normalizer,
                             np.exp([min(total_avg_cost, 100)])))
                        avg_batch_time = time.time()
                    else:
                        logging.info(
                            "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
                            "normalized loss: %f, ppl: %f, speed: %.2f step/s"
                            % (step_idx, pass_id, batch_id, total_avg_cost,
                               total_avg_cost - loss_normalizer,
                               np.exp([min(total_avg_cost, 100)
                                       ]), args.print_step /
                               (time.time() - avg_batch_time)))
                        avg_batch_time = time.time()

                if step_idx % args.save_step == 0 and step_idx != 0:
                    # validation
                    if args.validation_file:
                        transformer.eval()
                        total_sum_cost = 0
                        total_token_num = 0
                        for input_data in val_loader():
                            (src_word, src_pos, src_slf_attn_bias, trg_word,
                             trg_pos, trg_slf_attn_bias, trg_src_attn_bias,
                             lbl_word, lbl_weight) = input_data
                            logits = transformer(src_word, src_pos,
                                                 src_slf_attn_bias, trg_word,
                                                 trg_pos, trg_slf_attn_bias,
                                                 trg_src_attn_bias)
                            sum_cost, avg_cost, token_num = criterion(
                                logits, lbl_word, lbl_weight)
                            total_sum_cost += sum_cost.numpy()
                            total_token_num += token_num.numpy()
                            total_avg_cost = total_sum_cost / total_token_num
                        logging.info("validation, step_idx: %d, avg loss: %f, "
                                     "normalized loss: %f, ppl: %f" %
                                     (step_idx, total_avg_cost,
                                      total_avg_cost - loss_normalizer,
                                      np.exp([min(total_avg_cost, 100)])))
                        transformer.train()

                    if args.save_model and (
                            trainer_count == 1
                            or fluid.dygraph.parallel.Env().dev_id == 0):
                        model_dir = os.path.join(args.save_model,
                                                 "step_" + str(step_idx))
                        if not os.path.exists(model_dir):
                            os.makedirs(model_dir)
                        fluid.save_dygraph(
                            transformer.state_dict(),
                            os.path.join(model_dir, "transformer"))
                        fluid.save_dygraph(
                            optimizer.state_dict(),
                            os.path.join(model_dir, "transformer"))

                batch_id += 1
                total_batch_num = total_batch_num + 1
                step_idx += 1

            time_consumed = time.time() - pass_start_time
            ce_time.append(time_consumed)

        if args.save_model:
            model_dir = os.path.join(args.save_model, "step_final")
            if not os.path.exists(model_dir):
                os.makedirs(model_dir)
            fluid.save_dygraph(transformer.state_dict(),
                               os.path.join(model_dir, "transformer"))
            fluid.save_dygraph(optimizer.state_dict(),
                               os.path.join(model_dir, "transformer"))

        if args.enable_ce:
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (trainer_count, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (trainer_count, _ppl))
Exemplo n.º 35
0
def train_dygraph(args, batch_generator):
    with fluid.dygraph.guard(place):
        if SEED is not None:
            fluid.default_main_program().random_seed = SEED
            fluid.default_startup_program().random_seed = SEED
        # define data loader
        train_loader = fluid.io.DataLoader.from_generator(capacity=10)
        train_loader.set_batch_generator(batch_generator, places=place)
        # define model
        transformer = Transformer(
            args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
            args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model,
            args.d_inner_hid, args.prepostprocess_dropout,
            args.attention_dropout, args.relu_dropout, args.preprocess_cmd,
            args.postprocess_cmd, args.weight_sharing, args.bos_idx,
            args.eos_idx)
        # define loss
        criterion = CrossEntropyCriterion(args.label_smooth_eps)
        # define optimizer
        learning_rate = fluid.layers.learning_rate_scheduler.noam_decay(
            args.d_model, args.warmup_steps, args.learning_rate)
        # define optimizer
        optimizer = fluid.optimizer.Adam(
            learning_rate=learning_rate,
            beta1=args.beta1,
            beta2=args.beta2,
            epsilon=float(args.eps),
            parameter_list=transformer.parameters())
        # the best cross-entropy value with label smoothing
        loss_normalizer = -(
            (1. - args.label_smooth_eps) * np.log(
                (1. - args.label_smooth_eps)) + args.label_smooth_eps *
            np.log(args.label_smooth_eps / (args.trg_vocab_size - 1) + 1e-20))
        ce_time = []
        ce_ppl = []
        avg_loss = []
        step_idx = 0
        for pass_id in range(args.epoch):
            pass_start_time = time.time()
            batch_id = 0
            for input_data in train_loader():
                (src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
                 trg_slf_attn_bias, trg_src_attn_bias, lbl_word,
                 lbl_weight) = input_data
                logits = transformer(src_word, src_pos, src_slf_attn_bias,
                                     trg_word, trg_pos, trg_slf_attn_bias,
                                     trg_src_attn_bias)
                sum_cost, avg_cost, token_num = criterion(logits, lbl_word,
                                                          lbl_weight)
                avg_cost.backward()
                optimizer.minimize(avg_cost)
                transformer.clear_gradients()
                if step_idx % args.print_step == 0:
                    total_avg_cost = avg_cost.numpy() * trainer_count
                    avg_loss.append(total_avg_cost[0])
                    if step_idx == 0:
                        logging.info(
                            "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
                            "normalized loss: %f, ppl: %f" %
                            (step_idx, pass_id, batch_id, total_avg_cost,
                             total_avg_cost - loss_normalizer,
                             np.exp([min(total_avg_cost, 100)])))
                        avg_batch_time = time.time()
                    else:
                        logging.info(
                            "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
                            "normalized loss: %f, ppl: %f, speed: %.2f steps/s"
                            %
                            (step_idx, pass_id, batch_id, total_avg_cost,
                             total_avg_cost - loss_normalizer,
                             np.exp([min(total_avg_cost, 100)]),
                             args.print_step / (time.time() - avg_batch_time)))
                        ce_ppl.append(np.exp([min(total_avg_cost, 100)]))
                        avg_batch_time = time.time()
                batch_id += 1
                step_idx += 1
                if step_idx == STEP_NUM:
                    if args.save_dygraph_model_path:
                        model_dir = os.path.join(args.save_dygraph_model_path)
                        if not os.path.exists(model_dir):
                            os.makedirs(model_dir)
                        fluid.save_dygraph(
                            transformer.state_dict(),
                            os.path.join(model_dir, "transformer"))
                        fluid.save_dygraph(
                            optimizer.state_dict(),
                            os.path.join(model_dir, "transformer"))
                    break
            time_consumed = time.time() - pass_start_time
            ce_time.append(time_consumed)
        return np.array(avg_loss)
Exemplo n.º 36
0
def eval(args):
    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone(for_test=True)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    val_reader = paddle.batch(val_reader, batch_size=args.batch_size)

    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    load_model(exe, val_program, args.model_path)

    batch_id = 0
    acc_top1_ns = []
    acc_top5_ns = []
    for data in val_reader():
        start_time = time.time()
        acc_top1_n, acc_top5_n = exe.run(
            val_program,
            feed=val_feeder.feed(data),
            fetch_list=[acc_top1.name, acc_top5.name])
        end_time = time.time()
        if batch_id % args.log_period == 0:
            _logger.info(
                "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".format(
                    batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n),
                    end_time - start_time))
        acc_top1_ns.append(np.mean(acc_top1_n))
        acc_top5_ns.append(np.mean(acc_top5_n))
        batch_id += 1

    _logger.info("Final eval - acc_top1: {}; acc_top5: {}".format(
        np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
Exemplo n.º 37
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid.backward import append_backward
import paddle.fluid.framework as framework
from paddle.fluid.framework import Program, switch_main_program
import bisect
import numpy as np

fluid.default_startup_program().random_seed = 1


class TestDyRnnStaticInput(unittest.TestCase):
    def setUp(self):
        self._delta = 0.005
        self._max_sequence_len = 3
        self._program = Program()
        switch_main_program(self._program)
        self.output_dim = 10
        self.place = core.CPUPlace()
        self.prepare_x_tensor()
        self.prepare_static_input_tensor()
        self.exe = fluid.Executor(self.place)

    def prepare_x_tensor(self):
Exemplo n.º 38
0
    def test_mnist_float32(self):
        seed = 90
        epoch_num = 1
        batch_size = 128
        batch_num = 50

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            mnist = MNIST("mnist")
            sgd = SGDOptimizer(learning_rate=1e-3)

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(self.reader_decorator(
                    paddle.dataset.mnist.train()),
                             batch_size=batch_size,
                             drop_last=True),
                places=fluid.CPUPlace())

            mnist.train()
            dy_param_init_value = {}
            for epoch in range(epoch_num):
                for batch_id, data in enumerate(batch_py_reader()):
                    if batch_id >= batch_num:
                        break
                    img = data[0]
                    dy_x_data = img.numpy()
                    label = data[1]
                    label.stop_gradient = True

                    cost = mnist(img)
                    loss = fluid.layers.cross_entropy(cost, label)
                    avg_loss = fluid.layers.mean(loss)

                    dy_out = avg_loss.numpy()

                    if epoch == 0 and batch_id == 0:
                        for param in mnist.parameters():
                            dy_param_init_value[param.name] = param.numpy()

                    avg_loss.backward()
                    sgd.minimize(avg_loss)
                    mnist.clear_gradients()

                    dy_param_value = {}
                    for param in mnist.parameters():
                        dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            mnist = MNIST("mnist")
            sgd = SGDOptimizer(learning_rate=1e-3)
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=batch_size,
                                        drop_last=True)

            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            cost = mnist(img)
            loss = fluid.layers.cross_entropy(cost, label)
            avg_loss = fluid.layers.mean(loss)
            sgd.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mnist.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for epoch in range(epoch_num):
                for batch_id, data in enumerate(train_reader()):
                    if batch_id >= batch_num:
                        break
                    static_x_data = np.array([
                        x[0].reshape(1, 28, 28) for x in data
                    ]).astype('float32')
                    y_data = np.array([x[1]
                                       for x in data]).astype('int64').reshape(
                                           [batch_size, 1])

                    fetch_list = [avg_loss.name]
                    fetch_list.extend(static_param_name_list)
                    out = exe.run(fluid.default_main_program(),
                                  feed={
                                      "pixel": static_x_data,
                                      "label": y_data
                                  },
                                  fetch_list=fetch_list)

                    static_param_value = {}
                    static_out = out[0]
                    for i in range(1, len(out)):
                        static_param_value[static_param_name_list[i -
                                                                  1]] = out[i]

        self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))

        self.assertTrue(np.allclose(static_out, dy_out))

        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
Exemplo n.º 39
0
def train(nn_type,
          use_cuda,
          save_dirname=None,
          model_filename=None,
          params_filename=None):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return

    startup_program = fluid.default_startup_program()
    main_program = fluid.default_main_program()

    if args.enable_ce:
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=BATCH_SIZE)
        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE)
        startup_program.random_seed = 90
        main_program.random_seed = 90
    else:
        train_reader = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=500),
                                    batch_size=BATCH_SIZE)
        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE)

    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if nn_type == 'softmax_regression':
        net_conf = softmax_regression
    elif nn_type == 'multilayer_perceptron':
        net_conf = multilayer_perceptron
    else:
        net_conf = convolutional_neural_network

    prediction, avg_loss, acc = net_conf(img, label)

    test_program = main_program.clone(for_test=True)
    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimizer.minimize(avg_loss)

    def train_test(train_test_program, train_test_feed, train_test_reader):
        acc_set = []
        avg_loss_set = []
        for test_data in train_test_reader():
            acc_np, avg_loss_np = exe.run(program=train_test_program,
                                          feed=train_test_feed.feed(test_data),
                                          fetch_list=[acc, avg_loss])
            acc_set.append(float(acc_np))
            avg_loss_set.append(float(avg_loss_np))
        # get test acc and loss
        acc_val_mean = numpy.array(acc_set).mean()
        avg_loss_val_mean = numpy.array(avg_loss_set).mean()
        return avg_loss_val_mean, acc_val_mean

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    exe = fluid.Executor(place)

    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
    exe.run(startup_program)
    epochs = [epoch_id for epoch_id in range(PASS_NUM)]

    lists = []
    step = 0
    for epoch_id in epochs:
        for step_id, data in enumerate(train_reader()):
            metrics = exe.run(main_program,
                              feed=feeder.feed(data),
                              fetch_list=[avg_loss, acc])
            if step % 100 == 0:
                print("Pass %d, Epoch %d, Cost %f" %
                      (step, epoch_id, metrics[0]))
            step += 1
        # test for epoch
        avg_loss_val, acc_val = train_test(train_test_program=test_program,
                                           train_test_reader=test_reader,
                                           train_test_feed=feeder)

        print("Test with Epoch %d, avg_cost: %s, acc: %s" %
              (epoch_id, avg_loss_val, acc_val))
        lists.append((epoch_id, avg_loss_val, acc_val))
        if save_dirname is not None:
            fluid.io.save_inference_model(save_dirname, ["img"], [prediction],
                                          exe,
                                          model_filename=model_filename,
                                          params_filename=params_filename)

    if args.enable_ce:
        print("kpis\ttrain_cost\t%f" % metrics[0])
        print("kpis\ttest_cost\t%s" % avg_loss_val)
        print("kpis\ttest_acc\t%s" % acc_val)

    # find the best pass
    best = sorted(lists, key=lambda list: float(list[1]))[0]
    print('Best pass is %s, testing Avgcost is %s' % (best[0], best[1]))
    print('The classification accuracy is %.2f%%' % (float(best[2]) * 100))
Exemplo n.º 40
0
accuracy = fluid.layers.accuracy(input=predict, label=words[-1])
auc_var, batch_auc_var, auc_states = \
    fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20)

dataset = fluid.DatasetFactory().create_dataset()
dataset.set_use_var(self.sparse_input_ids + [label])
pipe_command = "python dataset_generator.py"
dataset.set_pipe_command(pipe_command)
dataset.set_batch_size(128)
dataset.set_thread(10)
dataset.set_hdfs_config("afs:xxx.xxx.xx.xx", "xxxx,xxxx")
optimizer = fluid.optimizer.SGD(0.01)
optimizer.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace())

train_folder = ["afs:/app/fs/20191020", "afs:/app/fs/20191021"]
train_filelists = [["afs:/app/fs/20191020/0.txt", "afs:/app/fs/20191020/1.txt"],
                   ["afs:/app/fs/20191021/0.txt", "afs:/app/fs/20191021/1.txt"]]

exe.run(fluid.default_startup_program())
for filelist in train_filelists:
    dataset.set_filelist(filelist)
    exe.train_from_dataset(
        program=fluid.default_main_program(),
        dataset=dataset,
        fetch_list=[auc_var],
        fetch_info=["auc"],
        debug=False)
    # save model here

Exemplo n.º 41
0
def do_train(args):
    """train function"""

    train_prog = fluid.default_main_program()
    startup_prog = fluid.default_startup_program()

    with fluid.program_guard(train_prog, startup_prog):
        train_prog.random_seed = args.random_seed
        startup_prog.random_seed = args.random_seed

        with fluid.unique_name.guard():
            context_wordseq = fluid.data(
                name='context_wordseq',
                shape=[-1, 1],
                dtype='int64',
                lod_level=1)
            response_wordseq = fluid.data(
                name='response_wordseq',
                shape=[-1, 1],
                dtype='int64',
                lod_level=1)
            labels = fluid.data(name='labels', shape=[-1, 1], dtype='int64')

            input_inst = [context_wordseq, response_wordseq, labels]
            input_field = InputField(input_inst)
            data_reader = fluid.io.PyReader(
                feed_list=input_inst, capacity=4, iterable=False)

            loss = create_net(
                is_training=True, model_input=input_field, args=args)
            loss.persistable = True
            # gradient clipping
            fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
                max=1.0, min=-1.0))
            optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
            optimizer.minimize(loss)

            if args.use_cuda:
                dev_count = fluid.core.get_cuda_device_count()
                place = fluid.CUDAPlace(
                    int(os.getenv('FLAGS_selected_gpus', '0')))
            else:
                dev_count = int(os.environ.get('CPU_NUM', 1))
                place = fluid.CPUPlace()

            processor = reader.DataProcessor(
                data_path=args.training_file,
                max_seq_length=args.max_seq_len,
                batch_size=args.batch_size)

            batch_generator = processor.data_generator(
                place=place,
                phase="train",
                shuffle=True,
                sample_pro=args.sample_pro)

            num_train_examples = processor.get_num_examples(phase='train')
            max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size

            print("Num train examples: %d" % num_train_examples)
            print("Max train steps: %d" % max_train_steps)

    data_reader.decorate_batch_generator(batch_generator)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    assert (args.init_from_checkpoint == "") or (
        args.init_from_pretrain_model == "")

    #init from some checkpoint, to resume the previous training
    if args.init_from_checkpoint:
        save_load_io.init_from_checkpoint(args, exe, train_prog)
    #init from some pretrain models, to better solve the current task
    if args.init_from_pretrain_model:
        save_load_io.init_from_pretrain_model(args, exe, train_prog)

    if args.word_emb_init:
        print("start loading word embedding init ...")
        if six.PY2:
            word_emb = np.array(
                pickle.load(io.open(args.word_emb_init, 'rb'))).astype(
                    'float32')
        else:
            word_emb = np.array(
                pickle.load(
                    io.open(args.word_emb_init, 'rb'),
                    encoding="bytes")).astype('float32')
        set_word_embedding(word_emb, place)
        print("finish init word embedding  ...")

    build_strategy = fluid.compiler.BuildStrategy()
    build_strategy.enable_inplace = True

    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name, build_strategy=build_strategy)

    steps = 0
    begin_time = time.time()
    time_begin = time.time()

    for epoch_step in range(args.epoch):
        data_reader.start()
        sum_loss = 0.0
        ce_loss = 0.0
        while True:
            try:
                fetch_list = [loss.name]
                outputs = exe.run(compiled_train_prog, fetch_list=fetch_list)
                np_loss = outputs
                sum_loss += np.array(np_loss).mean()
                ce_loss = np.array(np_loss).mean()

                if steps % args.print_steps == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin
                    current_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                                 time.localtime(time.time()))
                    print(
                        '%s epoch: %d, step: %s, avg loss %s, speed: %f steps/s'
                        % (current_time, epoch_step, steps, sum_loss /
                           args.print_steps, args.print_steps / used_time))
                    sum_loss = 0.0
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    if args.save_checkpoint:
                        save_load_io.save_checkpoint(args, exe, train_prog,
                                                     "step_" + str(steps))
                    if args.save_param:
                        save_load_io.save_param(args, exe, train_prog,
                                                "step_" + str(steps))
                steps += 1
            except fluid.core.EOFException:
                data_reader.reset()
                break

    if args.save_checkpoint:
        save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
    if args.save_param:
        save_load_io.save_param(args, exe, train_prog, "step_final")

    def get_cards():
        num = 0
        cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
        if cards != '':
            num = len(cards.split(","))
        return num

    if args.enable_ce:
        card_num = get_cards()
        pass_time_cost = time.time() - begin_time
        print("test_card_num", card_num)
        print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost))
        print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))
Exemplo n.º 42
0
    def test_accuracy(self):
        image = fluid.layers.data(
            name='image', shape=[1, 28, 28], dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = fluid.default_main_program()
        val_prog = main_prog.clone(for_test=True)

        place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda(
        ) else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        feeder = fluid.DataFeeder([image, label], place, program=main_prog)
        train_reader = paddle.fluid.io.batch(
            paddle.dataset.mnist.train(), batch_size=64)
        eval_reader = paddle.fluid.io.batch(
            paddle.dataset.mnist.test(), batch_size=64)

        def train(program):
            iter = 0
            for data in train_reader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print(
                        'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                        format(iter, cost, top1, top5))

        def test(program):
            iter = 0
            result = [[], [], []]
            for data in eval_reader():
                cost, top1, top5 = exe.run(
                    program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, acc_top1, acc_top5])
                iter += 1
                if iter % 100 == 0:
                    print('eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'.
                          format(iter, cost, top1, top5))
                result[0].append(cost)
                result[1].append(top1)
                result[2].append(top5)
            print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format(
                np.mean(result[0]), np.mean(result[1]), np.mean(result[2])))
            return np.mean(result[1]), np.mean(result[2])

        train(main_prog)
        top1_1, top5_1 = test(main_prog)

        config = {
            'weight_quantize_type': 'channel_wise_abs_max',
            'activation_quantize_type': 'moving_average_abs_max',
            'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        }
        quant_train_prog = quant_aware(main_prog, place, config, for_test=False)
        quant_eval_prog = quant_aware(val_prog, place, config, for_test=True)
        train(quant_train_prog)
        quant_eval_prog, int8_prog = convert(
            quant_eval_prog, place, config, save_int8=True)
        top1_2, top5_2 = test(quant_eval_prog)
        # values before quantization and after quantization should be close
        print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1))
        print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
Exemplo n.º 43
0
def train(cfg):
    # startup_prog = fluid.Program()
    # train_prog = fluid.Program()

    drop_last = True

    dataset = SegDataset(
        file_list=cfg.DATASET.TRAIN_FILE_LIST,
        mode=ModelPhase.TRAIN,
        shuffle=True,
        data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    # place = places[0]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, loss, lr, pred, grts, masks, image = build_model(
        phase=ModelPhase.TRAIN)
    data_loader.set_sample_generator(
        data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)

    exe = fluid.Executor(place)

    cfg.update_from_file(args.teacher_cfg_file)
    # teacher_arch = teacher_cfg.architecture
    teacher_program = fluid.Program()
    teacher_startup_program = fluid.Program()

    with fluid.program_guard(teacher_program, teacher_startup_program):
        with fluid.unique_name.guard():
            _, teacher_loss, _, _, _, _, _ = build_model(
                teacher_program,
                teacher_startup_program,
                phase=ModelPhase.TRAIN,
                image=image,
                label=grts,
                mask=masks)

    exe.run(teacher_startup_program)

    teacher_program = teacher_program.clone(for_test=True)
    ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR
    assert ckpt_dir is not None
    print('load teacher model:', ckpt_dir)
    fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program)

    # cfg = load_config(FLAGS.config)
    cfg.update_from_file(args.cfg_file)
    data_name_map = {
        'image': 'image',
        'label': 'label',
        'mask': 'mask',
    }
    merge(teacher_program, fluid.default_main_program(), data_name_map, place)
    distill_pairs = [[
        'teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0'
    ]]

    def distill(pairs, weight):
        """
        Add 3 pairs of distillation losses, each pair of feature maps is the
        input of teacher and student's yolov3_loss respectively
        """
        loss = l2_loss(pairs[0][0], pairs[0][1])
        weighted_loss = loss * weight
        return weighted_loss

    distill_loss = distill(distill_pairs, 0.1)
    cfg.update_from_file(args.cfg_file)
    optimizer = solver.Solver(None, None)
    all_loss = loss + distill_loss
    lr = optimizer.optimise(all_loss)

    exe.run(fluid.default_startup_program())

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy,
                                             fluid.default_main_program())
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(
        fluid.default_main_program()).with_data_parallel(
            loss_name=all_loss.name,
            exec_strategy=exec_strategy,
            build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, fluid.default_main_program())
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
        load_vars = []
        load_fail_vars = []

        def var_shape_matched(var, shape):
            """
            Check whehter persitable variable shape is match with current network
            """
            var_exist = os.path.exists(
                os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
            if var_exist:
                var_shape = parse_shape_from_file(
                    os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
                return var_shape == shape
            return False

        for x in fluid.default_main_program().list_vars():
            if isinstance(x, fluid.framework.Parameter):
                shape = tuple(fluid.global_scope().find_var(
                    x.name).get_tensor().shape())
                if var_shape_matched(x, shape):
                    load_vars.append(x)
                else:
                    load_fail_vars.append(x)

        fluid.io.load_vars(
            exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
        for var in load_vars:
            print_info("Parameter[{}] loaded sucessfully!".format(var.name))
        for var in load_fail_vars:
            print_info(
                "Parameter[{}] don't exist or shape does not match current network, skip"
                " to load it.".format(var.name))
        print_info("{}/{} pretrained parameters loaded successfully!".format(
            len(load_vars),
            len(load_vars) + len(load_fail_vars)))
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    #fetch_list = [avg_loss.name, lr.name]
    fetch_list = [
        loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name
    ]

    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(
            precision=4, suppress=True, linewidth=160, floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_tb:
        if not args.tb_log_dir:
            print_info("Please specify the log directory by --tb_log_dir.")
            exit(1)

        from tb_paddle import SummaryWriter
        log_writer = SummaryWriter(args.tb_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    global_step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    avg_t_loss = 0.0
    avg_d_loss = 0.0
    best_mIoU = 0.0

    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError(
            ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_tb:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  global_step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  global_step)
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/step/sec', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, t_loss, d_loss, lr = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    avg_t_loss += np.mean(np.array(t_loss))
                    avg_d_loss += np.mean(np.array(d_loss))
                    global_step += 1

                    if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        avg_t_loss /= args.log_steps
                        avg_d_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss,
                                 avg_t_loss, avg_d_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        if args.use_tb:
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/speed', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        avg_t_loss = 0.0
                        avg_d_loss = 0.0
                        timer.restart()

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
                or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(
                    cfg=cfg,
                    ckpt_dir=ckpt_dir,
                    use_gpu=args.use_gpu,
                    use_mpio=args.use_mpio)
                if args.use_tb:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
                                          global_step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
                                          global_step)

                if mean_iou > best_mIoU:
                    best_mIoU = mean_iou
                    update_best_model(ckpt_dir)
                    print_info("Save best model {} to {}, mIoU = {:.4f}".format(
                        ckpt_dir,
                        os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'),
                        mean_iou))

            # Use Tensorboard to visualize results
            if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(
                    cfg=cfg,
                    use_gpu=args.use_gpu,
                    vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                    vis_dir="visual",
                    ckpt_dir=ckpt_dir,
                    log_writer=log_writer)
        if cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch)

    # save final model
    if cfg.TRAINER_ID == 0:
        save_checkpoint(exe, fluid.default_main_program(), 'final')
Exemplo n.º 44
0
def do_train(args):
    """执行训练过程

    Args:
        args: DefaultArgs对象,在utils.py中定义,
             存储模型训练的所有参数,

    Returns:
        训练产出的program及模型输出变量
    """
    train_program = fluid.default_main_program()
    startup_program = fluid.default_startup_program()

    dataset = Dataset(args)
    with fluid.program_guard(train_program, startup_program):
        train_program.random_seed = args.random_seed
        startup_program.random_seed = args.random_seed

        with fluid.unique_name.guard():
            train_ret = create_model(args,
                                     dataset.vocab_size,
                                     dataset.num_labels,
                                     mode='train')
            test_program = train_program.clone(for_test=True)

            optimizer = fluid.optimizer.Adam(
                learning_rate=args.base_learning_rate)
            optimizer.minimize(train_ret["avg_cost"])

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    train_reader = create_pyreader(args,
                                   file_name=args.train_data,
                                   feed_list=train_ret['feed_list'],
                                   place=place,
                                   reader=dataset)

    test_reader = create_pyreader(args,
                                  file_name=args.test_data,
                                  feed_list=train_ret['feed_list'],
                                  place=place,
                                  reader=dataset,
                                  iterable=True)

    exe = fluid.Executor(place)
    exe.run(startup_program)

    if args.init_checkpoint:
        utils.init_pretraining_params(exe, args.init_checkpoint, train_program)

    test_process(exe, test_program, test_reader, train_ret)
    if dev_count > 1:
        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
        build_strategy = fluid.compiler.BuildStrategy()

        compiled_prog = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_ret['avg_cost'].name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
    else:
        compiled_prog = fluid.compiler.CompiledProgram(train_program)

    step = 0
    fetch_list = []
    for epoch_id in range(args.epoch):
        for data in train_reader():
            outputs = exe.run(
                compiled_prog,
                fetch_list=fetch_list,
                feed=data[0],
            )
            step += 1

    test_process(exe, test_program, test_reader, train_ret)
    return test_program, train_ret['crf_decode']
Exemplo n.º 45
0
def train(conf_dict):
    """
    train process
    """
    # Get data layer
    data = layers.DataLayer()
    # Load network structure dynamically
    net = utils.import_class("nets", conf_dict["net"]["module_name"],
                             conf_dict["net"]["class_name"])(conf_dict)
    # Load loss function dynamically
    loss = utils.import_class("losses", conf_dict["loss"]["module_name"],
                              conf_dict["loss"]["class_name"])(conf_dict)
    # Load Optimization method
    optimizer = utils.import_class(
        "optimizers", "paddle_optimizers",
        conf_dict["optimizer"]["class_name"])(conf_dict)

    # Get service
    if "use_cuda" in conf_dict and conf_dict["use_cuda"] == 1:
        place = fluid.core.CUDAPlace(0)
    else:
        place = fluid.core.CPUPlace()

    if conf_dict["task_mode"] == "pairwise":
        # Build network
        left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1)
        pos_right = data.ops(name="right",
                             shape=[1],
                             dtype="int64",
                             lod_level=1)
        neg_right = data.ops(name="neg_right",
                             shape=[1],
                             dtype="int64",
                             lod_level=1)
        left_feat, pos_score = net.predict(left, pos_right)
        _, neg_score = net.predict(left, neg_right)
        avg_cost = loss.compute(pos_score, neg_score)
        # Get Feeder and Reader
        feeder = fluid.DataFeeder(
            place=place, feed_list=[left.name, pos_right.name, neg_right.name])
        reader = data_reader.get_reader(conf_dict, False, None)
    else:
        # Build network
        left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1)
        right = data.ops(name="right", shape=[1], dtype="int64", lod_level=1)
        label = data.ops(name="label", shape=[1], dtype="int64", lod_level=0)
        left_feat, pred = net.predict(left, right)
        avg_cost = loss.compute(pred, label)
        avg_cost.persistable = True
        # Get Feeder and Reader
        feeder = fluid.DataFeeder(
            place=place, feed_list=[left.name, right.name, label.name])
        reader = data_reader.get_reader(conf_dict, False, None)
    # Save Infer model
    infer_program = fluid.default_main_program().clone()
    # operate Optimization
    optimizer.ops(avg_cost)
    # optimize memory
    # fluid.memory_optimize(fluid.default_main_program())
    executor = fluid.Executor(place)
    executor.run(fluid.default_startup_program())
    # Get and run executor
    parallel_executor = fluid.ParallelExecutor(
        use_cuda="use_cuda" in conf_dict and conf_dict["use_cuda"] == 1,
        loss_name=avg_cost.name,
        main_program=fluid.default_main_program())
    # Get device number
    device_count = parallel_executor.device_count
    logging.info("device count: %d" % device_count)
    # run train
    logging.info("start train process ...")
    for epoch_id in range(conf_dict["epoch_num"]):
        losses = []
        # Get batch data iterator
        batch_data = paddle.batch(reader,
                                  conf_dict["batch_size"],
                                  drop_last=False)
        start_time = time.time()
        total_loss = 0.0
        for iter, data in enumerate(batch_data()):
            if len(data) < device_count:
                continue
            avg_loss = parallel_executor.run([avg_cost.name],
                                             feed=feeder.feed(data))
            total_loss += np.mean(avg_loss[0])
            if (iter + 1) % 100 == 0:
                print("epoch: %d, iter: %d, loss: %f" %
                      (epoch_id, iter, total_loss / 100))
                total_loss = 0.0
            losses.append(np.mean(avg_loss[0]))
        end_time = time.time()
        print("epoch: %d, loss: %f, used time: %f" %
              (epoch_id, np.mean(losses), end_time - start_time))
        model_save_dir = conf_dict["model_path"]
        model_path = os.path.join(model_save_dir, str(epoch_id))
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
        if conf_dict["task_mode"] == "pairwise":
            feed_var_names = [left.name, pos_right.name]
            target_vars = [left_feat, pos_score]
        else:
            feed_var_names = [left.name, right.name]
            target_vars = [left_feat, pred]
        fluid.io.save_inference_model(model_path, feed_var_names, target_vars,
                                      executor, infer_program)
Exemplo n.º 46
0
    fluid.clip.set_gradient_clip(
        fluid.clip.GradientClipByGlobalNorm(clip_norm=CLIP))
    p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)

grad_list = [elem[1] for elem in p_g]
grad_clip_list = [elem[1] for elem in p_g_clip]

train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.mnist.train(), buf_size=8192),
    batch_size=BATCH_SIZE)

place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
exe.run(fluid.default_startup_program())

count = 0
for data in train_reader():
    count += 1
    if count > 5:
        break
    out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
    out_clip = exe.run(prog_clip,
                       feed=feeder.feed(data),
                       fetch_list=grad_clip_list)
    global_norm = 0
    for v in out[1:]:
        global_norm += np.sum(np.power(v, 2))
    global_norm = np.sqrt(global_norm)
Exemplo n.º 47
0
    def run_boxps_preload(self, is_cpu=True):
        x = fluid.layers.data(name='x', shape=[1], dtype='int64', lod_level=0)
        y = fluid.layers.data(name='y', shape=[1], dtype='int64', lod_level=0)
        emb_x, emb_y = _pull_box_sparse([x, y], size=2)
        emb_xp = _pull_box_sparse(x, size=2)
        concat = layers.concat([emb_x, emb_y], axis=1)
        fc = layers.fc(input=concat,
                       name="fc",
                       size=1,
                       num_flatten_dims=1,
                       bias_attr=False)
        loss = layers.reduce_mean(fc)
        layers.Print(loss)
        place = fluid.CPUPlace(
        ) if is_cpu or not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        batch_size = 2

        def binary_print(slot, fout):
            fout.write(str(len(slot)) + " ")
            for e in slot:
                fout.write(str(e) + " ")

        batch1 = np.ones(
            (batch_size, 2, 1)).astype("int64").reshape(batch_size, 2, 1)
        filelist = []
        place_str = "cpu" if is_cpu else "gpu"
        for i in range(2):
            filelist.append("test_hdfs_" + place_str + "_" + str(i))
        for f in filelist:
            with open(f, "w") as fout:
                for ins in batch1:
                    for slot in ins:
                        binary_print(slot, fout)
                fout.write("\n")

        def create_dataset():
            dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset")
            dataset.set_date("20190930")
            dataset.set_use_var([x, y])
            dataset.set_batch_size(2)
            dataset.set_thread(1)
            dataset.set_filelist(filelist)
            return dataset

        datasets = []
        datasets.append(create_dataset())
        datasets.append(create_dataset())
        optimizer = fluid.optimizer.SGD(learning_rate=0.5)
        optimizer = fluid.optimizer.PipelineOptimizer(optimizer,
                                                      cut_list=[],
                                                      place_list=[place],
                                                      concurrency_list=[1],
                                                      queue_size=1,
                                                      sync_steps=-1)
        optimizer.minimize(loss)
        exe.run(fluid.default_startup_program())
        datasets[0].load_into_memory()
        datasets[0].begin_pass()
        datasets[1].preload_into_memory()
        exe.train_from_dataset(program=fluid.default_main_program(),
                               dataset=datasets[0],
                               print_period=1)
        datasets[0].end_pass(True)
        datasets[1].wait_preload_done()
        datasets[1].begin_pass()
        exe.train_from_dataset(program=fluid.default_main_program(),
                               dataset=datasets[1],
                               print_period=1,
                               debug=True)
        datasets[1].end_pass(False)
        for f in filelist:
            os.remove(f)
Exemplo n.º 48
0
def train():
    def save_model(postfix):
        model_path = os.path.join('./work', postfix)
        print('save models to %s' % (model_path))
        fluid.io.save_params(exe, model_path)

    def network(is_train):
        record_file = glob.glob('train*.recordio')
        print(record_file)
        test_file = 'train00.recordio'
        record_file.remove(test_file)
        test_file = ['test.recordio', 'train00.recordio']
        file_obj = fluid.layers.open_files(
            filenames=record_file if is_train else test_file,
            shapes=[[-1, 3, 540, 960], [-1, 1, 540, 960], [-1, 1], [-1, 1]],
            dtypes=['float32', 'float32', 'int64', 'int64'],
            lod_levels=[0, 0, 0, 0],
            pass_num=1000)
        file_obj = fluid.layers.shuffle(file_obj, 500)
        file_obj = fluid.layers.batch(file_obj,
                                      batch_size=6 if is_train else 100)
        img, des_im, total_num, group_num = fluid.layers.read_file(file_obj)
        print('read over')  # here is the data
        total_num = fluid.layers.cast(total_num, dtype="float32")
        group_num = create_group(group_num)
        predict1, predict0 = FPN_and_groupout(img)  # build our network
        delta0 = 100
        delta1 = 100
        loss0 = fluid.layers.elementwise_sub(predict1, des_im)
        loss0 = fluid.layers.reduce_mean(fluid.layers.abs(loss0))
        loss1 = fluid.layers.reduce_mean(
            fluid.layers.square_error_cost(input=fluid.layers.reduce_sum(
                predict1, dim=[2, 3]),
                                           label=total_num))
        loss2 = fluid.layers.cross_entropy(input=predict0,
                                           label=group_num,
                                           soft_label=True)
        loss2 = fluid.layers.reduce_mean(loss2)
        loss = fluid.layers.reduce_mean(
            fluid.layers.elementwise_add(
                fluid.layers.elementwise_add(loss0 * delta0, loss1),
                delta1 * loss2))
        # here if we only use loss0, then the final loss is about e-05,
        # and the error is about e+01, so we add a delta in the loss
        return loss, predict1, total_num

    with fluid.unique_name.guard():
        train_loss, pre_train, tr_num = network(is_train=True)
        optimizer = fluid.optimizer.AdamOptimizer(
            learning_rate=fluid.layers.exponential_decay(0.0001, 4000, 0.9))
        optimizer.minimize(train_loss)
    test_program = fluid.Program()
    with fluid.unique_name.guard():
        with fluid.program_guard(test_program, fluid.Program()):
            loss, pre, true_num = network(is_train=False)
    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    min_error = 1000
    for i in range(200000):
        loss_n, pretrain, tr_train = exe.run(
            program=fluid.default_main_program(),
            fetch_list=[train_loss.name, pre_train.name, tr_num.name])
        acc0 = np.abs(
            np.rint(np.sum(np.sum(pretrain, axis=-1), axis=-1)) -
            np.rint(tr_train)) / tr_train
        av_acc0 = np.sum(acc0) / np.shape(acc0)[0]
        print("step {} train loss is {}, train error is {}".format(
            i, loss_n, av_acc0))
        if i % 1000 == 0:
            #save_model(str(i))
            pre_map, tr_nums = exe.run(program=test_program,
                                       fetch_list=[pre.name, true_num.name])
            acc = np.abs(
                np.rint(np.sum(np.sum(pre_map, axis=-1), axis=-1)) -
                np.rint(tr_nums)) / tr_nums
            acc_mae = np.sum(
                np.abs(
                    np.rint(np.sum(np.sum(pre_map, axis=-1), axis=-1)) -
                    np.rint(tr_nums))) / np.shape(acc)[0]
            av_acc = np.sum(acc) / np.shape(acc)[0]
            if av_acc < min_error:
                min_error = av_acc
                if i > 10000:
                    save_model(str(i))
            print("MAE is {}".format(acc_mae))
            print("min erro is {}".format(min_error))
            print("average error is {}".format(av_acc))
Exemplo n.º 49
0
def train(args):
    data_shape = cityscape.train_data_shape()
    num_classes = cityscape.num_classes()
    # define network
    images = fluid.layers.data(name='image', shape=data_shape, dtype='float32')
    label_sub1 = fluid.layers.data(name='label_sub1', shape=[1], dtype='int32')
    label_sub2 = fluid.layers.data(name='label_sub2', shape=[1], dtype='int32')
    label_sub4 = fluid.layers.data(name='label_sub4', shape=[1], dtype='int32')
    mask_sub1 = fluid.layers.data(name='mask_sub1', shape=[-1], dtype='int32')
    mask_sub2 = fluid.layers.data(name='mask_sub2', shape=[-1], dtype='int32')
    mask_sub4 = fluid.layers.data(name='mask_sub4', shape=[-1], dtype='int32')

    sub4_out, sub24_out, sub124_out = icnet(
        images, num_classes,
        np.array(data_shape[1:]).astype("float32"))
    loss_sub4 = create_loss(sub4_out, label_sub4, mask_sub4, num_classes)
    loss_sub24 = create_loss(sub24_out, label_sub2, mask_sub2, num_classes)
    loss_sub124 = create_loss(sub124_out, label_sub1, mask_sub1, num_classes)
    reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124

    regularizer = fluid.regularizer.L2Decay(0.0001)
    optimizer = fluid.optimizer.Momentum(learning_rate=poly_decay(),
                                         momentum=0.9,
                                         regularization=regularizer)
    _, params_grads = optimizer.minimize(reduced_loss, no_grad_set=no_grad_set)

    # prepare environment
    place = fluid.CPUPlace()
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())

    if args.init_model is not None:
        print("load model from: %s" % args.init_model)
        sys.stdout.flush()
        fluid.io.load_params(exe, args.init_model)

    iter_id = 0
    t_loss = 0.
    sub4_loss = 0.
    sub24_loss = 0.
    sub124_loss = 0.
    train_reader = cityscape.train(args.batch_size,
                                   flip=args.random_mirror,
                                   scaling=args.random_scaling)
    start_time = time.time()
    while True:
        # train a pass
        for data in train_reader():
            if iter_id > TOTAL_STEP:
                end_time = time.time()
                print("kpis	train_duration	%f" % (end_time - start_time))
                return
            iter_id += 1
            results = exe.run(
                feed=get_feeder_data(data, place),
                fetch_list=[reduced_loss, loss_sub4, loss_sub24, loss_sub124])
            t_loss += results[0]
            sub4_loss += results[1]
            sub24_loss += results[2]
            sub124_loss += results[3]
            # training log
            if iter_id % LOG_PERIOD == 0:
                print(
                    "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f"
                    % (iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD,
                       sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD))
                print("kpis	train_cost	%f" % (t_loss / LOG_PERIOD))

                t_loss = 0.
                sub4_loss = 0.
                sub24_loss = 0.
                sub124_loss = 0.
                sys.stdout.flush()

            if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None:
                dir_name = args.checkpoint_path + "/" + str(iter_id)
                fluid.io.save_persistables(exe, dirname=dir_name)
                print("Saved checkpoint: %s" % (dir_name))
Exemplo n.º 50
0
def train(word_dict,
          net_method,
          use_cuda,
          seed,
          quality,
          save_dirname=None):
    BATCH_SIZE = 128
    PASS_NUM = 100
    dict_dim = len(word_dict)
    class_dim = 2
    target_val_acc = quality

    # Seed for batch producer
    random.seed(seed) 
    
    # Seed for weight initialization
    fluid.default_startup_program().random_seed = seed

    # Setup input features and label as data layers
    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1)
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")

    cost, acc_out, prediction = net_method(
        data, label, input_dim=dict_dim, class_dim=class_dim)

    # Initialize a test program for obtaining test accuracy and cost
    test_program = fluid.default_main_program().clone(for_test=True)

    # Setup Adam optimizer
    adam = fluid.optimizer.Adam(learning_rate=0.0005) #Learning rate of 5e-4 works for conv models and 2e-3 for LSTM model

    optimize_ops, params_grads = adam.minimize(cost)

    # Create reader to iterate over training set
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=25000),
        batch_size=BATCH_SIZE)

    # Setup place and executor for runtime
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
    
    # Create reader to iterate over validation set
    test_reader = paddle.batch(
                    paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in xrange(PASS_NUM):
            train_loss_set = []
            train_acc_set = []  
   
            # Calculate average training loss and accuracy
            # across all mini-batches in the training set
            for batch_id, data in enumerate(train_reader()):
                cost_val, acc_val = exe.run(main_program,
                                            feed=feeder.feed(data),
                                            fetch_list=[cost, acc_out])
		train_loss_set.append(float(cost_val))
		train_acc_set.append(float(acc_val)) 
	    train_loss = np.array(train_loss_set).mean()
            train_acc = np.array(train_acc_set).mean() * 100

            # Calculate average valication loss and accuracy 
            # across all mini-batches in the validation set
            acc_set = []
            avg_loss_set = []
            for tid, test_data in enumerate(test_reader()):
                avg_loss_np, acc_np = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[cost, acc_out])
                acc_set.append(float(acc_np))
                avg_loss_set.append(float(avg_loss_np))
            acc_val = np.array(acc_set).mean() * 100 
            avg_loss_val = np.array(avg_loss_set).mean()
            print("Epoch =", pass_id, ", train-accuracy =", train_acc, ", train-loss =", train_loss, ", validation-accuracy =", acc_val, ", validation-loss =", avg_loss_val)

            if acc_val > target_val_acc:
                ## Exit the program on reaching desired accuracy value
                break

    train_loop(fluid.default_main_program())
Exemplo n.º 51
0
    def test_ptb_rnn_cpu_float32(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel("ptb_model",
                                 hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            sgd = SGDOptimizer(learning_rate=1e-3)
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                sgd.minimize(dy_loss)
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            dy_loss_value = dy_loss.numpy()
            dy_last_cell_value = last_cell.numpy()
            dy_last_hidden_value = last_hidden.numpy()

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            ptb_model = PtbModel("ptb_model",
                                 hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            sgd = SGDOptimizer(learning_rate=1e-3)
            x = fluid.layers.data(name="x",
                                  shape=[-1, num_steps, 1],
                                  dtype='int64')
            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
            init_hidden = fluid.layers.data(name="init_hidden",
                                            shape=[1],
                                            dtype='float32')
            init_cell = fluid.layers.data(name="init_cell",
                                          shape=[1],
                                          dtype='float32')

            static_loss, static_last_hidden, static_last_cell = ptb_model(
                x, y, init_hidden, init_cell)
            sgd.minimize(static_loss)
            static_param_updated = dict()
            static_param_init = dict()
            static_param_name_list = list()
            for param in ptb_model.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(framework.default_startup_program(),
                          fetch_list=static_param_name_list)
            for i in range(len(static_param_name_list)):
                static_param_init[static_param_name_list[i]] = out[i]
            static_loss_value = None
            static_last_cell_value = None
            static_last_hidden_value = None
            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                fetch_list = [
                    static_loss, static_last_hidden, static_last_cell
                ]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "x": x_data,
                                  "y": y_data,
                                  "init_hidden": init_hidden_data,
                                  "init_cell": init_cell_data
                              },
                              fetch_list=fetch_list)
                static_loss_value = out[0]
                static_last_hidden_value = out[1]
                static_last_cell_value = out[2]

                if i == batch_num - 1:
                    for k in range(3, len(out)):
                        static_param_updated[static_param_name_list[
                            k - 3]] = out[k]

        self.assertTrue(np.array_equal(static_loss_value, dy_loss_value))
        self.assertTrue(
            np.array_equal(static_last_cell_value, dy_last_cell_value))
        self.assertTrue(
            np.array_equal(static_last_hidden_value, dy_last_hidden_value))
        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))
        for key, value in six.iteritems(static_param_updated):
            self.assertTrue(np.array_equal(value, dy_param_updated[key]))
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid.backward import append_backward
import paddle.fluid.framework as framework
from paddle.fluid.framework import Program, switch_main_program
import bisect
import numpy as np

fluid.default_startup_program().random_seed = 1


class TestDyRnnStaticInput(unittest.TestCase):
    def setUp(self):
        self._delta = 0.005
        self._max_sequence_len = 3
        self._program = Program()
        switch_main_program(self._program)
        self.output_dim = 10
        self.place = core.CPUPlace()
        self.prepare_x_tensor()
        self.prepare_static_input_tensor()
        self.exe = fluid.Executor(self.place)

    def prepare_x_tensor(self):
Exemplo n.º 53
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import paddle
import paddle.fluid as fluid
import math
import sys

# need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization
# version.
fluid.default_startup_program().random_seed = 111

x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')

device_type = 'CPU'
use_nccl = False
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
    device_type = 'CUDA'
    use_nccl = False
    place = fluid.CUDAPlace(0)

places = fluid.layers.get_places(device_count=0, device_type=device_type)
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
with pd.do():