Exemplo n.º 1
0
def train():
    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        if args.benchmark:
            args.epoch = 1
        processor = reader.SentaProcessor(
            data_dir=args.data_dir,
            vocab_path=args.vocab_path,
            random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

       # max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        train_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='train',
            epoch=args.epoch,
            shuffle=True)

        eval_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='dev',
            epoch=args.epoch,
            shuffle=False)

        cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
                           args.padding_size)

        if args.use_data_parallel:
            cnn_net = fluid.dygraph.parallel.DataParallel(cnn_net,
                                                              strategy)
        if args.use_data_parallel:
            train_data_generator = fluid.contrib.reader.distributed_batch_reader(train_data_generator)



        sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        length = len(list(enumerate(train_data_generator())))
        for eop in range(args.epoch):
            time_begin = time.time()
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.9f')
            progress = ProgressMeter(length, batch_time, data_time, prefix="epoch: [{}]".format(eop))
            end = Tools.time()
            for batch_id, data in enumerate(train_data_generator()):
                data_time.update(Tools.time() - end)
                steps += 1
                doc = to_variable(
                    np.array([
                        np.pad(x[0][0:args.padding_size], (
                            0, args.padding_size - len(x[0][
                                0:args.padding_size])),
                               'constant',
                               constant_values=(args.vocab_size)) for x in data
                    ]).astype('int64').reshape(-1, 1))

                label = to_variable(
                    np.array([x[1] for x in data]).astype('int64').reshape(
                        args.batch_size, 1))

                cnn_net.train()
                avg_cost, prediction, acc = cnn_net(doc, label)

                if args.use_data_parallel:
                    avg_cost = cnn_net.scale_loss(avg_cost)
                    avg_cost.backward()
                    cnn_net.apply_collective_grads()
                else:
                  avg_cost.backward()

                batch_time.update(Tools.time() - end)
                np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                word_num = np.sum(np_mask)
                sgd_optimizer.minimize(avg_cost)
                cnn_net.clear_gradients()
                total_cost.append(avg_cost.numpy() * word_num)
                total_acc.append(acc.numpy() * word_num)
                total_num_seqs.append(word_num)

                if steps % args.skip_steps == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin
                    progress.print(batch_id + 1)
                    #print("step: %d, ave loss: %f, "
                    #      "ave acc: %f, speed: %f steps/s" %
                    #      (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                    #       np.sum(total_acc) / np.sum(total_num_seqs),
                    #       args.skip_steps / used_time))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.validation_steps == 0:
                    total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                    cnn_net.eval()
                    eval_steps = 0
                    for eval_batch_id, eval_data in enumerate(
                            eval_data_generator()):
                        eval_np_doc = np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in eval_data
                        ]).astype('int64').reshape(1, -1)
                        eval_label = to_variable(
                            np.array([x[1] for x in eval_data]).astype('int64')
                            .reshape(args.batch_size, 1))
                        eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
                        eval_avg_cost, eval_prediction, eval_acc = cnn_net(
                            eval_doc, eval_label)

                        eval_np_mask = (
                            eval_np_doc != args.vocab_size).astype('int32')
                        eval_word_num = np.sum(eval_np_mask)
                        total_eval_cost.append(eval_avg_cost.numpy() *
                                               eval_word_num)
                        total_eval_acc.append(eval_acc.numpy() * eval_word_num)
                        total_eval_num_seqs.append(eval_word_num)

                        eval_steps += 1

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("Final validation result: step: %d, ave loss: %f, "
                          "ave acc: %f, speed: %f steps/s" %
                          (steps, np.sum(total_eval_cost) /
                           np.sum(total_eval_num_seqs), np.sum(total_eval_acc) /
                           np.sum(total_eval_num_seqs), eval_steps / used_time))
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = "save_dir_" + str(steps)
                    print('save model to: ' + save_path)
                    fluid.dygraph.save_persistables(cnn_net.state_dict(),
                                                    save_path)
                end = Tools.time()
Exemplo n.º 2
0
    def train_one_epoch(self, epoch):
        losses = []
        accs = []

        for i in range(self.model_num):
            if self.use_data_parallel:
                self.parallel_models[i].train()
            else:
                self.models[i].train()
            losses.append(AvgrageMeter())
            accs.append(AvgrageMeter())

        for step_indx, (images, labels) in enumerate(self.train_loader):
            images, labels = to_variable(images), to_variable(labels)
            batch_size = images.shape[0]

            logits = []
            if self.use_data_parallel:
                for model in self.parallel_models:
                    logits.append(model(images))
            else:
                for model in self.models:
                    logits.append(model(images))

            log_msg = 'Train Epoch {}, Step {}'.format(epoch, step_indx)
            for i in range(self.model_num):
                gt_loss = self.models[i].loss(logits[i], labels)
                kl_loss = 0
                for j in range(self.model_num):
                    if i != j:
                        x = F.log_softmax(logits[i], axis=1)
                        y = fluid.layers.softmax(logits[j], axis=1)
                        kl_loss += fluid.layers.kldiv_loss(
                            x, y, reduction='batchmean')

                loss = gt_loss
                if (self.model_num > 1):
                    loss += kl_loss / (self.model_num - 1)

                prec = fluid.layers.accuracy(input=logits[i],
                                             label=labels,
                                             k=1)
                losses[i].update(loss.numpy(), batch_size)
                accs[i].update(prec.numpy() * 100, batch_size)

                if self.use_data_parallel:
                    loss = self.parallel_models[i].scale_loss(loss)
                    loss.backward()
                    self.parallel_models[i].apply_collective_grads()
                else:
                    loss.backward()
                self.optimizers[i].minimize(loss)
                if self.use_data_parallel:
                    self.parallel_models[i].clear_gradients()
                else:
                    self.models[i].clear_gradients()

                log_msg += ', model{}_loss: {:.3f}'.format(
                    i + 1, losses[i].avg[0])

            if step_indx % self.log_freq == 0:
                logger.info(log_msg)
        return losses, accs
def train(args):
    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    valid_config = merge_configs(config, 'valid', vars(args))
    print_configs(train_config, 'Train')

    use_data_parallel = False
    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if use_data_parallel else fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):
        if use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        video_model = NonLocal("NonLocal", train_config, mode="train")

        optimizer = create_optimizer(train_config.TRAIN,
                                     video_model.parameters())
        if use_data_parallel:
            video_model = fluid.dygraph.parallel.DataParallel(video_model,
                                                              strategy)

        bs_denominator = 1
        if args.use_gpu:
            # check number of GPUs
            gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
            if gpus == "":
                pass
            else:
                gpus = gpus.split(",")
                num_gpus = len(gpus)
                assert num_gpus == train_config.TRAIN.num_gpus, \
                       "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \
                       "shoud be the same as that" \
                       "set in {}({})".format(
                       num_gpus, args.config, train_config.TRAIN.num_gpus)
            bs_denominator = train_config.TRAIN.num_gpus

        train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                            bs_denominator)

        train_reader = NonlocalReader(name="NONLOCAL", mode="train", cfg=train_config)

        train_reader = train_reader.create_reader()
        if use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        for epoch in range(train_config.TRAIN.epoch):
            video_model.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0
            for batch_id, data in enumerate(train_reader()):
                x_data = np.array([item[0] for item in data]).astype('float32')
                y_data = np.array([item[1] for item in data]).astype('int64')
                x_data = to_variable(x_data)
                labels = to_variable(y_data)
                labels.stop_gradient = True
                outputs = video_model(x_data, train_config)

                loss = fluid.layers.cross_entropy(outputs, labels, soft_label=False, ignore_index=-100)
                loss = fluid.layers.reduce_sum(loss, dim=-1)
                avg_loss = fluid.layers.mean(loss)
                
                acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1)
                acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5)

                if use_data_parallel:
                    avg_loss = video_model.scale_loss(avg_loss)
                    avg_loss.backward()
                    video_model.apply_collective_grads()
                else:
                    avg_loss.backward()
                optimizer.minimize(avg_loss)
                video_model.clear_gradients()

                total_loss += avg_loss.numpy()[0]
                total_acc1 += acc_top1.numpy()[0]
                total_acc5 += acc_top5.numpy()[0]
                total_sample += 1

                print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.
                      format(epoch, batch_id,
                             avg_loss.numpy()[0],
                             acc_top1.numpy()[0], acc_top5.numpy()[0]))

            print(
                'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'.
                format(epoch, total_loss / total_sample, total_acc1 /
                       total_sample, total_acc5 / total_sample))
            video_model.eval()
            val(epoch, video_model, valid_config, args)

        if fluid.dygraph.parallel.Env().local_rank == 0:
            fluid.dygraph.save_dygraph(video_model.state_dict(), "final")
        logger.info('[TRAIN] training finished')
Exemplo n.º 4
0
def train():
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 90
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        if not args.ce:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=True)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        else:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=False)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
                           args.padding_size)

        sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []

        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_data_generator()):
                enable_profile = steps > args.profile_steps

                with profile_context(enable_profile):

                    steps += 1
                    doc = to_variable(
                        np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in data
                        ]).astype('int64').reshape(-1, 1))

                    label = to_variable(
                        np.array([x[1] for x in data]).astype('int64').reshape(
                            args.batch_size, 1))

                    cnn_net.train()
                    avg_cost, prediction, acc = cnn_net(doc, label)
                    avg_cost.backward()
                    np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                    word_num = np.sum(np_mask)
                    sgd_optimizer.minimize(avg_cost)
                    cnn_net.clear_gradients()
                    total_cost.append(avg_cost.numpy() * word_num)
                    total_acc.append(acc.numpy() * word_num)
                    total_num_seqs.append(word_num)

                    if steps % args.skip_steps == 0:
                        time_end = time.time()
                        used_time = time_end - time_begin
                        print("step: %d, ave loss: %f, "
                              "ave acc: %f, speed: %f steps/s" %
                              (steps,
                               np.sum(total_cost) / np.sum(total_num_seqs),
                               np.sum(total_acc) / np.sum(total_num_seqs),
                               args.skip_steps / used_time))
                        total_cost, total_acc, total_num_seqs = [], [], []
                        time_begin = time.time()

                    if steps % args.validation_steps == 0:
                        total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                        cnn_net.eval()
                        eval_steps = 0
                        for eval_batch_id, eval_data in enumerate(
                                eval_data_generator()):
                            eval_np_doc = np.array([
                                np.pad(x[0][0:args.padding_size],
                                       (0, args.padding_size -
                                        len(x[0][0:args.padding_size])),
                                       'constant',
                                       constant_values=(args.vocab_size))
                                for x in eval_data
                            ]).astype('int64').reshape(1, -1)
                            eval_label = to_variable(
                                np.array([x[1] for x in eval_data
                                          ]).astype('int64').reshape(
                                              args.batch_size, 1))
                            eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
                            eval_avg_cost, eval_prediction, eval_acc = cnn_net(
                                eval_doc, eval_label)

                            eval_np_mask = (eval_np_doc !=
                                            args.vocab_size).astype('int32')
                            eval_word_num = np.sum(eval_np_mask)
                            total_eval_cost.append(eval_avg_cost.numpy() *
                                                   eval_word_num)
                            total_eval_acc.append(eval_acc.numpy() *
                                                  eval_word_num)
                            total_eval_num_seqs.append(eval_word_num)

                            eval_steps += 1

                        time_end = time.time()
                        used_time = time_end - time_begin
                        print(
                            "Final validation result: step: %d, ave loss: %f, "
                            "ave acc: %f, speed: %f steps/s" %
                            (steps, np.sum(total_eval_cost) /
                             np.sum(total_eval_num_seqs),
                             np.sum(total_eval_acc) /
                             np.sum(total_eval_num_seqs),
                             eval_steps / used_time))
                        time_begin = time.time()
                        if args.ce:
                            print("kpis\ttrain_loss\t%0.3f" %
                                  (np.sum(total_eval_cost) /
                                   np.sum(total_eval_num_seqs)))
                            print("kpis\ttrain_acc\t%0.3f" %
                                  (np.sum(total_eval_acc) /
                                   np.sum(total_eval_num_seqs)))

                    if steps % args.save_steps == 0:
                        save_path = "save_dir_" + str(steps)
                        print('save model to: ' + save_path)
                        fluid.dygraph.save_persistables(
                            cnn_net.state_dict(), save_path)
                if enable_profile:
                    print('save profile result into /tmp/profile_file')
                    return
NUM_CLASSES = 7


if __name__ == '__main__':
    with fluid.dygraph.guard():
        model = YOLOv3('yolov3', num_classes=NUM_CLASSES, is_train=False)
        model_state_dict, _ = fluid.load_dygraph(WEIGHT_FILE)
        model.load_dict(model_state_dict)
        model.eval()

        total_results = []
        test_loader = single_image_data_loader(IMAGE_NAME, mode='test')
        for i, data in enumerate(test_loader()):
            img_name, img_data, img_scale_data = data
            img = to_variable(img_data)
            img_scale = to_variable(img_scale_data)

            outputs = model.forward(img)
            bboxes, scores = model.get_pred(outputs,
                                     im_shape=img_scale,
                                     anchors=ANCHORS,
                                     anchor_masks=ANCHOR_MASKS,
                                     valid_thresh = VALID_THRESH)

            bboxes_data = bboxes.numpy()
            scores_data = scores.numpy()
            results = multiclass_nms(bboxes_data, scores_data,
                          score_thresh=VALID_THRESH, 
                          nms_thresh=NMS_THRESH, 
                          pre_nms_topk=NMS_TOPK, 
    def func_testSetNumpyBeforeTrain(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            # TODO: marsyang1993 Change seed to

            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [0.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                # set lr to 0.0, not update parameter
                new_lr = 0.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr),
                        beta1=0.8,
                        beta2=0.6,
                        parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            np_opti_dict = {}
            np_state_dict = {}

            for k, v in self.opti_dict.items():
                if isinstance(v, (core.VarBase, core.eager.Tensor)):
                    np_opti_dict[v.name] = v.numpy()
                else:
                    np_opti_dict[k] = v

            for k, v in self.state_dict.items():
                np_state_dict[k] = v.numpy()

            adam.set_state_dict(np_opti_dict)
            ptb_model.set_state_dict(np_state_dict)
            for i in range(1):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if k == "global_step":
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name] + 1))

                if k.find("beta1_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta1))
                if k.find("beta2_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta2))

            # check parameter

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]
                self.assertTrue(np.array_equal(new_t, base_t))
Exemplo n.º 7
0
def train(args):
    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    valid_config = merge_configs(config, 'valid', vars(args))
    print_configs(train_config, 'Train')

    local_rank = fluid.dygraph.parallel.Env().local_rank

    use_data_parallel = args.use_data_parallel
    trainer_count = fluid.dygraph.parallel.Env().nranks
    if not args.use_gpu:
        place = fluid.CPUPlace()
    elif not args.use_data_parallel:
        place = fluid.CUDAPlace(0)
    else:
        #(data_parallel step1/6)
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)

    #load pretrain
    assert os.path.exists(args.weights), \
        "Given dir {} not exist.".format(args.weights)
    pre_state_dict = fluid.load_program_state(args.weights)
    #for key in pre_state_dict.keys():
    #    print('pre_state_dict.key: {}'.format(key))

    with fluid.dygraph.guard(place):
        #1. init model
        video_model = TSM_ResNet("TSM", train_config)

        #2. set weights
        param_state_dict = {}
        model_dict = video_model.state_dict()
        for key in model_dict.keys():
            weight_name = model_dict[key].name
            if weight_name in pre_state_dict.keys(
            ) and weight_name != "fc_0.w_0" and weight_name != "fc_0.b_0":
                print('succ Load weight: {}, shape: {}'.format(
                    weight_name, pre_state_dict[weight_name].shape))
                param_state_dict[key] = pre_state_dict[weight_name]
            else:
                print('fail Load weight: {}'.format(weight_name))
                param_state_dict[key] = model_dict[key]
        video_model.set_dict(param_state_dict)

        #3. init optim
        optimizer = create_optimizer(train_config.TRAIN,
                                     video_model.parameters())
        if use_data_parallel:
            #(data_parallel step2,3/6)
            strategy = fluid.dygraph.parallel.prepare_context()
            video_model = fluid.dygraph.parallel.DataParallel(
                video_model, strategy)

        # 4. load checkpoint
        if args.checkpoint:
            assert os.path.exists(args.checkpoint + ".pdparams"), \
                "Given dir {}.pdparams not exist.".format(args.checkpoint)
            assert os.path.exists(args.checkpoint + ".pdopt"), \
                "Given dir {}.pdopt not exist.".format(args.checkpoint)
            para_dict, opti_dict = fluid.dygraph.load_dygraph(args.checkpoint)
            video_model.set_dict(para_dict)
            optimizer.set_dict(opti_dict)

        # 5. reader
        bs_denominator = 1
        if args.use_gpu:
            gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
            if gpus == "":
                pass
            else:
                gpus = gpus.split(",")
                num_gpus = len(gpus)
                assert num_gpus == train_config.TRAIN.num_gpus, \
                       "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \
                       "shoud be the same as that" \
                       "set in {}({})".format(
                       num_gpus, args.config, train_config.TRAIN.num_gpus)
            bs_denominator = train_config.TRAIN.num_gpus

        train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                            bs_denominator)

        train_reader = UCF101Reader(name="TSM", mode="train", cfg=train_config)

        train_reader = train_reader.create_reader()
        if use_data_parallel:
            #(data_parallel step4/6)
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        # 6. train loop
        reader_cost_averager = TimeAverager()
        batch_cost_averager = TimeAverager()
        for epoch in range(train_config.TRAIN.epoch):
            epoch_start = time.time()

            video_model.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0

            # 6.1 for each batch, call model() , backward(), and minimize()
            batch_start = time.time()
            for batch_id, data in enumerate(train_reader()):
                t1 = time.time()
                reader_cost_averager.record(t1 - batch_start)

                x_data = np.array([item[0] for item in data])
                y_data = np.array([item[1] for item in data]).reshape([-1, 1])

                imgs = to_variable(x_data)
                labels = to_variable(y_data)
                labels.stop_gradient = True

                t2 = time.time()
                outputs = video_model(imgs)
                t3 = time.time()

                loss = fluid.layers.cross_entropy(input=outputs,
                                                  label=labels,
                                                  ignore_index=-1)
                avg_loss = fluid.layers.mean(loss)

                acc_top1 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=1)
                acc_top5 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=5)

                current_step_lr = optimizer.current_step_lr()
                if use_data_parallel:
                    #(data_parallel step5/6)
                    avg_loss = video_model.scale_loss(avg_loss)
                    avg_loss.backward()
                    video_model.apply_collective_grads()
                else:
                    avg_loss.backward()

                t4 = time.time()
                optimizer.minimize(avg_loss)
                video_model.clear_gradients()

                avg_loss_value = avg_loss.numpy()[0]
                acc_top1_value = acc_top1.numpy()[0]
                acc_top5_value = acc_top5.numpy()[0]

                total_loss += avg_loss_value
                total_acc1 += acc_top1_value
                total_acc5 += acc_top5_value
                total_sample += 1

                t5 = time.time()
                batch_cost_averager.record(
                    t5 - batch_start,
                    num_samples=train_config.TRAIN.batch_size)
                if batch_id % args.log_interval == 0:
                    print(
                        'TRAIN Epoch: %d, iter: %d, loss: %.5f, acc1: %.5f, acc5: %.5f, lr: %.5f, forward_cost:%.5f s, backward_cost:%.5f s, minimize_cost:%.5f s, to_variable_cost: %.5f s, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f samples/sec'
                        % (epoch, batch_id, avg_loss_value, acc_top1_value,
                           acc_top5_value, current_step_lr, t3 - t2, t4 - t3,
                           t5 - t4, t2 - t1, batch_cost_averager.get_average(),
                           reader_cost_averager.get_average(),
                           batch_cost_averager.get_ips_average()))
                    batch_cost_averager.reset()
                    reader_cost_averager.reset()

                batch_start = time.time()

            train_epoch_cost = time.time() - epoch_start
            print(
                'TRAIN End, Epoch {}, avg_loss= {:.5f}, avg_acc1= {:.5f}, avg_acc5= {:.5f}, lr={:.5f}, epoch_cost: {:.5f} sec'
                .format(epoch, total_loss / total_sample,
                        total_acc1 / total_sample, total_acc5 / total_sample,
                        current_step_lr, train_epoch_cost))

            # 6.2 save checkpoint
            if local_rank == 0:
                if not os.path.isdir(args.model_save_dir):
                    os.makedirs(args.model_save_dir)
                model_path = os.path.join(
                    args.model_save_dir,
                    args.model_path_pre + "_epoch{}".format(epoch))
                fluid.dygraph.save_dygraph(video_model.state_dict(),
                                           model_path)
                fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)
                print('save_dygraph End, Epoch {}/{} '.format(
                    epoch, train_config.TRAIN.epoch))

            # 6.3 validation
            video_model.eval()
            val(epoch, video_model, valid_config, args)

        # 7. save final model
        if local_rank == 0:
            model_path = os.path.join(args.model_save_dir,
                                      args.model_path_pre + "_final")
            fluid.dygraph.save_dygraph(video_model.state_dict(), model_path)
            fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)

        logger.info('[TRAIN] training finished')
Exemplo n.º 8
0
    def test_forward_hook_return_value(self):
        seed = 90

        places = [fluid.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))

        for place in places:
            with fluid.dygraph.guard(place):
                fluid.default_startup_program().random_seed = seed
                fluid.default_main_program().random_seed = seed
                fluid.set_flags({'FLAGS_sort_sum_gradient': True})

                input_word = np.array(
                    [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7,
                     8]).reshape(6, 3).astype('int64')
                input_word1 = input_word * 2
                input_word = input_word.reshape((-1, 3, 1))
                input_word1 = input_word1.reshape((-1, 3, 1))
                y_data = np.array(
                    [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8,
                     9]).reshape(6, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))

                input = base.to_variable(input_word)
                input1 = base.to_variable(input_word1)
                y = base.to_variable(y_data)

                simplenet = SimpleNet(
                    hidden_size=20,
                    vocab_size=32,
                    num_steps=3,
                    init_scale=0.1,
                    is_sparse=False,
                    dtype="float32")

                # origin, don't register any hook
                outs_origin = simplenet(input, y)
                outs_origin1 = simplenet(input1, y)

                # register forward_pre_hook
                forward_pre_hook_handle1 = simplenet.register_forward_pre_hook(
                    forward_pre_hook1)
                outs_pre_hook = simplenet(input, y)
                self.assertTrue(
                    np.array_equal(outs_pre_hook.numpy(), outs_origin1.numpy()))

                # remove forward_pre_hook
                forward_pre_hook_handle1.remove()
                outs_pre_hook = simplenet(input, y)
                self.assertTrue(
                    np.array_equal(outs_pre_hook.numpy(), outs_origin.numpy()))

                # register forward_hook
                forward_hook_handle1 = simplenet.register_forward_post_hook(
                    forward_hook1)
                outs_forward_hook = simplenet(input, y)
                self.assertTrue(
                    np.array_equal(outs_forward_hook.numpy(),
                                   outs_origin.numpy() * 2))

                # remove forward_hook
                forward_hook_handle1.remove()
                outs_forward_hook = simplenet(input, y)
                self.assertTrue(
                    np.array_equal(outs_forward_hook.numpy(),
                                   outs_origin.numpy()))
Exemplo n.º 9
0
 def parse(self, db_value):
     x = to_variable(db_value)
     return {"x1": x}
Exemplo n.º 10
0
    def test_forward_hook(self):
        seed = 90

        places = [fluid.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))

        for place in places:
            with fluid.dygraph.guard(place):
                fluid.default_startup_program().random_seed = seed
                fluid.default_main_program().random_seed = seed
                backward_strategy = fluid.dygraph.BackwardStrategy()
                backward_strategy.sort_sum_gradient = True

                global call_forward_hook
                global call_forward_pre_hook

                input_word = np.array(
                    [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7,
                     8]).reshape(6, 3).astype('int64')
                input_word = input_word.reshape((-1, 3, 1))
                y_data = np.array(
                    [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8,
                     9]).reshape(6, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))

                input = base.to_variable(input_word)
                y = base.to_variable(y_data)

                simplenet = SimpleNet(hidden_size=20,
                                      vocab_size=32,
                                      num_steps=3,
                                      init_scale=0.1,
                                      is_sparse=False,
                                      dtype="float32")

                # origin, don't register any hook
                outs_origin = simplenet(input, y)
                self.assertFalse(call_forward_hook)
                self.assertFalse(call_forward_pre_hook)

                # register forward_hook and forward_pre_hook
                forward_hook_handle = simplenet.register_forward_post_hook(
                    forward_hook)
                forward_pre_hook_handle = simplenet.register_forward_pre_hook(
                    forward_pre_hook)
                outs_hook = simplenet(input, y)
                self.assertTrue(call_forward_hook)
                self.assertTrue(call_forward_pre_hook)

                outs_hook = simplenet(input, y)
                self.assertTrue(call_forward_hook)
                self.assertTrue(call_forward_pre_hook)

                # remove forward_hook
                forward_hook_handle.remove()
                call_forward_hook = False
                call_forward_pre_hook = False
                outs_remove_forward_hook = simplenet(input, y)
                self.assertFalse(call_forward_hook)
                self.assertTrue(call_forward_pre_hook)

                # remove forward_pre_hook
                forward_pre_hook_handle.remove()
                call_forward_hook = False
                call_forward_pre_hook = False
                outs_remove_hook = simplenet(input, y)
                self.assertFalse(call_forward_hook)
                self.assertFalse(call_forward_pre_hook)
Exemplo n.º 11
0
def train_model():
    place = fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):
        # 1. init net and optimizer
        if args.model == "MobileNetV1":
            net = MobileNetV1(class_dim=args.class_dim, scale=1.0)
        elif args.model == "MobileNetV2":
            net = MobileNetV2(class_dim=args.class_dim, scale=1.0)
        elif args.model == "ResNet50":
            net = ResNet()
        elif args.model == "ResNet101":
            net = ResNet(layers=101)
        else:
            print(
                "wrong model name, please try model = ResNet50 or MobileNetV1 or MobileNetV2"
            )
            exit()

        optimizer = fluid.optimizer.AdamOptimizer(
            parameter_list=net.parameters())
        # for param in net.parameters():
        #     print(param.name, param.shape)

        input_fake = np.ones((args.batch_size, 3, 224, 224)).astype(np.float32)
        target_fake = np.ones((args.batch_size, 1)).astype(np.int)

        global train_images
        batch_number = train_images / args.batch_size

        # 2. train loop
        for eop in range(args.num_epochs):
            net.train()

            img = to_variable(input_fake)
            label = to_variable(target_fake)

            print("\nBegin Training Epoch {}".format(eop + 1))
            epoch_start_time = time.time()

            batch_id = 0
            for i in range(int(batch_number)):
                t1 = time.time()

                # img = to_variable(input_fake)
                # label = to_variable(target_fake)

                out = net(img)
                softmax_out = fluid.layers.softmax(out, use_cudnn=False)
                loss = fluid.layers.cross_entropy(input=softmax_out,
                                                  label=label)
                avg_loss = fluid.layers.mean(x=loss)
                avg_loss.backward()
                optimizer.minimize(avg_loss)
                net.clear_gradients()

                t2 = time.time()
                train_batch_elapse = t2 - t1

                print("epoch id: %d, batch step: %d, forward_backward %2.4f" %
                      (eop, batch_id, train_batch_elapse))
                batch_id += 1

            epoch_end_time = time.time()
            print("\nAfter Training Epoch {} time is: {:.4f}".format(
                eop + 1, epoch_end_time - epoch_start_time))
Exemplo n.º 12
0
    def forward(self, xforward):
        """
        xforward, xreverse = B T C H W tensors.
        """
        xreverse = xforward[:, ::-1, :, :, :]
        y_out_fwd, _ = self.forward_net(xforward)
        y_out_rev, _ = self.reverse_net(xreverse)
        y_out_fwd = y_out_fwd[
            -1]  # outputs of last CLSTM layer = B, T, C, H, W
        y_out_rev = y_out_rev[-1]

        # print(reversed_idx)
        y_out_rev = y_out_rev[:, ::-1, :, :, :]
        # print(y_out_rev.shape)
        ycat = fluid.layers.concat([y_out_fwd, y_out_rev], axis=2)

        return ycat


if __name__ == '__main__':
    with fluid.dygraph.guard():
        input = np.random.randn(5, 20, 1280, 7, 7).astype('float32')
        x = to_variable(input)
        model = ConvBGRU(in_channels=1280,
                         hidden_channels=64,
                         kernel_size=(3, 3),
                         num_layers=2)
        out = model(x)
        print(out.shape)
Exemplo n.º 13
0
def train(args):

    with fluid.dygraph.guard():
        backward_strategy = fluid.dygraph.BackwardStrategy()
        backward_strategy.sort_sum_gradient = True
        ocr_attention = OCRAttention("ocr_attention")

        if Config.learning_rate_decay == "piecewise_decay":
            learning_rate = fluid.layers.piecewise_decay(
                [50000], [Config.LR, Config.LR * 0.01])
        else:
            learning_rate = Config.LR
        optimizer = fluid.optimizer.Adam(learning_rate=0.001)
        dy_param_init_value = {}

        grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(5.0 )

        train_reader = data_reader.train(
            Config.batch_size,
            max_length=Config.max_length,
            train_images_dir=args.train_images,
            train_list_file=args.train_list,
            cycle=args.total_step > 0,
            shuffle=True,
            model=args.model)

        infer_image= './data/data/test_images/'
        infer_files = './data/data/test.list'
        test_reader = data_reader.train(
                Config.batch_size,
                1000,
                train_images_dir= infer_image,
                train_list_file= infer_files,
                cycle=False,
                model=args.model)
        def eval():
            ocr_attention.eval()
            total_loss = 0.0
            total_step = 0.0
            equal_size = 0
            for data in test_reader():
                data_dict = get_attention_feeder_data(data)

                label_in = to_variable(data_dict["label_in"])
                label_out = to_variable(data_dict["label_out"])

                label_out._stop_gradient = True
                label_out.trainable = False

                img = to_variable(data_dict["pixel"])

                prediction = ocr_attention(img, label_in)
                prediction = fluid.layers.reshape( prediction, [label_out.shape[0] * label_out.shape[1], -1], inplace=False)

                score, topk = layers.topk( prediction, 1)

                seq = topk.numpy()

                seq = seq.reshape( ( args.batch_size, -1))

                mask = data_dict['mask'].reshape( (args.batch_size, -1))
                seq_len = np.sum( mask, -1)

                trans_ref = data_dict["label_out"].reshape( (args.batch_size, -1))
                for i in range( args.batch_size ):
                    length = int(seq_len[i] -1 )
                    trans = seq[i][:length - 1]
                    ref = trans_ref[i][ : length - 1]
                    if np.array_equal( trans, ref ):
                        equal_size += 1

                total_step += args.batch_size
            print( "eval cost", equal_size / total_step )

        total_step = 0
        epoch_num = 20
        for epoch in range(epoch_num):
            batch_id = 0

            total_loss = 0.0
            for data in train_reader():

                total_step += 1
                data_dict = get_attention_feeder_data(data)

                label_in = to_variable(data_dict["label_in"])
                label_out = to_variable(data_dict["label_out"])

                label_out._stop_gradient = True
                label_out.trainable = False

                img = to_variable(data_dict["pixel"])

                prediction = ocr_attention(img, label_in)
                prediction = fluid.layers.reshape( prediction, [label_out.shape[0] * label_out.shape[1], -1], inplace=False)
                label_out = fluid.layers.reshape(label_out, [-1, 1], inplace=False)
                loss = fluid.layers.cross_entropy(
                    input=prediction, label=label_out)

                mask = to_variable(data_dict["mask"])

                loss = layers.elementwise_mul( loss, mask, axis=0)
                avg_loss = fluid.layers.reduce_sum(loss)

                total_loss += avg_loss.numpy()
                avg_loss.backward()
                optimizer.minimize(avg_loss, grad_clip=grad_clip)
                ocr_attention.clear_gradients()

                framework._dygraph_tracer()._clear_ops()

                if batch_id > 0 and batch_id % 1000 == 0:
                    print("epoch: {}, batch_id: {}, loss {}".format(epoch, batch_id, total_loss / args.batch_size / 1000))

                    total_loss = 0.0

                if total_step > 0 and total_step % 2000 == 0:

                    model_value = ocr_attention.state_dict()
                    np.savez( "model/" + str(total_step), **model_value )

                    ocr_attention.eval()
                    eval()
                    ocr_attention.train()

                batch_id +=1
Exemplo n.º 14
0
        dy_param_updated = dict()
        dy_param_init = dict()
        dy_loss = None
        last_hidden = None
        last_cell = None

        for i in range(batch_num):
            x_data = np.arange(12).reshape(4, 3).astype('int64')
            y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
            x_data = x_data.reshape((-1, num_steps, 1))
            y_data = y_data.reshape((-1, 1))
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')
            x = to_variable(x_data)
            y = to_variable(y_data)
            init_hidden = to_variable(init_hidden_data)
            init_cell = to_variable(init_cell_data)
            outs = ptb_model(x, y, init_hidden, init_cell)
            dy_loss, last_hidden, last_cell = outs

            if i == 0:
                for param in ptb_model.parameters():
                    dy_param_init[param.name] = param.numpy()
            dy_loss.backward()
            sgd.minimize(dy_loss)
            ptb_model.clear_gradients()
            if i == batch_num - 1:
                for param in ptb_model.parameters():
                    dy_param_updated[param.name] = param.numpy()
Exemplo n.º 15
0
    def func_testSetNumpy(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            bd = []
            lr_arr = [1.0]
            # this a fake lr decay strategy
            for i in range(1, 10):
                bd.append(100 * i)
                new_lr = 1.0
                lr_arr.append(new_lr)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr),
                        parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            # check optimizer
            opti_dict = adam.state_dict()
            np_opti_dict = {}
            # set to zero
            for k, v in opti_dict.items():
                if isinstance(v, (core.VarBase, core.eager.Tensor)):
                    np_t = v.numpy()
                    np_opti_dict[v.name] = np_t
                    var = v.value().get_tensor()
                    var.set(np.zeros_like(np_t), place)
                    self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
                else:
                    np_opti_dict[k] = v

            if isinstance(adam._learning_rate, LearningRateDecay):
                adam._learning_rate.step_num = 0

            adam.set_state_dict(np_opti_dict)

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if isinstance(v, (core.VarBase, core.eager.Tensor)):
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name]))
                else:
                    self.assertEqual(v, self.base_opti[k])

            # check parameter
            state_dict = ptb_model.state_dict()
            np_state_dict = {}
            for k, v in state_dict.items():
                np_t = v.numpy()
                np_state_dict[k] = np_t
                var = v.value().get_tensor()

                var.set(np.zeros_like(np_t), place)

            ptb_model.set_state_dict(np_state_dict)

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]

                self.assertTrue(np.array_equal(new_t, base_t))
Exemplo n.º 16
0
def train():
    place = fluid.CUDAPlace(0) if cfg.use_cuda else fluid.CPUPlace()
    if cfg.train_model == 'deepfm':
        with fluid.dygraph.guard(place):
            model = DeepFM()
    elif cfg.train_model == 'dnnplus':
        with fluid.dygraph.guard(place):
            model = DNNPlus()
    elif cfg.train_model == 'dnn':
        with fluid.dygraph.guard(place):
            model = DNN()
    elif cfg.train_model == 'drnn':
        with fluid.dygraph.guard(place):
            model = DRNN()
    
    with fluid.dygraph.guard(place):
        optimizer = fluid.optimizer.Adam(
                learning_rate=cfg.learning_rate,
                parameter_list=model.parameters(),
                regularization=fluid.regularizer.L2DecayRegularizer(cfg.reg))
        # optimizer = fluid.optimizer.SGD(learning_rate=cfg.learning_rate,
        #                                 parameter_list=model.parameters())
        file_list = [
            os.path.join(cfg.train_files_path, x) for x in os.listdir(cfg.train_files_path)
        ]
        train_reader = data_reader(cfg.batch_size, file_list, cfg.feat_dict, data_type="train")
        start_epoch = 0
        if cfg.checkpoint:
            model_dict, optimizer_dict = fluid.dygraph.load_dygraph(
                cfg.checkpoint)
            model.set_dict(model_dict)
            optimizer.set_dict(optimizer_dict)
            start_epoch = int(
                os.path.basename(cfg.checkpoint).split("_")[
                    -1])  # get next train epoch
            logger.info("load model {} finished.".format(cfg.checkpoint))

        logger.info("Training Begin")

        for epoch in range(start_epoch, cfg.epoches):
            start_time = time.time()
            total_loss = 0.0
            total_auc = 0.0
            count = 0
            auc_metric = fluid.metrics.Auc('ROC')
            
            if not os.path.isdir(os.path.join(cfg.log_dir, model.name)):
                os.makedirs( os.path.join(cfg.log_dir, model.name))
            log_path = os.path.join(cfg.log_dir, model.name, str(epoch + 1) + '_train_result.log')
            f = open(log_path, 'w+')

            model.train()
            for batch_id, data in enumerate(train_reader()):
                raw_feat_idx, raw_feat_value, label = zip(*data)
                
                raw_feat_idx = np.array(raw_feat_idx, dtype=np.int64)
                raw_feat_value = np.array(raw_feat_value, dtype=np.float32)
                label = np.array(label, dtype=np.int64)
                raw_feat_idx, raw_feat_value, label = [
                    to_variable(i)
                    for i in [raw_feat_idx, raw_feat_value, label]
                ]

                predict = model(raw_feat_idx, raw_feat_value, label)

                loss = fluid.layers.log_loss(
                    input=predict, label=fluid.layers.cast(label, dtype="float32"))
                batch_loss = fluid.layers.reduce_sum(loss)

                total_loss += batch_loss.numpy().item()
                batch_loss.backward()
                optimizer.minimize(batch_loss)
                model.clear_gradients()
                
                count += 1
                predict_2d = fluid.layers.concat([1 - predict, predict], 1)
                auc_metric.update(preds=predict_2d.numpy(), labels=label.numpy())

                if (batch_id + 1) % cfg.log_interval == 0:
                    logger.info(
                        "epoch: %d, batch_id: %d, loss: %.6f, auc: %.6f" % (
                            epoch + 1, batch_id + 1, total_loss / count / cfg.batch_size, auc_metric.eval()))
                
                if (batch_id + 1) % cfg.log_interval_2 == 0:
                    f.write('%d,%d,%.4f,%.4f\n' % (epoch + 1, batch_id + 1, total_loss / count / cfg.batch_size, auc_metric.eval()))

            end_time = time.time()
            logger.info("epoch %d finished, use time = %ds \n" % ((epoch + 1), end_time - start_time))

            if (epoch + 1) % cfg.save_interval == 0:
                model_path = os.path.join(str(cfg.save_path), model.name, model.name + "_epoch_" + str(epoch + 1))
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                logger.info("saving model to %s \n" % (model_path))
                fluid.dygraph.save_dygraph(model.state_dict(), model_path)
                fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)
            f.close()
            evaluate(model, epoch + 1)
    logger.info("Done.")
Exemplo n.º 17
0
    def func_testSetVariableBeforeTrain(self):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale)

            place = fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)
            adam = Adam(learning_rate=0.0,
                        beta1=0.8,
                        beta2=0.6,
                        parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            adam.set_state_dict(self.opti_dict)
            ptb_model.set_state_dict(self.state_dict)

            for i in range(1):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                dy_loss.backward()
                adam.minimize(dy_loss)
                ptb_model.clear_gradients()

            opti_dict = adam.state_dict()
            for k, v in opti_dict.items():
                if k == "global_step":
                    self.assertTrue(
                        np.array_equal(v.numpy(), self.base_opti[v.name] + 1))

                if k.find("beta1_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta1))
                if k.find("beta2_pow_acc_0") > 0:
                    self.assertTrue(
                        np.array_equal(v.numpy(),
                                       self.base_opti[v.name] * adam._beta2))

            state_dict = ptb_model.state_dict()

            for k, v in state_dict.items():
                new_t = v.numpy()

                base_t = self.model_base[k]
                self.assertTrue(np.array_equal(new_t, base_t))
Exemplo n.º 18
0
    def simple_net_float32(self, is_sparse, dtype):
        places = [fluid.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))

        for place in places:
            seed = 90
            hidden_size = 10
            vocab_size = 1000
            num_steps = 3
            init_scale = 0.1
            batch_size = 4
            batch_num = 200

            for is_sort_sum_gradient in [True, False]:
                with fluid.dygraph.guard(place):
                    paddle.seed(seed)
                    paddle.framework.random._manual_program_seed(seed)

                    simple_net = SimpleNet(hidden_size=hidden_size,
                                           vocab_size=vocab_size,
                                           num_steps=num_steps,
                                           init_scale=init_scale,
                                           is_sparse=is_sparse,
                                           dtype=dtype)

                    sgd = SGDOptimizer(learning_rate=1e-3,
                                       parameter_list=simple_net.parameters())
                    dy_param_updated = dict()
                    dy_param_init = dict()
                    dy_loss = None

                    helper = DyGraphProgramDescTracerTestHelper(self)
                    fluid.set_flags(
                        {'FLAGS_sort_sum_gradient': is_sort_sum_gradient})

                    for i in range(batch_num):
                        x_data = np.arange(12).reshape(4, 3).astype('int64')
                        y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                        x_data = x_data.reshape((-1, num_steps))
                        y_data = y_data.reshape((-1, 1))

                        x = to_variable(x_data)
                        y = to_variable(y_data)
                        outs = simple_net(x, y)
                        dy_loss = outs
                        if i == 0:
                            for param in simple_net.parameters():
                                dy_param_init[param.name] = param.numpy()
                        dy_loss.backward()
                        sgd.minimize(dy_loss)
                        sgd.clear_gradients()
                        if i == batch_num - 1:
                            for param in simple_net.parameters():
                                dy_param_updated[param.name] = param.numpy()
                    dy_loss_value = dy_loss.numpy()

                with new_program_scope():
                    paddle.seed(seed)
                    paddle.framework.random._manual_program_seed(seed)

                    simple_net = SimpleNet(hidden_size=hidden_size,
                                           vocab_size=vocab_size,
                                           num_steps=num_steps,
                                           is_sparse=is_sparse,
                                           dtype=dtype)

                    exe = fluid.Executor(place)
                    sgd = SGDOptimizer(learning_rate=1e-3)
                    x = fluid.layers.data(name="x",
                                          shape=[-1, num_steps],
                                          dtype='int64')
                    y = fluid.layers.data(name="y", shape=[-1, 1], dtype=dtype)

                    static_loss = simple_net(x, y)
                    sgd.minimize(static_loss)
                    static_param_updated = dict()
                    static_param_init = dict()
                    static_param_name_list = list()
                    for param in simple_net.parameters():
                        static_param_name_list.append(param.name)

                    out = exe.run(fluid.default_startup_program(),
                                  fetch_list=static_param_name_list)
                    for i in range(len(static_param_name_list)):
                        static_param_init[static_param_name_list[i]] = out[i]
                    static_loss_value = None
                    for i in range(batch_num):
                        x_data = np.arange(12).reshape(4, 3).astype('int64')
                        y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                        x_data = x_data.reshape((-1, num_steps))
                        y_data = y_data.reshape((-1, 1))
                        fetch_list = [static_loss]
                        fetch_list.extend(static_param_name_list)
                        out = exe.run(fluid.default_main_program(),
                                      feed={
                                          "x": x_data,
                                          "y": y_data
                                      },
                                      fetch_list=fetch_list)
                        static_loss_value = out[0]

                        if i == batch_num - 1:
                            for k in range(3, len(out)):
                                static_param_updated[static_param_name_list[
                                    k - 1]] = out[k]

                self.assertTrue(
                    np.allclose(static_loss_value, dy_loss_value, rtol=1e-3))
                for key, value in six.iteritems(static_param_init):
                    self.assertTrue(np.array_equal(value, dy_param_init[key]))
                for key, value in six.iteritems(static_param_updated):
                    self.assertTrue(
                        np.array_equal(value, dy_param_updated[key]))
Exemplo n.º 19
0
    def test_gnn_float32(self):
        paddle.manual_seed(90)
        paddle.framework.random._manual_program_seed(90)
        startup = fluid.Program()
        main = fluid.Program()

        scope = fluid.core.Scope()
        with new_program_scope(main=main, startup=startup, scope=scope):
            features = fluid.layers.data(
                name='features',
                shape=[1, 100, 50],
                dtype='float32',
                append_batch_size=False)
            # Use selected rows when it's supported.
            adj = fluid.layers.data(
                name='adj',
                shape=[1, 100, 100],
                dtype='float32',
                append_batch_size=False)
            labels = fluid.layers.data(
                name='labels',
                shape=[100, 1],
                dtype='int64',
                append_batch_size=False)

            model = GCN('test_gcn', 50)
            logits = model(features, adj)
            logits = fluid.layers.reshape(logits, logits.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss = fluid.layers.softmax_with_cross_entropy(logits, labels)
            loss = fluid.layers.reduce_sum(loss)

            adam = AdamOptimizer(learning_rate=1e-3)
            adam.minimize(loss)
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(startup)
            static_loss = exe.run(feed={
                'features': np.ones(
                    [1, 100, 50], dtype=np.float32),
                'adj': np.ones(
                    [1, 100, 100], dtype=np.float32),
                'labels': np.ones(
                    [100, 1], dtype=np.int64)
            },
                                  fetch_list=[loss])[0]

            static_weight = np.array(
                scope.find_var(model.gc.weight.name).get_tensor())

        with fluid.dygraph.guard():
            paddle.manual_seed(90)
            paddle.framework.random._manual_program_seed(90)

            features = np.ones([1, 100, 50], dtype=np.float32)
            # Use selected rows when it's supported.
            adj = np.ones([1, 100, 100], dtype=np.float32)
            labels = np.ones([100, 1], dtype=np.int64)

            model = GCN('test_gcn', 50)
            logits = model(to_variable(features), to_variable(adj))
            logits = fluid.layers.reshape(logits, logits.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss = fluid.layers.softmax_with_cross_entropy(logits,
                                                           to_variable(labels))
            loss = fluid.layers.reduce_sum(loss)
            loss.backward()
            adam = AdamOptimizer(
                learning_rate=1e-3, parameter_list=model.parameters())

            adam.minimize(loss)
            model.clear_gradients()
            loss_value = loss.numpy()
            model_gc_weight_value = model.gc.weight.numpy()

        with fluid.dygraph.guard():
            paddle.manual_seed(90)
            paddle.framework.random._manual_program_seed(90)

            features2 = np.ones([1, 100, 50], dtype=np.float32)
            # Use selected rows when it's supported.
            adj2 = np.ones([1, 100, 100], dtype=np.float32)
            labels2 = np.ones([100, 1], dtype=np.int64)

            model2 = GCN('test_gcn', 50)
            logits2 = model2(to_variable(features2), to_variable(adj2))
            logits2 = fluid.layers.reshape(logits2, logits2.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss2 = fluid.layers.softmax_with_cross_entropy(
                logits2, to_variable(labels2))
            loss2 = fluid.layers.reduce_sum(loss2)
            loss2.backward()
            adam2 = AdamOptimizer(
                learning_rate=1e-3, parameter_list=model2.parameters())
            adam2.minimize(loss2)
            model2.clear_gradients()
            loss2_value = loss2.numpy()
            model2_gc_weight_value = model2.gc.weight.numpy()

        self.assertEqual(static_loss, loss_value)
        self.assertTrue(np.allclose(static_weight, model_gc_weight_value))
        self.assertEqual(static_loss, loss2_value)
        self.assertTrue(np.allclose(static_weight, model2_gc_weight_value))
        sys.stderr.write('%s %s\n' % (static_loss, loss_value))
Exemplo n.º 20
0
def evaluate():
    place = fluid.CUDAPlace(0) if cfg.use_cuda else fluid.CPUPlace()
    inference_scope = fluid.Scope()
    test_files = [
        os.path.join(cfg.evaluate_file_path, x) for x in os.listdir(cfg.evaluate_file_path)
    ]
    dataset = CriteoDataset()
    test_reader = paddle.batch(dataset.test(test_files), batch_size=cfg.batch_size)

    with fluid.dygraph.guard(place):
        if cfg.train_model == 'drnn':
            model = DRNN()
        elif cfg.train_model == 'dnn':
            model = DNN()
        elif cfg.train_model == 'fcdnn':
            model = FCDNN()
        model_path = os.path.join(cfg.save_path, model.name, model.name + "_epoch_" + str(cfg.test_epoch))
        
        model_dict, optimizer_dict = fluid.dygraph.load_dygraph(model_path)
        model.set_dict(model_dict)
        logger.info("load model {} finished.".format(model_path))

        model.eval()
        logger.info('Begin evaluate model.')

        run_index = 0
        infer_auc = 0.0
        L = []
        for batch_id, data in enumerate(test_reader()):
            dense_feature, sparse_feature, label = zip(*data)
                
            sparse_feature = np.array(sparse_feature, dtype=np.int64)
            dense_feature = np.array(dense_feature, dtype=np.float32)
            label = np.array(label, dtype=np.int64)
            sparse_feature, dense_feature, label = [
                to_variable(i)
                for i in [sparse_feature, dense_feature, label]
            ]

            avg_cost, auc_var = model(dense_feature, sparse_feature, label)

            run_index += 1
            infer_auc += auc_var.numpy().item()
            L.append(avg_cost.numpy() / cfg.batch_size)

            if batch_id % cfg.log_interval == 0:
                logger.info("TEST --> batch: {} loss: {} auc: {}".format(
                    batch_id, avg_cost.numpy() / cfg.batch_size, infer_auc / run_index))

        infer_loss = np.mean(L)
        infer_auc = infer_auc / run_index
        infer_result = {}
        infer_result['loss'] = infer_loss
        infer_result['auc'] = infer_auc
        if not os.path.isdir(cfg.log_dir):
            os.makedirs(cfg.log_dir)
        log_path = os.path.join(cfg.log_dir, model.name + '_infer_result.log')
        
        logger.info(str(infer_result))
        with open(log_path, 'w+') as f:
            f.write(str(infer_result))
        logger.info("Done.")
    return infer_result
Exemplo n.º 21
0
# 输出:output, (hn,cn)
# 输入数据格式:
# input(seq_len, batch, input_size)
# h0(num_layers * num_directions, batch, hidden_size)
# c0(num_layers * num_directions, batch, hidden_size)

# 输出数据格式:
# output(seq_len, batch, hidden_size * num_directions)
# hn(num_layers * num_directions, batch, hidden_size)
# cn(num_layers * num_directions, batch, hidden_size)

# x = torch.rand(7,3,151)
# net = nn.LSTM(input_size=151, hidden_size=128, num_layers=1, batch_first=True)
# y,_ = net(x)
# print(y.shape)

import paddle.fluid as fluid
import paddle.fluid.dygraph.base as base
import numpy

D = 151
T = 1  #sum(lod[0])

input = numpy.random.rand(T, 3 * D).astype('float32')
hidden_input = numpy.random.rand(T, D).astype('float32')

with fluid.dygraph.guard():
    gru = fluid.dygraph.GRUUnit(size=D * 3)
    h, r, g = gru(base.to_variable(input), base.to_variable(hidden_input))

    print(h.shape, r.shape, g.shape)
    def test_mnist_sort_gradient_float32(self):
        seed = 90
        epoch_num = 1

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True

            mnist2 = MNIST("mnist")
            sgd2 = SGDOptimizer(learning_rate=1e-3)
            train_reader2 = paddle.batch(paddle.dataset.mnist.train(),
                                         batch_size=128,
                                         drop_last=True)

            mnist2.train()
            dy_param_init_value2 = {}
            for epoch in range(epoch_num):
                for batch_id, data in enumerate(train_reader2()):
                    dy_x_data2 = np.array([
                        x[0].reshape(1, 28, 28) for x in data
                    ]).astype('float32')
                    y_data2 = np.array([x[1] for x in data
                                        ]).astype('int64').reshape(128, 1)

                    img2 = to_variable(dy_x_data2)
                    label2 = to_variable(y_data2)
                    label2.stop_gradient = True

                    cost2 = mnist2(img2)
                    loss2 = fluid.layers.cross_entropy(cost2, label2)
                    avg_loss2 = fluid.layers.mean(loss2)

                    dy_out2 = avg_loss2.numpy()

                    if epoch == 0 and batch_id == 0:
                        for param in mnist2.parameters():
                            dy_param_init_value2[param.name] = param.numpy()

                    avg_loss2.backward(backward_strategy)
                    sgd2.minimize(avg_loss2)
                    mnist2.clear_gradients()

                    dy_param_value2 = {}
                    for param in mnist2.parameters():
                        dy_param_value2[param.name] = param.numpy()
                    if batch_id == 20:
                        break

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            mnist = MNIST("mnist")
            sgd = SGDOptimizer(learning_rate=1e-3)
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=128,
                                        drop_last=True)

            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            cost = mnist(img)
            loss = fluid.layers.cross_entropy(cost, label)
            avg_loss = fluid.layers.mean(loss)
            sgd.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mnist.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for epoch in range(epoch_num):
                for batch_id, data in enumerate(train_reader()):
                    static_x_data = np.array([
                        x[0].reshape(1, 28, 28) for x in data
                    ]).astype('float32')
                    y_data = np.array([x[1] for x in data
                                       ]).astype('int64').reshape([128, 1])

                    fetch_list = [avg_loss.name]
                    fetch_list.extend(static_param_name_list)
                    out = exe.run(fluid.default_main_program(),
                                  feed={
                                      "pixel": static_x_data,
                                      "label": y_data
                                  },
                                  fetch_list=fetch_list)

                    static_param_value = {}
                    static_out = out[0]
                    for i in range(1, len(out)):
                        static_param_value[static_param_name_list[i -
                                                                  1]] = out[i]
                    if batch_id == 20:
                        break

        self.assertTrue(np.allclose(dy_x_data2.all(), static_x_data.all()))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value2[key]))

        self.assertTrue(np.allclose(static_out, dy_out2))

        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value2[key],
                                        atol=1e-5))
Exemplo n.º 23
0
    def forward(self, input):
        N, C, H, W = input.shape
        _, _, res4, res5 = self.resnet(input)

        feature = self.connect_conv(res5)
        feature = self.connect_bn(feature)
        feature = self.connect_relu(feature)
        gru_output = self.gru_module(feature)
        dropout = self.dropout(gru_output)
        logit = self.get_logit_conv(dropout)
        logit = F.common.interpolate(logit, size=[H, W], mode='BILINEAR')

        if 1:
            aux_logit = self.auxhead(res4)
            aux_logit = F.common.interpolate(aux_logit,
                                             size=[H, W],
                                             mode='BILINEAR')
            return logit, aux_logit
        return logit


if __name__ == "__main__":
    from paddle.fluid.dygraph.base import to_variable
    import numpy as np
    with fluid.dygraph.guard():
        model = GruModule(input_channel=512, num_state=128, num_node=64)
        data = np.random.uniform(-1, 1, [2, 512, 96, 96]).astype('float32')
        data = to_variable(data)
        y = model(data)
        print(y.shape)
Exemplo n.º 24
0
def train_ptb_lm():
    args = parse_args()

    # check if set use_gpu=True in paddlepaddle cpu version
    model_check.check_cuda(args.use_gpu)

    place = core.CPUPlace()
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    # check if paddlepaddle version is satisfied
    model_check.check_version()

    model_type = args.model_type

    vocab_size = 10000
    if model_type == "test":
        num_layers = 1
        batch_size = 2
        hidden_size = 10
        num_steps = 3
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 1
        max_epoch = 1
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "small":
        num_layers = 2
        batch_size = 20
        hidden_size = 200
        num_steps = 20
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 4
        max_epoch = 13
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "medium":
        num_layers = 2
        batch_size = 20
        hidden_size = 650
        num_steps = 35
        init_scale = 0.05
        max_grad_norm = 5.0
        epoch_start_decay = 6
        max_epoch = 39
        dropout = 0.5
        lr_decay = 0.8
        base_learning_rate = 1.0
    elif model_type == "large":
        num_layers = 2
        batch_size = 20
        hidden_size = 1500
        num_steps = 35
        init_scale = 0.04
        max_grad_norm = 10.0
        epoch_start_decay = 14
        max_epoch = 55
        dropout = 0.65
        lr_decay = 1.0 / 1.15
        base_learning_rate = 1.0
    else:
        print("model type not support")
        return

    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            max_epoch = 1
        ptb_model = PtbModel(hidden_size=hidden_size,
                             vocab_size=vocab_size,
                             num_layers=num_layers,
                             num_steps=num_steps,
                             init_scale=init_scale,
                             dropout=dropout)

        if args.init_from_pretrain_model:
            if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
                print(args.init_from_pretrain_model)
                raise Warning("The pretrained params do not exist.")
                return
            fluid.load_dygraph(args.init_from_pretrain_model)
            print("finish initing model from pretrained params from %s" %
                  (args.init_from_pretrain_model))

        dy_param_updated = dict()
        dy_param_init = dict()
        dy_loss = None
        last_hidden = None
        last_cell = None

        data_path = args.data_path
        print("begin to load data")
        ptb_data = reader.get_ptb_data(data_path)
        print("finished load data")
        train_data, valid_data, test_data = ptb_data

        batch_len = len(train_data) // batch_size
        total_batch_size = (batch_len - 1) // num_steps
        log_interval = 200

        bd = []
        lr_arr = [1.0]
        for i in range(1, max_epoch):
            bd.append(total_batch_size * i)
            new_lr = base_learning_rate * (lr_decay**max(
                i + 1 - epoch_start_decay, 0.0))
            lr_arr.append(new_lr)

        grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm)
        sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=lr_arr),
                           parameter_list=ptb_model.parameters(),
                           grad_clip=grad_clip)

        def reader_decorator(reader):
            def __reader__():
                for item in reader:
                    x_data = item[0].reshape((-1, num_steps, 1))
                    y_data = item[1].reshape((-1, num_steps, 1))
                    yield x_data, y_data

            return __reader__

        def eval(model, data):
            print("begin to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            model.eval()
            train_data_iter = reader_decorator(
                reader.get_data_iter(data, batch_size, num_steps))

            eval_data_loader = fluid.io.DataLoader.from_generator(capacity=200)
            eval_data_loader.set_batch_generator(train_data_iter, places=place)

            for batch_id, batch in enumerate(eval_data_loader):
                x, y = batch
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()

                total_loss += out_loss
                iters += num_steps

            print("eval finished")
            ppl = np.exp(total_loss / iters)
            print("ppl ", batch_id, ppl[0])

        ce_time = []
        ce_ppl = []

        total_batch_num = 0  #this is for benchmark
        for epoch_id in range(max_epoch):
            epoch_start = time.time()

            ptb_model.train()
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            train_data_iter = reader_decorator(
                reader.get_data_iter(train_data, batch_size, num_steps))

            train_data_loader = fluid.io.DataLoader.from_generator(
                capacity=200)
            train_data_loader.set_batch_generator(train_data_iter,
                                                  places=place)

            init_hidden = to_variable(init_hidden_data)
            init_cell = to_variable(init_cell_data)

            batch_cost_avg = TimeCostAverage()
            reader_cost_avg = TimeCostAverage()

            batch_start = time.time()
            for batch_id, batch in enumerate(train_data_loader):
                if args.max_iter and total_batch_num == args.max_iter:
                    return

                train_reader_cost = time.time() - batch_start
                reader_cost_avg.record(train_reader_cost)

                x, y = batch

                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                init_hidden = last_hidden.detach()
                init_cell = last_cell.detach()
                out_loss = dy_loss.numpy()

                dy_loss.backward()
                sgd.minimize(dy_loss)
                ptb_model.clear_gradients()

                global_lr = sgd._global_learning_rate().numpy()
                total_loss += out_loss
                iters += num_steps
                total_batch_num = total_batch_num + 1  #this is for benchmark

                train_batch_cost = time.time() - batch_start
                batch_cost_avg.record(train_batch_cost)

                if batch_id > 0 and batch_id % log_interval == 0:
                    ppl = np.exp(total_loss / iters)
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f words/sec"
                        % (epoch_id, batch_id, ppl[0], global_lr, out_loss,
                           batch_cost_avg.get_average(),
                           reader_cost_avg.get_average(),
                           batch_size / batch_cost_avg.get_average()))
                    batch_cost_avg.reset()
                    reader_cost_avg.reset()
                batch_start = time.time()

            ppl = np.exp(total_loss / iters)
            train_epoch_cost = time.time() - epoch_start
            print("-- Epoch:[%d]; ppl: %.5f, epoch_cost: %.5f s" %
                  (epoch_id, ppl[0], train_epoch_cost))

            ce_time.append(train_epoch_cost)
            ce_ppl.append(ppl[0])

            if batch_size <= 20 and epoch_id == 0 and ppl[0] > 1000:
                # for bad init, after first epoch, the loss is over 1000
                # no more need to continue
                print(
                    "Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch."
                )
                print("Abort this training process and please start again.")
                return

            save_model_dir = os.path.join(args.save_model_dir, str(epoch_id),
                                          'params')
            fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
            print("Saved model to: %s.\n" % save_model_dir)

            eval(ptb_model, valid_data)

        if args.ce:
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (dev_count, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (dev_count, _ppl))

        eval(ptb_model, test_data)
    if model_depth == 10:
        model = ResNet(BasicBlock, [1, 1, 1, 1], get_inplanes(), **kwargs)
    elif model_depth == 18:
        model = ResNet(BasicBlock, [2, 2, 2, 2], get_inplanes(), **kwargs)
    elif model_depth == 34:
        model = ResNet(BasicBlock, [3, 4, 6, 3], get_inplanes(), **kwargs)
    elif model_depth == 50:
        model = ResNet(Bottleneck, [3, 4, 6, 3], get_inplanes(), **kwargs)
    elif model_depth == 101:
        model = ResNet(Bottleneck, [3, 4, 23, 3], get_inplanes(), **kwargs)
    elif model_depth == 152:
        model = ResNet(Bottleneck, [3, 8, 36, 3], get_inplanes(), **kwargs)
    elif model_depth == 200:
        model = ResNet(Bottleneck, [3, 24, 36, 3], get_inplanes(), **kwargs)

    return model


if __name__ == "__main__":

    with fluid.dygraph.guard():
        """
        输入:
            输入Tensor的维度: [N,Cin,Din,Hin,Win]
        """
        x = np.random.randn(10, 3, 8, 224, 224).astype('float32')
        x = to_variable(x)
        net = generate_model(10, conv1_t_size=8)
        # net = FrameSubNet(3, 3)
        out = net(x)
        print(out.shape)
Exemplo n.º 26
0
    simple_net = fluid.dygraph.parallel.DataParallel(simple_net, strategy)

    train_reader = paddle.batch(ptb_train_reader(),
                                batch_size=batch_size,
                                drop_last=True)
    train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader)

    sgd = fluid.optimizer.SGD(learning_rate=1e-3,
                              parameter_list=simple_net.parameters())
    dy_loss = None

    for i, data in enumerate(train_reader()):
        x_data = np.array([x[0].reshape(3) for x in data]).astype('int64')
        y_data = np.array([x[1].reshape(3) for x in data]).astype('int64')
        x_data = x_data.reshape((-1, num_steps, 1))
        y_data = y_data.reshape((-1, 1))

        x = to_variable(x_data)
        y = to_variable(y_data)
        dy_loss = simple_net(x, y)

        dy_loss = simple_net.scale_loss(dy_loss)
        dy_loss.backward()
        simple_net.apply_collective_grads()

        sgd.minimize(dy_loss)
        simple_net.clear_gradients()
    dy_loss_value = dy_loss.numpy()

print("- dygrah loss: %.6f" % dy_loss_value[0])
Exemplo n.º 27
0
            learning_rate=0.001,
            regularization=fluid.regularizer.L2Decay(0.0005))

        train_loader = multithread_loader(TRAINDIR,
                                          batch_size=10,
                                          mode='train')
        valid_loader = multithread_loader(VALIDDIR,
                                          batch_size=10,
                                          mode='valid')

        MAX_EPOCH = 300  # 提升点: 可以改变训练的轮数
        for epoch in range(MAX_EPOCH):
            for i, data in enumerate(train_loader()):
                img, gt_boxes, gt_labels, img_scale = data
                gt_scores = np.ones(gt_labels.shape).astype('float32')
                gt_scores = to_variable(gt_scores)
                img = to_variable(img)
                gt_boxes = to_variable(gt_boxes)
                gt_labels = to_variable(gt_labels)
                outputs = model(img)
                loss = model.get_loss(outputs,
                                      gt_boxes,
                                      gt_labels,
                                      gtscore=gt_scores,
                                      anchors=ANCHORS,
                                      anchor_masks=ANCHOR_MASKS,
                                      ignore_thresh=IGNORE_THRESH,
                                      use_label_smooth=False)

                loss.backward()
                opt.minimize(loss)
Exemplo n.º 28
0
    def test_while_op(self):
        seed = 90
        epoch_num = 1
        if core.is_compiled_with_cuda():
            batch_num = 3
        else:
            batch_num = 2
        np.random.seed = seed
        image_np = np.random.randn(Config.batch_size, Config.DATA_SHAPE[0],
                                   Config.DATA_SHAPE[1],
                                   Config.DATA_SHAPE[2]).astype('float32')
        label_in_np = np.arange(
            0, Config.max_length,
            dtype='int64').reshape([1, Config.max_length])
        for i in range(2, Config.batch_size + 1):
            label_in_np = np.vstack((label_in_np, np.arange(
                (i - 1) * Config.max_length,
                i * Config.max_length,
                dtype='int64').reshape([1, Config.max_length])))

        label_out_np = np.arange(
            0, Config.max_length,
            dtype='int64').reshape([1, Config.max_length])
        for i in range(2, Config.batch_size + 1):
            label_out_np = np.vstack((label_out_np, np.arange(
                (i - 1) * Config.max_length,
                i * Config.max_length,
                dtype='int64').reshape([1, Config.max_length])))

        with fluid.dygraph.guard():
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            ocr_attention = OCRAttention()

            if Config.learning_rate_decay == "piecewise_decay":
                learning_rate = fluid.layers.piecewise_decay(
                    [50000], [Config.LR, Config.LR * 0.01])
            else:
                learning_rate = Config.LR
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001, parameter_list=ocr_attention.parameters())
            dy_param_init_value = {}
            for param in ocr_attention.parameters():
                dy_param_init_value[param.name] = param.numpy()
            for epoch in range(epoch_num):
                for batch_id in range(batch_num):
                    label_in = to_variable(label_in_np)
                    label_out = to_variable(label_out_np)
                    label_out.stop_gradient = True
                    img = to_variable(image_np)
                    dy_prediction = ocr_attention(img, label_in)
                    label_out = fluid.layers.reshape(
                        label_out, [-1, 1], inplace=False)
                    dy_prediction = fluid.layers.reshape(
                        dy_prediction, [label_out.shape[0], -1], inplace=False)
                    loss = fluid.layers.cross_entropy(
                        input=dy_prediction, label=label_out)
                    avg_loss = fluid.layers.reduce_sum(loss)

                    dy_out = avg_loss.numpy()

                    if epoch == 0 and batch_id == 0:
                        for param in ocr_attention.parameters():
                            if param.name not in dy_param_init_value:
                                dy_param_init_value[param.name] = param.numpy()
                    avg_loss.backward()
                    dy_grad_value = {}
                    for param in ocr_attention.parameters():
                        if param.trainable:
                            np_array = np.array(param._grad_ivar().value()
                                                .get_tensor())
                            dy_grad_value[param.name + core.grad_var_suffix(
                            )] = np_array

                    optimizer.minimize(avg_loss)
                    ocr_attention.clear_gradients()
                    dy_param_value = {}
                    for param in ocr_attention.parameters():
                        dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            ocr_attention = OCRAttention()

            if Config.learning_rate_decay == "piecewise_decay":
                learning_rate = fluid.layers.piecewise_decay(
                    [50000], [Config.LR, Config.LR * 0.01])
            else:
                learning_rate = Config.LR

            optimizer = fluid.optimizer.SGD(learning_rate=0.001)

            images = fluid.layers.data(
                name='pixel', shape=Config.DATA_SHAPE, dtype='float32')
            static_label_in = fluid.layers.data(
                name='label_in', shape=[1], dtype='int64', lod_level=0)
            static_label_out = fluid.layers.data(
                name='label_out', shape=[1], dtype='int64', lod_level=0)
            static_label_out.stop_gradient = True
            static_label_out.trainable = False

            static_prediction = ocr_attention(images, static_label_in)

            static_prediction = fluid.layers.reshape(
                static_prediction, shape=[-1, Config.num_classes + 2])

            cost = fluid.layers.cross_entropy(
                input=static_prediction, label=static_label_out)
            static_avg_loss = fluid.layers.reduce_sum(cost)
            # param_grad_list = fluid.backward.append_backward(static_avg_loss)
            optimizer.minimize(static_avg_loss)

            static_param_init_value = {}
            static_param_name_list = []
            static_grad_name_list = []
            for param in ocr_attention.parameters():
                static_param_name_list.append(param.name)
                if param.trainable:
                    static_grad_name_list.append(param.name +
                                                 core.grad_var_suffix())

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            fetch_list = [static_avg_loss.name]
            fetch_list.extend(static_param_name_list)
            fetch_list.extend(static_grad_name_list)
            for epoch in range(epoch_num):
                for batch_id in range(batch_num):
                    static_label_in = label_in_np
                    static_label_out = label_out_np
                    static_label_out = static_label_out.reshape((-1, 1))
                    out = exe.run(fluid.default_main_program(),
                                  feed={
                                      "pixel": image_np,
                                      "label_in": static_label_in,
                                      "label_out": static_label_out
                                  },
                                  fetch_list=fetch_list)
                    static_param_value = {}
                    static_grad_value = {}
                    static_out = out[0]
                    for i in range(1, len(static_param_name_list) + 1):
                        static_param_value[static_param_name_list[i - 1]] = out[
                            i]
                    grad_start_pos = len(static_param_name_list) + 1
                    for i in range(grad_start_pos,
                                   len(static_grad_name_list) + grad_start_pos):
                        static_grad_value[static_grad_name_list[
                            i - grad_start_pos]] = out[i]

        self.assertTrue(np.allclose(static_out, dy_out))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.array_equal(value, dy_param_init_value[key]))

        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key], rtol=1e-05))
Exemplo n.º 29
0
def train(place):

    num_layers = 1
    batch_size = 4
    hidden_size = 10
    num_steps = 3
    init_scale = 0.1
    max_epoch = 1
    dropout = 0.0
    vocab_size = 1000
    batch_num = 200

    with fluid.dygraph.guard(place):
        paddle.seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)
        ptb_model = PtbModel(hidden_size=hidden_size,
                             vocab_size=vocab_size,
                             num_layers=num_layers,
                             num_steps=num_steps,
                             init_scale=init_scale,
                             dropout=dropout)

        sgd = SGDOptimizer(learning_rate=1e-3,
                           parameter_list=ptb_model.parameters())

        for epoch_id in range(max_epoch):

            total_loss = 0.0
            iters = 0.0
            total_sample = 0

            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            init_hidden = to_variable(init_hidden_data)
            init_cell = to_variable(init_cell_data)
            for step_id in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))

                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, num_steps, 1))

                x = to_variable(x_data)
                y = to_variable(y_data)

                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                out_loss = dy_loss.numpy()

                dy_loss.backward()
                sgd.minimize(dy_loss)
                ptb_model.clear_gradients()

                total_loss += out_loss
                iters += num_steps
                total_sample += 1
                if step_id % PRINT_STEP == 0:
                    if step_id == 0:
                        logging.info(
                            "epoch %d | step %d, loss %0.3f" %
                            (epoch_id, step_id, total_loss / total_sample))
                        avg_batch_time = time.time()
                    else:
                        speed = PRINT_STEP / (time.time() - avg_batch_time)
                        logging.info(
                            "epoch %d | step %d, loss %0.3f, speed %.3f steps/s"
                            % (epoch_id, step_id, total_loss / total_sample,
                               speed))
                        avg_batch_time = time.time()

        return out_loss, last_hidden.numpy(), last_cell.numpy()
    def ptb_rnn_sort_gradient_cpu_float32(self, is_sparse):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200

        with fluid.dygraph.guard():
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale,
                                 is_sparse=is_sparse)

            sgd = SGDOptimizer(learning_rate=1e-3,
                               parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)
                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                sgd.minimize(dy_loss)
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            dy_loss_value = dy_loss.numpy()
            dy_last_cell_value = last_cell.numpy()
            dy_last_hidden_value = last_hidden.numpy()

        with new_program_scope():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale,
                                 is_sparse=is_sparse)

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            sgd = SGDOptimizer(learning_rate=1e-3)
            x = fluid.layers.data(name="x",
                                  shape=[-1, num_steps, 1],
                                  dtype='int64')
            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
            init_hidden = fluid.layers.data(name="init_hidden",
                                            shape=[1],
                                            dtype='float32')
            init_cell = fluid.layers.data(name="init_cell",
                                          shape=[1],
                                          dtype='float32')

            static_loss, static_last_hidden, static_last_cell = ptb_model(
                x, y, init_hidden, init_cell)
            sgd.minimize(static_loss)
            static_param_updated = dict()
            static_param_init = dict()
            static_param_name_list = list()
            for param in ptb_model.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(framework.default_startup_program(),
                          fetch_list=static_param_name_list)
            for i in range(len(static_param_name_list)):
                static_param_init[static_param_name_list[i]] = out[i]
            static_loss_value = None
            static_last_cell_value = None
            static_last_hidden_value = None
            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                fetch_list = [
                    static_loss, static_last_hidden, static_last_cell
                ]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "x": x_data,
                                  "y": y_data,
                                  "init_hidden": init_hidden_data,
                                  "init_cell": init_cell_data
                              },
                              fetch_list=fetch_list)
                static_loss_value = out[0]
                static_last_hidden_value = out[1]
                static_last_cell_value = out[2]

                if i == batch_num - 1:
                    for k in range(3, len(out)):
                        static_param_updated[static_param_name_list[
                            k - 3]] = out[k]

        self.assertTrue(np.array_equal(static_loss_value, dy_loss_value))
        self.assertTrue(
            np.array_equal(static_last_cell_value, dy_last_cell_value))
        self.assertTrue(
            np.array_equal(static_last_hidden_value, dy_last_hidden_value))
        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))
        for key, value in six.iteritems(static_param_updated):
            self.assertTrue(np.array_equal(value, dy_param_updated[key]))