Пример #1
0
def train(args):
    all_train_rewards = []
    all_test_rewards = []
    prev_result = 0
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')

        train_model = ECO.GoogLeNet(train_config['MODEL']['num_classes'],
                                    train_config['MODEL']['seg_num'],
                                    train_config['MODEL']['seglen'], 'RGB')
        opt = fluid.optimizer.Momentum(
            0.001,
            0.9,
            parameter_list=train_model.parameters(),
            use_nesterov=True,
            regularization=fluid.regularizer.L2Decay(
                regularization_coeff=0.0005))

        if args.pretrain:
            model, _ = fluid.dygraph.load_dygraph('trained_model/best_model')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_reader = KineticsReader(args.model_name.upper(), 'train',
                                      train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()

        train_model.train()

        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)

                if out is not None:

                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)

                    avg_loss.backward()

                    opt.minimize(avg_loss)
                    train_model.clear_gradients()

                    if batch_id % 200 == 0:
                        print("Loss at epoch {} step {}: {}, acc: {}".format(
                            i, batch_id, avg_loss.numpy(), acc.numpy()))
                        fluid.dygraph.save_dygraph(
                            train_model.state_dict(),
                            args.save_dir + '/ucf_model')
                        result = validate_model()

                        all_test_rewards.append(result)
                        if result > prev_result:
                            prev_result = result
                            print('The best result is ' + str(result))
                            fluid.save_dygraph(train_model.state_dict(),
                                               'trained_model/best_model')
                            np.savez('result_data/ucf_data.npz',
                                     all_train_rewards=all_train_rewards,
                                     all_test_rewards=all_test_rewards)

            all_train_rewards.append(acc.numpy())

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))

        np.savez('result_data/ucf_data.npz',
                 all_train_rewards=all_train_rewards,
                 all_test_rewards=all_test_rewards)
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')
        # train_model = TSN1.TSNResNet('TSN',train_config['MODEL']['num_layers'],
        #                             train_config['MODEL']['num_classes'],
        #                             train_config['MODEL']['seg_num'],0.00002)
        train_model = resnet_3d.generate_model(train_config['MODEL']['num_layers'])
        # 根据自己定义的网络,声明train_model
        # opt = fluid.optimizer.Momentum(learning_rate=train_config['MODEL']['learning_rate'],momentum = 0.9, parameter_list=train_model.parameters())
        # opt = fluid.optimizer.Momentum(0.001, 0.9, parameter_list=train_model.parameters())
        # opt=fluid.optimizer.SGDOptimizer(learning_rate=train_config['MODEL']['learning_rate'], parameter_list=train_model.parameters())
        opt = fluid.optimizer.AdamOptimizer(learning_rate=train_config['MODEL']['learning_rate'],
                                            parameter_list=train_model.parameters())
        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            train_model = resnet_3d.generate_model(train_config['MODEL']['num_layers'], n_classes=1039)
            # model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            model, _ = fluid.dygraph.load_dygraph('data/data51645/paddle_dy')

            train_model.load_dict(model)
            train_model.fc = fluid.dygraph.Linear(512 * 4, 101, act='softmax')
            print('pretrain is ok')

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        # train_reader = KineticsReader(args.model_name.upper(), 'train', train_config).create_reader()
        train_reader = Ucf101(args.model_name.upper(), 'train', train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()

        # test
        test_config = merge_configs(config, 'test', vars(args))
        label_dic = np.load('label_dir.npy', allow_pickle=True).item()
        label_dic = {v: k for k, v in label_dic.items()}

        # get infer reader
        # test_reader = Ucf101(args.model_name.upper(), 'test', test_config).create_reader()
        t_acc = []
        v_acc = []
        t_loss = []
        for i in range(epochs):
            train_acc_list = []
            train_loss_list = []
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                dy_x_data = np.transpose(dy_x_data, (0, 2, 1, 3, 4))
                y_data = np.array([[x[1]] for x in data]).astype('int64')
                # if batch_id ==0:
                #     print(dy_x_data.shape)
                #     print(y_data.shape)

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                # out, acc = train_model.forward(img, label)
                out, acc = train_model(img, label)
                train_acc_list.append(acc.numpy()[0])
                # print('shape',out.shape,label.shape)
                # print(out)
                # print(label)

                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                train_loss_list.append(avg_loss.numpy())

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 10 == 0:
                    logger.info(
                        "Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
            t_loss.append(np.mean(train_loss_list))
            t_acc.append(np.mean(train_acc_list))
            # val_acc_list = []
            # for batch_id, data in enumerate(test_reader()):
            #     dy_x_data = np.array([x[0] for x in data]).astype('float32')
            #     dy_x_data = np.transpose(dy_x_data,(0,2,1,3,4))
            #     y_data = np.array([[x[1]] for x in data]).astype('int64')

            #     img = fluid.dygraph.to_variable(dy_x_data)
            #     label = fluid.dygraph.to_variable(y_data)
            #     label.stop_gradient = True
            #     out, acc = train_model.forward(img, label)
            #     val_acc_list.append(acc.numpy()[0])
            # v_acc.append(np.mean(val_acc_list))
            # print("测试集准确率为:{}".format(np.mean(val_acc_list)))
            fluid.dygraph.save_dygraph(train_model.state_dict(), args.save_dir + '/res3d_model_' + str(i + 1))

        print('t_acc', t_acc)
        print('t_loss', t_loss)
        # print('v_acc',v_acc)
        # get infer reader
        # val_reader = KineticsReader(args.model_name.upper(), 'valid', val_config).create_reader()
        # logger.info("Final loss: {}".format(avg_loss.numpy()))
        # print("Final loss: {}".format(avg_loss.numpy()))
        result_list = []
        result_list.append(t_acc)
        result_list.append(t_loss)
        np_list = np.array(result_list).T
        name = ['train_acc', 'train_loss']
        test = pd.DataFrame(columns=name, data=np_list)
        now = int(time.time())
        timeArray = time.localtime(now)
        today_time = time.strftime("%Y-%m-%d-%H-%M-%S", timeArray)
        test.to_csv('train_result_' + today_time + '_.csv')
Пример #3
0
def parse_losses(losses):
    log_vars = dict()
    for loss_name, loss_value in losses.items():
        log_vars[loss_name] = fluid.layers.mean(loss_value)

    loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)

    log_vars['loss'] = loss

    return loss, log_vars


args = parse_args()
config = parse_config(args.config)
train_config = merge_configs(config, 'train', vars(args))
val_config = merge_configs(config, 'valid', vars(args))
train_reader = KineticsReader(args.model_name.upper(), 'train',
                              train_config).create_reader()
val_reader = KineticsReader(args.model_name.upper(), 'valid',
                            val_config).create_reader()

label = fluid.layers.data(name='label', shape=[1], dtype='int64')
data_shape = [1, 3, 32, 224, 224]
img = fluid.layers.data(name='images', shape=data_shape, dtype='float32')

network = TSN.TSN3D(
    backbone=train_config['MODEL']['backbone'],
    necks=train_config['MODEL']['necks'],
    spatial_temporal_module=train_config['MODEL']['spatial_temporal_module'],
    segmental_consensus=train_config['MODEL']['segmental_consensus'],
Пример #4
0
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        val_config = merge_configs(config, 'test', vars(args))

        if not os.path.exists(args.save_dir):
            os.mkdir(args.save_dir)

        # 根据自己定义的网络,声明train_model
        train_model = ResNet3D.generate_model(50)

        if args.resume == True:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            train_model.load_dict(model)
            print('Resume from ' + args.save_dir + '/tsn_model')
        elif args.pretrain:
            pretrain_weights = fluid.io.load_program_state(args.pretrain)
            inner_state_dict = train_model.state_dict()
            print('Resume from' + args.pretrain)
            for name, para in inner_state_dict.items():
                if ((name in pretrain_weights) and (not ('fc' in para.name))):
                    para.set_value(pretrain_weights[name])
                else:
                    print('del ' + para.name)

        opt = fluid.optimizer.Momentum(train_config.TRAIN.learning_rate,
                                       train_config.TRAIN.learning_rate_decay,
                                       parameter_list=train_model.parameters())
        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader(args.model_name.upper(), 'train',
                                      train_config).create_reader()
        val_reader = KineticsReader(args.model_name.upper(), 'valid',
                                    val_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader(
            )):  # data (list) (batch)[seg_num,3 * seglen,size,size]
                dy_x_data = np.array([x[0] for x in data]).astype(
                    'float32')  # [batch, seg_num, 3 * seglen, size, size]
                y_data = np.array([[x[1]] for x in data
                                   ]).astype('int64')  # [batch, 1]

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)

                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                logger.info("Loss at epoch {} step {}: {}, acc: {}".format(
                    i, batch_id, avg_loss.numpy(), acc.numpy()))
                print("Loss at epoch {} step {}: {}, acc: {}".format(
                    i, batch_id, avg_loss.numpy(), acc.numpy()))

            acc_list = []
            for batch_id, data in enumerate(val_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)
                acc_list.append(acc.numpy()[0])

            logger.info("Val at epoch {}:  acc: {}".format(
                i, np.mean(acc_list)))
            print("Val at epoch {}:  acc: {}".format(i, np.mean(acc_list)) +
                  '\n')

            if i % 10 == 0:
                fluid.dygraph.save_dygraph(
                    train_model.state_dict(),
                    args.save_dir + '/tsn_model_' + str(i))
        fluid.dygraph.save_dygraph(train_model.state_dict(),
                                   args.save_dir + '/tsn_model')
        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))
Пример #5
0
def train(args):
    all_train_rewards = []
    all_test_rewards = []
    prev_result = 0

    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    print_configs(train_config, 'Train')

    train_model = ECO.GoogLeNet(train_config['MODEL']['num_classes'],
                                train_config['MODEL']['seg_num'],
                                train_config['MODEL']['seglen'], 'RGB',
                                0.00002)
    opt = paddle.optimizer.Momentum(0.001,
                                    0.9,
                                    parameters=train_model.parameters())

    if args.pretrain:
        # load the pretrained model
        model_dict = paddle.load('best_model/best_model_seg12')

        train_model.set_state_dict(model_dict)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    train_dataset = ECO_Dataset(args.model_name.upper(),
                                train_config,
                                mode='train')

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=paddle.CUDAPlace(0),
                                        batch_size=None,
                                        batch_sampler=None)

    epochs = args.epoch or train_model.epoch_num()

    train_model.train()

    for i in range(epochs):

        for batch_id, data in enumerate(train_loader()):

            img = data[0]
            label = data[1]

            out, acc = train_model(img, label)

            if out is not None:

                loss = paddle.nn.functional.cross_entropy(out, label)
                avg_loss = paddle.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 200 == 0:
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    paddle.save(train_model.state_dict(),
                                args.save_dir + '/ucf_model_hapi')
        all_train_rewards.append(acc.numpy())

        result = validate_model()

        all_test_rewards.append(result)
        if result > prev_result:
            prev_result = result
            print('The best result is ' + str(result))
            paddle.save(train_model.state_dict(),
                        'best_model/final_best_model_hapi')  #保存模型
    logger.info("Final loss: {}".format(avg_loss.numpy()))
    print("Final loss: {}".format(avg_loss.numpy()))

    np.savez('result/final_ucf_data_hapi.npz',
             all_train_rewards=all_train_rewards,
             all_test_rewards=all_test_rewards)
Пример #6
0
def train(args):
    # parse config
    #place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    if args.use_gpu:
        if args.use_data_parallel:
            place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
        else:
            place = fluid.CUDAPlace(0)
    else:
        fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        print_configs(train_config, 'Train')

        val_config = merge_configs(config, 'valid', vars(args))
        print_configs(val_config, "Valid")

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        #根据自己定义的网络,声明train_model
        #train_model = TSN1.TSNResNet(layers=train_config['MODEL']['num_layers'], class_dim=train_config['MODEL']['num_classes'], seg_num=train_config['MODEL']['seg_num'])
        train_model = I3D_TPN(config)
        step = train_config.TRAIN.step if train_config.TRAIN.step is not None else int(
            train_config.TRAIN.all_num / train_config.TRAIN.batch_size)
        print("step for lr decay: %d" % step)
        decay_epoch = train_config.TRAIN.learning_rate_decay_epoch
        learning_rate_decay = train_config.TRAIN.learning_rate_decay
        base_lr = train_config.TRAIN.learning_rate
        bd = [step * e for e in decay_epoch]
        lr = [base_lr * (learning_rate_decay**i) for i in range(len(bd) + 1)]
        if train_config.TRAIN.optimizer_type == 'SGD':
            opt = fluid.optimizer.Momentum(
                learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                           values=lr),
                momentum=train_config.TRAIN.momentum,
                parameter_list=train_model.parameters(),
                use_nesterov=train_config.TRAIN.use_nesterov,
                grad_clip=fluid.clip.GradientClipByNorm(clip_norm=40),
                regularization=fluid.regularizer.L2Decay(
                    regularization_coeff=train_config.TRAIN.l2_weight_decay))
        elif train_config.TRAIN.optimizer_type == 'Adam':
            opt = fluid.optimizer.Adam(
                learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                           values=lr),
                regularization=fluid.regularizer.L2Decay(
                    train_config['TRAIN']['l2_weight_decay']),
                parameter_list=train_model.parameters(),
            )

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/tsn_model')
            train_model.load_dict(model)

        if args.use_data_parallel:
            train_model = fluid.dygraph.parallel.DataParallel(
                train_model, strategy)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader(args.model_name.upper(), 'train',
                                      train_config).create_reader()

        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        val_reader = KineticsReader(args.model_name.upper(), 'valid',
                                    val_config).create_reader()
        print('go to training')
        epochs = args.epoch or train_model.epoch_num()
        acc_history = 0.0
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')
                #print(dy_x_data.shape)
                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc, loss_TPN = train_model(img, label)

                loss = fluid.layers.softmax_with_cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_TPN_loss = fluid.layers.mean(loss_TPN)

                all_loss = avg_loss + avg_TPN_loss
                if args.use_data_parallel:
                    print(args.use_data_parallel)
                    all_loss = train_model.scale_loss(all_loss)
                    all_loss.backward()
                    train_model.apply_collective_grads()
                else:
                    all_loss.backward()

                opt.minimize(all_loss)
                train_model.clear_gradients()

                if batch_id % train_config.TRAIN.visual_step == 0:
                    #opt._learning_rate = float(opt._learning_rate) / 10
                    current_lr = opt.current_step_lr()
                    logger.info(
                        "Loss at epoch {} step {}: {}, AUX loss: {} acc: {}, current_lr: {}"
                        .format(i, batch_id, avg_loss.numpy(),
                                avg_TPN_loss.numpy(), acc.numpy(), current_lr))
                    print(
                        "Loss at epoch {} step {}: {}, AUX loss: {}, acc: {}, current_lr: {}"
                        .format(i, batch_id, avg_loss.numpy(),
                                avg_TPN_loss.numpy(), acc.numpy(), current_lr))
                    fluid.dygraph.save_dygraph(
                        train_model.state_dict(),
                        args.save_dir + '/I3D_tpn_model')

            print('go to eval')
            acc_list = []
            train_model.eval()
            for batch_id, data in enumerate(tqdm(val_reader())):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')
                #print(dy_x_data.shape)
                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True
                out_jpg, acc_jpg, _ = train_model(img, label)
                out = out_jpg
                acc = fluid.layers.accuracy(input=out, label=label)
                #out_jpg, out_flow, acc = val_model(img, flow_img, label)
                acc_list.append(acc.numpy()[0])

            #print("JPG+FLOW验证集准确率为:{}".format(np.mean(acc_list)))
            #print("JPG验证集准确率为:{}".format(np.mean(acc_list_jpg)))
            #print("FLOW验证集准确率为:{}".format(np.mean(acc_list_flow)))
            print("TPN验证集准确率为:%.6f" % (np.mean(acc_list)))
            print("BEST   TPN验证集准确率为:%.6f" % (acc_history))

            if np.mean(acc_list) > acc_history:
                acc_history = np.mean(acc_list)
                fluid.dygraph.save_dygraph(train_model.state_dict(),
                                           args.save_dir + '/tpn_best')
                print("TPN BEST验证集准确率为:{}".format(np.mean(acc_list)))

            train_model.train()

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))
Пример #7
0
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))

        print_configs(train_config, 'Train')
        train_model = Dresnet.TSNResNet(
            '3Dresnet',
            seglen=train_config['MODEL']['seglen'],
            seg_num=train_config['MODEL']['seg_num'],
            weight_decay=0.00002)

        opt = fluid.optimizer.Momentum(0.001,
                                       0.9,
                                       parameter_list=train_model.parameters())

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/3Dresnet')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader(args.model_name.upper(), 'train',
                                      train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)

                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 1 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    fluid.dygraph.save_dygraph(train_model.state_dict(),
                                               args.save_dir + '/3Dresnet')
        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))
def train(args):
    # parse config
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        valid_config = merge_configs(config, 'valid', vars(args))
        print_configs(train_config, 'train')

        #根据自己定义的网络,声明train_model
        train_model = ResNet_3d()
        train_model.train()
        opt = fluid.optimizer.Momentum(
            config.TRAIN.learning_rate,
            0.9,
            parameter_list=train_model.parameters(),
            regularization=fluid.regularizer.L2Decay(
                config.TRAIN.l2_weight_decay))

        #加载预训练参数
        #加载上一次训练好的模型
        if args.resume == True:
            model, _ = fluid.dygraph.load_dygraph(args.save_dir +
                                                  '/resnet_3d_model.pdparams')
            train_model.load_dict(model)
            print('Resueme from ' + args.save_dir +
                  '/resnet_3d_model.pdparams')
        # elif args.pretrain:
        #     pretrain_weights = fluid.io.load_program_state(args.pretrain)
        #     inner_state_dict = train_model.state_dict()
        #     print('Pretrain with '+ args.pretrain)
        #     for name, para in inner_state_dict.items():
        #         if((para.name in pretrain_weights) and (not('fc' in para.name))):
        #             para.set_value(pretrain_weights[para.name])
        #         else:
        #             print('del '+ para.name)
        #用3D参数初始化
        elif args.pretrain:
            pretrain_weights = fluid.io.load_program_state(
                args.pretrain + '/resnet_3d_model1.pdparams')  #预训练模型转为之后的参数
            #print(a)
            inner_state_dict = train_model.state_dict()
            print('pretrain with' + args.pretrain)
            for name, para in inner_state_dict.items():
                if ((name in pretrain_weights) and (not ('fc' in para.name))):
                    para.set_value(pretrain_weights[name])
                else:
                    print('del' + para.name)
            #train_model.set_dict(a)
        else:
            pass

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = Ucf101Reader(args.model_name.upper(), 'train',
                                    train_config).create_reader()
        valid_reader = Ucf101Reader(args.model_name.upper(), 'valid',
                                    valid_config).create_reader()
        epochs = args.epoch or train_config.TRAIN.epoch
        #print(epochs)
        for i in range(epochs):
            train_model.train()  #启用 BatchNormalization 和 Dropout
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                #                out, acc = train_model(img, label)
                #print(img.shape)
                out = train_model(img)
                acc = fluid.layers.accuracy(out, label)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()

                if batch_id % 10 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(
                        i, batch_id, avg_loss.numpy(), acc.numpy()))
            fluid.dygraph.save_dygraph(train_model.state_dict(),
                                       args.save_dir + '/resnet_3d_model')

            if ((i % 3) == 0 and i != 0):
                acc_list = []
                avg_loss_list = []
                train_model.eval()
                for batch_id, data in enumerate(valid_reader()):
                    dy_x_data = np.array([x[0]
                                          for x in data]).astype('float32')
                    y_data = np.array([[x[1]] for x in data]).astype('int64')

                    img = fluid.dygraph.to_variable(dy_x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    label.stop_gradient = True
                    out = train_model(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    acc_list.append(acc.numpy()[0])
                    avg_loss_list.append(avg_loss.numpy())
                    if batch_id % 20 == 0:
                        logger.info(
                            "valid Loss at step {}: {}, acc: {}".format(
                                batch_id, avg_loss.numpy(), acc.numpy()))
                        print("valid Loss at  step {}: {}, acc: {}".format(
                            batch_id, avg_loss.numpy(), acc.numpy()))
                print("验证集准确率为:{}".format(np.mean(acc_list)))
                print("验证集loss为:{}".format(np.mean(avg_loss_list)))
Пример #9
0
def train(args):
    """train"""
    logger.info("Start train program")
    # parse config
    config_info = config.parse_config(args.config)
    train_config = config.merge_configs(config_info, 'train', vars(args))
    valid_config = config.merge_configs(config_info, 'valid', vars(args))
    valid_config['MODEL']['save_dir'] = args.save_dir

    bs_denominator = 1
    if args.use_gpu:
        # check number of GPUs
        gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
        if gpus == "":
            pass
        else:
            gpus = gpus.split(",")
            num_gpus = len(gpus)
            assert num_gpus == train_config.TRAIN.num_gpus, \
                "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \
                "shoud be the same as that" \
                "set in {}({})".format(
                    num_gpus, args.config, train_config.TRAIN.num_gpus)
        bs_denominator = train_config.TRAIN.num_gpus

    # adaptive batch size
    train_batch_size_in = train_config.TRAIN.batch_size
    #  train_learning_rate_in = train_config.TRAIN.learning_rate
    train_config.TRAIN.batch_size = min(
        int(train_config.TRAIN.num_samples / 10), train_batch_size_in)
    train_config.TRAIN.batch_size = int(
        train_config.TRAIN.batch_size / bs_denominator) * bs_denominator
    train_config.TRAIN.batch_size = max(train_config.TRAIN.batch_size,
                                        bs_denominator)
    # train_config.TRAIN.learning_rate = float(train_learning_rate_in) / float(train_batch_size_in) \
    #     * train_config.TRAIN.batch_size

    val_batch_size_in = valid_config.VALID.batch_size
    valid_config.VALID.batch_size = min(
        int(valid_config.VALID.num_samples / 10), val_batch_size_in)
    valid_config.VALID.batch_size = int(
        valid_config.VALID.batch_size / bs_denominator) * bs_denominator
    valid_config.VALID.batch_size = max(valid_config.VALID.batch_size,
                                        bs_denominator)

    # model remove bn when train every gpu batch_size is small
    if int(train_config.TRAIN.batch_size /
           bs_denominator) < train_config.MODEL.modelbn_min_everygpu_bs:
        train_config.MODEL.with_bn = False
        valid_config.MODEL.with_bn = False
    else:
        train_config.MODEL.with_bn = True
        valid_config.MODEL.with_bn = True

    config.print_configs(train_config, 'Train')
    train_model = action_net.ActionNet(args.model_name,
                                       train_config,
                                       mode='train')
    valid_model = action_net.ActionNet(args.model_name,
                                       valid_config,
                                       mode='valid')

    # build model
    startup = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup):
        with fluid.unique_name.guard():
            train_model.build_input(use_pyreader=True)
            train_model.build_model()
            # for the input, has the form [data1, data2,..., label], so train_feeds[-1] is label
            train_feeds = train_model.feeds()
            train_fetch_list = train_model.fetches()
            train_loss = train_fetch_list[0]
            for item in train_fetch_list:
                item.persistable = True
            optimizer = train_model.optimizer()
            optimizer.minimize(train_loss)
            train_pyreader = train_model.pyreader()

    valid_prog = fluid.Program()
    with fluid.program_guard(valid_prog, startup):
        with fluid.unique_name.guard():
            valid_model.build_input(use_pyreader=True)
            valid_model.build_model()
            valid_feeds = valid_model.feeds()
            valid_fetch_list = valid_model.fetches()
            valid_pyreader = valid_model.pyreader()
            for item in valid_fetch_list:
                item.persistable = True

    valid_prog = valid_prog.clone(for_test=True)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup)

    #print_prog(train_prog)
    #print_prog(valid_prog)

    if args.resume:
        # if resume weights is given, load resume weights directly
        assert os.path.exists(args.resume), \
            "Given resume weight dir {} not exist.".format(args.resume)

        def if_exist(var):
            return os.path.exists(os.path.join(args.resume, var.name))

        fluid.io.load_vars(exe,
                           args.resume,
                           predicate=if_exist,
                           main_program=train_prog)
    else:
        # if not in resume mode, load pretrain weights
        if args.pretrain:
            assert os.path.exists(args.pretrain), \
                "Given pretrain weight dir {} not exist.".format(args.pretrain)
            pretrain = args.pretrain or train_model.get_pretrain_weights()
            if pretrain:
                train_model.load_pretrain_params_file(exe, pretrain,
                                                      train_prog, place)

    build_strategy = fluid.BuildStrategy()
    build_strategy.enable_inplace = True

    compiled_train_prog = fluid.compiler.CompiledProgram(
        train_prog).with_data_parallel(loss_name=train_loss.name,
                                       build_strategy=build_strategy)
    compiled_valid_prog = fluid.compiler.CompiledProgram(
        valid_prog).with_data_parallel(share_vars_from=compiled_train_prog,
                                       build_strategy=build_strategy)
    # get reader
    train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                        bs_denominator)
    valid_config.VALID.batch_size = int(valid_config.VALID.batch_size /
                                        bs_denominator)
    print("config setting")
    train_dataload = feature_reader.FeatureReader(args.model_name.upper(),
                                                  'train', train_config,
                                                  bs_denominator)
    train_reader = train_dataload.create_reader()
    print("train reader")
    valid_dataload = feature_reader.FeatureReader(args.model_name.upper(),
                                                  'valid', valid_config,
                                                  bs_denominator)
    valid_reader = valid_dataload.create_reader()

    # get metrics
    train_metrics = accuracy_metrics.MetricsCalculator(args.model_name.upper(),
                                                       'train', train_config)
    valid_metrics = accuracy_metrics.MetricsCalculator(args.model_name.upper(),
                                                       'valid', valid_config)

    epochs = args.epoch_num or train_model.epoch_num()
    print("epoch is ", epochs)

    exe_places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    train_pyreader.decorate_sample_list_generator(train_reader,
                                                  places=exe_places)
    valid_pyreader.decorate_sample_list_generator(valid_reader,
                                                  places=exe_places)

    utils.train_with_pyreader(
        exe,
        train_prog,
        compiled_train_prog,  # train_exe,
        train_pyreader,
        train_fetch_list,
        train_metrics,
        epochs=epochs,
        log_interval=args.log_interval,
        valid_interval=args.valid_interval,
        save_dir=args.save_dir,
        save_model_name=args.model_name,
        compiled_test_prog=compiled_valid_prog,  # test_exe=valid_exe,
        test_pyreader=valid_pyreader,
        test_fetch_list=valid_fetch_list,
        test_metrics=valid_metrics)

    logger.info("Finish program")
Пример #10
0
def train(args, distributed):
    #===================== GPU CONF =====================#
    if distributed:
        # if run on parallel mode
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
    else:
        # if run on single GPU mode, and select gpu number.
        args.use_gpu = True
        place = fluid.CUDAPlace(args.gpu_num) if args.use_gpu else fluid.CPUPlace()
    # ===================== Dygraph Mode =====================#
    with fluid.dygraph.guard(place):
        # leverage from TSN training script
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))
        val_config = merge_configs(config, 'valid', vars(args))
        print_configs(train_config, 'Train')

        # ===================== Init ECO =====================#
        train_model = ECO.ECO(num_classes=train_config['MODEL']['num_classes'],
                              num_segments=train_config['MODEL']['seg_num'])
        if distributed:
            strategy = fluid.dygraph.parallel.prepare_context()
            train_model = fluid.dygraph.parallel.DataParallel(train_model, strategy)

        # trick 1: use clip gradient method to avoid gradient explosion
        if args.gd is not None:
            clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=args.gd)
            print('clip:', clip)

        # ===================== Init Optimizer =====================#
        # optimizer config: use momentum, nesterov, weight decay, lr decay
        learning_rate = 0.001
        opt = fluid.optimizer.Momentum(learning_rate, 0.9,
                                       parameter_list=train_model.parameters(),
                                       use_nesterov=True,
                                       regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4),
                                       grad_clip=clip)
        # trick 2: Freezing BatchNorm2D except the first one.
        # trick 3: make all weight layer lr mult as 1, bias lr mult as 2.
        get_optim_policies(opt)
        print('get_optim_policies:--batch_norm_0.w_0', opt._parameter_list[2].optimize_attr,opt._parameter_list[2].stop_gradient)
        print('get_optim_policies:--batch_norm_0.b_0', opt._parameter_list[3].optimize_attr,opt._parameter_list[2].stop_gradient)

        # ===================== Use Pretrained Model =====================#
        # use pretrained model: ECO_Full_rgb_model_Kinetics.pth 2.tar(download from MZO git)
        # then transform it from torch to paddle weight except fc layer.
        if args.pretrain:
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/ECO_FULL_RGB_seg16')
            # also tried using pretrained model on torch, 32F-92.9%,16F-91.8% precision trained on torch
            # model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/eco_91.81_model_best')
            train_model.load_dict(model)

        # build model
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # ===================== Init Data Reader =====================#
        # leverage from TSN training script
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size
        train_reader = KineticsReader('ECO', 'train', train_config).create_reader()
        print('train_reader', train_reader)
        val_reader = KineticsReader('ECO', 'valid', val_config).create_reader()
        if distributed:
            train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader)

        # ===================== Init Trick Params =====================#
        epochs = args.epoch or train_model.epoch_num()
        loss_summ = 0
        saturate_cnt = 0
        exp_num = 0
        best_prec1 = 0

        for i in range(epochs):
            train_model.train()
            # trick 4: Saturate lr decay: different from lr piecewise decay or others
            # calculate prec every epoch, if prec1 does not rise for 5 times(named model saturated), then use decay lr.
            if saturate_cnt == args.num_saturate:
                exp_num = exp_num + 1
                saturate_cnt = 0
                decay = 0.1 ** (exp_num)
                learning_rate = learning_rate * decay
                opt = fluid.optimizer.Momentum(learning_rate, 0.9,
                                               parameter_list=train_model.parameters(),
                                               use_nesterov=True,
                                               regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4),
                                               grad_clip=clip)
                print('get_optim_policies:--batch_norm_0.w_0', opt._parameter_list[2].optimize_attr,
                      opt._parameter_list[2].stop_gradient)
                print('get_optim_policies:--batch_norm_0.b_0', opt._parameter_list[3].optimize_attr,
                      opt._parameter_list[2].stop_gradient)
                print("- Learning rate decreases by a factor of '{}'".format(10 ** (exp_num)))
            
            for batch_id, data in enumerate(train_reader()):
                lr = opt.current_step_lr()
                print('lr:', lr)  # check lr every batch ids
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
                label.stop_gradient = True

                out, acc = train_model(img, label)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                loss_summ += avg_loss
                if distributed:
                    avg_loss = train_model.scale_loss(avg_loss)
                avg_loss.backward()
                if distributed:
                    train_model.apply_collective_grads()

                if (batch_id + 1) % 4 == 0:
                    # trick 5: scale down gradients when iter size is functioning every 4 batches
                    opt.minimize(loss_summ)
                    opt.clear_gradients()
                    loss_summ = 0

                if batch_id % 1 == 0:
                    logger.info(
                        "Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))

            if (i + 1) % args.eval_freq == 0 or i == args.epochs - 1:
                train_model.eval()
                acc_list = []
                false_class = []

                for batch_id, data in enumerate(val_reader()):
                    dy_x_data = np.array([x[0] for x in data]).astype('float32')
                    y_data = np.array([[x[1]] for x in data]).astype('int64')

                    img = fluid.dygraph.to_variable(dy_x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    label.stop_gradient = True

                    out, acc = train_model(img, label)
                    if acc.numpy()[0] != 1:
                        false_class.append(label.numpy()[0][0])
                    acc_list.append(acc.numpy()[0])
                    print(batch_id, 'acc:', np.mean(acc_list))
                    if len(false_class) == 0:
                        continue
                print("validate set acc:{}".format(np.mean(acc_list)))
                prec1 = np.mean(acc_list)
                # remember best prec@1 and save checkpoint
                is_best = prec1 > best_prec1
                if is_best:
                    saturate_cnt = 0
                    fluid.dygraph.save_dygraph(train_model.state_dict(),
                                               args.save_dir + '/ECO_FULL_1/' + str(i) + '_best_' + str(prec1))
                else:
                    saturate_cnt = saturate_cnt + 1

                print("- Validation Prec@1 saturates for {} epochs.".format(saturate_cnt), best_prec1)
                best_prec1 = max(prec1, best_prec1)

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))
Пример #11
0
def train(args):
    # parse config   参数配置
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()    # 是否使用 GPU

    with fluid.dygraph.guard(place):
        config = parse_config(args.config)
        train_config = merge_configs(config, 'train', vars(args))       # vars 函数,返回参数表达式的值
        print_configs( train_config, 'Train configs : ' )

        train_model = ResNet3D.ResNet3D('resnet',train_config['MODEL']['num_layers'],
                                    train_config['MODEL']['num_classes'],
                                    train_config['MODEL']['seg_num'],
                                    0.00002)

        #根据自己定义的网络,声明train_model
        # parameter_list 指明在训练的时候,哪些参数(  在此是 train_model.parameters()  )会被优化
        opt = fluid.optimizer.Momentum(0.001, 0.9, parameter_list=train_model.parameters())

        if args.pretrain:
            # 加载上一次训练的模型,继续训练
            model, _ = fluid.dygraph.load_dygraph(args.save_dir + '/resnet_model')
            train_model.load_dict(model)

        # 创建一个保存模型的路径
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)

        # get reader
        train_config.TRAIN.batch_size = train_config.TRAIN.batch_size # 两边完全一样啊???
        # KineticsReader().create_reader()  函数返回值是  batch_size 组 <img, label> 数据        
        train_reader = KineticsReader(args.model_name.upper(), 'train', train_config).create_reader()

        epochs = args.epoch or train_model.epoch_num()
        for i in range(epochs):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32')
                y_data = np.array([[x[1]] for x in data]).astype('int64')

                ## 获取的img 是一个5维数据:batchbatch_size,提取多少片段(seg_num*seg_len),通道数,长,宽
                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)
        
                label.stop_gradient = True
                
                out, acc = train_model(img, label)
                
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)

                avg_loss.backward()

                opt.minimize(avg_loss)
                train_model.clear_gradients()
                
                # 隔多少次训练,进行一次输出提示
                if batch_id % 1 == 0:
                    logger.info("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    print("Loss at epoch {} step {}: {}, acc: {}".format(i, batch_id, avg_loss.numpy(), acc.numpy()))
                    fluid.dygraph.save_dygraph(train_model.state_dict(), args.save_dir + '/resnet_model')

        logger.info("Final loss: {}".format(avg_loss.numpy()))
        print("Final loss: {}".format(avg_loss.numpy()))