Exemplo n.º 1
0
    return lang_stats


####################################################################################
# Main
####################################################################################
# initialize the data holder.

if __name__ == '__main__':

    opt = opts.parse_opt()
    if opt.path_opt is not None:
        with open(opt.path_opt, 'r') as handle:
            options_yaml = yaml.load(handle)
        utils.update_values(options_yaml, vars(opt))
    print(opt)
    cudnn.benchmark = True

    if opt.dataset == 'flickr30k':
        from misc.dataloader_flickr30k import DataLoader
    else:
        from misc.dataloader_coco import DataLoader

    if not os.path.exists(opt.checkpoint_path):
        os.makedirs(opt.checkpoint_path)

    ####################################################################################
    # Data Loader
    ####################################################################################
    dataset = DataLoader(opt, split='train')
Exemplo n.º 2
0
def main():
    opt = opts.parse_opt()
    if opt.path_opt is not None:
        with open(opt.path_opt, 'r') as handle:
            options_yaml = yaml.load(handle, Loader=yaml.FullLoader)
        utils.update_values(options_yaml, vars(opt))

    opt.checkpoint_path = opt.checkpoint_path + opt.exp_name
    print('=============')
    print(opt.exp_name)
    print('=============')

    opt.input_json = opt.data_path + opt.input_json
    opt.input_dic = opt.data_path + opt.input_dic
    opt.seg_feature_root = opt.data_path + opt.seg_feature_root
    opt.feature_root = opt.data_path + opt.feature_root
    opt.proposal_h5 = opt.data_path + opt.proposal_h5
    opt.densecap_references = [
        opt.data_path + reference for reference in opt.densecap_references
    ]

    opt.test_mode = (opt.val_split == 'testing')
    if opt.enable_BUTD:
        assert opt.att_input_mode == 'region', 'region attention only under the BUTD mode'

    cudnn.benchmark = True

    torch.manual_seed(opt.seed)
    np.random.seed(opt.seed)
    random.seed(opt.seed)
    if opt.cuda:
        torch.cuda.manual_seed_all(opt.seed)

    if opt.dataset == 'anet':
        from misc.dataloader_anet import DataLoader
    else:
        raise Exception('only support anet!')

    if not os.path.exists(opt.checkpoint_path):
        os.makedirs(opt.checkpoint_path)

    # open the detection json file.
    print('DataLoader loading proposal file: ', opt.proposal_h5)
    h5_proposal_file = h5py.File(opt.proposal_h5, 'r', driver='core')
    num_proposals = h5_proposal_file['dets_num'][:]
    label_proposals = h5_proposal_file['dets_labels'][:]
    h5_proposal_file.close()

    # Data Loader
    dataset = DataLoader(opt,
                         split=opt.train_split,
                         seq_per_img=opt.seq_per_img,
                         num_proposals=num_proposals,
                         label_proposals=label_proposals)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=opt.batch_size,
                                             shuffle=True,
                                             num_workers=opt.num_workers)

    dataset_val = DataLoader(opt,
                             split=opt.val_split,
                             seq_per_img=opt.seq_per_img,
                             num_proposals=num_proposals,
                             label_proposals=label_proposals)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=opt.batch_size,
                                                 shuffle=False,
                                                 num_workers=opt.num_workers)
    # ======================================================================

    # Build the Model
    opt.vocab_size = dataset.vocab_size
    opt.detect_size = dataset.detect_size
    opt.seq_length = opt.seq_length
    opt.glove_w = torch.from_numpy(dataset.glove_w).float()
    opt.glove_vg_cls = torch.from_numpy(dataset.glove_vg_cls).float()
    opt.glove_clss = torch.from_numpy(dataset.glove_clss).float()

    opt.wtoi = dataset.wtoi
    opt.itow = dataset.itow
    opt.itod = dataset.itod
    opt.ltow = dataset.ltow
    opt.itoc = dataset.itoc
    opt.wtol = dataset.wtol
    opt.wtod = dataset.wtod
    opt.vg_cls = dataset.vg_cls

    if opt.att_model == 'cyclical':
        model = build_model(opt, device)
    else:
        raise ValueError('Unknown captioning model: {}'.format(opt.att_model))

    infos = {}
    histories = {}
    # if opt.start_from is not None:
    if opt.resume:
        if opt.load_best_score == 1:
            model_path = os.path.join(opt.checkpoint_path, 'model-best.pth')
            info_path = os.path.join(opt.checkpoint_path,
                                     'infos_' + opt.id + '-best.pkl')
        else:
            model_path = os.path.join(opt.checkpoint_path, 'model.pth')
            info_path = os.path.join(opt.checkpoint_path,
                                     'infos_' + opt.id + '.pkl')

        # open old infos and check if models are compatible
        with open(info_path, 'rb') as f:
            infos = pickle.load(f)
            saved_model_opt = infos['opt']

        # opt.learning_rate = saved_model_opt.learning_rate
        print('========================================')
        print('Loading the model %s...' % (model_path))
        if opt.inference_only:
            print('Running Inference only ...')
        print('========================================')
        # model.load_state_dict(torch.load(model_path))
        if not is_code_development():
            model.load_state_dict(torch.load(model_path))
        else:
            model.load_state_dict(
                torch.load(model_path,
                           map_location=lambda storage, loc: storage))

        if os.path.isfile(
                os.path.join(opt.checkpoint_path,
                             'histories_' + opt.id + '.pkl')):
            with open(
                    os.path.join(opt.checkpoint_path,
                                 'histories_' + opt.id + '.pkl'), 'rb') as f:
                histories = pickle.load(f)

    best_val_score = infos.get('best_val_score', None)
    iteration = infos.get('iter', 0)

    if opt.resume_decoder_exp_name != '' and not opt.resume:
        start_epoch = opt.start_epoch
    else:
        start_epoch = infos.get('epoch', 0)

    val_result_history = histories.get('val_result_history', {})
    loss_history = histories.get('loss_history', {})
    lr_history = histories.get('lr_history', {})
    ss_prob_history = histories.get('ss_prob_history', {})

    model = nn.DataParallel(model).to(device)

    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if ('ctx2pool_grd' in key) or ('vis_embed' in key):
                print('Finetune param: {}'.format(key))
                params += [{
                    'params': [value],
                    'lr': opt.learning_rate * 0.1,  # finetune the fc7 layer
                    'weight_decay': opt.weight_decay,
                    'betas': (opt.optim_alpha, opt.optim_beta)
                }]
            else:
                params += [{
                    'params': [value],
                    'lr': opt.learning_rate,
                    'weight_decay': opt.weight_decay,
                    'betas': (opt.optim_alpha, opt.optim_beta)
                }]

    print("Use %s as optmization method" % (opt.optim))
    optimizer = None
    if opt.optim == 'sgd':
        optimizer = optim.SGD(params, lr=opt.learning_rate, momentum=0.9)
    elif opt.optim == 'adam':
        optimizer = optim.Adam(params)
    elif opt.optim == 'adamax':
        optimizer = optim.Adamax(params)
    else:
        raise ValueError('Unknown optimizer: {}'.format(opt.optim))

    # set up tensorboard logger
    tb_logger = utils.set_tb_logger(
        opt.tb_log_dir, opt.exp_name,
        opt.resume) if not opt.inference_only else None

    # set up trainer
    trainer = Trainer(opt, dataset, model, optimizer, dataloader,
                      dataloader_val)

    # set up LR scheduler
    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  patience=opt.patience,
                                  min_lr=opt.min_lr)

    best_score = {
        "Bleu_1": 0.0,
        "Bleu_2": 0.0,
        "Bleu_3": 0.0,
        "Bleu_4": 0.0,
        "METEOR": 0.0,
        "ROUGE_L": 0.0,
        "CIDEr": 0.0,
        "SPICE": 0.0
    }

    for epoch in range(start_epoch, opt.max_epochs):
        if not opt.inference_only:
            trainer.train(epoch, tb_logger=tb_logger)

        if epoch % opt.val_every_epoch == 0:
            with torch.no_grad():
                lang_stats = trainer.eval(epoch, tb_logger=tb_logger)

            if opt.inference_only:
                break

            # update learning rate by monitoring CIDEr score
            scheduler.step(lang_stats['CIDEr'], epoch)

            # Save model if is improving on validation result
            current_score = lang_stats['CIDEr']

            best_flag = False
            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score
                best_flag = True
            checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth')
            if opt.mGPUs:
                torch.save(model.module.state_dict(), checkpoint_path)
            else:
                torch.save(model.state_dict(), checkpoint_path)
            print("model saved to {}".format(checkpoint_path))

            # Dump miscalleous informations
            infos['iter'] = iteration
            infos['epoch'] = epoch
            infos['best_val_score'] = best_val_score
            infos['opt'] = opt
            infos['vocab'] = dataset.itow

            histories['val_result_history'] = val_result_history
            histories['loss_history'] = loss_history
            histories['lr_history'] = lr_history
            histories['ss_prob_history'] = ss_prob_history
            with open(
                    os.path.join(opt.checkpoint_path,
                                 'infos_' + opt.id + '.pkl'), 'wb') as f:
                pickle.dump(infos, f)
            with open(
                    os.path.join(opt.checkpoint_path,
                                 'histories_' + opt.id + '.pkl'), 'wb') as f:
                pickle.dump(histories, f)

            if best_flag:
                checkpoint_path = os.path.join(opt.checkpoint_path,
                                               'model-best.pth')
                if opt.mGPUs:
                    torch.save(model.module.state_dict(), checkpoint_path)
                else:
                    torch.save(model.state_dict(), checkpoint_path)

                print("model saved to {} with best cider score {:.3f}".format(
                    checkpoint_path, best_val_score))
                with open(
                        os.path.join(opt.checkpoint_path,
                                     'infos_' + opt.id + '-best.pkl'),
                        'wb') as f:
                    pickle.dump(infos, f)

                # update best scores
                for metric, _ in best_score.items():
                    best_score[metric] = lang_stats[metric]

            print("===================================")
            print("--> Highest scores on {} set at epoch {}".format(
                opt.val_split, epoch))
            for metric, score in sorted(best_score.items()):
                print('{}: {:.4f}'.format(metric, score))
Exemplo n.º 3
0
def parse_opts():
    parser = argparse.ArgumentParser()

    # ID of this run
    parser.add_argument('--cfg_path', type=str, default='cfgs/esgn.yml', help='')
    parser.add_argument('--id', type=str, default='default', help='id of this run')
    parser.add_argument('--gpu_id', type=str, nargs='+', default=['0'])
    parser.add_argument('--seed', type=int, default=777)
    parser.add_argument('--disable_cudnn', type=int, default=1, help='disable cudnn may solve some unknown bugs')
    parser.add_argument('--debug', action='store_true', help='using mini-dataset for fast debugging')

    #  ***************************** INPUT DATA PATH *****************************
    parser.add_argument('--train_caption_file', type=str,
                        default='data/captiondata/train_modified.json', help='')
    parser.add_argument('--invalid_video_json', type=str, nargs='+', default=['data/DBG_invalid_videos.json'])
    parser.add_argument('--val_caption_file', type=str, default='data/captiondata/val_1.json')
    parser.add_argument('--visual_feature_folder', type=str, default='data/resnet_bn')
    parser.add_argument('--train_proposal_file', type=str, default='',
                        help='generated results on trainset by a Temporal Action Proposal model')
    parser.add_argument('--eval_proposal_file', type=str, default='',
                        help='generated results on valset by a Temporal Action Proposal model')
    parser.add_argument('--visual_feature_type', type=str, default='c3d', choices=['c3d', 'resnet_bn', 'resnet'])
    parser.add_argument('--feature_dim', type=int, default=500, help='dim of frame-level feature vector')
    # parser.add_argument('--dict_file', type=str, default='data/vocabulary_activitynet.json', help='')

    parser.add_argument('--start_from', type=str, default='', help='id of the run with incompleted training')
    parser.add_argument('--start_from_mode', type=str, choices=['best', 'last'], default="last")
    parser.add_argument('--pretrain', action='store_true')
    parser.add_argument('--pretrain_path', type=str, default='', help='path of .pth')

    #  ***************************** DATALOADER OPTION *****************************
    parser.add_argument('--nthreads', type=int, default=4)
    parser.add_argument('--feature_sample_rate', type=int, default=1)
    parser.add_argument('--train_proposal_sample_num', type=int,
                        default=24,
                        help='number of sampled proposals (or proposal sequence), a bigger value may be better')
    parser.add_argument('--train_proposal_type', type=str, default='', help='gt, learnt_seq, learnt')

    # ***************************** Event ENCODER  *****************************
    parser.add_argument('--event_encoder_type', type=str, choices=['basic', 'TSRM'], default='basic')
    parser.add_argument('--hidden_dim', type=int, default=512, help='hidden size of all fc layers')
    parser.add_argument('--position_encoding_size', type=int, default=100)

    #  ***************************** DECODER  *****************************
    parser.add_argument('--decoder_input_feats_type', type=str, default='C', choices=['C', 'E', 'C+E'],
                        help='C:clip-level features, E: event-level features, C+E: both')
    parser.add_argument('--decoder_type', type=str, default="show_attend_tell",
                        choices=['show_attend_tell', 'hrnn', 'cmg_hrnn'])
    parser.add_argument('--rnn_size', type=int, default=512,
                        help='size of the rnn in number of hidden nodes in each layer')
    parser.add_argument('--num_layers', type=int, default=1, help='number of layers in the decoderRNN')

    parser.add_argument('--att_hid_size', type=int, default=512, help='the hidden size of the attention MLP')
    parser.add_argument('--drop_prob', type=float, default=0.5, help='strength of dropout in the Language Model RNN')
    parser.add_argument('--max_decoding_len', type=int, default=30, help='')

    #  ***************************** OPTIMIZER *****************************

    # optimizer
    parser.add_argument('--epoch', type=int, default=25)
    parser.add_argument('--batch_size', type=int, default=1, help='batch_size must be 1 when using hrnn')
    parser.add_argument('--grad_clip', type=float, default=100., help='clip gradients at this value')
    parser.add_argument('--optimizer_type', type=str, default='adam')
    parser.add_argument('--weight_decay', type=float, default=0, help='weight_decay')

    # lr
    parser.add_argument('--lr', type=float, default=1e-4, help='1e-4 for resnet feature and 5e-5 for C3D feature')
    parser.add_argument('--learning_rate_decay_start', type=float, default=8)
    parser.add_argument('--learning_rate_decay_every', type=float, default=3)
    parser.add_argument('--learning_rate_decay_rate', type=float, default=0.5)

    # scheduled sampling
    parser.add_argument('--scheduled_sampling_start', type=int, default=-1,
                        help='at what iteration to start decay gt probability')
    parser.add_argument('--basic_ss_prob', type=float, default=0, help='initial ss prob')
    parser.add_argument('--scheduled_sampling_increase_every', type=int, default=1,
                        help='every how many epoch thereafter to gt probability')
    parser.add_argument('--scheduled_sampling_increase_prob', type=float, default=0.05,
                        help='How much to update the prob')
    parser.add_argument('--scheduled_sampling_max_prob', type=float, default=0.25,
                        help='Maximum scheduled sampling prob.')

    #  ***************************** SAVING AND LOGGING *****************************
    parser.add_argument('--min_epoch_when_save', type=int, default=-1)
    parser.add_argument('--save_checkpoint_every', type=int, default=1)
    parser.add_argument('--save_all_checkpoint', action='store_true')
    parser.add_argument('--save_dir', type=str, default='./save', help='directory to store checkpointed models')

    #  ***************************** Evaluation *************************************
    parser.add_argument('--eval_score_threshold', type=float, default=0.)
    parser.add_argument('--eval_nms_threshold', type=float, default=1)
    parser.add_argument('--eval_top_n', type=int, default=100)
    args = parser.parse_args()

    if args.cfg_path:
        with open(args.cfg_path, 'r') as handle:
            options_yaml = yaml.load(handle)
        utils.update_values(options_yaml, vars(args))

    args.raw_feature_dim = args.feature_dim

    if args.debug:
        args.id = 'debug_' + time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
        args.min_epoch_when_save = 0
        args.save_checkpoint_every = 1
        args.shuffle = 0
        args.tap_epochs = 10
        args.train_caption_file = 'data/captiondata/train_modified_small.json'
        args.val_caption_file = 'data/captiondata/val_1_small.json'

    return args
Exemplo n.º 4
0
Arquivo: opts.py Projeto: cxqj/49-SYSU
def parse_opts():
    parser = argparse.ArgumentParser()

    # ID of this run
    parser.add_argument('--cfg_path', type=str, default='cfgs/basic_rnn.yml', help='')
    parser.add_argument('--id', type=str, default='default', help='id of this run')
    parser.add_argument('--gpu_id', type=str, nargs='+', default=['0'])
    parser.add_argument('--seed', type=int, default=777)
    parser.add_argument('--disable_cudnn', type=int, default=1, help='disable cudnn may solve some unknown bugs')
    parser.add_argument('--debug', action='store_true', help='using mini-dataset for fast debugging')

    #  ***************************** INPUT DATA PATH *****************************
    parser.add_argument('--train_caption_file', type=str,
                        default='data/captiondata/train_modified.json', help='')
    parser.add_argument('--invalid_video_json', type=str, default='data/resnet_bn_invalid_videos.json')
    parser.add_argument('--val_caption_file', type=str, default='data/captiondata/val_1.json')
    parser.add_argument('--visual_feature_folder', type=str, default='data/resnet_bn')
    parser.add_argument('--train_proposal_file', type=str, default='',
                        help='generated results on trainset of a Temporal Action Proposal model')
    parser.add_argument('--visual_feature_type', type=str, default='c3d', choices=['c3d', 'resnet_bn', 'resnet'])
    parser.add_argument('--feature_dim', type=int, default=500, help='dim of frame-level feature vector')
    parser.add_argument('--dict_file', type=str, default='data/vocabulary_activitynet.json', help='')

    parser.add_argument('--start_from', type=str, default='', help='id of the run with incomplete training')
    parser.add_argument('--start_from_mode', type=str, choices=['best', 'best-RL', 'last'], default="last")
    parser.add_argument('--pretrain', action='store_true')
    parser.add_argument('--pretrain_path', type=str, default='', help='path of .pth')

    #  ***************************** DATALOADER OPTION *****************************
    parser.add_argument('--nthreads', type=int, default=4)
    parser.add_argument('--feature_sample_rate', type=int, default=1)
    parser.add_argument('--train_proposal_sample_num', type=int,
                        default=24,
                        help='number of sampled proposals (or proposal sequence), a bigger value may be better')
    parser.add_argument('--train_proposal_type', type=str, default='', help='gt, learnt_seq, learnt')

    # ***************************** Event ENCODER  *****************************
    parser.add_argument('--event_encoder_type', type=str, choices=['basic', 'TSRM'], default='basic')
    parser.add_argument('--hidden_dim', type=int, default=512, help='hidden size of all fc layers')
    parser.add_argument('--group_num', type=int, default=16, help='')
    parser.add_argument('--use_posit_branch', type=int, default=1, help='')

    #  ***************************** CAPTION DECODER  *****************************
    parser.add_argument('--wordRNN_input_feats_type', type=str, default='C', choices=['C', 'E', 'C+E'],
                        help='C:clip-level features, E: event-level features, C+E: both')
    parser.add_argument('--caption_decoder_type', type=str, default="show_attend_tell",
                        choices=['show_attend_tell', 'hrnn', 'cmg_hrnn'])
    parser.add_argument('--rnn_size', type=int, default=512,
                        help='size of the rnn in number of hidden nodes in each layer')
    parser.add_argument('--num_layers', type=int, default=1, help='number of layers in the RNN')
    parser.add_argument('--input_encoding_size', type=int, default=512,
                        help='the encoding size of each token in the vocabulary')
    parser.add_argument('--att_hid_size', type=int, default=512, help='the hidden size of the attention MLP')
    parser.add_argument('--drop_prob', type=float, default=0.5, help='strength of dropout in the Language Model RNN')
    parser.add_argument('--max_caption_len', type=int, default=30, help='')

    #  ***************************** OPTIMIZER *****************************

    # optimizer
    parser.add_argument('--epoch', type=int, default=25)
    parser.add_argument('--batch_size', type=int, default=1, help='batch_size must be 1 when using hrnn')
    parser.add_argument('--grad_clip', type=float, default=100., help='clip gradients at this value')
    parser.add_argument('--optimizer_type', type=str, default='adam')
    parser.add_argument('--weight_decay', type=float, default=0, help='weight_decay')

    # lr
    parser.add_argument('--lr', type=float, default=1e-4, help='1e-4 for resnet feature and 5e-5 for C3D feature')
    parser.add_argument('--learning_rate_decay_start', type=float, default=8)
    parser.add_argument('--learning_rate_decay_every', type=float, default=3)
    parser.add_argument('--learning_rate_decay_rate', type=float, default=0.5)

    # scheduled sampling
    """
    结果欠佳原因在这里
        (1)在训练阶段的decoder,是将目标样本["吃","兰州","拉面"]作为输入下一个预测分词的输入。
        (2)而在预测阶段的decoder,是将上一个预测结果,作为下一个预测值的输入。(注意查看预测多的箭头)
           这个差异导致了问题的产生,训练和预测的情景不同。在预测的时候,如果上一个词语预测错误,还后面全部都会跟着错误,蝴蝶效应。
   基础模型只会使用真实lable数据作为输入, 现在,train-decoder不再一直都是真实的lable数据作为下一个时刻的输入。train-decoder时以一个
   概率P选择模型自身的输出作为下一个预测的输入,以1-p选择真实标记作为下一个预测的输入。Secheduled sampling(计划采样),即采样率P在训练
   的过程中是变化的。一开始训练不充分,先让P小一些,尽量使用真实的label作为输入,随着训练的进行,将P增大,多采用自身的输出作为下一个
   预测的输入。随着训练的进行,P越来越大大,train-decoder模型最终变来和inference-decoder预测模型一样,消除了train-decoder与inference-decoder
   之间的差异
   总之:通过这个scheduled-samping方案,抹平了训练decoder和预测decoder之间的差异!让预测结果和训练时的结果一样。
   参考:https://www.cnblogs.com/panfengde/p/10315576.html
    """
    parser.add_argument('--scheduled_sampling_start', type=int, default=-1,
                        help='at what iteration to start decay gt probability')
    parser.add_argument('--basic_ss_prob', type=float, default=0, help='initial ss prob')
    parser.add_argument('--scheduled_sampling_increase_every', type=int, default=2,
                        help='every how many iterations thereafter to gt probability')
    parser.add_argument('--scheduled_sampling_increase_prob', type=float, default=0.05,
                        help='How much to update the prob')
    parser.add_argument('--scheduled_sampling_max_prob', type=float, default=0.25,
                        help='Maximum scheduled sampling prob.')

    # self critical learning
    parser.add_argument('--self_critical_after', type=int, default=-1)   # 是否使用增强学习训练模型

    #  ***************************** SAVING AND LOGGING *****************************
    parser.add_argument('--min_epoch_when_save', type=int, default=-1)
    parser.add_argument('--save_checkpoint_every', type=int, default=1)
    parser.add_argument('--save_all_checkpoint', action='store_true')
    parser.add_argument('--save_dir', type=str, default='./save', help='directory to store checkpointed models')

    #  ***************************** Evaluation *************************************
    parser.add_argument('--eval_score_threshold', type=float, default=0.)
    parser.add_argument('--eval_nms_threshold', type=float, default=1)
    parser.add_argument('--eval_top_n', type=int, default=100)
    args = parser.parse_args()

    if args.cfg_path:
        with open(args.cfg_path, 'r') as handle:
            options_yaml = yaml.load(handle)
        utils.update_values(options_yaml, vars(args))

    args.raw_feature_dim = args.feature_dim

    if args.debug:
        args.id = 'debug_' + time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
        args.min_epoch_when_save = 0
        args.save_checkpoint_every = 1
        args.shuffle = 0
        args.tap_epochs = 10
        args.train_caption_file = 'data/captiondata/train_modified_small.json'
        args.val_caption_file = 'data/captiondata/val_1_small.json'

    return args