Exemplo n.º 1
0
            if k not in ignore and "learning" not in k:
                if k in vars(opt):
                    assert vars(opt)[k] == vars(infos['opt'])[k], (k + ' option not consistent ' +
                                                                   str(vars(opt)[k]) + ' vs. ' + str(vars(infos['opt'])[k]))
                else:
                    vars(opt).update({k: vars(infos['opt'])[k]}) # copy over options from model

        opt.fliplr = opt.fliplr_eval
        opt.language_creativity = 0
        opt.seq_per_img = 5
        opt.bootstrap = 0
        opt.sample_cap = 0
        vocab = infos['vocab']  # ix -> word mapping
        # Build CNN model for single branch use
        if opt.cnn_model.startswith('resnet'):
            cnn_model = cnn.ResNetModel(opt)
        elif opt.cnn_model.startswith('vgg'):
            cnn_model = cnn.VggNetModel(opt)
        else:
            print('Unknown model %s' % opt.cnn_model)
            sys.exit(1)

        cnn_model.cuda()
        cnn_model.eval()
        model = ms.select_model(opt)
        model.load()
        model.cuda()
        model.eval()
        # Create the Data Loader instance
        start = time.time()
        if len(opt.image_folder) == 0:
Exemplo n.º 2
0
def main(ens_opt):
    # setup gpu
    try:
        gpu_id = int(subprocess.check_output('gpu_getIDs.sh', shell=True))
    except:
        print("Failed to get gpu_id (setting gpu_id to %d)" % ens_opt.gpu_id)
        gpu_id = str(ens_opt.gpu_id)
        # beware seg fault if tf after torch!!
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
    ens_opt.logger.warn('GPU ID: %s | available memory: %dM' \
                        % (os.environ['CUDA_VISIBLE_DEVICES'], get_gpu_memory(gpu_id)))
    import tensorflow as tf
    import torch
    import models.setup as ms
    from models.ensemble import Ensemble, eval_ensemble, eval_external_ensemble
    import models.cnn as cnn
    from loader import DataLoader, DataLoaderRaw

    ens_opt.models = [_[0] for _ in ens_opt.models]
    print('Models:', ens_opt.models)
    if not ens_opt.output:
        if not len(ens_opt.image_folder):
            evaldir = '%s/evaluations/%s' % (ens_opt.ensemblename,
                                             ens_opt.split)
        else:
            ens_opt.split = ens_opt.image_list.split('/')[-1].split('.')[0]
            print('Split :: ', ens_opt.split)
            evaldir = '%s/evaluations/server_%s' % (ens_opt.ensemblename,
                                                    ens_opt.split)

        if not osp.exists(evaldir):
            os.makedirs(evaldir)
        ens_opt.output = '%s/bw%d' % (evaldir, ens_opt.beam_size)
    models_paths = []
    cnn_models = []
    rnn_models = []
    options = []
    # Reformat:
    for m in ens_opt.models:
        models_paths.append('save/%s/model-best.pth' %
                            m)  # FIXME check that cnn-best is the one loaded
        infos_path = "save/%s/infos-best.pkl" % m
        with open(infos_path, 'rb') as f:
            print('Opening %s' % infos_path)
            infos = pickle.load(f, encoding="iso-8859-1")
        vocab = infos['vocab']
        iopt = infos['opt']
        # define single model options
        params = copy.copy(vars(ens_opt))
        params.update(vars(iopt))
        opt = argparse.Namespace(**params)
        opt.modelname = 'save/' + m
        opt.start_from_best = ens_opt.start_from_best
        opt.beam_size = ens_opt.beam_size
        opt.batch_size = ens_opt.batch_size
        opt.logger = ens_opt.logger
        if opt.start_from_best:
            flag = '-best'
            opt.logger.warn('Starting from the best saved model')
        else:
            flag = ''
        opt.cnn_start_from = osp.join(opt.modelname, 'model-cnn%s.pth' % flag)
        opt.infos_start_from = osp.join(opt.modelname, 'infos%s.pkl' % flag)
        opt.start_from = osp.join(opt.modelname, 'model%s.pth' % flag)
        opt.logger.warn('Starting from %s' % opt.start_from)

        # Load infos
        with open(opt.infos_start_from, 'rb') as f:
            print('Opening %s' % opt.infos_start_from)
            infos = pickle.load(f, encoding="iso-8859-1")
            infos['opt'].logger = None
        ignore = [
            "batch_size", "beam_size", "start_from", 'cnn_start_from',
            'infos_start_from', "start_from_best", "language_eval", "logger",
            "val_images_use", 'input_data', "loss_version", "region_size",
            "use_adaptive_pooling", "clip_reward", "gpu_id", "max_epochs",
            "modelname", "config", "sample_max", "temperature"
        ]
        for k in list(vars(infos['opt']).keys()):
            if k not in ignore and "learning" not in k:
                if k in vars(opt):
                    assert vars(opt)[k] == vars(
                        infos['opt'])[k], (k + ' option not consistent ' +
                                           str(vars(opt)[k]) + ' vs. ' +
                                           str(vars(infos['opt'])[k]))
                else:
                    vars(opt).update({k: vars(infos['opt'])[k]
                                      })  # copy over options from model

        opt.fliplr = 0
        opt.language_creativity = 0
        opt.seq_per_img = 5
        opt.bootstrap = 0
        opt.sample_cap = 0
        vocab = infos['vocab']  # ix -> word mapping
        # Build CNN model for single branch use
        if opt.cnn_model.startswith('resnet'):
            cnn_model = cnn.ResNetModel(opt)
        elif opt.cnn_model.startswith('vgg'):
            cnn_model = cnn.VggNetModel(opt)
        else:
            print('Unknown model %s' % opt.cnn_model)
            sys.exit(1)

        cnn_model.cuda()
        cnn_model.eval()
        model = ms.select_model(opt)
        model.load()
        model.cuda()
        model.eval()
        options.append(opt)
        cnn_models.append(cnn_model)
        rnn_models.append(model)

        # Create the Data Loader instance
    start = time.time()
    external = False
    if len(ens_opt.image_folder) == 0:
        loader = DataLoader(options[0])
    else:
        external = True
        loader = DataLoaderRaw({
            'folder_path': ens_opt.image_folder,
            'files_list': ens_opt.image_list,
            'batch_size': ens_opt.batch_size
        })
        loader.ix_to_word = vocab

    # Define the ensemble:
    ens_model = Ensemble(rnn_models, cnn_models, ens_opt)

    if external:
        preds = eval_external_ensemble(ens_model, loader, vars(ens_opt))
    else:
        preds, lang_stats = eval_ensemble(ens_model, loader, vars(ens_opt))
    print("Finished evaluation in ", (time.time() - start))
    if ens_opt.dump_json == 1:
        # dump the json
        json.dump(preds, open(ens_opt.output + ".json", 'w'))
Exemplo n.º 3
0
def train(opt):
    """
    main training loop
    """
    # setup gpu
    try:
        import subprocess
        gpu_id = int(subprocess.check_output('gpu_getIDs.sh', shell=True))
    except:
        print("Failed to get gpu_id (setting gpu_id to %d)" % opt.gpu_id)
        gpu_id = str(opt.gpu_id)
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
    opt.logger.warn('GPU ID: %s | available memory: %dM' \
                    % (os.environ['CUDA_VISIBLE_DEVICES'], get_gpu_memory(gpu_id)))
    import torch
    import torch.nn as nn
    from torch.autograd import Variable
    import torch.optim as optim
    from loader import DataLoader
    import models.eval_utils as evald
    import models.cnn as cnn
    import models.setup as ms
    import utils
    import utils.logging as lg
    from tensorboardX import SummaryWriter

    # reproducibility:
    torch.manual_seed(opt.seed)
    random.seed(opt.seed)
    np.random.seed(opt.seed)
    loader = DataLoader(opt)
    opt.vocab_size = loader.vocab_size + 1
    opt.seq_length = loader.seq_length
    opt.lr_wait = 0

    tb_writer = SummaryWriter(opt.eventname)
    # tb_writer = tf.summary.FileWriter(opt.eventname)
    iteration, epoch, opt, infos, history = ms.recover_infos(opt)
    if opt.shift_epoch:
        opt.logger.warn('Resetting epoch count (%d -> 0)' % epoch)
        epoch = 0
    opt.logger.warn('Starting from iteration %d (epoch %d)' %
                    (iteration, epoch))
    # Recover data iterator and best perf
    loader.iterators = infos.get('iterators', loader.iterators)
    if opt.load_best_score == 1:
        best_val_score = infos.get('best_val_score', None)
    opt.logger.warn(
        'using single CNN branch with feature maps as regions embeddings')
    # Build CNN model for single branch use
    if opt.cnn_model.startswith('resnet'):
        cnn_model = cnn.ResNetModel(opt)
    elif opt.cnn_model.startswith('vgg'):
        cnn_model = cnn.VggNetModel(opt)
    else:
        opt.logger.error('Unknown model %s' % opt.cnn_model)
        sys.exit(1)
    try:
        cnn_model.cuda()
    except:
        cnn_model.cuda(gpu_id)
    # Build the captioning model
    opt.logger.error('-----------------------------SETUP')
    model = ms.select_model(opt)
    # model.define_loss(loader.get_vocab())
    model.load()
    opt.logger.error('-----------------------------/SETUP')
    model.cuda()
    update_lr_flag = True
    # Assure in training mode
    model.train()
    cnn_model.eval()
    model.define_loss(loader.get_vocab())
    optimizers = ms.set_optimizer(opt, epoch, model, cnn_model)
    lg.log_optimizer(opt, optimizers)
    # Main loop
    # To save before training:
    iteration -= 1
    val_losses = []
    while True:
        if update_lr_flag:
            # Assign the learning rate
            opt = utils.manage_lr(epoch, opt, val_losses)
            utils.scale_lr(optimizers, opt.scale_lr)  # set the decayed rate
            lg.log_optimizer(opt, optimizers)
            # Assign the scheduled sampling prob
            if opt.scheduled_sampling_strategy == "step":
                if epoch >= opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0:
                    frac = (epoch - opt.scheduled_sampling_start
                            ) // opt.scheduled_sampling_increase_every
                    opt.ss_prob = min(
                        opt.scheduled_sampling_increase_prob * frac,
                        opt.scheduled_sampling_max_prob)
                    model.ss_prob = opt.ss_prob
                    opt.logger.warn('ss_prob= %.2e' % model.ss_prob)
            if opt.loss_version in ['word', 'seq'
                                    ] and opt.alpha_strategy == "step":
                if epoch >= opt.alpha_increase_start:
                    # Update ncrit's alpha:
                    opt.logger.warn('Updating alpha')
                    frac = (epoch - opt.alpha_increase_start
                            ) // opt.alpha_increase_every
                    new_alpha = min(opt.alpha_increase_factor * frac,
                                    opt.alpha_max)
                    model.crit.alpha = new_alpha
                    opt.logger.warn('New alpha %.3e' % new_alpha)
            update_lr_flag = False

        if opt.scheduled_sampling_strategy == "sigmoid":
            if epoch >= opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0:
                opt.logger.warn("setting up the ss_prob")
                opt.ss_prob = 1 - opt.scheduled_sampling_speed / (
                    opt.scheduled_sampling_speed +
                    exp(iteration / opt.scheduled_sampling_speed))
                model.ss_prob = opt.ss_prob
                opt.logger.warn("ss_prob =  %.3e" % model.ss_prob)
        if opt.loss_version in ['word', 'seq'
                                ] and opt.alpha_strategy == "sigmoid":
            # Update crit's alpha:
            opt.logger.warn('Updating the loss scaling param alpha')
            new_alpha = 1 - opt.alpha_speed / (
                opt.alpha_speed + exp(iteration / opt.alpha_speed))
            new_alpha = min(new_alpha, opt.alpha_max)
            model.crit.alpha = new_alpha
            opt.logger.warn('New alpha %.3e' % new_alpha)

        # Load data from train split (0)
        data = loader.get_batch('train')
        torch.cuda.synchronize()
        start = time.time()
        images = data['images']
        images = Variable(torch.from_numpy(images), requires_grad=False).cuda()
        att_feats, fc_feats = cnn_model.forward_caps(images, opt.seq_per_img)
        ml_loss, loss, stats = model.step(data, att_feats, fc_feats, iteration,
                                          epoch)
        for optimizer in optimizers:
            optimizer.zero_grad()
        # // Move
        loss.backward()
        grad_norm = []
        grad_norm.append(utils.clip_gradient(optimizers, opt.grad_clip))
        for optimizer in optimizers:
            optimizer.step()
        train_loss = loss.item()
        if np.isnan(train_loss):
            sys.exit('Loss is nan')
        train_ml_loss = ml_loss.item()
        try:
            train_kld_loss = kld_loss.item()
            train_recon_loss = recon_loss.item()
        except:
            pass
        #  grad_norm = [utils.get_grad_norm(optimizer)]
        torch.cuda.synchronize()
        end = time.time()
        losses = {'train_loss': train_loss, 'train_ml_loss': train_ml_loss}

        lg.stderr_epoch(epoch, iteration, opt, losses, grad_norm, end - start)
        # Update the iteration and epoch
        iteration += 1
        if data['bounds']['wrapped']:
            epoch += 1
            update_lr_flag = True
        # Write the training loss summary
        if iteration % opt.losses_log_every == 0:
            lg.log_epoch(tb_writer, iteration, opt, losses, stats, grad_norm,
                         model)
            history['loss'][iteration] = float(losses['train_loss'])
            history['lr'][iteration] = opt.current_lr
            history['ss_prob'][iteration] = model.ss_prob
            history['scores_stats'][iteration] = stats

        # make evaluation on validation set, and save model
        if iteration % opt.save_checkpoint_every == 0:
            # eval model
            eval_kwargs = {'split': 'val', 'dataset': opt.input_data + '.json'}
            eval_kwargs.update(vars(opt))
            # eval_kwargs['batch_size'] = 5  # FIXME
            # print("eval kwargs: ", eval_kwargs)
            (val_ml_loss, val_loss, predictions,
             lang_stats) = evald.eval_split(cnn_model, model, loader,
                                            opt.logger, eval_kwargs)
            # Write validation result into summary
            lg.add_summary_value(tb_writer, 'validation_loss', val_loss,
                                 iteration)
            lg.add_summary_value(tb_writer, 'validation_ML_loss', val_ml_loss,
                                 iteration)

            for k, v in lang_stats.items():
                lg.add_summary_value(tb_writer, k, v, iteration)
            tb_writer.file_writer.flush()
            history['val_perf'][iteration] = {
                'loss': val_loss,
                'ml_loss': val_ml_loss,
                'lang_stats': lang_stats,
                'predictions': predictions
            }
            val_losses.insert(0, val_loss)
            # Save model if it improves CIDEr
            if opt.language_eval == 1:
                current_score = lang_stats['CIDEr']
            else:
                current_score = -val_loss
            best_flag = False
            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score
                best_flag = True
            lg.save_model(model, cnn_model, optimizers, opt, iteration, epoch,
                          loader, best_val_score, history, best_flag)
        # Stop if reaching max epochs
        if epoch > opt.max_epochs and opt.max_epochs != -1:
            opt.logger.info('Max epochs reached')
            break