示例#1
0
def eval_external_ensemble(ensemble, loader, eval_kwargs={}):
    verbose = eval_kwargs.get('verbose', True)
    num_images = eval_kwargs.get('num_images', -1)
    split = eval_kwargs.get('split', 'val')
    lang_eval = eval_kwargs.get('language_eval', 1)
    dataset = eval_kwargs.get('dataset', 'coco')
    beam_size = eval_kwargs.get('beam_size', 1)
    logger = eval_kwargs.get('logger')
    caption_model = eval_kwargs.get('caption_model')
    vocab_size = eval_kwargs.get('vocb_size')
    dump_path = eval_kwargs.get('dump_path')

    # Make sure in the evaluation mode
    for cnn_model in ensemble.cnn_models:
        cnn_model.eval()

    for model in ensemble.models:
        model.eval()

    loader.reset_iterator(split)

    n = 0
    predictions = []
    Feats = []
    seq_per_img = 5
    while True:
        data = loader.get_batch(split, seq_per_img=seq_per_img)
        n = n + loader.batch_size

        # forward the model to get loss
        images = data['images']
        images = Variable(torch.from_numpy(images), volatile=True).cuda()

        att_feats_ens, fc_feats_ens = ensemble.get_feats(images)
        seq, probs = ensemble.sample(fc_feats_ens, att_feats_ens, eval_kwargs)
        sents = utils.decode_sequence(loader.get_vocab(), seq)

        for k, sent in enumerate(sents):
            spath = short_path(data['infos'][k]['file_path'])
            print_sampled(spath, sent)
            entry = {'image_id': spath, 'caption': sent}
            predictions.append(entry)
            #  logger.debug('image %s: %s' %(entry['image_id'], entry['caption']))
        ix0 = data['bounds']['it_pos_now']
        ix1 = data['bounds']['it_max']
        #  logger.warn('ix1 = %d - ix0 = %d' % (ix1, ix0))
        if num_images != -1:
            ix1 = min(ix1, num_images)
        for i in range(n - ix1):
            predictions.pop()
        #  logger.debug('validation loss ... %d/%d (%f)' %(ix0 - 1, ix1, loss))
        if data['bounds']['wrapped']:
            break
        if n >= ix1:
            logger.warn('Evaluated the required samples (%s)' % n)
            break
    #  pickle.dump(Feats, open('cnn_features.pkl', 'w'))
    return predictions
示例#2
0
def eval_external(cnn_model, model, loader, eval_kwargs={}):
    num_images = eval_kwargs.get('num_images', -1)
    split = eval_kwargs.get('split', 'val')
    # serves no purpose except to have the same signature for get_batch
    beam_size = eval_kwargs.get('beam_size', 1)
    logger = eval_kwargs.get('logger')
    caption_model = eval_kwargs.get('caption_model')
    beam_size = eval_kwargs.get('beam_size', 1)
    sample_max = eval_kwargs.get('sample_max', 1)
    temperature = eval_kwargs.get('temperature', 0.5)
    forbid_unk = eval_kwargs.get('forbid_unk', 1)

    print("Eval %s" % caption_model)

    # Make sure in the evaluation mode
    cnn_model.eval()
    model.eval()
    loader.reset_iterator(split)

    n = 0
    predictions = []
    seq_per_img = 1
    while True:
        data = loader.get_batch(split, seq_per_img=seq_per_img)
        n = n + loader.batch_size
        # forward the model to get loss
        images = data['images']
        images = Variable(torch.from_numpy(images), volatile=True).cuda()
        att_feats, fc_feats, att_unique, fc_unique = cnn_model.forward_caps(
            images, seq_per_img, return_unique=True)
        seq, _ = model.sample(
            fc_feats, att_feats, {
                'beam_size': beam_size,
                'forbid_unk': forbid_unk,
                "sample_max": sample_max,
                "temperature": temperature
            })
        sents = decode_sequence(loader.get_vocab(), seq)

        for k, sent in enumerate(sents):
            spath = short_path(data['infos'][k]['file_path'])
            entry = {'image_id': spath, 'caption': sent}
            print_sampled(spath, sent)
            predictions.append(entry)
        ix0 = data['bounds']['it_pos_now']
        ix1 = data['bounds']['it_max']
        if num_images != -1:
            ix1 = min(ix1, num_images)
        for i in range(n - ix1):
            predictions.pop()
        if data['bounds']['wrapped']:
            break
        if n >= ix1:
            logger.warn('Evaluated the required samples (%s)' % n)
            break
    # Switch back to training mode
    model.train()
    return predictions
示例#3
0
def score_trads(preds, trg_loader, eval_kwargs):
    split = eval_kwargs.get('split', 'val')
    batch_size = eval_kwargs.get('batch_size', 80)
    verbose = eval_kwargs.get('verbose', 0)
    ground_truths = []
    trg_loader.reset_iterator(split)
    n = 0
    while True:
        # get batch
        data_trg = trg_loader.get_trg_batch(split,
                                            range(batch_size),
                                            batch_size)
        output_lines_trg_gold = data_trg['out_labels']
        n += batch_size
        # Decode a minibatch greedily __TODO__ add beam search decoding
        # Do the same for gold sentences
        sent_gold = decode_sequence(trg_loader.get_vocab(),
                                    output_lines_trg_gold,
                                    eos=trg_loader.eos,
                                    bos=trg_loader.bos)
        if not verbose:
            verb = not (n % 1000)
        else:
            verb = verbose
        for (l, gl) in zip(preds, sent_gold):
            ground_truths.append(gl)
            if verb:
                lg.print_sampled("", gl, l)
        ix1 = data_trg['bounds']['it_max']
        if data_trg['bounds']['wrapped']:
            break
        if n >= ix1:
            print('Evaluated the required samples (%s)' % n)
            break
    bleu_moses, _ = corpus_bleu(preds, ground_truths)
    scores = {'Bleu': bleu_moses}
    return scores
示例#4
0
def evaluate_model(model, src_loader, trg_loader, logger, eval_kwargs):
    """Evaluate model."""
    preds = []
    ground_truths = []
    batch_size = eval_kwargs.get('batch_size', 1)
    split = eval_kwargs.get('split', 'val')
    verbose = eval_kwargs.get('verbose', 0)
    eval_kwargs['BOS'] = trg_loader.bos
    eval_kwargs['EOS'] = trg_loader.eos
    eval_kwargs['PAD'] = trg_loader.pad
    eval_kwargs['UNK'] = trg_loader.unk

    # Make sure to be in evaluation mode
    model.eval()
    src_loader.reset_iterator(split)
    trg_loader.reset_iterator(split)
    n = 0
    loss_sum = 0
    ml_loss_sum = 0
    loss_evals = 0
    while True:
        # get batch
        data_src, order = src_loader.get_src_batch(split, batch_size)
        tmp = [data_src['labels']]
        input_lines_src, = [Variable(torch.from_numpy(_),
                                    requires_grad=False).cuda()
                           for _ in tmp]
        src_lengths = data_src['lengths']
        data_trg = trg_loader.get_trg_batch(split, order, batch_size)
        tmp = [data_trg['labels'], data_trg['out_labels'], data_trg['mask']]
        input_lines_trg_gold, output_lines_trg_gold, mask = [Variable(torch.from_numpy(_),
                                                                      requires_grad=False).cuda()
                                                             for _ in tmp]
        trg_lengths = data_trg['lengths']
        n += batch_size

        # decoder_logit = model(input_lines_src, input_lines_trg_gold)
        # if model.opt.sample_reward:
            # ml_loss, loss, stats = model.crit(model, input_lines_src, input_lines_trg_gold,
                                              # output_lines_trg_gold, mask)
        # else:
            # ml_loss, loss, stats = model.crit(decoder_logit, output_lines_trg_gold, mask)

        ml_loss, loss, _ = model.step(input_lines_src, src_lengths,
                                      input_lines_trg_gold, trg_lengths,
                                      output_lines_trg_gold,
                                      mask)
        loss_sum += loss.data.item()
        ml_loss_sum += ml_loss.data.item()
        loss_evals = loss_evals + 1
        # Initialize target with <BOS> for every sentence Index = 2
        # print('Sampling sentence')
        # print('GPU:', os.environ['CUDA_VISIBLE_DEVICES'])
        start = time.time()
        # print('>>> Sampling:')
        batch_preds, _ = model.sample(input_lines_src, src_lengths, opt=eval_kwargs)
        if isinstance(batch_preds, list):
            # wiht beam size unpadded preds
            sent_preds = [decode_sequence(trg_loader.get_vocab(),
                                          np.array(pred).reshape(1, -1),
                                          eos=trg_loader.eos,
                                          bos=trg_loader.bos)[0]
                          for pred in batch_preds]
        else:
            # decode
            sent_preds = decode_sequence(trg_loader.get_vocab(), batch_preds,
                                         eos=trg_loader.eos,
                                         bos=trg_loader.bos)
        # Do the same for gold sentences
        sent_source = decode_sequence(src_loader.get_vocab(),
                                      input_lines_src.data.cpu().numpy(),
                                      eos=src_loader.eos, bos=src_loader.bos)
        sent_gold = decode_sequence(trg_loader.get_vocab(),
                                    output_lines_trg_gold.data.cpu().numpy(),
                                    eos=trg_loader.eos,
                                    bos=trg_loader.bos)
        if not verbose:
            verb = not (n % 300)
        else:
            verb = verbose
        for (sl, l, gl) in zip(sent_source, sent_preds, sent_gold):
            preds.append(l)
            ground_truths.append(gl)
            if verb:
                lg.print_sampled(sl, gl, l)
        ix1 = data_src['bounds']['it_max']
        if data_src['bounds']['wrapped']:
            break
        if n >= ix1:
            logger.warn('Evaluated the required samples (%s)' % n)
            break
    # print('Predictions lenght:', len(preds), len(ground_truths))
    # assert(len(preds) == trg_loader.h5_file['labels_val'].shape[0])
    bleu_moses, _ = corpus_bleu(preds, ground_truths)
    return preds, ml_loss_sum / loss_evals, loss_sum / loss_evals, bleu_moses
示例#5
0
def eval_split(cnn_model, model, loader, logger, eval_kwargs={}):
    verbose = eval_kwargs.get('verbose', False)
    dataset = eval_kwargs.get('dataset', 'coco')
    split = eval_kwargs.get('split', 'val')
    val_images_use = eval_kwargs.get('val_images_use', -1)
    lang_eval = eval_kwargs.get('language_eval', 1)
    language_creativity = eval_kwargs.get('language_creativity', 1)
    all_metrics = eval_kwargs.get('all_metrics', 0)
    single_metrics = eval_kwargs.get('single_metrics', 0)

    beam_size = eval_kwargs.get('beam_size', 1)
    sample_max = eval_kwargs.get('sample_max', 1)
    temperature = eval_kwargs.get('temperature', 0.5)
    forbid_unk = eval_kwargs.get('forbid_unk', 1)
    batch_size = eval_kwargs.get('batch_size', 1)
    seq_per_img = eval_kwargs.get('seq_per_img')
    region_size = model.region_size
    # Make sure to be in the evaluation mode
    cnn_model.eval()
    model.eval()
    logger.warn('Evaluating the %s split (%d)' % (split, val_images_use))
    loader.reset_iterator(split)
    n = 0
    loss_sum = 0
    ml_loss_sum = 0
    loss_evals = 0
    predictions = []
    while True:
        data = loader.get_batch(split,
                                batch_size=batch_size,
                                seq_per_img=seq_per_img)
        n = n + loader.batch_size
        images = data['images']
        images = Variable(torch.from_numpy(images), requires_grad=False).cuda()
        att_feats, fc_feats, att_unique, fc_unique = cnn_model.forward_caps(
            images, seq_per_img, return_unique=True)
        ml_loss, loss, stats = model.step(data,
                                          att_feats,
                                          fc_feats,
                                          train=False)
        # print('Scores : ', stats)
        ml_loss_sum += ml_loss.item()
        loss_sum += loss.item()
        loss_evals = loss_evals + 1
        # TODO Only leave one feature for each image, in case duplicate sample
        seq, probs = model.sample(fc_unique,
                                  att_unique,
                                  opt={
                                      'beam_size': beam_size,
                                      "forbid_unk": forbid_unk,
                                      "sample_max": sample_max,
                                      "temperature": temperature
                                  })
        sent_scores = probs.cpu().numpy().sum(axis=1)
        sents = decode_sequence(loader.get_vocab(), seq)
        for k, sent in enumerate(sents):
            if loader.flip:
                entry = {
                    'image_id': data['infos'][k // 2]['id'],
                    'caption': sent,
                    'score': sent_scores[k]
                }
                if not k % 2:
                    unflipped = entry
                else:
                    if entry['score'] > unflipped['score']:
                        del entry['score']
                        predictions.append(entry)
                    else:
                        del unflipped['score']
                        predictions.append(unflipped)
            else:
                entry = {'image_id': data['infos'][k]['id'], 'caption': sent}
                predictions.append(entry)
            print_sampled(entry['image_id'], entry['caption'])
        ix0 = data['bounds']['it_pos_now']
        ix1 = data['bounds']['it_max']
        if val_images_use != -1:
            ix1 = min(ix1, val_images_use)
        for i in range(n - ix1):
            predictions.pop()
        if data['bounds']['wrapped']:
            break
        if n >= ix1:
            logger.warn('Evaluated the required samples (%s)' % n)
            break
    lang_stats = None
    if lang_eval:
        lang_stats, preds, _ = language_eval(dataset, predictions, logger,
                                             all_metrics, single_metrics,
                                             language_creativity)
        print('preds:', preds)
    # Back to training:
    model.train()
    if model.cnn_finetuning:
        logger.warn('Finetuning cnn ON, filtering the BN layers')
        cnn_model.train()
        cnn_model.filter_bn()
    return ml_loss_sum / loss_evals, loss_sum / loss_evals, predictions, lang_stats, preds
示例#6
0
def generate_caps(encoder, decoder, crit, loader, eval_kwargs={}):
    verbose = eval_kwargs.get('verbose', True)
    split = eval_kwargs.get('split', 'train')
    lang_eval = eval_kwargs.get('language_eval', 1)
    dataset = eval_kwargs.get('dataset', 'coco')
    beam_size = eval_kwargs.get('beam_size', 1)
    beam_size = 1
    logger = eval_kwargs.get('logger')
    lm_model = eval_kwargs.get('lm_model')
    vocab_size = eval_kwargs.get('vocab_size')
    sample_max = eval_kwargs.get('sample_max')
    temperature = eval_kwargs.get('temperature')
    tries = eval_kwargs.get('tries', 5)
    sample_limited_vocab = eval_kwargs.get('sample_limited_vocab', 0)
    output_file = eval_kwargs.get('output_file')

    print('Using sample_max = %d  ||  temperature %.2f' %
          (sample_max, temperature))

    # Make sure in the evaluation mode
    encoder.eval()
    decoder.eval()
    logger.warn('Generating captions for the full training set')
    loader.reset_iterator(split)
    n = 0
    blobs = []
    SENTS = []
    gen_SENTS = []
    while True:
        data = loader.get_batch(split)
        n = n + loader.batch_size
        # forward the model to get loss
        #  if n > 100:
        #      break
        infos = data['infos']
        ids = [inf['id'] for inf in infos]
        assert len(ids) == 1, "Batch size larger than 1"
        tmp = [data['labels'], data['masks']]
        tmp = [
            Variable(torch.from_numpy(_), volatile=True).cuda() for _ in tmp
        ]
        labels, masks = tmp
        tr = 0
        gt = decode_sequence(loader.get_vocab(), labels[:, 1:].data)
        SENTS += gt
        blob_batch = {"id": ids[0], "gt": gt, "sampled": []}
        for igt in gt:
            print_sampled(ids[0], gt)

        while tr < tries:
            #  z_mu, z_var, codes = encoder(labels)
            if lm_model == "rnn_vae":
                codes = encoder.sample(labels)
            elif lm_model == "rnn_multi_vae":
                codes = encoder.sample_group(labels)
                #  scodes = encoder.sample(labels)
            else:
                codes = encoder(labels)
            if sample_limited_vocab:
                sample_vocab = np.unique(labels[:, 1:].cpu().data.numpy())
                print("sample_vocab:", sample_vocab.tolist())
                seq, _ = decoder.sample_ltd(
                    codes, sample_vocab, {
                        'beam_size': beam_size,
                        "vocab_size": vocab_size,
                        "sample_max": sample_max,
                        "temperature": temperature
                    })
            else:
                seq, _ = decoder.sample(
                    codes, {
                        'beam_size': beam_size,
                        "vocab_size": vocab_size,
                        "sample_max": sample_max,
                        "temperature": temperature
                    })

            sents = decode_sequence(loader.get_vocab(), seq)
            #  ssents = decode_sequence(loader.get_vocab(), sseq)
            gen_SENTS += sents
            #  gen_SENTS += ssents
            for isent in sents:
                print_sampled(0, isent, warn=True)
            #  print '--------------------(SINGLE)------------------------'
            #  for isent in ssents:
            #      print _WARNING + isent + _ENDC
            print('----------------------------------------------------')

            blob_batch['sampled'] += sents
            #  blob_batch['sampled'] += ssents
            tr += 1
        ix0 = data['bounds']['it_pos_now']
        ix1 = data['bounds']['it_max']
        if data['bounds']['wrapped']:
            break
        if n >= ix1:
            logger.warn('Evaluated the required samples (%s)' % n)
            break
        blobs.append(blob_batch)
        #  print "Blob batch:", blob_batch
    json.dump(blobs, open(output_file, 'w'))
    if lang_eval:
        lang_stats = language_lm_eval(SENTS, gen_SENTS)
        print(lang_stats)
    encoder.train()
    decoder.train()
    return 1