Exemplo n.º 1
0
def test_get_moses_multi_bleu():
    hypotheses = ["The brown fox jumps over the dog 笑", "The brown fox jumps over the dog 2 笑"]
    references = [
        "The quick brown fox jumps over the lazy dog 笑",
        "The quick brown fox jumps over the lazy dog 笑"
    ]
    result = get_moses_multi_bleu(hypotheses, references, lowercase=False)
    np.testing.assert_almost_equal(result, 46.51, decimal=2)
Exemplo n.º 2
0
def calculate_bleu(src, trg, corpus_level=False, weights=(0.25, 0.25, 0.25, 0.25), use_torchnlp=True):
    # src = [[sent words1], [sent words2], ...], trg = [sent words]
    if not use_torchnlp:
        if not corpus_level:
            score = bleu_score.sentence_bleu(src, trg, weights=weights)
        else:
            score = bleu_score.corpus_bleu(src, trg, weights=weights)
    else:
        score = get_moses_multi_bleu(src, trg, lowercase=True)
    return score
Exemplo n.º 3
0
def evaluateRandomly(encoder, decoder, n, data_pairs):
    bleu_score_total = 0
    for i in range(n):
        pair = random.choice(data_pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        bleu_score_total += get_moses_multi_bleu([output_sentence], [pair[1]],
                                                 lowercase=True)
        print('<', output_sentence)
        print('')
    bleu_score_avg = bleu_score_total / n
    return bleu_score_avg
Exemplo n.º 4
0
    def evaluate(self, model, data):
        """ Evaluate a model on given dataset and return performance.

        Args:
            model (seq2seq.models): model to evaluate
            data (seq2seq.dataset.dataset.Dataset): dataset to evaluate against

        Returns:
            loss (float): loss of the given model on the given dataset
        """
        model.eval()

        loss = self.loss
        loss.reset()
        match = 0
        total = 0

        device = None if torch.cuda.is_available() else -1
        batch_iterator = torchtext.data.BucketIterator(
            dataset=data,
            batch_size=self.batch_size,
            sort=True,
            sort_key=lambda x: len(x.src),
            device=device,
            train=False)
        tgt_vocab = data.fields[seq2seq.tgt_field_name].vocab
        pad = tgt_vocab.stoi[data.fields[seq2seq.tgt_field_name].pad_token]

        hypotheses, references = [], []
        with torch.no_grad():
            for batch in batch_iterator:
                # obtain input variables and input lengths
                # shape: (bs, lens) and (bs,)
                input_variables, input_lengths = getattr(
                    batch, seq2seq.src_field_name)
                # obtain target sentences
                # shape: (bs, lens)
                target_variables = getattr(batch, seq2seq.tgt_field_name)

                decoder_outputs, decoder_hidden, other = model(
                    input_variables, input_lengths.tolist(), target_variables)

                # Evaluation
                seqlist = other['sequence']
                for step, step_output in enumerate(decoder_outputs):
                    target = target_variables[:, step + 1]
                    loss.eval_batch(
                        step_output.view(target_variables.size(0), -1), target)

                    non_padding = target.ne(pad)
                    correct = seqlist[step].view(-1).eq(target).masked_select(
                        non_padding).sum().item()
                    match += correct
                    total += non_padding.sum().item()

                    hypotheses.extend(
                        step_output.view(target_variables.size(0),
                                         -1).tolist())
                    references.extend(target.tolist())

        if total == 0:
            accuracy = float('nan')
        else:
            accuracy = match / total

        bleu = get_moses_multi_bleu(hypotheses, references)

        return loss.get_loss(), accuracy, bleu
def bleu(tar, pred):
    'Calculates moses bleu given two arrays of str tokens'
    tar, pred = ' '.join(tar), ' '.join(pred)
    return get_moses_multi_bleu([tar], [pred])
Exemplo n.º 6
0
            model.zero_grad()
            #reference.append(" ".join(sentence))
            cap_target = prepare_sequence(sentence, word_to_ix)

            cap_pred = model(cap_target,features[i])
            cap_pred = cap_pred.view(-1,vocab_size)
            maxval, maxidx = torch.max(cap_pred,dim=-1)
            sent=[]
            key_list=list(word_to_ix.keys())    
            for k in range(maxidx.shape[0]):
                sent.append(key_list[maxidx[k]])

            #hypothesis.append(" ".join(sent))
            #print(" ".join(sent))
            #print(" ".join(sentence))
            te=get_moses_multi_bleu(" ".join(sent), " ".join(sentence), lowercase=True)
            #print(te)
            if(te > ma):
              ma=te
        to=to+ma
        #print(ma)
    to=to/30
    print(to)
              
#print(get_moses_multi_bleu(hypothesis, reference, lowercase=True))

er=0
pr=0

hypothesis=[]
reference=[]
#             print(features.shape)
        avlad = vladmodel(features[i])
#             print(avlad.shape)
        avlad = avlad.view(-1)
        cap_pred = model(cap_target,avlad)
        cap_pred = cap_pred.view(-1,vocab_size)
        maxval, maxidx = torch.max(cap_pred,dim=-1)
        sent=[]
        key_list=list(word_to_ix.keys())    
        for k in range(maxidx.shape[0]):
            sent.append(key_list[maxidx[k]])
        
        hypothesis.append(" ".join(sent))
            
print('bleu-score')
print(get_moses_multi_bleu(hypothesis, reference, lowercase=True))


### Validation set evaluation ###
datapath = 'video_data/'
cap_val= np.load(datapath+'captionsDev.npy',encoding='bytes')
cap_val.shape

feat_val = np.load('vgg_feat_val.npy')
val_feat = feat_val[:,:,:500,:,:]
val_feat.shape
features_val = torch.Tensor(val_feat).cuda()

def prepare_sequence(seq, to_ix):
    idxs = []
    for w in seq:
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    track_blue = []

    # Build up dataset
    s_train, s_test = load_from_big_file('obama_speech', g_sequence_len)
    # idx_to_word: List of id to word
    # word_to_idx: Dictionary mapping word to id
    idx_to_word, word_to_idx = fetch_vocab(s_train, s_train, s_test)
    # input_seq, target_seq = prepare_data(DATA_GERMAN, DATA_ENGLISH, word_to_idx)

    global VOCAB_SIZE
    VOCAB_SIZE = len(idx_to_word)
    save_vocab(CHECKPOINT_PATH + 'metadata.data', idx_to_word, word_to_idx,
               VOCAB_SIZE, g_emb_dim, g_hidden_dim)

    print('VOCAB SIZE:', VOCAB_SIZE)
    # Define Networks
    generator = Generator(VOCAB_SIZE, g_emb_dim, g_hidden_dim, opt.cuda)

    if opt.cuda:
        generator = generator.cuda()

    # Generate toy data using target lstm
    print('Generating data ...')

    # Generate samples either from sentences file or lstm
    # Sentences file will be structured input sentences
    # LSTM based is BOG approach
    generate_real_data('obama_speech', BATCH_SIZE, GENERATED_NUM, idx_to_word,
                       word_to_idx, POSITIVE_FILE, TEST_FILE)
    # generate_samples(target_lstm, BATCH_SIZE, GENERATED_NUM, POSITIVE_FILE, idx_to_word)
    # generate_samples(target_lstm, BATCH_SIZE, 10, TEST_FILE, idx_to_word)
    # Create Test data iterator for testing
    test_iter = GenDataIter(TEST_FILE, BATCH_SIZE)
    #test_predict(generator, test_iter, idx_to_word, train_mode=True)

    # Load data from file
    gen_data_iter = GenDataIter(POSITIVE_FILE, BATCH_SIZE)
    lines = read_file(POSITIVE_FILE)

    refrences = []
    for line in lines:
        phrase = []
        for char in line:
            phrase.append(idx_to_word[char])

        refrences.append(' '.join(phrase))
        #refrences.append(phrase)

    # Pretrain Generator using MLE
    gen_criterion = nn.NLLLoss(size_average=False)
    gen_optimizer = optim.Adam(generator.parameters())
    if opt.cuda:
        gen_criterion = gen_criterion.cuda()
    print('Pretrain with MLE ...')
    for epoch in range(PRE_EPOCH_NUM):
        loss = train_epoch(generator, gen_data_iter, gen_criterion,
                           gen_optimizer)
        print('Epoch [%d] Model Loss: %f' % (epoch, loss))
        sys.stdout.flush()
        generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE)
        if track_training:
            lines = read_file(EVAL_FILE)
            hypotheses = []
            for line in lines:
                phrase = []
                for char in line:
                    phrase.append(idx_to_word[char])

                hypotheses.append(' '.join(phrase))
                #hypotheses.append(phrase)

            bleu_score = get_moses_multi_bleu(hypotheses,
                                              refrences,
                                              lowercase=True)
            track_blue.append(bleu_score)
            print(track_blue)

    torch.save(generator.state_dict(), CHECKPOINT_PATH + 'generator_mle.model')
    track_blue = np.array(track_blue)
    np.save(ROOT_PATH + 'track_blue_mle3.npy', track_blue)

    plt.plot(track_blue)
    plt.show()
Exemplo n.º 9
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    track_blue = []
    # Build up dataset
    s_train, s_test = load_from_big_file('obama_speech',g_sequence_len)
    # idx_to_word: List of id to word
    # word_to_idx: Dictionary mapping word to id
    idx_to_word, word_to_idx = fetch_vocab(s_train, s_train, s_test)
    # input_seq, target_seq = prepare_data(DATA_GERMAN, DATA_ENGLISH, word_to_idx)

    global VOCAB_SIZE
    VOCAB_SIZE = len(idx_to_word)
    save_vocab(CHECKPOINT_PATH + 'metadata.data', idx_to_word, word_to_idx, VOCAB_SIZE, g_emb_dim, g_hidden_dim)

    print('VOCAB SIZE:', VOCAB_SIZE)
    # Define Networks
    generator = Generator(VOCAB_SIZE, g_emb_dim, g_hidden_dim, opt.cuda)
    discriminator = Discriminator(d_num_class, VOCAB_SIZE, d_emb_dim, d_filter_sizes, d_num_filters, d_dropout)
    if opt.cuda:
        generator = generator.cuda()
        discriminator = discriminator.cuda()

    # Generate toy data using target lstm
    print('Generating data ...')

    # Generate samples either from sentences file or lstm
    # Sentences file will be structured input sentences
    # LSTM based is BOG approach
    generate_real_data('obama_speech', BATCH_SIZE, GENERATED_NUM, idx_to_word, word_to_idx,
                       POSITIVE_FILE, TEST_FILE)
    # generate_samples(target_lstm, BATCH_SIZE, GENERATED_NUM, POSITIVE_FILE, idx_to_word)
    # generate_samples(target_lstm, BATCH_SIZE, 10, TEST_FILE, idx_to_word)
    # Create Test data iterator for testing
    test_iter = GenDataIter(TEST_FILE, BATCH_SIZE)
    #test_predict(generator, test_iter, idx_to_word, train_mode=True)

    # Load data from file
    gen_data_iter = GenDataIter(POSITIVE_FILE, BATCH_SIZE)
    lines = read_file(POSITIVE_FILE)

    refrences = []
    for line in lines:
        phrase = []
        for char in line:
            phrase.append(idx_to_word[char])

        refrences.append(' '.join(phrase))
        #refrences.append(phrase)



    # Pretrain Generator using MLE
    gen_criterion = nn.NLLLoss(size_average=False)
    gen_optimizer = optim.Adam(generator.parameters())
    if opt.cuda:
        gen_criterion = gen_criterion.cuda()
    print('Pretrain with MLE ...')
    for epoch in range(PRE_EPOCH_NUM):
        loss = train_epoch(generator, gen_data_iter, gen_criterion, gen_optimizer)
        print('Epoch [%d] Model Loss: %f' % (epoch, loss))
        sys.stdout.flush()
        generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE)
        if track_training:
            lines = read_file(EVAL_FILE)
            hypotheses = []
            for line in lines:
                phrase = []
                for char in line:
                    phrase.append(idx_to_word[char])

                hypotheses.append(' '.join(phrase))
                #hypotheses.append(phrase)

            bleu_score=get_moses_multi_bleu(hypotheses, refrences, lowercase=True)
            track_blue.append(bleu_score)
            print(track_blue)

        # generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE)
        # eval_iter = GenDataIter(EVAL_FILE, BATCH_SIZE)
        # loss = eval_epoch(target_lstm, eval_iter, gen_criterion)
        # print('Epoch [%d] True Loss: %f' % (epoch, loss))

    # Pretrain Discriminator
    dis_criterion = nn.NLLLoss(size_average=False)
    dis_optimizer = optim.Adam(discriminator.parameters())
    if opt.cuda:
        dis_criterion = dis_criterion.cuda()
    print('Pretrain Discriminator ...')
    for epoch in range(5):
        generate_samples(generator, BATCH_SIZE, GENERATED_NUM, NEGATIVE_FILE)
        dis_data_iter = DisDataIter(POSITIVE_FILE, NEGATIVE_FILE, BATCH_SIZE)
        for _ in range(3):
            loss = train_epoch(discriminator, dis_data_iter, dis_criterion, dis_optimizer)
            print('Epoch [%d], loss: %f' % (epoch, loss))
            #sys.stdout.flush()

    # Adversarial Training
    rollout = Rollout(generator, 0.8)
    print('#####################################################')
    print('Start Adversarial Training...\n')
    gen_gan_loss = GANLoss()
    gen_gan_optm = optim.Adam(generator.parameters())
    if opt.cuda:
        gen_gan_loss = gen_gan_loss.cuda()
    gen_criterion = nn.NLLLoss(size_average=False)
    if opt.cuda:
        gen_criterion = gen_criterion.cuda()
    dis_criterion = nn.NLLLoss(size_average=False)
    dis_optimizer = optim.Adam(discriminator.parameters())
    if opt.cuda:
        dis_criterion = dis_criterion.cuda()
    for total_batch in range(TOTAL_BATCH):
        ## Train the generator for one step
        for it in range(1):
            samples = generator.sample(BATCH_SIZE, g_sequence_len)
            # construct the input to the genrator, add zeros before samples and delete the last column
            zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor)
            if samples.is_cuda:
                zeros = zeros.cuda()
            inputs = Variable(torch.cat([zeros, samples.data], dim=1)[:, :-1].contiguous())
            targets = Variable(samples.data).contiguous().view((-1,))
            # calculate the reward
            rewards = rollout.get_reward(samples, 16, discriminator)
            rewards = Variable(torch.Tensor(rewards))
            rewards = torch.exp(rewards).contiguous().view((-1,))
            if opt.cuda:
                rewards = rewards.cuda()
            prob = generator.forward(inputs)
            # print('SHAPE: ', prob.shape, targets.shape, rewards.shape)
            loss = gen_gan_loss(prob, targets, rewards)
            gen_gan_optm.zero_grad()
            loss.backward()
            gen_gan_optm.step()
            # print('GEN PRED DIM: ', prob.shape)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            # generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE)
            # eval_iter = GenDataIter(EVAL_FILE, BATCH_SIZE)
            # loss = eval_epoch(target_lstm, eval_iter, gen_criterion)
            # print('Batch [%d] True Loss: %f' % (total_batch, loss))

            # predictions = torch.max(prob, dim=1)[1]
            # predictions = predictions.view(BATCH_SIZE, -1)
            # # print('PRED SHAPE:' , predictions.shape)
            # for each_sen in list(predictions):
            #     print('Training Output:', generate_sentence_from_id(idx_to_word, each_sen, DEBUG_FILE))
            #
            # test_predict(generator, test_iter, idx_to_word, train_mode=True)
            loss_gen = eval_epoch(generator, gen_data_iter, gen_criterion)
            print('Epoch [%d] Model Loss: %f' % (total_batch, loss_gen))
            generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE)
            #show_some_generated_sequences(idx_to_word, 10, EVAL_FILE)
            sys.stdout.flush()
            if track_training:
                lines = read_file(EVAL_FILE)
                hypotheses = []
                for line in lines:
                    phrase = []
                    for char in line:
                        phrase.append(idx_to_word[char])

                    hypotheses.append(' '.join(phrase))
                    # hypotheses.append(phrase)

                bleu_score = get_moses_multi_bleu(hypotheses, refrences, lowercase=True)
                track_blue.append(bleu_score)
                print(track_blue)

            torch.save(generator.state_dict(), CHECKPOINT_PATH + 'generator_seqgan.model')
            torch.save(discriminator.state_dict(), CHECKPOINT_PATH + 'discriminator_seqgan.model')
        rollout.update_params()

        for _ in range(4):
            generate_samples(generator, BATCH_SIZE, GENERATED_NUM, NEGATIVE_FILE)
            dis_data_iter = DisDataIter(POSITIVE_FILE, NEGATIVE_FILE, BATCH_SIZE)
            for _ in range(2):
                loss = train_epoch(discriminator, dis_data_iter, dis_criterion, dis_optimizer)

    track_blue = np.array(track_blue)
    np.save(ROOT_PATH + 'track_blue_seqgan2.npy', track_blue)

    plt.plot(track_blue)
    plt.show()
Exemplo n.º 10
0
all_scores_bleu = []
all_stds_bleu = []

for iteration in range(1, 11):
    checkpoint_path = f'trial_1/checkpoint/text_gan_{str(iteration*10000).zfill(6)}_model.pth'
    checkpoint = torch.load(checkpoint_path)
    net_g.load_state_dict(checkpoint['tg'])

    net_g.cuda()
    net_g.eval()

    fake_texts = []
    for i in range(100):
        noise = torch.randn(32, 128).cuda()
        g_text_latent = net_g(noise)
        g_captions = net_t_ae.generate(g_text_latent)

        fake_texts += g_captions

    #p,r,f = b_score(fake_texts, real_texts, bert="bert-base-uncased", verbose=True)
    #print(f.mean().item())
    #all_scores_bert.append(f.mean().item())

    fake_whole_texts = ''
    for t in fake_texts:
        fake_whole_texts += t
    score = get_moses_multi_bleu([fake_whole_texts], [real_whole_texts],
                                 lowercase=True)
    all_scores_bleu.append(score)

print(all_scores_bert, all_scores_bleu)