Exemplo n.º 1
0
    def eval(self, epoch, train_iter):
        self.model.eval()

        val_bleu = AverageMeter()
        start_time = time.time()

        for i, batch in enumerate(tqdm(self.val_loader)):
            src_input = batch.src[0]
            src_length = batch.src[1]
            trg_input = batch.trg[0][:, :-1]
            trg_output = batch.trg[0][:, 1:]
            trg_length = batch.trg[1]
            batch_size, trg_len = trg_input.size(0), trg_input.size(1)

            decoder_logit = self.model(src_input, src_length.tolist())
            pred = decoder_logit.view(batch_size, self.max_len, -1)

            # Compute BLEU score
            pred_sents = []
            trg_sents = []
            for j in range(batch_size):
                pred_sent = self.get_sentence(
                    tensor2np(pred[j]).argmax(axis=-1), 'trg')
                trg_sent = self.get_sentence(tensor2np(trg_output[j]), 'trg')
                pred_sents.append(pred_sent)
                trg_sents.append(trg_sent)
            bleu_value = get_bleu(pred_sents, trg_sents)
            val_bleu.update(bleu_value, 1)

        self.print_valid_result(epoch, train_iter, val_bleu.avg, start_time)
        # self.print_sample(batch_size, epoch, train_iter, src_input, trg_output, pred)

        # Save model if bleu score is higher than the best
        if self.best_bleu < val_bleu.avg:
            self.best_bleu = val_bleu.avg
            checkpoint = {'model': self.model, 'epoch': epoch}
            torch.save(
                checkpoint, self.log_path + '/Model_e%d_i%d_%.3f.pt' %
                (epoch, train_iter, val_bleu.avg))

        # Logging tensorboard
        info = {
            'epoch': epoch,
            'train_iter': train_iter,
            'train_loss': self.train_loss.avg,
            'train_bleu': self.train_bleu.avg,
            'bleu': val_bleu.avg
        }

        for tag, value in info.items():
            self.tf_log.scalar_summary(
                tag, value, (epoch * self.iter_per_epoch) + train_iter + 1)
Exemplo n.º 2
0
    def evaluate(self, dataset):
        total_matches = 0
        total_possible = 0
        total_predicted_length = 0
        total_expected_length = 0
        for batch in dataset:
            for source, target in zip(batch[0], batch[1]):
                # Prepares input
                source = tf.expand_dims(source, 0)
                # Prints expected translation
                words = []
                for word in target.numpy():
                    decoded = self.target_tokenizer.index_to_word[word].decode(
                    )
                    words.append(decoded)
                    if decoded == '<end>':
                        break
                print('Expected:', ' '.join(words[1:-1]))
                reference = np.array(words[1:], ndmin=2)
                # Prints actual translation
                words = []
                prediction, attention = self.translate(source,
                                                       return_attention=True)
                for word in prediction:
                    decoded = self.target_tokenizer.index_to_word[word].decode(
                    )
                    words.append(decoded)
                print('Translation:', ' '.join(words[:-1]), end='\n\n')
                # Updates data for BLEU score computation
                candidate = np.array(words, ndmin=2)
                matches, possible, predicted_length, expected_length = get_counts(
                    candidate, reference)
                total_matches += matches
                total_possible += possible
                total_predicted_length += predicted_length
                total_expected_length += expected_length
                # Plots attention
                plot_attention(attention,
                               tf.squeeze(source).numpy(), prediction,
                               self.source_tokenizer.index_to_word,
                               self.target_tokenizer.index_to_word)

        # Computes BLEU score
        bleu = get_bleu(total_matches, total_possible, total_predicted_length,
                        total_expected_length)
        print('Bleu:', bleu)
Exemplo n.º 3
0
    def translate(self):
        """Translate the whole dataset."""
        trg_preds = []
        trg_gold = []
        for j in range(
            0, len(self.src['data']),
            self.config['data']['batch_size']
        ):
            """Decode a single minibatch."""
            print('Decoding %d out of %d ' % (j, len(self.src['data'])))
            hypotheses, scores = self.decode_batch(j)
            all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses]
            all_preds = [
                ' '.join([self.trg['id2word'][x] for x in hyp])
                for hyp in all_hyp_inds
            ]

            # Get target minibatch
            input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
                get_minibatch(
                    self.trg['data'], self.tgt_dict, j,
                    self.config['data']['batch_size'],
                    self.config['data']['max_trg_length'],
                    add_start=True, add_end=True, use_cuda=self.use_cuda
                )
            )

            output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
            all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold]
            all_gold = [
                ' '.join([self.trg['id2word'][x] for x in hyp])
                for hyp in all_gold_inds
            ]

            trg_preds += all_preds
            trg_gold += all_gold
        print("investigate some preds and golds.....")
        print("trg_preds: ", trg_preds[0])
        print("trg_gold: ", trg_gold[0])
        bleu_score = get_bleu(trg_preds, trg_gold)

        # print('BLEU : %.5f ' % (bleu_score))
        return bleu_score
Exemplo n.º 4
0
def evaluate_model(
    model, src, src_test, trg,
    trg_test, config, src_valid=None, trg_valid=None,
    verbose=True, metric='bleu', use_cuda=False
):
    """Evaluate model.
    :param model: the model object
    :param src:
    :param src_test:
    :param trg:
    :param trg_test:
    :param config: the config object
    :param src_valid:
    :param trg_valid:
    :param verbose:
    :param metric:
    :param use_cuda:
    :return:
    """
    preds = []
    ground_truths = []
    for j in range(0, len(src_test['data']), config['data']['batch_size']):
        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, _ = get_minibatch(
            src_test['data'], src['word2id'], j, config['data']['batch_size'],
            config['data']['max_src_length'], add_start=True, add_end=True,
            use_cuda=use_cuda)

        # Get target minibatch
        input_lines_trg_gold, output_lines_trg_gold, lens_src, _ = (
            get_minibatch(
                trg_test['data'], trg['word2id'], j,
                config['data']['batch_size'], config['data']['max_trg_length'],
                add_start=True, add_end=True, use_cuda=use_cuda
            ))

        # Initialize target with <s> for every sentence
        input_lines_trg = Variable(torch.LongTensor(
            [
                [trg['word2id']['<s>']]
                for i in range(input_lines_src.size(0))
            ]
        ))
        if use_cuda:
            input_lines_trg = input_lines_trg.cuda()

        # print("input_lines_src: ", input_lines_src.size(), "input_lines_trg: ", input_lines_trg.size())
        # input_lines_src: [80, 49],   "input_lines_trg: " [80, 1]
        # Decode a minibatch greedily __TODO__ add beam search decoding
        input_lines_trg = decode_minibatch(
            config, model, input_lines_src,
            input_lines_trg, output_lines_trg_gold,
            use_cuda=use_cuda
        )
        # save gpu memory(in vain)
        input_lines_src = input_lines_src.data.cpu().numpy()
        del input_lines_src
        output_lines_src = output_lines_src.data.cpu().numpy()
        input_lines_trg_gold = input_lines_trg_gold.data.cpu().numpy()
        del input_lines_trg_gold

        # Copy minibatch outputs to cpu and convert ids to words
        input_lines_trg = input_lines_trg.data.cpu().numpy()
        input_lines_trg = [
            [trg['id2word'][x] for x in line]
            for line in input_lines_trg
        ]

        # Do the same for gold sentences
        output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
        output_lines_trg_gold = [
            [trg['id2word'][x] for x in line]
            for line in output_lines_trg_gold
        ]
        print("input_lines_trg: ", input_lines_trg[0])
        print("the length  of a sent", len(input_lines_trg[0]))
        # Process outputs
        for sentence_pred, sentence_real, sentence_real_src in zip(
            input_lines_trg,
            output_lines_trg_gold,
            output_lines_src
        ):
            # 去除开始和结束符, 构造完整的句子sentence, 以便计算bleu值
            if '<s>' in sentence_pred:
                index = sentence_pred.index('<s>')
                sentence_pred = sentence_pred[index+1:]
            if '</s>' in sentence_pred:
                index = sentence_pred.index('</s>')
                sentence_pred = sentence_pred[:index]
            preds.append(sentence_pred)

            if '<s>' in sentence_real:
                index = sentence_real.index('<s>')
                sentence_real = sentence_real[index+1:]
            if '</s>' in sentence_real:
                index = sentence_real.index('</s>')
                sentence_real = sentence_real[: index]
            ground_truths.append(sentence_real)

    print("call the get_bleu method to calc bleu score.....")
    print("preds: ", preds[0])
    print("ground_truths: ", ground_truths[0])
    return get_bleu(preds, ground_truths)
Exemplo n.º 5
0
def evaluate_autoencode_model(
    model, src, src_test,
    config, src_valid=None,
    verbose=True, metric='bleu'
):
    """Evaluate model."""
    preds = []
    ground_truths = []
    for j in xrange(0, len(src_test['data']), config['data']['batch_size']):

        print('Decoding batch : %d out of %d ' % (j, len(src_test['data'])))
        input_lines_src, lens_src, mask_src = get_autoencode_minibatch(
            src_test['data'], src['word2id'], j, config['data']['batch_size'],
            config['data']['max_src_length'], add_start=True, add_end=True
        )

        input_lines_trg = Variable(torch.LongTensor(
            [
                [src['word2id']['<s>']]
                for i in range(input_lines_src.size(0))
            ]
        )).cuda()

        for i in range(config['data']['max_src_length']):

            decoder_logit = model(input_lines_src, input_lines_trg)
            word_probs = model.decode(decoder_logit)
            decoder_argmax = word_probs.data.cpu().numpy().argmax(axis=-1)
            next_preds = Variable(
                torch.from_numpy(decoder_argmax[:, -1])
            ).cuda()

            input_lines_trg = torch.cat(
                (input_lines_trg, next_preds.unsqueeze(1)),
                1
            )

        input_lines_trg = input_lines_trg.data.cpu().numpy()

        input_lines_trg = [
            [src['id2word'][x] for x in line]
            for line in input_lines_trg
        ]

        output_lines_trg_gold = input_lines_src.data.cpu().numpy()
        output_lines_trg_gold = [
            [src['id2word'][x] for x in line]
            for line in output_lines_trg_gold
        ]

        for sentence_pred, sentence_real in zip(
            input_lines_trg,
            output_lines_trg_gold,
        ):
            if '</s>' in sentence_pred:
                index = sentence_pred.index('</s>')
            else:
                index = len(sentence_pred)
            preds.append(sentence_pred[:index + 1])

            if verbose:
                print(' '.join(sentence_pred[:index + 1]))

            if '</s>' in sentence_real:
                index = sentence_real.index('</s>')
            else:
                index = len(sentence_real)
            if verbose:
                print(' '.join(sentence_real[:index + 1]))
            if verbose:
                print('--------------------------------------')
            ground_truths.append(sentence_real[:index + 1])
    return get_bleu(preds, ground_truths)
Exemplo n.º 6
0
 train_src, train_tgt = shuffle_data(train_src, train_tgt)
 for j in xrange(0, len(train_src), batch_size):
     batch_src, batch_tgt_inp, batch_tgt_op, batch_src_lens, batch_src_mask, batch_tgt_mask \
         = prepare_batch(
             train_src[j: j + batch_size],
             train_tgt[j: j + batch_size],
             src_word2ind,
             tgt_word2ind
         )
     entropy = f_train(batch_src, batch_tgt_inp, batch_tgt_op,
                       batch_src_lens, batch_tgt_mask)
     costs.append(entropy)
     logging.info('Epoch : %d Minibatch : %d Loss : %.3f' % (i, j, entropy))
     if j % 64000 == 0 and j != 0:
         dev_predictions = decode_dev()
         dev_bleu = get_bleu(dev_predictions, dev_tgt)
         if dev_bleu > BEST_BLEU:
             BEST_BLEU = dev_bleu
             print_decoded_dev(dev_predictions)
             save_model(i, j, params)
         logging.info('Epoch : %d Minibatch :%d dev BLEU : %.3f' %
                      (i, j, dev_bleu))
         logging.info('Mean Cost : %.3f' % (np.mean(costs)))
         costs = []
     if j % 6400 == 0:
         generate_samples(batch_src, batch_tgt_inp, batch_src_lens)
 dev_predictions = decode_dev()
 dev_bleu = get_bleu(dev_predictions, dev_tgt)
 if dev_bleu > BEST_BLEU:
     BEST_BLEU = dev_bleu
     print_decoded_dev(dev_predictions)
Exemplo n.º 7
0
    def train(self):
        self.best_bleu = .0

        criterion = nn.NLLLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.8)

        for epoch in range(self.num_epoch):
            self.train_loss = AverageMeter()
            self.train_bleu = AverageMeter()

            start_time = time.time()

            for i, batch in enumerate(tqdm(self.train_loader)):
                self.model.train()

                src_input = batch.src[0]
                src_length = batch.src[1]

                trg_input = batch.trg[0][:, :-1]
                trg_output = batch.trg[0][:, 1:]
                trg_length = batch.trg[1]

                batch_size, trg_len = trg_input.size(0), trg_input.size(1)

                decoder_logit = self.model(src_input, src_length.tolist(),
                                           trg_input)
                pred = decoder_logit.view(batch_size, trg_len, -1)

                optimizer.zero_grad()

                loss = criterion(decoder_logit,
                                 trg_output.contiguous().view(-1))
                loss.backward()

                torch.nn.utils.clip_grad_norm(self.model.parameters(),
                                              self.grad_clip)

                optimizer.step()

                # Compute BLEU score and Loss
                pred_sents = []
                trg_sents = []

                for j in range(batch_size):
                    pred_sent = self.get_sentence(
                        tensor2np(pred[j]).argmax(axis=-1), 'trg')
                    trg_sent = self.get_sentence(tensor2np(trg_output[j]),
                                                 'trg')
                    pred_sents.append(pred_sent)
                    trg_sents.append(trg_sent)

                bleu_value = get_bleu(pred_sents, trg_sents)
                self.train_bleu.update(bleu_value, 1)
                self.train_loss.update(loss.data[0], batch_size)

                if i % 5000 == 0 and i != 0:
                    self.print_train_result(epoch, i, start_time)
                    self.print_sample(batch_size, epoch, i, src_input,
                                      trg_output, pred)
                    self.eval(epoch, i)
                    train_loss = AverageMeter()
                    train_bleu = AverageMeter()
                    start_time = time.time()

                # Logging tensorboard
                info = {
                    'epoch': epoch,
                    'train_iter': i,
                    'train_loss': self.train_loss.avg,
                    'train_bleu': self.train_bleu.avg
                }
                for tag, value in info.items():
                    self.tf_log.scalar_summary(
                        tag, value, (epoch * self.iter_per_epoch) + i + 1)

            self.print_train_result(epoch, i, start_time)
            self.print_sample(batch_size, epoch, i, src_input, trg_output,
                              pred)
            self.eval(epoch, i)
Exemplo n.º 8
0
  def train(self, epochs: int, train: tf.data.Dataset, test=None) -> None:
    """
    Performs training of the translation model. It shows training/test loss and bleu score after each epoch.

    :param epochs: Number of epochs
    :param train: Training dataset
    :param test: Test dataset
    """
    for epoch in range(epochs):
      # Performing a training epoch
      start = time.perf_counter()
      train_loss = 0
      train_matches = 0
      train_possible = 0
      train_predicted_length = 0
      train_expected_length = 0
      for batch, (sources, targets) in enumerate(train):
        # Calls model
        batch_loss, expected, predicted = self.train_step(sources, targets)
        # Update loss for logging
        train_loss += batch_loss
        # Updates data for BLEU score computation
        matches, possible, predicted_length, expected_length = get_counts(
          expected.numpy(), predicted.numpy(), ending_token=self.decoder.vocab[b'<end>']
        )
        train_matches += matches
        train_possible += possible
        train_predicted_length += predicted_length
        train_expected_length += expected_length
      # Computes BLEU score
      bleu = get_bleu(train_matches, train_possible, train_predicted_length, train_expected_length)

      # Logs training results
      print('\nEpoch {} out of {} complete ({:.2f} secs) -- Train Loss: {:.4f} -- Train Bleu: {:.2f}'.format(
        epoch + 1,
        epochs,
        time.perf_counter() - start,
        train_loss / (batch + 1),
        bleu
      ), end='')

      if test is not None:
        # Evaluates performance on test set after epoch training
        test_loss = 0
        test_matches = 0
        test_possible = 0
        test_predicted_length = 0
        test_expected_length = 0
        for batch, (sources, targets) in enumerate(test):
          # Calls model
          batch_loss, expected, predicted = self.test_step(sources, targets)
          # Update loss for logging
          test_loss += batch_loss
          # Updates data for BLEU score computation
          matches, possible, predicted_length, expected_length = get_counts(
            expected.numpy(), predicted.numpy(), ending_token=self.decoder.vocab[b'<end>']
          )
          test_matches += matches
          test_possible += possible
          test_predicted_length += predicted_length
          test_expected_length += expected_length
        # Computes BLEU score
        bleu = get_bleu(test_matches, test_possible, test_predicted_length, test_expected_length)
        # Logs test performance
        if batch >= 0:
          print(' -- Test Loss: {:.4f} -- Test Bleu: {:.2f}'.format(
            test_loss / (batch + 1),
            bleu
          ), end='')

      # Save checkpoint every ten epochs
      if (epoch + 1) % 10 == 0:
        print('\nCreating intermediate checkpoint!')
        self.checkpoint.save(file_prefix=self.checkpoint_prefix)

    # Save weights after training is done
    print('\nCreating final checkpoint!')
    self.checkpoint.save(file_prefix=self.checkpoint_prefix)