def eval(self, epoch, train_iter): self.model.eval() val_bleu = AverageMeter() start_time = time.time() for i, batch in enumerate(tqdm(self.val_loader)): src_input = batch.src[0] src_length = batch.src[1] trg_input = batch.trg[0][:, :-1] trg_output = batch.trg[0][:, 1:] trg_length = batch.trg[1] batch_size, trg_len = trg_input.size(0), trg_input.size(1) decoder_logit = self.model(src_input, src_length.tolist()) pred = decoder_logit.view(batch_size, self.max_len, -1) # Compute BLEU score pred_sents = [] trg_sents = [] for j in range(batch_size): pred_sent = self.get_sentence( tensor2np(pred[j]).argmax(axis=-1), 'trg') trg_sent = self.get_sentence(tensor2np(trg_output[j]), 'trg') pred_sents.append(pred_sent) trg_sents.append(trg_sent) bleu_value = get_bleu(pred_sents, trg_sents) val_bleu.update(bleu_value, 1) self.print_valid_result(epoch, train_iter, val_bleu.avg, start_time) # self.print_sample(batch_size, epoch, train_iter, src_input, trg_output, pred) # Save model if bleu score is higher than the best if self.best_bleu < val_bleu.avg: self.best_bleu = val_bleu.avg checkpoint = {'model': self.model, 'epoch': epoch} torch.save( checkpoint, self.log_path + '/Model_e%d_i%d_%.3f.pt' % (epoch, train_iter, val_bleu.avg)) # Logging tensorboard info = { 'epoch': epoch, 'train_iter': train_iter, 'train_loss': self.train_loss.avg, 'train_bleu': self.train_bleu.avg, 'bleu': val_bleu.avg } for tag, value in info.items(): self.tf_log.scalar_summary( tag, value, (epoch * self.iter_per_epoch) + train_iter + 1)
def evaluate(self, dataset): total_matches = 0 total_possible = 0 total_predicted_length = 0 total_expected_length = 0 for batch in dataset: for source, target in zip(batch[0], batch[1]): # Prepares input source = tf.expand_dims(source, 0) # Prints expected translation words = [] for word in target.numpy(): decoded = self.target_tokenizer.index_to_word[word].decode( ) words.append(decoded) if decoded == '<end>': break print('Expected:', ' '.join(words[1:-1])) reference = np.array(words[1:], ndmin=2) # Prints actual translation words = [] prediction, attention = self.translate(source, return_attention=True) for word in prediction: decoded = self.target_tokenizer.index_to_word[word].decode( ) words.append(decoded) print('Translation:', ' '.join(words[:-1]), end='\n\n') # Updates data for BLEU score computation candidate = np.array(words, ndmin=2) matches, possible, predicted_length, expected_length = get_counts( candidate, reference) total_matches += matches total_possible += possible total_predicted_length += predicted_length total_expected_length += expected_length # Plots attention plot_attention(attention, tf.squeeze(source).numpy(), prediction, self.source_tokenizer.index_to_word, self.target_tokenizer.index_to_word) # Computes BLEU score bleu = get_bleu(total_matches, total_possible, total_predicted_length, total_expected_length) print('Bleu:', bleu)
def translate(self): """Translate the whole dataset.""" trg_preds = [] trg_gold = [] for j in range( 0, len(self.src['data']), self.config['data']['batch_size'] ): """Decode a single minibatch.""" print('Decoding %d out of %d ' % (j, len(self.src['data']))) hypotheses, scores = self.decode_batch(j) all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses] all_preds = [ ' '.join([self.trg['id2word'][x] for x in hyp]) for hyp in all_hyp_inds ] # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch( self.trg['data'], self.tgt_dict, j, self.config['data']['batch_size'], self.config['data']['max_trg_length'], add_start=True, add_end=True, use_cuda=self.use_cuda ) ) output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold] all_gold = [ ' '.join([self.trg['id2word'][x] for x in hyp]) for hyp in all_gold_inds ] trg_preds += all_preds trg_gold += all_gold print("investigate some preds and golds.....") print("trg_preds: ", trg_preds[0]) print("trg_gold: ", trg_gold[0]) bleu_score = get_bleu(trg_preds, trg_gold) # print('BLEU : %.5f ' % (bleu_score)) return bleu_score
def evaluate_model( model, src, src_test, trg, trg_test, config, src_valid=None, trg_valid=None, verbose=True, metric='bleu', use_cuda=False ): """Evaluate model. :param model: the model object :param src: :param src_test: :param trg: :param trg_test: :param config: the config object :param src_valid: :param trg_valid: :param verbose: :param metric: :param use_cuda: :return: """ preds = [] ground_truths = [] for j in range(0, len(src_test['data']), config['data']['batch_size']): # Get source minibatch input_lines_src, output_lines_src, lens_src, _ = get_minibatch( src_test['data'], src['word2id'], j, config['data']['batch_size'], config['data']['max_src_length'], add_start=True, add_end=True, use_cuda=use_cuda) # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, _ = ( get_minibatch( trg_test['data'], trg['word2id'], j, config['data']['batch_size'], config['data']['max_trg_length'], add_start=True, add_end=True, use_cuda=use_cuda )) # Initialize target with <s> for every sentence input_lines_trg = Variable(torch.LongTensor( [ [trg['word2id']['<s>']] for i in range(input_lines_src.size(0)) ] )) if use_cuda: input_lines_trg = input_lines_trg.cuda() # print("input_lines_src: ", input_lines_src.size(), "input_lines_trg: ", input_lines_trg.size()) # input_lines_src: [80, 49], "input_lines_trg: " [80, 1] # Decode a minibatch greedily __TODO__ add beam search decoding input_lines_trg = decode_minibatch( config, model, input_lines_src, input_lines_trg, output_lines_trg_gold, use_cuda=use_cuda ) # save gpu memory(in vain) input_lines_src = input_lines_src.data.cpu().numpy() del input_lines_src output_lines_src = output_lines_src.data.cpu().numpy() input_lines_trg_gold = input_lines_trg_gold.data.cpu().numpy() del input_lines_trg_gold # Copy minibatch outputs to cpu and convert ids to words input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [ [trg['id2word'][x] for x in line] for line in input_lines_trg ] # Do the same for gold sentences output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() output_lines_trg_gold = [ [trg['id2word'][x] for x in line] for line in output_lines_trg_gold ] print("input_lines_trg: ", input_lines_trg[0]) print("the length of a sent", len(input_lines_trg[0])) # Process outputs for sentence_pred, sentence_real, sentence_real_src in zip( input_lines_trg, output_lines_trg_gold, output_lines_src ): # 去除开始和结束符, 构造完整的句子sentence, 以便计算bleu值 if '<s>' in sentence_pred: index = sentence_pred.index('<s>') sentence_pred = sentence_pred[index+1:] if '</s>' in sentence_pred: index = sentence_pred.index('</s>') sentence_pred = sentence_pred[:index] preds.append(sentence_pred) if '<s>' in sentence_real: index = sentence_real.index('<s>') sentence_real = sentence_real[index+1:] if '</s>' in sentence_real: index = sentence_real.index('</s>') sentence_real = sentence_real[: index] ground_truths.append(sentence_real) print("call the get_bleu method to calc bleu score.....") print("preds: ", preds[0]) print("ground_truths: ", ground_truths[0]) return get_bleu(preds, ground_truths)
def evaluate_autoencode_model( model, src, src_test, config, src_valid=None, verbose=True, metric='bleu' ): """Evaluate model.""" preds = [] ground_truths = [] for j in xrange(0, len(src_test['data']), config['data']['batch_size']): print('Decoding batch : %d out of %d ' % (j, len(src_test['data']))) input_lines_src, lens_src, mask_src = get_autoencode_minibatch( src_test['data'], src['word2id'], j, config['data']['batch_size'], config['data']['max_src_length'], add_start=True, add_end=True ) input_lines_trg = Variable(torch.LongTensor( [ [src['word2id']['<s>']] for i in range(input_lines_src.size(0)) ] )).cuda() for i in range(config['data']['max_src_length']): decoder_logit = model(input_lines_src, input_lines_trg) word_probs = model.decode(decoder_logit) decoder_argmax = word_probs.data.cpu().numpy().argmax(axis=-1) next_preds = Variable( torch.from_numpy(decoder_argmax[:, -1]) ).cuda() input_lines_trg = torch.cat( (input_lines_trg, next_preds.unsqueeze(1)), 1 ) input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [ [src['id2word'][x] for x in line] for line in input_lines_trg ] output_lines_trg_gold = input_lines_src.data.cpu().numpy() output_lines_trg_gold = [ [src['id2word'][x] for x in line] for line in output_lines_trg_gold ] for sentence_pred, sentence_real in zip( input_lines_trg, output_lines_trg_gold, ): if '</s>' in sentence_pred: index = sentence_pred.index('</s>') else: index = len(sentence_pred) preds.append(sentence_pred[:index + 1]) if verbose: print(' '.join(sentence_pred[:index + 1])) if '</s>' in sentence_real: index = sentence_real.index('</s>') else: index = len(sentence_real) if verbose: print(' '.join(sentence_real[:index + 1])) if verbose: print('--------------------------------------') ground_truths.append(sentence_real[:index + 1]) return get_bleu(preds, ground_truths)
train_src, train_tgt = shuffle_data(train_src, train_tgt) for j in xrange(0, len(train_src), batch_size): batch_src, batch_tgt_inp, batch_tgt_op, batch_src_lens, batch_src_mask, batch_tgt_mask \ = prepare_batch( train_src[j: j + batch_size], train_tgt[j: j + batch_size], src_word2ind, tgt_word2ind ) entropy = f_train(batch_src, batch_tgt_inp, batch_tgt_op, batch_src_lens, batch_tgt_mask) costs.append(entropy) logging.info('Epoch : %d Minibatch : %d Loss : %.3f' % (i, j, entropy)) if j % 64000 == 0 and j != 0: dev_predictions = decode_dev() dev_bleu = get_bleu(dev_predictions, dev_tgt) if dev_bleu > BEST_BLEU: BEST_BLEU = dev_bleu print_decoded_dev(dev_predictions) save_model(i, j, params) logging.info('Epoch : %d Minibatch :%d dev BLEU : %.3f' % (i, j, dev_bleu)) logging.info('Mean Cost : %.3f' % (np.mean(costs))) costs = [] if j % 6400 == 0: generate_samples(batch_src, batch_tgt_inp, batch_src_lens) dev_predictions = decode_dev() dev_bleu = get_bleu(dev_predictions, dev_tgt) if dev_bleu > BEST_BLEU: BEST_BLEU = dev_bleu print_decoded_dev(dev_predictions)
def train(self): self.best_bleu = .0 criterion = nn.NLLLoss() optimizer = optim.Adam(self.model.parameters(), lr=self.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.8) for epoch in range(self.num_epoch): self.train_loss = AverageMeter() self.train_bleu = AverageMeter() start_time = time.time() for i, batch in enumerate(tqdm(self.train_loader)): self.model.train() src_input = batch.src[0] src_length = batch.src[1] trg_input = batch.trg[0][:, :-1] trg_output = batch.trg[0][:, 1:] trg_length = batch.trg[1] batch_size, trg_len = trg_input.size(0), trg_input.size(1) decoder_logit = self.model(src_input, src_length.tolist(), trg_input) pred = decoder_logit.view(batch_size, trg_len, -1) optimizer.zero_grad() loss = criterion(decoder_logit, trg_output.contiguous().view(-1)) loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), self.grad_clip) optimizer.step() # Compute BLEU score and Loss pred_sents = [] trg_sents = [] for j in range(batch_size): pred_sent = self.get_sentence( tensor2np(pred[j]).argmax(axis=-1), 'trg') trg_sent = self.get_sentence(tensor2np(trg_output[j]), 'trg') pred_sents.append(pred_sent) trg_sents.append(trg_sent) bleu_value = get_bleu(pred_sents, trg_sents) self.train_bleu.update(bleu_value, 1) self.train_loss.update(loss.data[0], batch_size) if i % 5000 == 0 and i != 0: self.print_train_result(epoch, i, start_time) self.print_sample(batch_size, epoch, i, src_input, trg_output, pred) self.eval(epoch, i) train_loss = AverageMeter() train_bleu = AverageMeter() start_time = time.time() # Logging tensorboard info = { 'epoch': epoch, 'train_iter': i, 'train_loss': self.train_loss.avg, 'train_bleu': self.train_bleu.avg } for tag, value in info.items(): self.tf_log.scalar_summary( tag, value, (epoch * self.iter_per_epoch) + i + 1) self.print_train_result(epoch, i, start_time) self.print_sample(batch_size, epoch, i, src_input, trg_output, pred) self.eval(epoch, i)
def train(self, epochs: int, train: tf.data.Dataset, test=None) -> None: """ Performs training of the translation model. It shows training/test loss and bleu score after each epoch. :param epochs: Number of epochs :param train: Training dataset :param test: Test dataset """ for epoch in range(epochs): # Performing a training epoch start = time.perf_counter() train_loss = 0 train_matches = 0 train_possible = 0 train_predicted_length = 0 train_expected_length = 0 for batch, (sources, targets) in enumerate(train): # Calls model batch_loss, expected, predicted = self.train_step(sources, targets) # Update loss for logging train_loss += batch_loss # Updates data for BLEU score computation matches, possible, predicted_length, expected_length = get_counts( expected.numpy(), predicted.numpy(), ending_token=self.decoder.vocab[b'<end>'] ) train_matches += matches train_possible += possible train_predicted_length += predicted_length train_expected_length += expected_length # Computes BLEU score bleu = get_bleu(train_matches, train_possible, train_predicted_length, train_expected_length) # Logs training results print('\nEpoch {} out of {} complete ({:.2f} secs) -- Train Loss: {:.4f} -- Train Bleu: {:.2f}'.format( epoch + 1, epochs, time.perf_counter() - start, train_loss / (batch + 1), bleu ), end='') if test is not None: # Evaluates performance on test set after epoch training test_loss = 0 test_matches = 0 test_possible = 0 test_predicted_length = 0 test_expected_length = 0 for batch, (sources, targets) in enumerate(test): # Calls model batch_loss, expected, predicted = self.test_step(sources, targets) # Update loss for logging test_loss += batch_loss # Updates data for BLEU score computation matches, possible, predicted_length, expected_length = get_counts( expected.numpy(), predicted.numpy(), ending_token=self.decoder.vocab[b'<end>'] ) test_matches += matches test_possible += possible test_predicted_length += predicted_length test_expected_length += expected_length # Computes BLEU score bleu = get_bleu(test_matches, test_possible, test_predicted_length, test_expected_length) # Logs test performance if batch >= 0: print(' -- Test Loss: {:.4f} -- Test Bleu: {:.2f}'.format( test_loss / (batch + 1), bleu ), end='') # Save checkpoint every ten epochs if (epoch + 1) % 10 == 0: print('\nCreating intermediate checkpoint!') self.checkpoint.save(file_prefix=self.checkpoint_prefix) # Save weights after training is done print('\nCreating final checkpoint!') self.checkpoint.save(file_prefix=self.checkpoint_prefix)