def evaluate(self) -> None: """ Evaluate test data according to bleu score. """ logging.info( f"Evaluating model with this configuration: \n {self.model.config}" ) # generate predictions and calculate bleu score hyps = [] self.model.eval() with torch.no_grad(): for batch in tqdm(self.src_test_generator): batch = batch.to(self.device) translations = self.model.generate(input_ids=batch) decoded = [ self.tokenizer.decode(translation, skip_special_tokens=True, clean_up_tokenization_spaces=False) for translation in translations ] hyps = hyps + decoded bleu_score = calculate_bleu_score(hyps, self.tgt_test) logging.info(f"BLEU score on test data is: {bleu_score:.2f}") # write hypothesis to file hyps_path = os.path.join(self.experiment_path, f"model_test_hyps_bleu:{bleu_score:.2f}.txt") with open(hyps_path, "w") as file: for sent in hyps: file.write(sent + " \n") logging.info(f"Model hypothesis saved in {hyps_path}")
def _evaluate_dev( self, dev_generator: Generator[torch.Tensor, None, None], tgt_dev: list, epoch: int, ) -> float: """ Evaluate parallel dev dataset, write hypothesis to file, and displays bleu score. """ # evaluate parallel dev dataset hyps = [] self.model.eval() with torch.no_grad(): for batch in tqdm(dev_generator): batch = batch.to(self.device) translations = self.model.generate(input_ids=batch) decoded = [ self.tokenizer.decode(translation, skip_special_tokens=True, clean_up_tokenization_spaces=False) for translation in translations ] hyps = hyps + decoded bleu_score = calculate_bleu_score(hyps, tgt_dev) # write hypothesis to file hyps_path = os.path.join(self.experiment_path, f"epoch_{epoch+1}_dev_hyps.txt") with open(hyps_path, "w") as file: for sent in hyps: file.write(sent + " \n") self.logger.info(f"Model hypothesis saved in {hyps_path}") self.logger.info( f"BLEU score after epoch {epoch+1} is: {bleu_score:.2f}") return bleu_score
def calc_generative_metrics(self, preds, target) -> dict: return calculate_bleu_score(preds, target)
checkpoint_dir='./checkpoints') if trainer.checkpoint_manager.latest_checkpoint: print("Restored from {}".format( trainer.checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") trainer.checkpoint.restore(trainer.checkpoint_manager.latest_checkpoint) def do_translate(input_data): index = input_data[0] source = input_data[1][0] target = input_data[1][1] print(index) output = translate(source, data_loader, trainer, SEQ_MAX_LEN_TARGET) return {'source': source, 'target': target, 'output': output} translated_data = [] for test_data in data: res = do_translate(test_data) translated_data.append(res['output']) with open('translated_data', 'w') as f: f.write(str('\n'.join(translated_data))) score, report = calculate_bleu_score(target_path='translated_data', ref_path=target_data_path)