torch.manual_seed(config.seed); np.random.seed(config.seed) # Initialize model model = LanguageModel(config=config) #CTCModel(config=config) print(model) # Generate datasets train_dataset, valid_dataset, test_dataset = get_text_datasets(config) trainer = Trainer(model=model, config=config) if restart: trainer.load_checkpoint() # Train the final model if train: for epoch in range(config.num_epochs): print("========= Epoch %d of %d =========" % (epoch+1, config.num_epochs)) train_loss = trainer.train(train_dataset) model = model.cpu() valid_loss = trainer.test(valid_dataset, set="valid") if torch.cuda.is_available(): model = model.cuda() print("========= Results: epoch %d of %d =========" % (epoch+1, config.num_epochs)) print("train loss: %.2f| valid loss: %.2f\n" % (train_loss, valid_loss) ) trainer.save_checkpoint() trainer.load_best_model() test_loss = trainer.test(test_dataset, set="test") print("========= Test results =========") print("test loss: %.2f \n" % (test_WER, test_loss) )
trainer.save_checkpoint(WER=valid_WER_surprisal, sampling_method="surprisal") print("========= Results: epoch %d of %d =========" % (epoch + 1, config.num_epochs)) print("train WER: %.2f| train loss: %.2f| train FLOPs: %d" % (train_WER * 100, train_loss, train_FLOPs_mean)) print( "valid WER: %.2f| valid loss: %.2f| valid FLOPs: %d (random sampling)" % (valid_WER_random * 100, valid_loss_random, valid_FLOPs_mean_random)) print( "valid WER: %.2f| valid loss: %.2f| valid FLOPs: %d (surprisal sampling)\n" % (valid_WER_surprisal * 100, valid_loss_surprisal, valid_FLOPs_mean_surprisal)) trainer.load_best_model(sampling_method="random") model.sample_based_on_surprisal_during_testing = False test_WER_random, test_loss_random, test_FLOPs_mean_random, test_FLOPs_std_random = trainer.test( test_dataset, set="test") trainer.load_best_model(sampling_method="surprisal") model.sample_based_on_surprisal_during_testing = True test_WER_surprisal, test_loss_surprisal, test_FLOPs_mean_surprisal, test_FLOPs_std_surprisal = trainer.test( test_dataset, set="test") print("========= Test results =========") print("test WER: %.2f| test loss: %.2f| test FLOPs: %d (random sampling)" % (test_WER_random * 100, test_loss_random, test_FLOPs_mean_random)) print( "test WER: %.2f| test loss: %.2f| test FLOPs: %d (surprisal sampling)\n" % (test_WER_surprisal * 100, test_loss_surprisal, test_FLOPs_mean_surprisal))