def RetrieveFeaturePredictionNMse(model_name): """ Retrieve the Feature and Prediciton values and place in a np array :param model_name: the name of the model return Xtruth, Xpred, Ytruth, Ypred """ # Retrieve the prediction and truth and prediction first feature_file = os.path.join('data', 'test_Xtruth_{}.csv'.format(model_name)) pred_file = os.path.join('data', 'test_Ypred_{}.csv'.format(model_name)) truth_file = os.path.join('data', 'test_Ytruth_{}.csv'.format(model_name)) feat_file = os.path.join('data', 'test_Xpred_{}.csv'.format(model_name)) # Getting the files from file name Xtruth = pd.read_csv(feature_file, header=None, delimiter=' ') Xpred = pd.read_csv(feat_file, header=None, delimiter=' ') Ytruth = pd.read_csv(truth_file, header=None, delimiter=' ') Ypred = pd.read_csv(pred_file, header=None, delimiter=' ') #retrieve mse, mae Ymae, Ymse = compare_truth_pred(pred_file, truth_file) #get the maes of y print(Xtruth.shape) return Xtruth.values, Xpred.values, Ytruth.values, Ypred.values, Ymae, Ymse
def train(self): """ The major training function. This would start the training using information given in the flags :return: None """ cuda = True if torch.cuda.is_available() else False if cuda: self.model.cuda() # Construct optimizer after the model moved to GPU self.optm = self.make_optimizer() self.lr_scheduler = self.make_lr_scheduler(self.optm) # Time keeping tk = time_keeper( time_keeping_file=os.path.join(self.ckpt_dir, 'training time.txt')) for epoch in range(self.flags.train_step): # Set to Training Mode train_loss = 0 # boundary_loss = 0 # Unnecessary during training since we provide geometries self.model.train() for j, (geometry, spectra) in enumerate(self.train_loader): if cuda: geometry = geometry.cuda() # Put data onto GPU spectra = spectra.cuda() # Put data onto GPU #print('spectra = ', spectra) #print('geometry = ', geometry) self.optm.zero_grad() # Zero the gradient first pi, sigma, mu = self.model(spectra) # Get the output #print('spectra = {}, pi, sigma, mu = {}, {}, {}'.format(spectra.cpu().numpy(), # pi.detach().cpu().numpy()[0,:], # sigma.detach().cpu().numpy()[0,:,0], # mu.detach().cpu().numpy()[0,:,0])) #print('geometry shape', geometry.size()) loss = self.make_loss(pi, sigma, mu, geometry) # Get the loss tensor #loss = self.make_loss(pi, sigma, mu, geometry, warmup=epoch) # Get the loss tensor #Xpred = mdn.sample(pi, sigma, mu).detach().cpu().numpy() #Ypred = torch.tensor(simulator(self.flags.data_set, Xpred), requires_grad=False) #if cuda: # Ypred = Ypred.cuda() #simulator_loss = nn.functional.mse_loss(Ypred, spectra).detach().cpu().numpy() # Get the loss tensor #print('nll loss at epoch {}, batch {} is {} '.format(epoch, j, loss.detach().cpu().numpy())) loss.backward() # Calculate the backward gradients # gradient clipping torch.nn.utils.clip_grad_value_(self.model.parameters(), 1) self.optm.step() # Move one step the optimizer train_loss += loss # Aggregate the loss # boundary_loss += self.Boundary_loss # Aggregate the BDY loss # Calculate the avg loss of training train_avg_loss = train_loss.cpu().data.numpy() / (j + 1) # boundary_avg_loss = boundary_loss.cpu().data.numpy() / (j + 1) if epoch % self.flags.eval_step == 0: # For eval steps, do the evaluations and tensor board # Record the training loss to the tensorboard self.log.add_scalar('Loss/train', train_avg_loss, epoch) #self.log.add_scalar('Loss/simulator_train', simulator_loss, epoch) # self.log.add_scalar('Loss/BDY_train', boundary_avg_loss, epoch) # Set to Evaluation Mode self.model.eval() print("Doing Evaluation on the model now") test_loss = 0 for j, (geometry, spectra) in enumerate( self.test_loader): # Loop through the eval set if cuda: geometry = geometry.cuda() spectra = spectra.cuda() pi, sigma, mu = self.model(spectra) # Get the output if self.flags.data_set == 'meta_material': loss = self.make_loss(pi, sigma, mu, geometry) # Get the loss tensor test_loss += loss.detach().cpu().numpy() else: Xpred = mdn.sample(pi, sigma, mu).numpy() Ypred_np = simulator(self.flags.data_set, Xpred) mae, mse = compare_truth_pred(Ypred_np, spectra.cpu().numpy(), cut_off_outlier_thres=10, quiet_mode=True) test_loss += np.mean(mse) # Aggregate the loss break # only get the first batch that is enough # Record the testing loss to the tensorboard test_avg_loss = test_loss / (j + 1) self.log.add_scalar('Loss/test', test_avg_loss, epoch) print("This is Epoch %d, training loss %.5f, validation loss %.5f"\ % (epoch, train_avg_loss, test_avg_loss )) #print("This is Epoch %d, training loss %.5f, validation loss %.5f, training simulator loss %.5f" \ # % (epoch, train_avg_loss, test_avg_loss, simulator_loss )) # Plotting the first spectra prediction for validation # f = self.compare_spectra(Ypred=logit[0,:].cpu().data.numpy(), Ytruth=spectra[0,:].cpu().data.numpy()) # self.log.add_figure(tag='spectra compare',figure=f,global_step=epoch) # Model improving, save the model down if test_avg_loss < self.best_validation_loss: self.best_validation_loss = test_avg_loss self.save() print("Saving the model down...") if self.best_validation_loss < self.flags.stop_threshold: print("Training finished EARLIER at epoch %d, reaching loss of %.5f" %\ (epoch, self.best_validation_loss)) return None # Learning rate decay upon plateau self.lr_scheduler.step(train_avg_loss) self.log.close() tk.record(1) # Record the total time of the training peroid