def __init__(self, input_size, num_layers, kernel_size, in_channels, out_channels, dropout_rate, step): super(TemporalBlock, self).__init__() self.dropout_rate = dropout_rate self.conv_layers = nn.ModuleList() self.dropout_layers = nn.ModuleList() self.input_length = input_size[2] self.step = step kernel_size = Util.generate_list_from(kernel_size) #factorized kernel temporal_kernel_size = [kernel_size[0], 1, 1] self.temporal_padding_value = kernel_size[0] // 2 temporal_padding = [self.temporal_padding_value, 0, 0] intermed_channels = out_channels for i in range(num_layers): intermed_channels*=2 if i == (num_layers-1): intermed_channels = out_channels self.conv_layers.append( nn.Sequential( nn.Conv3d(in_channels, intermed_channels, kernel_size=temporal_kernel_size, padding=temporal_padding, bias=False), nn.BatchNorm3d(intermed_channels), nn.LeakyReLU(inplace=True) ) ) self.dropout_layers.append(nn.Dropout(dropout_rate)) in_channels = intermed_channels
def __init__(self, input_size, num_layers, hidden_dim, kernel_size, device, dropout_rate, step=5, *args, **kwargs): super(MIM, self).__init__() self.filter_size = kernel_size self.num_hidden_out = input_size[1] self.input_length = input_size[2] self.step = step self.num_layers = num_layers self.num_hidden = Util.generate_list_from(hidden_dim,num_layers) self.device = device self.stlstm_layer = nn.ModuleList() self.stlstm_layer_diff = nn.ModuleList() num_hidden_in = self.num_hidden_out for i in range(self.num_layers): if i < 1: self.stlstm_layer.append( SpatioTemporalLSTMCell(self.filter_size, num_hidden_in, self.num_hidden[i], input_size, device, dropout_rate) ) else: self.stlstm_layer.append( MIMBlock(self.filter_size, self.num_hidden[i], input_size, device, dropout_rate) ) for i in range(self.num_layers - 1): self.stlstm_layer_diff.append( MIMS(self.filter_size, self.num_hidden[i+1], input_size, device, dropout_rate) ) self.conv_last = nn.Conv2d(self.num_hidden[num_layers - 1], self.num_hidden_out, kernel_size=1, stride=1, padding=0, bias=False)
def __init__(self, input_size, num_layers, hidden_dim, kernel_size, device, dropout_rate, step=5): super(PredRNN, self).__init__() self.frame_channel = input_size[1] self.num_layers = num_layers self.num_hidden = Util.generate_list_from(hidden_dim, num_layers) self.device = device cell_list = [] self.input_length = input_size[2] self.step = step width = input_size[4] in_channel = self.frame_channel for i in range(self.num_layers): in_channel = self.frame_channel if i == 0 else self.num_hidden[i - 1] cell_list.append( SpatioTemporalLSTMCell(in_channel, self.num_hidden[i], width, kernel_size, 1, dropout_rate)) self.cell_list = nn.ModuleList(cell_list) self.conv_last = nn.Conv2d(self.num_hidden[num_layers - 1], self.frame_channel, kernel_size=1, stride=1, padding=0, bias=False)
def __init__(self, num_layers, kernel_size, in_channels, out_channels, dropout_rate): super(SpatialBlock, self).__init__() self.padding = kernel_size // 2 self.dropout_rate = dropout_rate self.conv_layers = nn.ModuleList() self.dropout_layers = nn.ModuleList() kernel_size = Util.generate_list_from(kernel_size) #factorized kernel spatial_kernel_size = [1, kernel_size[1], kernel_size[2]] spatial_padding_value = kernel_size[1] // 2 spatial_padding = [0, spatial_padding_value, spatial_padding_value] intermed_channels = out_channels for i in range(num_layers): intermed_channels *= 2 if i == (num_layers - 1): intermed_channels = out_channels self.conv_layers.append( nn.Sequential( nn.Conv3d(in_channels, intermed_channels, kernel_size=spatial_kernel_size, padding=spatial_padding, bias=False), nn.BatchNorm3d(intermed_channels), nn.LeakyReLU(inplace=True))) self.dropout_layers.append(nn.Dropout(dropout_rate)) in_channels = intermed_channels
def __init__(self, input_size, num_layers, kernel_size, in_channels, out_channels, dropout_rate, step): super(TemporalCausalBlock_NoChannelIncrease, self).__init__() self.dropout_rate = dropout_rate self.conv_layers = nn.ModuleList() self.lrelu_layers = nn.ModuleList() self.batch_layers = nn.ModuleList() self.dropout_layers = nn.ModuleList() self.input_length = input_size[2] self.step = step kernel_size = Util.generate_list_from(kernel_size) #factorized kernel temporal_kernel_size = [kernel_size[0], 1, 1] self.temporal_padding_value = kernel_size[0] - 1 temporal_padding = [self.temporal_padding_value, 0, 0] for i in range(num_layers): self.conv_layers.append( nn.Conv3d(in_channels, out_channels, kernel_size=temporal_kernel_size, padding=temporal_padding, bias=False) ) self.lrelu_layers.append(nn.LeakyReLU()) self.batch_layers.append(nn.BatchNorm3d(out_channels)) self.dropout_layers.append(nn.Dropout(dropout_rate)) in_channels = out_channels
def __init__(self, kernel_size, in_channels, out_channels, dropout_rate, bias): super(Conv2Plus1Block, self).__init__() kernel_size = Util.generate_list_from(kernel_size) #factorized kernel spatial_kernel_size = [1, kernel_size[1], kernel_size[2]] temporal_kernel_size = [kernel_size[0], 1, 1] spatial_padding_value = kernel_size[1] // 2 temporal_padding_value = kernel_size[0] // 2 spatial_padding = [0, spatial_padding_value, spatial_padding_value] temporal_padding = [temporal_padding_value, 0, 0] intermed_channels = int(math.floor((kernel_size[0] * kernel_size[1] * kernel_size[2] * in_channels * out_channels)/ \ (kernel_size[1] * kernel_size[2] * in_channels + kernel_size[0] * out_channels))) self.spatial_conv = nn.Sequential( nn.Conv3d(in_channels, intermed_channels, spatial_kernel_size, padding=spatial_padding, bias=bias), nn.BatchNorm3d(intermed_channels), nn.LeakyReLU(inplace=True)) self.temporal_conv = nn.Conv3d(intermed_channels, out_channels, temporal_kernel_size, padding=temporal_padding, bias=bias)
def __init__(self, kernel_size, in_channels, out_channels): super().__init__() kernel_size = Util.generate_list_from(kernel_size) spatial_value = kernel_size[1] // 2 kernel_size = [1, spatial_value, spatial_value] stride = [1, spatial_value, spatial_value] self.conv = nn.Sequential( nn.ConvTranspose3d(in_channels, out_channels, kernel_size=kernel_size, stride=stride), nn.LeakyReLU(inplace=True))
def __init__(self, kernel_size, in_channels, out_channels): super().__init__() kernel_size = Util.generate_list_from(kernel_size) temporal_value = kernel_size[0] // 2 spatial_value = kernel_size[1] // 2 padding = [temporal_value, spatial_value, spatial_value] stride = [1, spatial_value, spatial_value] self.down_block = nn.Sequential( nn.Conv3d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, stride=stride), nn.LeakyReLU(inplace=True))
def __init__(self, kernel_size, in_channels, out_channels, dropout_rate, bias): super(Conv3DBlock, self).__init__() kernel_size = Util.generate_list_from(kernel_size) spatial_padding_value = kernel_size[1] // 2 temporal_padding_value = kernel_size[0] // 2 padding = [ temporal_padding_value, spatial_padding_value, spatial_padding_value ] self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, padding=padding, bias=bias)
def __init__(self, input_size, kernel_size, in_channels, out_channels, dropout_rate, step): super(TemporalGeneratorBlock, self).__init__() self.step = step self.input_length = input_size[2] self.tconv_layers = nn.ModuleList() self.conv_layers = nn.ModuleList() kernel_size = Util.generate_list_from(kernel_size) #factorized kernel spatial_kernel_size = [1, kernel_size[1], kernel_size[2]] spatial_padding_value = kernel_size[1] // 2 spatial_padding = [0, spatial_padding_value, spatial_padding_value] num_layers = math.ceil( (self.step - self.input_length) / (2 * self.input_length)) intermed_channels = out_channels for i in range(num_layers): intermed_channels *= 2 if i == (num_layers - 1): intermed_channels = out_channels self.tconv_layers.append( nn.Sequential( nn.ConvTranspose3d(in_channels, intermed_channels, [4, 1, 1], stride=[2, 1, 1], padding=[1, 0, 0], bias=False), nn.BatchNorm3d(intermed_channels), nn.LeakyReLU(inplace=True))) in_channels = intermed_channels num_layers = self.step // self.input_length intermed_channels *= 2 for i in range(num_layers): self.conv_layers.append( nn.Sequential( nn.Conv3d(in_channels, intermed_channels, kernel_size=spatial_kernel_size, padding=spatial_padding, bias=False), nn.BatchNorm3d(intermed_channels), nn.LeakyReLU(inplace=True))) in_channels = intermed_channels intermed_channels = out_channels
def __init__(self, input_size, num_layers, kernel_size, in_channels, out_channels, dropout_rate, step): super(TemporalReversedBlock_NoChannelIncrease, self).__init__() self.dropout_rate = dropout_rate self.conv_layers = nn.ModuleList() self.lrelu_layers = nn.ModuleList() self.batch_layers = nn.ModuleList() self.dropout_layers = nn.ModuleList() self.input_length = input_size[2] self.step = step kernel_size = Util.generate_list_from(kernel_size) #factorized kernel temporal_kernel_size = [kernel_size[0], 1, 1] for i in range(num_layers): self.conv_layers.append( RNet(in_channels, out_channels, kernel_size=temporal_kernel_size, bias=False) ) self.dropout_layers.append(nn.Dropout(dropout_rate)) in_channels = out_channels
def __execute_learning(self, model, criterion, optimizer, train_loader, val_loader, test_loader, dataset_name, filename_prefix, dropout_rate): criterion_name = type(criterion).__name__ filename_prefix += '_' + criterion_name util = Util(self.model_descr, self.dataset_type, self.version, filename_prefix) # Training the model checkpoint_filename = util.get_checkpoint_filename() trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, self.epochs, self.device, self.verbose, self.patience, self.no_stop) start_timestamp = tm.time() train_losses, val_losses = trainer.fit(checkpoint_filename) end_timestamp = tm.time() # Error analysis util.save_loss(train_losses, val_losses) util.plot([train_losses, val_losses], ['Training', 'Validation'], 'Epochs', 'Error', 'Error analysis', self.plot) # Load model with minimal loss after training phase model, _, best_epoch, val_loss = trainer.load_checkpoint( checkpoint_filename) # Evaluating the model evaluator = Evaluator(model, criterion, test_loader, self.device) test_loss = evaluator.eval() train_time = end_timestamp - start_timestamp print( f'Training time: {util.to_readable_time(train_time)}\n{self.model_descr} {criterion_name}: {test_loss:.4f}\n' ) return { 'best_epoch': best_epoch, 'val_error': val_loss, 'test_error': test_loss, 'train_time': train_time, 'loss_type': criterion_name, 'dropout_rate': dropout_rate, 'dataset': dataset_name }
if __name__ == '__main__': args = get_arguments() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.cuda) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') message, model_descr = None, None if (args.convlstm): model_descr = 'ConvLSTM' else: model_descr = 'STConvS2s' model_builder = MLBuilder(model_descr, args.version, args.plot, args.no_seed, args.verbose, args.small_dataset, args.no_stop, args.epoch, args.patience, device, args.workers, args.convlstm, args.mae, args.chirps, args.step) print(f'RUN MODEL: {model_descr}') print(f'Device: {device}') # start time is saved when creating an instance of Util util = Util(model_descr, version=args.version) try: message = run(model_builder, args.iteration, util) message['step'] = args.step message['hostname'] = platform.node() except Exception as e: traceback.print_exc() message = '=> Error: ' + str(e) util.send_email(message, args.email)
def run_model(self, number): self.__define_seed(number) validation_split = 0.2 test_split = 0.2 # Loading the dataset ds = xr.open_mfdataset(self.dataset_file) if (self.config.small_dataset): ds = ds[dict(sample=slice(0, 500))] train_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split) val_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split, is_validation=True) test_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split, is_test=True) if self.config.verbose: print('[X_train] Shape:', train_dataset.X.shape) print('[y_train] Shape:', train_dataset.y.shape) print('[X_val] Shape:', val_dataset.X.shape) print('[y_val] Shape:', val_dataset.y.shape) print('[X_test] Shape:', test_dataset.X.shape) print('[y_test] Shape:', test_dataset.y.shape) print( f'Train on {len(train_dataset)} samples, validate on {len(val_dataset)} samples' ) params = { 'batch_size': self.config.batch, 'num_workers': self.config.workers, 'worker_init_fn': self.__init_seed } train_loader = DataLoader(dataset=train_dataset, shuffle=True, **params) val_loader = DataLoader(dataset=val_dataset, shuffle=False, **params) test_loader = DataLoader(dataset=test_dataset, shuffle=False, **params) models = { 'stconvs2s-r': stconvs2s.STConvS2S_R, 'stconvs2s-c': stconvs2s.STConvS2S_C, 'convlstm': stconvlstm.STConvLSTM, 'predrnn': predrnn.PredRNN, 'mim': mim.MIM, 'conv2plus1d': conv2plus1d.Conv2Plus1D, 'conv3d': conv3d.Conv3D, 'enc-dec3d': encoder_decoder3d.Endocer_Decoder3D, 'vlstm': nosocial.VanillaLSTM_Downsample, 'slstm': slstm.SocialLSTM_Downsample, 'sclstm': sclstm.SocialConvLSTM, } if self.config.model not in models: raise ValueError( f'{self.config.model} is not a valid model name. Choose between: {models.keys()}' ) quit() # Creating the model model_bulder = models[self.config.model] model = model_bulder( input_size=train_dataset.X.shape, num_layers=self.config.num_layers, hidden_dim=self.config.hidden_dim, kernel_size=self.config.kernel_size, device=self.device, dropout_rate=self.dropout_rate, step=int(self.step), share=self.config.share, lstms_shape=self.config.lstms_shape, ) model.to(self.device) metrics = { 'rmse': (RMSELoss, RMSEDownSample), 'mae': (L1Loss, L1LossDownSample) } if self.config.loss not in metrics: raise ValueError( f'{self.config.loss} is not a valid loss function name. Choose between: {models.keys()}' ) quit() if self.config.model in ['vlstm', 'slstm']: loss = metrics[self.config.loss][1](train_dataset.X.shape) else: loss = metrics[self.config.loss][0]() loss.to(self.device) opt_params = {'lr': 0.001, 'alpha': 0.9, 'eps': 1e-6} optimizer = torch.optim.RMSprop(model.parameters(), **opt_params) util = Util(self.config.model, self.dataset_type, self.config.version, self.filename_prefix) train_info = {'train_time': 0} if self.config.pre_trained is None: train_info = self.__execute_learning(model, loss, optimizer, train_loader, val_loader, util) eval_info = self.__load_and_evaluate(model, loss, optimizer, test_loader, train_info['train_time'], util) if (torch.cuda.is_available()): torch.cuda.empty_cache() return {**train_info, **eval_info}
def run_model(self, number): self.__define_seed(number) validation_split = 0.2 test_split = 0.2 # Loading the dataset ds = xr.open_mfdataset(self.dataset_file) if (self.config.small_dataset): ds = ds[dict(sample=slice(0, 500))] train_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split, x_step=self.x_step) val_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split, x_step=self.x_step, is_validation=True) test_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split, x_step=self.x_step, is_test=True) util = Util(self.config.model, self.dataset_type, self.config.version, self.filename_prefix) # normalizing data num_channels = train_dataset.X.shape[1] if num_channels > 1: normalizer_x = Normalizer() normalizer_x.observe(train_dataset.X) normalizer_y = Normalizer() normalizer_y.observe(train_dataset.y) train_dataset.X = normalizer_x.normalize(train_dataset.X) train_dataset.y = normalizer_y.normalize(train_dataset.y) val_dataset.X = normalizer_x.normalize(val_dataset.X) val_dataset.y = normalizer_y.normalize(val_dataset.y) test_dataset.X = normalizer_x.normalize(test_dataset.X) test_dataset.y = normalizer_y.normalize(test_dataset.y) util.save_normalization_parameters(normalizer_x, normalizer_y) # INITIAL STATE - batch x channel x time x latitude x longitude initial_state = torch.tensor(train_dataset.X)[:1, :, :1].to( self.device) if (self.config.verbose): print('[X_train] Shape:', train_dataset.X.shape) print('[y_train] Shape:', train_dataset.y.shape) print('[X_val] Shape:', val_dataset.X.shape) print('[y_val] Shape:', val_dataset.y.shape) print('[X_test] Shape:', test_dataset.X.shape) print('[y_test] Shape:', test_dataset.y.shape) print( f'Train on {len(train_dataset)} samples, validate on {len(val_dataset)} samples' ) params = { 'batch_size': self.config.batch, 'num_workers': self.config.workers, 'worker_init_fn': self.__init_seed } train_loader = DataLoader(dataset=train_dataset, shuffle=True, **params) val_loader = DataLoader(dataset=val_dataset, shuffle=False, **params) test_loader = DataLoader(dataset=test_dataset, shuffle=False, **params) models = { 'stconvs2s-r': STConvS2S_R, 'stconvs2s-c': STConvS2S_C, 'convlstm': STConvLSTM, 'predrnn': PredRNN, 'mim': MIM, 'conv2plus1d': Conv2Plus1D, 'conv3d': Conv3D, 'enc-dec3d': Endocer_Decoder3D, 'ablation-stconvs2s-nocausalconstraint': AblationSTConvS2S_NoCausalConstraint, 'ablation-stconvs2s-notemporal': AblationSTConvS2S_NoTemporal, 'ablation-stconvs2s-r-nochannelincrease': AblationSTConvS2S_R_NoChannelIncrease, 'ablation-stconvs2s-c-nochannelincrease': AblationSTConvS2S_C_NoChannelIncrease, 'ablation-stconvs2s-r-inverted': AblationSTConvS2S_R_Inverted, 'ablation-stconvs2s-c-inverted': AblationSTConvS2S_C_Inverted, 'ablation-stconvs2s-r-notfactorized': AblationSTConvS2S_R_NotFactorized, 'ablation-stconvs2s-c-notfactorized': AblationSTConvS2S_C_NotFactorized } if not (self.config.model in models): raise ValueError( f'{self.config.model} is not a valid model name. Choose between: {models.keys()}' ) quit() # Creating the model model_bulder = models[self.config.model] model = model_bulder(train_dataset.X.shape, self.config.num_layers, self.config.hidden_dim, self.config.kernel_size, self.device, self.dropout_rate, self.y_step) # Use all disponible GPUs if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to(self.device) criterion = RMSELoss(reg=self.config.regularization, initial_state=initial_state) opt_params = { 'lr': self.config.learning_rate, 'alpha': 0.9, 'eps': 1e-6 } optimizer = torch.optim.RMSprop(model.parameters(), **opt_params) train_info = {'train_time': 0} if self.config.pre_trained is None: train_info = self.__execute_learning(model, criterion, optimizer, train_loader, val_loader, util) eval_info = self.__load_and_evaluate(model, criterion, optimizer, test_loader, train_info['train_time'], util) if (torch.cuda.is_available()): torch.cuda.empty_cache() return {**train_info, **eval_info}
if __name__ == '__main__': print('RUN MODEL: ARIMA') args = get_arguments() dataset_name, dataset_file = get_dataset_file(args.chirps) ds = xr.open_mfdataset(dataset_file) with Pool() as pool: i = range(ds.lat.size) index_list = list(itertools.product(i, i)) # separate time series based on each location ds_list = [ ds.isel(lat=index[0], lon=index[1]).to_dataframe() for index in index_list ] util = Util('ARIMA') results = pool.starmap( run_arima, zip(ds_list, itertools.repeat(args.chirps), itertools.repeat(int(args.step)))) results = np.array(results) pool.close() pool.join() print('Elapsed time', util.get_time_info()['elapsed_time']) rmse_list = [result[0] for result in results if result[0] >= 0] mae_list = [result[1] for result in results if result[1] >= 0] rmse_mean, rmse_std = np.mean(rmse_list), np.std(rmse_list) mae_mean, mae_std = np.mean(mae_list), np.std(mae_list) print('\nRMSE: ', rmse_list) print('\nMAE: ', mae_list)
def run_model(self, number): self.__define_seed(number) validation_split = 0.2 test_split = 0.2 # Loading the dataset ds = xr.open_mfdataset(self.dataset_file) if (self.config.small_dataset): ds = ds[dict(sample=slice(0, 500))] train_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split) val_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split, is_validation=True) test_dataset = NetCDFDataset(ds, test_split=test_split, validation_split=validation_split, is_test=True) if (self.config.verbose): print('[X_train] Shape:', train_dataset.X.shape) print('[y_train] Shape:', train_dataset.y.shape) print('[X_val] Shape:', val_dataset.X.shape) print('[y_val] Shape:', val_dataset.y.shape) print('[X_test] Shape:', test_dataset.X.shape) print('[y_test] Shape:', test_dataset.y.shape) print( f'Train on {len(train_dataset)} samples, validate on {len(val_dataset)} samples' ) params = { 'batch_size': self.config.batch, 'num_workers': self.config.workers, 'worker_init_fn': self.__init_seed } train_loader = DataLoader(dataset=train_dataset, shuffle=True, **params) val_loader = DataLoader(dataset=val_dataset, shuffle=False, **params) test_loader = DataLoader(dataset=test_dataset, shuffle=False, **params) models = { 'stconvs2s-r': STConvS2S_R, 'stconvs2s-c': STConvS2S_C, 'convlstm': STConvLSTM, 'predrnn': PredRNN, 'mim': MIM, 'conv2plus1d': Conv2Plus1D, 'conv3d': Conv3D, 'enc-dec3d': Endocer_Decoder3D, 'ablation-stconvs2s-nocausalconstraint': AblationSTConvS2S_NoCausalConstraint, 'ablation-stconvs2s-notemporal': AblationSTConvS2S_NoTemporal, 'ablation-stconvs2s-r-nochannelincrease': AblationSTConvS2S_R_NoChannelIncrease, 'ablation-stconvs2s-c-nochannelincrease': AblationSTConvS2S_C_NoChannelIncrease, 'ablation-stconvs2s-r-inverted': AblationSTConvS2S_R_Inverted, 'ablation-stconvs2s-c-inverted': AblationSTConvS2S_C_Inverted, 'ablation-stconvs2s-r-notfactorized': AblationSTConvS2S_R_NotFactorized, 'ablation-stconvs2s-c-notfactorized': AblationSTConvS2S_C_NotFactorized } if not (self.config.model in models): raise ValueError( f'{self.config.model} is not a valid model name. Choose between: {models.keys()}' ) quit() # Creating the model model_bulder = models[self.config.model] model = model_bulder(train_dataset.X.shape, self.config.num_layers, self.config.hidden_dim, self.config.kernel_size, self.device, self.dropout_rate, int(self.step)) model.to(self.device) criterion = RMSELoss() opt_params = {'lr': 0.001, 'alpha': 0.9, 'eps': 1e-6} optimizer = torch.optim.RMSprop(model.parameters(), **opt_params) util = Util(self.config.model, self.dataset_type, self.config.version, self.filename_prefix) train_info = {'train_time': 0} if self.config.pre_trained is None: train_info = self.__execute_learning(model, criterion, optimizer, train_loader, val_loader, util) eval_info = self.__load_and_evaluate(model, criterion, optimizer, test_loader, train_info['train_time'], util) if (torch.cuda.is_available()): torch.cuda.empty_cache() return {**train_info, **eval_info}
train_times_epochs, iteration, util) new_model_info['dataset'] = model_info['dataset'] return new_model_info if __name__ == '__main__': args = get_arguments() #os.environ["CUDA_VISIBLE_DEVICES"]=str(args.cuda) device = torch.device('cpu') device_descr = 'CPU' if torch.cuda.is_available(): device = torch.device('cuda') device_descr = 'GPU' message = None model_builder = MLBuilder(args, device) print(f'RUN MODEL: {args.model.upper()}') print(f'Device: {device_descr}') print(f'Settings: {args}') # start time is saved when creating an instance of Util util = Util(args.model, version=args.version) try: message = run(model_builder, args.iteration, util) message['x_step'] = args.x_step message['y_step'] = args.y_step message['hostname'] = platform.node() except Exception as e: traceback.print_exc() message = '=> Error: ' + str(e) util.send_email(message, args.email)