def train_pytorch(train_set, val_set, test_set, epochs=1000, mini_batch_size=50, lr=0.001): # Turn autograd on torch.set_grad_enabled(True) # Net definition model = torch_nn.Sequential(torch_nn.Linear(2, 25), torch_nn.ReLU(), torch_nn.Linear(25, 25), torch_nn.ReLU(), torch_nn.Linear(25, 25), torch_nn.ReLU(), torch_nn.Linear(25, 2)) # Training params opt = torch.optim.SGD(model.parameters(), lr=lr) criterion = torch_nn.MSELoss(reduction='sum') # Train and present results start_time = time.perf_counter() history = train_model(model, Variable(train_set[0]), Variable(train_set[1]), Variable(val_set[0]), Variable(val_set[1]), criterion, opt, epochs, mini_batch_size, pytorch=True, verbose=True) end_time = time.perf_counter() # Compute final accuracies train_acc = compute_accuracy(model, Variable(train_set[0]), Variable(train_set[1]), pytorch=True) test_acc = compute_accuracy(model, Variable(test_set[0]), Variable(test_set[1]), pytorch=True) print("\tTraining time : %s s" % (end_time - start_time)) print("\tAccuracy : train_acc = %s \t test_acc = %s" % (train_acc, test_acc)) return history, end_time - start_time, (train_acc, test_acc)
def train_framework(train_set, val_set, test_set, epochs=1000, mini_batch_size=50, lr=0.001): # Turn autograd off torch.set_grad_enabled(False) # Net definition model = nn.Sequential(nn.Linear(2, 25, activation="relu"), F.ReLU(), nn.Linear(25, 25, activation="relu"), F.ReLU(), nn.Linear(25, 25, activation="relu"), F.ReLU(), nn.Linear(25, 2, activation="relu")) # Training params opt = optim.SGD(lr, model) criterion = losses.LossMSE() # Train start_time = time.perf_counter() history = train_model(model, train_set[0], train_set[1], val_set[0], val_set[1], criterion, opt, epochs, mini_batch_size, pytorch=False, verbose=True) end_time = time.perf_counter() # Compute final accuracies train_acc = compute_accuracy(model, train_set[0], train_set[1], pytorch=False) test_acc = compute_accuracy(model, test_set[0], test_set[1], pytorch=False) print("\tTraining time : %s s" % (end_time - start_time)) print("\tAccuracy : train_acc = %s \t test_acc = %s" % (train_acc, test_acc)) return history, end_time - start_time, (train_acc, test_acc)
def main(): args = parse_arguments() random.seed(args.seed) torch.manual_seed(args.seed) if args.use_cuda: torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True model_path = get_model_path(args.dataset, args.arch, args.seed) # Init logger log_file_name = os.path.join(model_path, 'log.txt') print("Log file: {}".format(log_file_name)) log = open(log_file_name, 'w') print_log('model path : {}'.format(model_path), log) state = {k: v for k, v in args._get_kwargs()} for key, value in state.items(): print_log("{} : {}".format(key, value), log) print_log("Random Seed: {}".format(args.seed), log) print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("Torch version : {}".format(torch.__version__), log) print_log("Cudnn version : {}".format(torch.backends.cudnn.version()), log) # Data specifications for the webistes dataset mean = [0., 0., 0.] std = [1., 1., 1.] input_size = 224 num_classes = 4 # Dataset traindir = os.path.join(WEBSITES_DATASET_PATH, 'train') valdir = os.path.join(WEBSITES_DATASET_PATH, 'val') train_transform = transforms.Compose([ transforms.Resize(input_size), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose([ transforms.Resize(input_size), transforms.ToTensor(), transforms.Normalize(mean, std) ]) data_train = dset.ImageFolder(root=traindir, transform=train_transform) data_test = dset.ImageFolder(root=valdir, transform=test_transform) # Dataloader data_train_loader = torch.utils.data.DataLoader(data_train, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) data_test_loader = torch.utils.data.DataLoader(data_test, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # Network if args.arch == "vgg16": net = models.vgg16(pretrained=True) elif args.arch == "vgg19": net = models.vgg19(pretrained=True) elif args.arch == "resnet18": net = models.resnet18(pretrained=True) elif args.arch == "resnet50": net = models.resnet50(pretrained=True) elif args.arch == "resnet101": net = models.resnet101(pretrained=True) elif args.arch == "resnet152": net = models.resnet152(pretrained=True) else: raise ValueError("Network {} not supported".format(args.arch)) if num_classes != 1000: net = manipulate_net_architecture(model_arch=args.arch, net=net, num_classes=num_classes) # Loss function if args.loss_function == "ce": criterion = torch.nn.CrossEntropyLoss() else: raise ValueError # Cuda if args.use_cuda: net.cuda() criterion.cuda() # Optimizer momentum = 0.9 decay = 5e-4 optimizer = torch.optim.SGD(net.parameters(), lr=args.learning_rate, momentum=momentum, weight_decay=decay, nesterov=True) recorder = RecorderMeter(args.epochs) start_time = time.time() epoch_time = AverageMeter() # Main loop for epoch in range(args.epochs): current_learning_rate = adjust_learning_rate(args.learning_rate, momentum, optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train_model(data_loader=data_train_loader, model=net, criterion=criterion, optimizer=optimizer, epoch=epoch, log=log, print_freq=200, use_cuda=True) # evaluate on test set print_log("Validation on test dataset:", log) val_acc, val_loss = validate(data_test_loader, net, criterion, log=log, use_cuda=args.use_cuda) recorder.update(epoch, train_los, train_acc, val_loss, val_acc) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), 'args': copy.deepcopy(args), }, model_path, 'checkpoint.pth.tar') # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(model_path, 'curve.png')) log.close()
os.makedirs(save_path) print('Initiating training, models will be saved at {}'.format(save_path)) save_callback = SaveCallback(save_path) # logs with open(os.path.join(save_path, 'training.log'), 'w') as f: with redirect_stdout(f): print(model) # training metrics history, best_model, time_elapsed = train_model( model, criterion, optimizer, train_data, val_data, epochs=epochs, batch_size=batch_size, shuffle_dataset=shuffle_dataset, scheduler=scheduler, use_cuda=use_cuda, pin_memory=pin_memory, callbacks=[save_callback]) # model save model_path = os.path.join(save_path, model_path) print('Saving model {}'.format(model_path)) torch.save(best_model, model_path) # metrics save save_history(history, file_path=os.path.join(save_path, 'history.csv')) plot_history(history, folder_path=save_path)
def __call__(self, input_features, print_out, index=None, dataset_path=None): """Implemented method when CAM is called on a given input and its targeted index Attributes: ------- input_features: A multivariate data input to the model print_out: Whether to print the class with maximum likelihood when index is None index: Targeted output class dataset_path: Path of the dataset (the same one that has been used to train) to retrain the new model (if it does not have GAP right after the explaining conv) Returns: ------- cam: The resulting weighted feature maps """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if index is not None and print_out == True: print_out = False if not self.has_gap: if dataset_path is None: raise AttributeError( "Dataset path is not defined for retraining the new model") for param in self.model.parameters(): param.requires_grad = False if ("fc" not in list( dict(self.model._modules["linear_layers"].named_children() ).keys())[-1]): n_classes = self.model._modules["linear_layers"][ -2].out_features else: n_classes = self.model._modules["linear_layers"][ -1].out_features new_cnn_layer_list = [] for idx, layer in enumerate(self.feature_module): new_cnn_layer_list.append(( list(dict( self.feature_module.named_children()).keys())[idx], layer, )) if (list(dict(self.feature_module.named_children()).keys()) [idx] == self.target_layer_names[0]): out_channels = layer.out_channels break new_cnn_layers = OrderedDict(new_cnn_layer_list) class TargetedModel(torch.nn.Module): def __init__(self, n_classes, out_channels): super().__init__() self.cnn_layers = torch.nn.Sequential(new_cnn_layers) self.linear_layers_1d = torch.nn.Sequential( OrderedDict([ ("avg_pool", torch.nn.AdaptiveAvgPool1d(1)), ("view", SwapLastDims()), ("fc1", torch.nn.Linear(out_channels, n_classes)), ("softmax", torch.nn.Softmax(dim=1)), ])) self.linear_layers_2d = torch.nn.Sequential( OrderedDict([ ("avg_pool", torch.nn.AdaptiveAvgPool2d(1)), ("squeeze", Squeeze()), ("fc1", torch.nn.Linear(out_channels, n_classes)), ("softmax", torch.nn.Softmax(dim=1)), ])) def forward(self, x): x = self.cnn_layers(x) if len(x.size()) == 4: x = self.linear_layers_2d(x) else: x = self.linear_layers_1d(x) x = torch.squeeze(x) return x new_model = TargetedModel(n_classes, out_channels).to(device) for param in new_model._modules["linear_layers_1d"].parameters(): param.requires_grad = True for param in new_model._modules["linear_layers_2d"].parameters(): param.requires_grad = True dataset = DatasetLoader(dataset_path) dataloaders, datasets_size = dataset.get_torch_dataset_loader_auto( 4, 4) criterion = torch.nn.CrossEntropyLoss() optimizer_ft = torch.optim.Adam(new_model.parameters(), lr=1.5e-4) exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1) train_model( new_model, criterion, optimizer_ft, exp_lr_scheduler, dataloaders, datasets_size, 10, ) features, output, index = self.extract_features( input_features, print_out, index) target = features[-1] target = target.cpu().data.numpy()[0, :] try: print(new_model._modules["linear_layers_1d"] [-1].weight.detach().cpu().numpy().shape) weights = (new_model._modules["linear_layers_1d"] [-1].weight.detach().cpu().numpy()[index, :]) except AttributeError: print(new_model._modules["linear_layers_1d"] [-2].weight.detach().cpu().numpy().shape) weights = (new_model._modules["linear_layers_1d"] [-2].weight.detach().cpu().numpy()[index, :]) except KeyError: try: print(new_model._modules["linear_layers_2d"] [-1].weight.detach().cpu().numpy().shape) weights = (new_model._modules["linear_layers_2d"] [-1].weight.detach().cpu().numpy()[index, :]) except AttributeError: print(new_model._modules["linear_layers_2d"] [-2].weight.detach().cpu().numpy().shape) weights = (new_model._modules["linear_layers_2d"] [-2].weight.detach().numpy()[index, :]) cam = np.zeros(target.shape[1:], dtype=np.float32) target = np.squeeze(target) weights = np.squeeze(weights).T # assert ( # weights.shape[0] == target.shape[0] # ), "Weights and targets layer shapes are not compatible." cam = self.cam_weighted_sum(cam, weights, target, ReLU=False) return cam, output features, output, index = self.extract_features( input_features, print_out, index) target = features[-1] target = target.cpu().data.numpy()[0, :] try: weights = (new_model._modules["linear_layers"] [-1].weight.detach().cpu().numpy()[:, index]) except AttributeError: weights = (new_model._modules["linear_layers"] [-2].weight.detach().cpu().numpy()[:, index]) cam = np.zeros(target.shape[1:], dtype=np.float32) target = np.squeeze(target) weights = np.squeeze(weights).T assert (weights.shape[0] == target.shape[0] ), "Weights and targets layer shapes are not compatible." cam = self.cam_weighted_sum(cam, weights, target, ReLU=False) return cam, output
from utils.models import Integrator from utils.dataset import ROIDataset, get_mask, Reflection from utils.training import train_model, iou, compute_iou, TwoChannelLoss if __name__ == '__main__': device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Current device is {}'.format(device)) batch_size = 128 model = Integrator().to(device) train_dataset = ROIDataset(path='data/train', augmentation=[Reflection(p=0.5)], key=get_mask, mode='integration', gen_p=0.7) train_loader = DataLoader(train_dataset, batch_size=batch_size) val_dataset = ROIDataset(path='data/val', key=get_mask, mode='integration', gen_p=0) val_loader = DataLoader(val_dataset, batch_size=batch_size) optimizer = optim.Adam(params=model.parameters(), lr=1e-2) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15, eta_min=1e-6) criterion = TwoChannelLoss(weights_split=[0.1, 2], weights_area=[0.4, 0.2]) result = train_model(model, train_loader, val_loader, criterion, iou, optimizer, 150, device, scheduler) model.load_state_dict(torch.load(os.path.join('data', model.__class__.__name__), map_location=device)) test_dataset = ROIDataset(path='data/test', key=get_mask, mode='integration', gen_p=0) test_loader = DataLoader(test_dataset, batch_size=batch_size) IoU_splitter, IoU_integration = compute_iou(model, test_loader, device) print('test IoUs: {:.4f}, {:.4f}'.format(IoU_splitter, IoU_integration))
def validate_model(Net, seed, mini_batch_size=100, optimizer=optim.Adam, criterion=nn.CrossEntropyLoss(), n_epochs=40, eta=1e-3, lambda_l2=0, alpha=0.5, beta=0.5, plot=True, rotate=False, translate=False, swap_channel=False, GPU=False): """ General : - Train a network model which weights has been initialized with a specific seed over n_epochs - Data is created with the same seed : train,validation and test calling the prologue - Record the train and validation accuracy and loss and can display they evolution curve Input : - Net : A network dictionnary from the <Nets> class - seed : seed for pseudo random number generator used in weight initialization and data loading -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py - plot : if true plot the learning curve evolution over the epochs -> default true -> rotate,translate and swap_channels -> data augmentation see loader.py Output : printed loss and accuracy of the network after training on the test set and learning curve if plot true """ # set the pytorch seed torch.manual_seed(seed) torch.cuda.manual_seed(seed) # set the seed for random spliting of the dataset in training and validation random.seed(0) # create the dataset data = PairSetMNIST() train_data = Training_set(data) test_data = Test_set(data) train_data_split = Training_set_split(train_data, rotate, translate, swap_channel) validation_data = Validation_set(train_data) # construct the net type with default parameter if (Net['net_type'] == 'Net2c'): model = Net['net'](nb_hidden=Net['hidden_layers'], dropout_prob=Net['drop_prob']) if (Net['net_type'] == 'LeNet_sharing'): model = Net['net'](nb_hidden=Net['hidden_layers'], dropout_ws=Net['drop_prob_ws'], dropout_comp=Net['drop_prob_comp']) if (Net['net_type'] == 'LeNet_sharing_aux'): # check if any data augmentation has been called # if none construct with tuned parameters without data augmentation # if yes construct with tuned parameters with data augmentation if (rotate == False and translate == False and swap_channel == False): model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'], nbhidden_comp=Net['hidden_layers_comp'], drop_prob_aux=Net['drop_prob_aux'], drop_prob_comp=Net['drop_prob_comp']) else: Net['learning rate'] = Net['learning rate augm'] model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'], nbhidden_comp=Net['hidden_layers_comp'], drop_prob_aux=Net['drop_prob_aux_augm'], drop_prob_comp=Net['drop_prob_comp_augm']) if (Net['net_type'] == 'Google_Net'): model = Net['net'](channels_1x1=Net['channels_1x1'], channels_3x3=Net['channels_3x3'], channels_5x5=Net['channels_5x5'], pool_channels=Net['pool_channels'], nhidden=Net['hidden_layers'], drop_prob_comp=Net['drop_prob_comp'], drop_prob_aux=Net['drop_prob_aux']) if GPU and cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) # train the model on the train set and validate at each epoch train_losses, train_acc, valid_losses, valid_acc = train_model( model, train_data_split, validation_data, device, mini_batch_size, optimizer, criterion, n_epochs, Net['learning rate'], lambda_l2, alpha, beta) if plot: learning_curve(train_losses, train_acc, valid_losses, valid_acc) # loss and accuracy of the network on the test test_loss, test_accuracy = compute_metrics(model, test_data, device) print('\nTest Set | Loss: {:.4f} | Accuracy: {:.2f}%\n'.format( test_loss, test_accuracy))
def evaluate_model(Net, seeds, mini_batch_size=100, optimizer=optim.Adam, criterion=nn.CrossEntropyLoss(), n_epochs=40, eta=1e-3, lambda_l2=0, alpha=0.5, beta=0.5, plot=True, statistics=True, rotate=False, translate=False, swap_channel=False, GPU=False): """ General : 10 rounds of network training / validation with statistics - Repeat the training/validation procedure 10 times for ten different seeds in seeds 1) At every seed -> reinitializes a network and a dataset : train,validation and test 2) Weights initialization and data loading are using the seed 3) Record the train and validation accuracy and loss and can display their evolution curve 4) Compute the statistics at the end of each training for performance evaluation i) Mean training accuracy for each seed -> value at the end of the last epoch ii) Mean validation accuracy for each seed -> value at the end of the last epoch iii) Mean test accuracy for each seed -> compute the accuracy on the test after each training -> display a boxplot of the statistics if statistics is true and print the mean and standard deviation Input : - Net : A network dictionnary from the <Nets> class - seeds : a list of seed to iterate over for pseudo random number generator used in weight initialization and data loading -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py - plot : if true plot the learning curve evolution over the epochs -> default true - statistics : if true display the boxplot of the train accuracies, validations and test and print the mean and standard deviation statistics -> rotate,translate and swap_channels -> data augmentation see loader.py Output : - train_result : A (10x4xn_epochs) tensor 10 -> seed 4 -> train loss ,train accuracy, validation loss, validation accuracy n_epochs -> evolution during training - test_losses : A tensor of shape (10,) containing the test loss at each seed - test_accuracies : A tensor of shape (10,) containing the test loss at each seed """ # tensor initialization to store the metrics train_results = torch.empty(len(seeds), 4, n_epochs) test_losses = [] test_accuracies = [] for n, seed in enumerate(seeds): # set the pytorch seed torch.manual_seed(seed) torch.cuda.manual_seed(seed) # set the seed for random spliting of the dataset in training and validation random.seed(0) # load the dataset train,validation and test data = PairSetMNIST() train_data = Training_set(data) test_data = Test_set(data) train_data_split = Training_set_split(train_data, rotate, translate, swap_channel) validation_data = Validation_set(train_data) # construct the net type with default parameter if (Net['net_type'] == 'Net2c'): model = Net['net'](nb_hidden=Net['hidden_layers'], dropout_prob=Net['drop_prob']) if (Net['net_type'] == 'LeNet_sharing'): model = Net['net'](nb_hidden=Net['hidden_layers'], dropout_ws=Net['drop_prob_ws'], dropout_comp=Net['drop_prob_comp']) if (Net['net_type'] == 'LeNet_sharing_aux'): # check if any data augmentation has been called # if none construct with tuned parameters without data augmentation # if yes construct with tuned parameters with data augmentation if (rotate == False and translate == False and swap_channel == False): model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'], nbhidden_comp=Net['hidden_layers_comp'], drop_prob_aux=Net['drop_prob_aux'], drop_prob_comp=Net['drop_prob_comp']) else: Net['learning rate'] = Net['learning rate augm'] model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'], nbhidden_comp=Net['hidden_layers_comp'], drop_prob_aux=Net['drop_prob_aux_augm'], drop_prob_comp=Net['drop_prob_comp_augm']) if (Net['net_type'] == 'Google_Net'): model = Net['net'](channels_1x1=Net['channels_1x1'], channels_3x3=Net['channels_3x3'], channels_5x5=Net['channels_5x5'], pool_channels=Net['pool_channels'], nhidden=Net['hidden_layers'], drop_prob_comp=Net['drop_prob_comp'], drop_prob_aux=Net['drop_prob_aux']) if GPU and cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) # train the model on the train set and validate at each epoch train_losses, train_acc, valid_losses, valid_acc = train_model( model, train_data_split, validation_data, device, mini_batch_size, optimizer, criterion, n_epochs, Net['learning rate'], lambda_l2, alpha, beta) # store the training and validation accuracies and losses during the training train_results[n, ] = torch.tensor( [train_losses, train_acc, valid_losses, valid_acc]) # compute the loss and accuracy of the model on the test set test_loss, test_acc = compute_metrics(model, test_data, device) # store the test metrics in the list test_losses.append(test_loss) test_accuracies.append(test_acc) # learning curve if plot: learning_curve(train_losses, train_acc, valid_losses, valid_acc) print( 'Seed {:d} | Test Loss: {:.4f} | Test Accuracy: {:.2f}%\n'.format( n, test_loss, test_acc)) # store the train, validation and test accuracies in a tensor for the boxplot data = torch.stack([ train_results[:, 1, (n_epochs - 1)], train_results[:, 3, (n_epochs - 1)], torch.tensor(test_accuracies) ]) data = data.view(1, 3, 10) # boxplot if statistics: Title = " Models accuracies" models = [Net['net_type']] boxplot(data, Title, models, True) return train_results, torch.tensor(test_losses), torch.tensor( test_accuracies)
train_dataset = ROIDataset(path='data/train', key=get_label, mode='classification', gen_p=0.7) train_loader = DataLoader(train_dataset, batch_size=batch_size) val_dataset = ROIDataset(path='data/val', key=get_label, mode='classification', gen_p=0) val_loader = DataLoader(val_dataset, batch_size=batch_size) optimizer = optim.Adam(params=model.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-6) criterion = nn.CrossEntropyLoss() result = train_model(model, train_loader, val_loader, criterion, accuracy, optimizer, 200, device, scheduler) model.load_state_dict( torch.load(os.path.join('data', model.__class__.__name__))) test_dataset = ROIDataset(path='data/test', key=get_label, mode='classification', gen_p=0) test_loader = DataLoader(test_dataset, batch_size=batch_size) print('test_accuracy: {:.4f}'.format( compute_accuracy(model, test_loader, device)))
def grid_search_aux(lrs, drop_prob_aux, drop_prob_comp, seeds, mini_batch_size=100, optimizer=optim.Adam, criterion=nn.CrossEntropyLoss(), n_epochs=40, lambda_l2=0, alpha=0.5, beta=0.5, rotate=False, translate=False, swap_channel=False, GPU=False): """ General : Iterate over combinations of parameters list to optimize and repeat a 10 round training/validation procedure at each combination -> Select the combination with the highest validation accuracy for a LeNet_sharing_aux network => only called in the <Nets> class by the Tune_LeNet_sharing_aux function Input : - lrs : list of learning rate - drop_prob_aux : list of dropout rate for the CNN auxiliary part - drop_prob_comp : list of dropout rate for the FC layer part for binary classification - seeds : list of seeds for statistics -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py -> rotate,translate and swap_channels -> data augmentation see loader.py Ouput : - train_results : A (len(lrs)xlen(drop_prob_aux)xlen(drop_prob_comp)xlen(seeds),4,n_epochs) tensor len() -> number of parameters or seed 4 -> train loss ,train accuracy, validation loss, validation accuracy n_epochs -> evolution during training - test_losses : A tensor of shape (10,) containing the test loss at each seed - test_accuracies : A tensor of shape (10,) containing the test loss at each seed - opt_lr : tuned value for learning rate - opt_prob_aux : tuned value for drop_prob_aux - opt_prob_comp : tuned value for drop_prob_comp """ # tensor to record the metrics train_results = torch.empty(len(lrs), len(drop_prob_aux), len(drop_prob_comp), len(seeds), 4, n_epochs) test_losses = torch.empty(len(lrs), len(drop_prob_aux), len(drop_prob_comp), len(seeds)) test_accuracies = torch.empty(len(lrs), len(drop_prob_aux), len(drop_prob_comp), len(seeds)) # iterate over the parameter combiantion for each seed in seeds for idz, eta in enumerate(lrs): for idx, prob_aux in enumerate(drop_prob_aux): for idy, prob_comp in enumerate(drop_prob_comp): for n, seed in enumerate(seeds): print( ' lr : {:.4f}, prob aux : {:.2f}, prob comp : {:.2f} (n= {:d})' .format(eta, prob_aux, prob_comp, n)) # set seed torch.manual_seed(seed) torch.cuda.manual_seed(seed) #set the random seed random.seed(0) # create the data data = PairSetMNIST() train_data = Training_set(data) test_data = Test_set(data) train_data_split = Training_set_split( train_data, rotate, translate, swap_channel) validation_data = Validation_set(train_data) # create the network model = LeNet_sharing_aux(drop_prob_aux=prob_aux, drop_prob_comp=prob_comp) if GPU and cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') model = model.to(device) # train the network train_losses, train_acc, valid_losses, valid_acc = train_model( model, train_data_split, validation_data, device, mini_batch_size, optimizer, criterion, n_epochs, eta, lambda_l2, alpha, beta) # store train and test results train_results[idz, idx, idy, n, ] = torch.tensor( [train_losses, train_acc, valid_losses, valid_acc]) test_loss, test_acc = compute_metrics( model, test_data, device) test_losses[idz, idx, idy, n] = test_loss test_accuracies[idz, idx, idy, n] = test_acc # compute the validation mean accuracy and standard deviation of the accuracy validation_grid_mean_acc = torch.mean(train_results[:, :, :, :, 3, 39], dim=3) validation_grid_std_acc = torch.std(train_results[:, :, :, :, 3, 39], dim=3) # compute thetest mean accuracy and standard deviation of the accuracy train_grid_mean_acc = torch.mean(train_results[:, :, :, :, 1, 39], dim=3) train_grid_std_acc = torch.std(train_results[:, :, :, :, 1, 39], dim=3) # get the indices of the parameter with the highest mean validation accuracy idx = torch.where( validation_grid_mean_acc == validation_grid_mean_acc.max()) if len(idx[0]) >= 2: idx = idx[0] # get the tuned parameters opt_lr = lrs[idx[0].item()] opt_prob_aux = drop_prob_aux[idx[1].item()] opt_prob_comp = drop_prob_comp[idx[2].item()] print( 'Best mean validation accuracy on {:d} seeds : {:.2f}%, std = {:.2f} with: learning rate = {:.4f} dropout rate aux = {:.2f} and dropout rate comp = {:.2f}' .format( len(seeds), validation_grid_mean_acc[idx[0].item(), idx[1].item(), idx[2].item()], validation_grid_std_acc[idx[0].item(), idx[1].item(), idx[2].item()], opt_lr, opt_prob_aux, opt_prob_comp)) return train_results, test_losses, test_accuracies, opt_lr, opt_prob_aux, opt_prob_comp ###########################################################################################################################################