def main(path_continue_learning=None, total_epoch=0, new_name=None): """ :param path_continue_learning: Path were the network is already saved (don t use if it is the beginning of the training) :param total_epoch: Number of epoch needed don t use if it is the beginning of the training) :param new_name: New name of the network, if we want to use again a network already train. :return: Nothing but train the network and save CSV files for the error and also save the network regularly """ # Manual seed of the network to have reproducible experiment torch.manual_seed(26542461) # If the network was already train we import it if path_continue_learning is not None: # Load the trained Network parameters, network = Save_import.load_from_checkpoint(path_checkpoint=path_continue_learning) # Here we can change some parameters, the only one necessary is the total_epoch parameters.epoch_total = total_epoch # parameters.learning_rate_decay = - 4.5 * 10 ** (-5) # parameters.batch_size = 4 # parameters.batch_size_val = 4 # parameters.learning_rate = 0.01 # parameters.momentum_IoU = 0 # parameters.loss = "IoU_Lovasz" # Put weight to GPU if torch.cuda.is_available(): parameters.weight_grad = parameters.weight_grad.cuda() # If a new name is define, we create new CSV files associated and change the name of the network if new_name is not None: # Init the csv file that will store the error, this time we make a copy of the existing error Save_import.duplicated_csv(path_CSV=parameters.path_CSV, old_name_network=parameters.name_network, new_name_network=new_name, train_number=parameters.train_number) parameters.name_network = new_name with open(parameters.path_print, 'w') as txtfile: txtfile.write('\n The program will continue \n') # If the network was not train, we start from scratch else: # Define the weight weight_grad = torch.FloatTensor([2.381681e+09, 3.856594e+08, 1.461642e+09, 4.291781e+07, 5.597591e+07, 8.135516e+07, 1.328548e+07, 3.654657e+07, 1.038652e+09, 7.157456e+07, 2.527450e+08, 7.923985e+07, 9.438758e+06, 4.460595e+08, 1.753254e+07, 1.655341e+07, 1.389560e+07, 6.178567e+06, 2.936571e+07]) sum_grad = weight_grad.sum() # normalize and then take the invert for i in range(weight_grad.size(0)): weight_grad[i] = sum_grad / weight_grad[i] # Normalize again and mult by the number of classes weight_grad = (weight_grad / weight_grad.sum()) * weight_grad.size(0) # if you want to keep the wiehgt, comment the next line weight_grad = torch.FloatTensor([1 for i in range(19)]) # Define all the parameters parameters = Parameters.Parameters(nColumns=8, nFeatMaps=[16, 32, 64, 128, 256], nFeatureMaps_init=3, number_classes=20 - 1, label_DF=Label.create_label(), width_image_initial=2048, height_image_initial=1024, size_image_crop=401, dropFactor=0.1, learning_rate=0.01, learning_rate_decay=1 * (10 ** (-2)), weight_decay=0, beta1=0.9, beta2=0.999, epsilon=1 * 10 ** (-8), batch_size=5, batch_size_val=5, epoch_total=400, actual_epoch=0, ratio=(1, 1), weight_grad=weight_grad, loss="focal_loss", momentum_IoU=0, path_save_net="./Model/", name_network="focal_loss2", train_number=0, path_CSV="./CSV/", path_data="/home_expes/collections/Cityscapes/", path_print="./Python_print_focal_loss.txt", path_result="./Result", num_workers=2) # Define the GridNet network = GridNet_structure.gridNet(nInputs=parameters.nFeatureMaps_init, nOutputs=parameters.number_classes, nColumns=parameters.nColumns, nFeatMaps=parameters.nFeatMaps, dropFactor=parameters.dropFactor) with open(parameters.path_print, 'w') as txtfile: txtfile.write('\n Start of the program \n') # Init the csv file that will store the error Save_import.init_csv(name_network=parameters.name_network, train_number=parameters.train_number, path_CSV=parameters.path_CSV, path_print=parameters.path_print) # Import both DataSets with the transformation train_dataset = Save_import.cityscapes_create_dataset(quality='fine', mode='train', transform=parameters.transforms_input, transform_target=parameters.transforms_output, parameters=parameters) val_dataset = Save_import.cityscapes_create_dataset(quality='fine', mode='val', transform=parameters.transforms_input, transform_target=parameters.transforms_output, parameters=parameters) # Create the DataSets for Pytorch used train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=parameters.batch_size, shuffle=True, num_workers=parameters.num_workers, drop_last=False) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=parameters.batch_size_val, shuffle=True, num_workers=parameters.num_workers, drop_last=False) # If there is more than one GPU we can use them if torch.cuda.device_count() > 1: with open(parameters.path_print, 'a') as txtfile: txtfile.write("\nLet's use " + str(torch.cuda.device_count()) + " GPUs! \n") network = torch.nn.DataParallel(network) else: with open(parameters.path_print, 'a') as txtfile: txtfile.write("\nWe don t have more than one GPU \n") # ... But we still use it in this case ? ... TODO try without to check if it is working network = torch.nn.DataParallel(network) # Put the network on GPU if possible if torch.cuda.is_available(): network.cuda() else: with open(parameters.path_print, 'a') as txtfile: txtfile.write("\nAccording to torch Cuda is not available \n") # Train the network train(network=network, parameters=parameters, train_loader=train_loader, val_loader=val_loader)
def main(path_continue_learning=None, total_epoch=0, new_name=None): """ :param path_continue_learning: Path were the network is already saved (don t use if it is the beginning of the training) :param total_epoch: Number of epoch needed don t use if it is the beginning of the training) :param new_name: New name of the network, if we want to use again a network already train. :return: Nothing but train the network and save CSV files for the error and also save the network regularly """ # Manual seed of the network to have reproducible experiment torch.manual_seed(945682461) # If the network was already train we import it if path_continue_learning is not None: # Load the trained Network parameters, network = Save_import.load_from_checkpoint( path_checkpoint=path_continue_learning) # Here we can change some parameters, the only one necessary is the total_epoch parameters.epoch_total = total_epoch # parameters.learning_rate_decay = 0.5 * (10 ** (-2)) # parameters.batch_size = 5 # parameters.batch_size_val = 5 parameters.learning_rate = 0.001 # parameters.momentum_IoU = 0.9 # If a new name is define, we create new CSV files associated and change the name of the network if new_name is not None: # Init the csv file that will store the error, this time we make a copy of the existing error Save_import.duplicated_csv( path_CSV=parameters.path_CSV, old_name_network=parameters.name_network, new_name_network=new_name, train_number=parameters.train_number) parameters.name_network = new_name with open(parameters.path_print, 'w') as txtfile: txtfile.write('\n The program will continue \n') # If the network was not train, we start from scratch else: # Define all the parameters parameters = Parameters.Parameters( nColumns=6, nFeatMaps=[16, 32, 64, 128], nFeatureMaps_init=3, number_classes=20 - 1, label_DF=Label.create_label(), width_image_initial=2048, height_image_initial=1024, size_image_crop=401, dropFactor=0.1, learning_rate=0.0001, learning_rate_decay=1 * (10**(-2)), weight_decay=0, beta1=0.9, beta2=0.999, epsilon=1 * 10**(-8), batch_size=40, batch_size_val=40, epoch_total=100, actual_epoch=0, ratio=(1, 1), weight_grad=torch.FloatTensor([1 for i in range(19)]), loss="cross_entropy_pretrain", momentum_IoU=0, pretrain=True, path_save_net="./Model/", name_network="resnet18_1000classes", train_number=0, path_CSV="./CSV/", # path_data="/home_expes/collections/Cityscapes/", path_data="/home_expes/collections/imagenet_1000_classes/", path_print="./Python_print_resnet18_1000classes.txt", path_result="./Result", num_workers=2) # Define the GridNet network = GridNet_structure.gridNet_imagenet( nInputs=parameters.nFeatureMaps_init, nOutputs=parameters.number_classes, nColumns=parameters.nColumns, nFeatMaps=parameters.nFeatMaps, dropFactor=parameters.dropFactor) network = GridNet_structure.ResNet18( nOutputs=len(Label.create_imagenet_class())) with open(parameters.path_print, 'w') as txtfile: txtfile.write('\n Start of the program \n') # Init the csv file that will store the error Save_import.init_csv(name_network=parameters.name_network, train_number=parameters.train_number, path_CSV=parameters.path_CSV, path_print=parameters.path_print) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Import both DataSets with the transformation train_dataset = Save_import.cityscapes_create_dataset_pretrain( mode="train", parameters=parameters, sliding_crop=None, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) val_dataset = Save_import.cityscapes_create_dataset_pretrain( mode="val", parameters=parameters, sliding_crop=None, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.ToTensor(), normalize, ])) # Create the DataSets for Pytorch used train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=parameters.batch_size, shuffle=True, num_workers=parameters.num_workers, drop_last=False) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=parameters.batch_size_val, shuffle=True, num_workers=parameters.num_workers, drop_last=False) # If there is more than one GPU we can sue them if torch.cuda.device_count() > 1: with open(parameters.path_print, 'a') as txtfile: txtfile.write("\nLet's use " + str(torch.cuda.device_count()) + " GPUs! \n") network = torch.nn.DataParallel(network) else: with open(parameters.path_print, 'a') as txtfile: txtfile.write("\nWe don t have more than one GPU \n") # ... But we still use it in this case ? ... TODO try without to check if it is still working # network = torch.nn.DataParallel(network) # Put the network on GPU if possible if torch.cuda.is_available(): network.cuda() else: with open(parameters.path_print, 'a') as txtfile: txtfile.write("\nAccording to torch Cuda is not even available \n") # Train the network train(network=network, parameters=parameters, train_loader=train_loader, val_loader=val_loader)