def super_main(adjustable): """Runs main for a specified iterations. Useful for experiment running. Note: set iterations to 1 if you want to save weights """ # load the datasets from h5 all_h5_datasets = ddl.load_datasets_from_h5(adjustable.datasets) # select which GPU to use, necessary to start tf session os.environ["CUDA_VISIBLE_DEVICES"] = adjustable.use_gpu # arrays for storing results number_of_datasets = len(adjustable.datasets) name = np.zeros(number_of_datasets) confusion_matrices = np.zeros( (adjustable.iterations, number_of_datasets, 4)) ranking_matrices = np.zeros( (adjustable.iterations, number_of_datasets, pc.RANKING_NUMBER)) start = time.time() for iter in range(adjustable.iterations): print('-----ITERATION %d' % iter) # lists for storing intermediate results all_ranking, all_training_pos, all_training_neg = [], [], [] # create training and ranking set for all datasets ss = time.time() for name in range(len(adjustable.datasets)): ranking, training_pos, training_neg = ddl.create_training_and_ranking_set( adjustable.datasets[name]) # labels have different meanings in `euclidean` case, 0 for match and 1 for mismatch if adjustable.cost_module_type == 'euclidean': ranking = pu.flip_labels(ranking) training_pos = pu.flip_labels(training_pos) training_neg = pu.flip_labels(training_neg) elif adjustable.cost_module_type == 'cosine': ranking = pu.zero_to_min_one_labels(ranking) training_pos = pu.zero_to_min_one_labels(training_pos) training_neg = pu.zero_to_min_one_labels(training_neg) # data gets appended in order all_ranking.append(ranking) all_training_pos.append(training_pos) all_training_neg.append(training_neg) # put all the training data together st = time.time() print('%0.2f mins' % ((st - ss) / 60)) merged_training_pos, merged_training_neg = ddl.merge_datasets( adjustable, all_training_pos, all_training_neg) # run main name, confusion_matrix, ranking_matrix = main(adjustable, all_h5_datasets, all_ranking, merged_training_pos, merged_training_neg) # store results confusion_matrices[iter] = confusion_matrix ranking_matrices[iter] = ranking_matrix stop = time.time() total_time = stop - start matrix_means = np.zeros((number_of_datasets, 4)) matrix_std = np.zeros((number_of_datasets, 4)) ranking_means = np.zeros((number_of_datasets, pc.RANKING_NUMBER)) ranking_std = np.zeros((number_of_datasets, pc.RANKING_NUMBER)) # for each dataset, create confusion and ranking matrices for dataset in range(number_of_datasets): matrices = np.zeros((adjustable.iterations, 4)) rankings = np.zeros((adjustable.iterations, pc.RANKING_NUMBER)) for iter in range(adjustable.iterations): matrices[iter] = confusion_matrices[iter][dataset] rankings[iter] = ranking_matrices[iter][dataset] # calculate the mean and std matrix_means[dataset] = np.mean(matrices, axis=0) matrix_std[dataset] = np.std(matrices, axis=0) ranking_means[dataset] = np.mean(rankings, axis=0) ranking_std[dataset] = np.std(rankings, axis=0) # log the results # note: TURN ON if you want to log results!! if pc.LOGGING: file_name = os.path.basename(__file__) pu.enter_in_log(adjustable.experiment_name, file_name, name, matrix_means, matrix_std, ranking_means, ranking_std, total_time)
def super_main(adjustable, get_data=False): """Runs main for a specified iterations. Useful for experiment running. Note: set iterations to 1 if you want to save weights """ ################################################################################################################ # Load datasets, note: always 1 dataset_test, but multiple datasets_train ################################################################################################################ datasets_train_h5 = dp.load_datasets_from_h5(adjustable.datasets_train) dataset_test_h5 = dp.load_datasets_from_h5(adjustable.dataset_test) ################################################################################################################ # Set the ranking number. ################################################################################################################ if dataset_test_h5 is None: if datasets_train_h5 is not None: if adjustable.ranking_number_test is None: print('Note: Only training will be performed.') ranking_number = None else: print( 'Warning: No ranking number needed, ranking number defaults to `None`.' ) print('Note: Only training will be performed.') ranking_number = None else: print('Error: No training data specified.') return else: print('Note: Testing (Ranking) will also be performed.') if adjustable.ranking_number_test == 'half': print(dataset_test_h5) ranking_number = pc.RANKING_DICT[adjustable.dataset_test] elif isinstance(adjustable.ranking_number_test, int): ranking_number = adjustable.ranking_number_test else: print('Error: Unknown configuration.') return ################################################################################################################ # [IF dataset_test_h5 is not None] Create arrays in which we store the results ################################################################################################################ if dataset_test_h5 is not None: confusion_matrices = np.zeros((adjustable.iterations, 4)) ranking_matrices = np.zeros((adjustable.iterations, ranking_number)) gregor_matrices = np.zeros((adjustable.iterations, 4)) else: confusion_matrices = None ranking_matrices = None gregor_matrices = None ################################################################################################################ # Start a number of experiment iterations ################################################################################################################ start = time.time() for iter in range(adjustable.iterations): print( '------------------------------------------------------------------------------------------------------\n' 'EXPERIMENT ITERATION %d\n' '------------------------------------------------------------------------------------------------------' % iter) # lists for storing intermediate results all_ranking, all_training_pos, all_training_neg = [], [], [] # create training and ranking set for all datasets ss = time.time() if dataset_test_h5 is None: print('Training using all data in datasets_train.') ############################################################################################################ # Prepare data for when we only train using all data ############################################################################################################ if datasets_train_h5 is not None: for index in range(len(adjustable.datasets_train)): ranking, training_pos, training_neg = dp.create_training_and_ranking_set( adjustable.datasets_train[index], adjustable, ranking_variable=None, do_ranking=False) if adjustable.cost_module_type in ['euclidean', 'cosine']: training_pos = pu.flip_labels(training_pos) training_neg = pu.flip_labels(training_neg) all_training_pos.append(training_pos) all_training_neg.append(training_neg) del ranking all_ranking = None else: print('Error: no training data specified.') return else: if adjustable.only_test == True: print('Testing only using ranking set based on dataset_test.') ######################################################################################################## # Prepare data for when we ONLY test. Randomly get the data or load from a file if file exists ######################################################################################################## ranking, tmp1, tmp2 = dp.create_training_and_ranking_set( adjustable.dataset_test, adjustable, ranking_variable=adjustable.ranking_number_test, do_training=False) del tmp1, tmp2 if adjustable.cost_module_type in ['euclidean', 'cosine']: ranking = pu.flip_labels(ranking) all_ranking.append(ranking) else: if datasets_train_h5 is not None: print('Training and testing on multiple datasets.') #################################################################################################### # Prepare data for when we train on multiple datasets and test #################################################################################################### # note: remember that only the last ranking in the ranking matrix will be tested on. for index in range(len(adjustable.datasets_train)): ranking, training_pos, training_neg = dp.create_training_and_ranking_set( adjustable.datasets_train[index], adjustable, ranking_variable=adjustable. ranking_number_train[index]) if adjustable.cost_module_type in [ 'euclidean', 'cosine' ]: ranking = pu.flip_labels(ranking) training_pos = pu.flip_labels(training_pos) training_neg = pu.flip_labels(training_neg) all_ranking.append(ranking) all_training_pos.append(training_pos) all_training_neg.append(training_neg) ranking, training_pos, training_neg = dp.create_training_and_ranking_set( adjustable.dataset_test, adjustable, ranking_variable=adjustable.ranking_number_test) if adjustable.cost_module_type in ['euclidean', 'cosine']: ranking = pu.flip_labels(ranking) training_pos = pu.flip_labels(training_pos) training_neg = pu.flip_labels(training_neg) all_ranking.append(ranking) all_training_pos.append(training_pos) all_training_neg.append(training_neg) else: print('Training and testing on a single dataset.') #################################################################################################### # Prepare data for when we train and test on a single dataset #################################################################################################### ranking, training_pos, training_neg = dp.create_training_and_ranking_set( adjustable.dataset_test, adjustable, ranking_variable=adjustable.ranking_number_test) if adjustable.cost_module_type in ['euclidean', 'cosine']: ranking = pu.flip_labels(ranking) training_pos = pu.flip_labels(training_pos) training_neg = pu.flip_labels(training_neg) all_ranking.append(ranking) all_training_pos.append(training_pos) all_training_neg.append(training_neg) st = time.time() print('%0.2f mins' % ((st - ss) / 60)) ################################################################################################################ # Merge the training data. # Here we decide how to merge: to mix or to order by using adjustable.mix # Also for training on multiple datasets + testing: decide if we include test set in the training to be mixed: # by using adjustable.mix_with_test ################################################################################################################ merged_training_pos, merged_training_neg = dp.merge_datasets( adjustable, all_training_pos, all_training_neg) ################################################################################################################ # Run main() ################################################################################################################ confusion_matrix, ranking_matrix, gregor_matrix = main( adjustable, datasets_train_h5, dataset_test_h5, all_ranking, merged_training_pos, merged_training_neg) if dataset_test_h5 is not None: # store results confusion_matrices[iter] = confusion_matrix ranking_matrices[iter] = ranking_matrix gregor_matrices[iter] = gregor_matrix stop = time.time() total_time = stop - start ################################################################################################################ # Calculate the means and standard deviations and log the results ################################################################################################################ if dataset_test_h5 is not None: matrix_means = np.mean(confusion_matrices, axis=0) matrix_std = np.std(confusion_matrices, axis=0) ranking_means = np.mean(ranking_matrices, axis=0) ranking_std = np.std(ranking_matrices, axis=0) gregor_matrix_means = np.mean(gregor_matrices, axis=0) gregor_matrix_std = np.std(gregor_matrices, axis=0) name = adjustable.dataset_test else: matrix_means = None matrix_std = None ranking_means = None ranking_std = None gregor_matrix_means = None gregor_matrix_std = None name = None # log the results if adjustable.log_experiment: file_name = os.path.basename(__file__) pu.enter_in_log(adjustable, adjustable.experiment_name, file_name, name, matrix_means, matrix_std, ranking_means, ranking_std, total_time, gregor_matrix_means, gregor_matrix_std) if get_data == True: return ranking_means, matrix_means, total_time