Exemplo n.º 1
0
def super_main(adjustable):
    """Runs main for a specified iterations. Useful for experiment running.
    Note: set iterations to 1 if you want to save weights
    """
    # load the datasets from h5
    all_h5_datasets = ddl.load_datasets_from_h5(adjustable.datasets)
    # select which GPU to use, necessary to start tf session
    os.environ["CUDA_VISIBLE_DEVICES"] = adjustable.use_gpu
    # arrays for storing results
    number_of_datasets = len(adjustable.datasets)
    name = np.zeros(number_of_datasets)
    confusion_matrices = np.zeros(
        (adjustable.iterations, number_of_datasets, 4))
    ranking_matrices = np.zeros(
        (adjustable.iterations, number_of_datasets, pc.RANKING_NUMBER))

    start = time.time()
    for iter in range(adjustable.iterations):
        print('-----ITERATION %d' % iter)
        # lists for storing intermediate results
        all_ranking, all_training_pos, all_training_neg = [], [], []
        # create training and ranking set for all datasets
        ss = time.time()
        for name in range(len(adjustable.datasets)):
            ranking, training_pos, training_neg = ddl.create_training_and_ranking_set(
                adjustable.datasets[name])
            # labels have different meanings in `euclidean` case, 0 for match and 1 for mismatch
            if adjustable.cost_module_type == 'euclidean':
                ranking = pu.flip_labels(ranking)
                training_pos = pu.flip_labels(training_pos)
                training_neg = pu.flip_labels(training_neg)
            elif adjustable.cost_module_type == 'cosine':
                ranking = pu.zero_to_min_one_labels(ranking)
                training_pos = pu.zero_to_min_one_labels(training_pos)
                training_neg = pu.zero_to_min_one_labels(training_neg)

            # data gets appended in order
            all_ranking.append(ranking)
            all_training_pos.append(training_pos)
            all_training_neg.append(training_neg)
        # put all the training data together
        st = time.time()
        print('%0.2f mins' % ((st - ss) / 60))
        merged_training_pos, merged_training_neg = ddl.merge_datasets(
            adjustable, all_training_pos, all_training_neg)
        # run main
        name, confusion_matrix, ranking_matrix = main(adjustable,
                                                      all_h5_datasets,
                                                      all_ranking,
                                                      merged_training_pos,
                                                      merged_training_neg)
        # store results
        confusion_matrices[iter] = confusion_matrix
        ranking_matrices[iter] = ranking_matrix

    stop = time.time()
    total_time = stop - start

    matrix_means = np.zeros((number_of_datasets, 4))
    matrix_std = np.zeros((number_of_datasets, 4))
    ranking_means = np.zeros((number_of_datasets, pc.RANKING_NUMBER))
    ranking_std = np.zeros((number_of_datasets, pc.RANKING_NUMBER))
    # for each dataset, create confusion and ranking matrices
    for dataset in range(number_of_datasets):
        matrices = np.zeros((adjustable.iterations, 4))
        rankings = np.zeros((adjustable.iterations, pc.RANKING_NUMBER))

        for iter in range(adjustable.iterations):
            matrices[iter] = confusion_matrices[iter][dataset]
            rankings[iter] = ranking_matrices[iter][dataset]
        # calculate the mean and std
        matrix_means[dataset] = np.mean(matrices, axis=0)
        matrix_std[dataset] = np.std(matrices, axis=0)
        ranking_means[dataset] = np.mean(rankings, axis=0)
        ranking_std[dataset] = np.std(rankings, axis=0)
    # log the results
    # note: TURN ON if you want to log results!!
    if pc.LOGGING:
        file_name = os.path.basename(__file__)
        pu.enter_in_log(adjustable.experiment_name, file_name, name,
                        matrix_means, matrix_std, ranking_means, ranking_std,
                        total_time)
Exemplo n.º 2
0
def super_main(adjustable, get_data=False):
    """Runs main for a specified iterations. Useful for experiment running.
    Note: set iterations to 1 if you want to save weights
    """

    ################################################################################################################
    #   Load datasets, note: always 1 dataset_test, but multiple datasets_train
    ################################################################################################################
    datasets_train_h5 = dp.load_datasets_from_h5(adjustable.datasets_train)
    dataset_test_h5 = dp.load_datasets_from_h5(adjustable.dataset_test)

    ################################################################################################################
    #   Set the ranking number.
    ################################################################################################################
    if dataset_test_h5 is None:
        if datasets_train_h5 is not None:
            if adjustable.ranking_number_test is None:
                print('Note: Only training will be performed.')
                ranking_number = None
            else:
                print(
                    'Warning: No ranking number needed, ranking number defaults to `None`.'
                )
                print('Note: Only training will be performed.')
                ranking_number = None
        else:
            print('Error: No training data specified.')
            return
    else:
        print('Note: Testing (Ranking) will also be performed.')
        if adjustable.ranking_number_test == 'half':
            print(dataset_test_h5)
            ranking_number = pc.RANKING_DICT[adjustable.dataset_test]
        elif isinstance(adjustable.ranking_number_test, int):
            ranking_number = adjustable.ranking_number_test
        else:
            print('Error: Unknown configuration.')
            return

    ################################################################################################################
    #   [IF dataset_test_h5 is not None] Create arrays in which we store the results
    ################################################################################################################
    if dataset_test_h5 is not None:
        confusion_matrices = np.zeros((adjustable.iterations, 4))
        ranking_matrices = np.zeros((adjustable.iterations, ranking_number))
        gregor_matrices = np.zeros((adjustable.iterations, 4))
    else:
        confusion_matrices = None
        ranking_matrices = None
        gregor_matrices = None

    ################################################################################################################
    #   Start a number of experiment iterations
    ################################################################################################################
    start = time.time()
    for iter in range(adjustable.iterations):
        print(
            '------------------------------------------------------------------------------------------------------\n'
            'EXPERIMENT ITERATION %d\n'
            '------------------------------------------------------------------------------------------------------'
            % iter)
        # lists for storing intermediate results
        all_ranking, all_training_pos, all_training_neg = [], [], []
        # create training and ranking set for all datasets
        ss = time.time()

        if dataset_test_h5 is None:
            print('Training using all data in datasets_train.')
            ############################################################################################################
            #   Prepare data for when we only train using all data
            ############################################################################################################
            if datasets_train_h5 is not None:
                for index in range(len(adjustable.datasets_train)):
                    ranking, training_pos, training_neg = dp.create_training_and_ranking_set(
                        adjustable.datasets_train[index],
                        adjustable,
                        ranking_variable=None,
                        do_ranking=False)
                    if adjustable.cost_module_type in ['euclidean', 'cosine']:
                        training_pos = pu.flip_labels(training_pos)
                        training_neg = pu.flip_labels(training_neg)

                    all_training_pos.append(training_pos)
                    all_training_neg.append(training_neg)
                    del ranking
                all_ranking = None
            else:
                print('Error: no training data specified.')
                return
        else:
            if adjustable.only_test == True:
                print('Testing only using ranking set based on dataset_test.')

                ########################################################################################################
                #   Prepare data for when we ONLY test. Randomly get the data or load from a file if file exists
                ########################################################################################################
                ranking, tmp1, tmp2 = dp.create_training_and_ranking_set(
                    adjustable.dataset_test,
                    adjustable,
                    ranking_variable=adjustable.ranking_number_test,
                    do_training=False)
                del tmp1, tmp2

                if adjustable.cost_module_type in ['euclidean', 'cosine']:
                    ranking = pu.flip_labels(ranking)
                all_ranking.append(ranking)
            else:
                if datasets_train_h5 is not None:
                    print('Training and testing on multiple datasets.')

                    ####################################################################################################
                    #   Prepare data for when we train on multiple datasets and test
                    ####################################################################################################
                    # note: remember that only the last ranking in the ranking matrix will be tested on.
                    for index in range(len(adjustable.datasets_train)):
                        ranking, training_pos, training_neg = dp.create_training_and_ranking_set(
                            adjustable.datasets_train[index],
                            adjustable,
                            ranking_variable=adjustable.
                            ranking_number_train[index])
                        if adjustable.cost_module_type in [
                                'euclidean', 'cosine'
                        ]:
                            ranking = pu.flip_labels(ranking)
                            training_pos = pu.flip_labels(training_pos)
                            training_neg = pu.flip_labels(training_neg)

                        all_ranking.append(ranking)
                        all_training_pos.append(training_pos)
                        all_training_neg.append(training_neg)

                    ranking, training_pos, training_neg = dp.create_training_and_ranking_set(
                        adjustable.dataset_test,
                        adjustable,
                        ranking_variable=adjustable.ranking_number_test)
                    if adjustable.cost_module_type in ['euclidean', 'cosine']:
                        ranking = pu.flip_labels(ranking)
                        training_pos = pu.flip_labels(training_pos)
                        training_neg = pu.flip_labels(training_neg)

                    all_ranking.append(ranking)
                    all_training_pos.append(training_pos)
                    all_training_neg.append(training_neg)

                else:
                    print('Training and testing on a single dataset.')

                    ####################################################################################################
                    #   Prepare data for when we train and test on a single dataset
                    ####################################################################################################
                    ranking, training_pos, training_neg = dp.create_training_and_ranking_set(
                        adjustable.dataset_test,
                        adjustable,
                        ranking_variable=adjustable.ranking_number_test)
                    if adjustable.cost_module_type in ['euclidean', 'cosine']:
                        ranking = pu.flip_labels(ranking)
                        training_pos = pu.flip_labels(training_pos)
                        training_neg = pu.flip_labels(training_neg)

                    all_ranking.append(ranking)
                    all_training_pos.append(training_pos)
                    all_training_neg.append(training_neg)

        st = time.time()
        print('%0.2f mins' % ((st - ss) / 60))

        ################################################################################################################
        #   Merge the training data.
        #   Here we decide how to merge: to mix or to order by using adjustable.mix
        #   Also for training on multiple datasets + testing: decide if we include test set in the training to be mixed:
        #   by using adjustable.mix_with_test
        ################################################################################################################
        merged_training_pos, merged_training_neg = dp.merge_datasets(
            adjustable, all_training_pos, all_training_neg)

        ################################################################################################################
        #   Run main()
        ################################################################################################################
        confusion_matrix, ranking_matrix, gregor_matrix = main(
            adjustable, datasets_train_h5, dataset_test_h5, all_ranking,
            merged_training_pos, merged_training_neg)

        if dataset_test_h5 is not None:
            # store results
            confusion_matrices[iter] = confusion_matrix
            ranking_matrices[iter] = ranking_matrix
            gregor_matrices[iter] = gregor_matrix

    stop = time.time()
    total_time = stop - start

    ################################################################################################################
    #   Calculate the means and standard deviations and log the results
    ################################################################################################################

    if dataset_test_h5 is not None:
        matrix_means = np.mean(confusion_matrices, axis=0)
        matrix_std = np.std(confusion_matrices, axis=0)
        ranking_means = np.mean(ranking_matrices, axis=0)
        ranking_std = np.std(ranking_matrices, axis=0)
        gregor_matrix_means = np.mean(gregor_matrices, axis=0)
        gregor_matrix_std = np.std(gregor_matrices, axis=0)
        name = adjustable.dataset_test
    else:
        matrix_means = None
        matrix_std = None
        ranking_means = None
        ranking_std = None
        gregor_matrix_means = None
        gregor_matrix_std = None
        name = None

    # log the results
    if adjustable.log_experiment:
        file_name = os.path.basename(__file__)
        pu.enter_in_log(adjustable, adjustable.experiment_name, file_name,
                        name, matrix_means, matrix_std, ranking_means,
                        ranking_std, total_time, gregor_matrix_means,
                        gregor_matrix_std)

    if get_data == True:
        return ranking_means, matrix_means, total_time