Exemplos de data_extraction em Python, exemplos de tools.data_extraction em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: no_gender.py Projeto: varshakirani/brain_data_analysis

def main():
    options = tools.parse_options()
    start = time.time()
    if os.path.isfile(
            options.input
    ):  # if results are already stored then use that as input
        scoresdf = pd.read_csv(options.input)
    else:  # in previous experiments, if results are not stored then create new dataframe to store the results
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',
            'Balanced_accuracy'
        ])

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
               contrast_list))
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x),
               contrast_list))
    relevant_contrast_list = n_back_list + faces_list  # extracted nBack 2,3 and Faces 3,4,5 contrasts

    # Age and gender information along with subject id is extracted
    file = open(options.additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    edf = pd.read_csv(options.additional_data + '/n300.csv')
    edf['subject_cont'] = ids
    edf = edf[['KJØNN', 'subject_cont', 'ALDER']]
    edf = edf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    for contrast in relevant_contrast_list:
        contrast_name = contrast.split(".")[0]
        if len(scoresdf[scoresdf["Contrast_name"] == contrast_name]):
            continue

        for nClass in range(2, 4, 1):
            #  Considering all classes: Bipolar, Schizo and Control
            if nClass == 3:
                df, contrast_name = tools.data_extraction(
                    options.data, nClass, contrast, options.data_type)
                df = mlu.missing_values(df)
                df = pd.merge(df, edf, on=['subject_cont'], how='inner')
                scoresdf = run_no_gender_ml(df, options, 123, scoresdf,
                                            contrast_name)

            #  Considering combination of 2 classes: Bipolar-Schizo, Schizo-Control and Control-Bipolar
            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(
                    options.data, nClass, contrast, options.data_type)

                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                # Adding age and gender data for Standardization purpose. This additional data will be removed in
                # data preprocessing
                df12 = pd.merge(df12, edf, on=['subject_cont'], how='inner')
                df23 = pd.merge(df23, edf, on=['subject_cont'], how='inner')
                df31 = pd.merge(df31, edf, on=['subject_cont'], how='inner')

                scoresdf = run_no_gender_ml(df12, options, 12, scoresdf,
                                            contrast_name)
                scoresdf = run_no_gender_ml(df23, options, 23, scoresdf,
                                            contrast_name)
                scoresdf = run_no_gender_ml(df31, options, 31, scoresdf,
                                            contrast_name)

        scoresdf.to_csv(options.output + "no_gender_individual.csv",
                        index=False)

    print(
        "It took %s seconds to run %s iterations for %s model after removing gender effect"
        % (time.time() - start, options.number_iterations, options.model))

Exemplo n.º 2

0

Exibir arquivo

    sns.heatmap(corr,
                xticklabels=corr.columns,
                yticklabels=corr.columns).set_title(title)

    plt.savefig("out/data_exploration/correlation_plots/heat_map_%s"%(title) )
    plt.show()


def missdata_plot(df1, title):
    sns.heatmap(df1.isnull(), yticklabels=False, cbar=False, cmap='viridis').set_title(title)
    plt.savefig("out/data_exploration/missing_data.png")
    plt.show()

if __name__ == "__main__":

    df1,c = tools.data_extraction("../Data",3, "Faces_con_0001.mat" )
    df2,c = tools.data_extraction("../Data",3,"Faces_con_0002.mat")

    #df1 = mlu.missing_values(df1, 1)

    options = parse_options()
    if options.univariate:
        univariate_analysis(df1)

    if options.correlate:
        features_correlation(df1)

    if options.heatmap:
        corr_heatmap(df1[df1["label"] == 1], "Bipolar Disorder Subjects")
        corr_heatmap(df1[df1["label"] == 2], "Schizophrenia Subjects")
        corr_heatmap(df1[df1["label"] == 3], "Control Subjects")

Exemplo n.º 3

0

Exibir arquivo

Arquivo: code_check.py Projeto: varshakirani/thesis

def main():
    input = "../Data"
    df, contrast_name = tools.data_extraction(input, 3, "Faces_con_0001.mat")
    df.fillna(df.mean(), inplace=True)

    scoresdf = pd.DataFrame(columns=['Score', 'Type', 'Model', 'Classifier'])

    # Model : model name

    for i in range(1):
        train, test = mlu.train_test_split(df)
        X, y = mlu.get_features_labels(train)
        tX, ty = mlu.get_features_labels(test)
        model = svm.SVC(kernel='rbf', C=4, gamma=2**-5)
        model.fit(X, y)
        train_score = model.score(X, y)
        test_score = model.score(tX, ty)
        predictions = model.predict(tX)
        print(len(ty))
        print(confusion_matrix(ty, predictions))
        print(classification_report(ty, predictions))
        param_grid = {
            'C': [0.1, 1, 10, 100, 1000],
            'gamma':
            [1, 0.1, 0.01, 0.001, 0.0001, 0.00001, 2**-5, 2**-10, 2**5],
            'kernel': ['rbf']
        }
        grid = GridSearchCV(svm.SVC(),
                            param_grid,
                            refit=True,
                            verbose=3,
                            cv=10)
        grid.fit(X, y)
        best_param = grid.best_params_
        print((best_param))
        grid_predictions = grid.predict(tX)
        print(confusion_matrix(ty, grid_predictions))
        print(classification_report(ty, grid_predictions))

        ### finding scores after hyperparamter tuning
        model = svm.SVC(kernel=best_param['kernel'],
                        C=best_param['C'],
                        gamma=best_param['gamma'])
        model.fit(X, y)
        train_score = model.score(X, y)
        test_score = model.score(tX, ty)
        scoresdf = scoresdf.append(
            {
                'Score': train_score,
                'Type': 'train',
                'Model': 'svm_kernel',
                'Classifier': 123,
                'Contrast_name': contrast_name
            },
            ignore_index=True)
        scoresdf = scoresdf.append(
            {
                'Score': test_score,
                'Type': 'test',
                'Model': 'svm_kernel',
                'Classifier': 123,
                'Contrast_name': contrast_name
            },
            ignore_index=True)

    fig, axes = plt.subplots(nrows=2, ncols=2)
    axs = axes.ravel()
    for j in range(4):

        models = scoresdf['Model'].unique()
        sns.boxplot(x='Model',
                    y='Score',
                    data=scoresdf[(scoresdf['Type'] == 'test')
                                  & (scoresdf['Model'] == 'svm_kernel')],
                    ax=axs[j])

Exemplo n.º 4

0

Exibir arquivo

Arquivo: logistic_lasso.py Projeto: varshakirani/thesis

    options = tools.parse_options()

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
               contrast_list))
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x),
               contrast_list))
    relevant_mat_files = n_back_list + faces_list
    start = time()
    for mat_file in relevant_mat_files:
        print(mat_file)
        df1, df2, df3, contrast_name = tools.data_extraction(
            options.data, 2, mat_file, 'face_aal')
        df1 = shuffle(df1)
        df2 = shuffle(df2)
        df3 = shuffle(df3)

        # Combining two pairs off all combination
        df12 = df1.append(df2)
        df23 = df2.append(df3)
        df31 = df3.append(df1)

        # Handle missing values
        df12 = mlu.missing_values(df12)
        df23 = mlu.missing_values(df23)
        df31 = mlu.missing_values(df31)

        run_logistic_lasso(df12, contrast_name, 12, options.output)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: age_gender_effect.py Projeto: varshakirani/brain_data_analysis

def main():
    options = tools.parse_options()

    data = options.data
    additional_data = options.additional_data

    file = open(additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(additional_data + '/n300.csv')
    gdf['subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont', 'ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    mat_files = os.listdir(data)
    n_back_file = list(filter(lambda x: 'nBack' in x, mat_files))[0]
    face_file = list(filter(lambda x: 'Faces' in x, mat_files))[0]
    contrasts = [n_back_file, face_file]
    t_test_scores = pd.DataFrame(
        columns=['statistic', 'pvalue', 'user group', 'task_name'])
    scoresdf = pd.DataFrame(
        columns=['beta', 'pvalue', 'Labels', 'variable', 'task_name'])
    params = ['age', 'gender']
    print(contrasts)
    for mat_file in contrasts:
        for param in params:
            df1, df2, df3, contrast_name = tools.data_extraction(
                data, 2, mat_file)
            df1.fillna(df1.mean(), inplace=True)
            df2.fillna(df2.mean(), inplace=True)
            df3.fillna(df3.mean(), inplace=True)

            df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
            df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
            df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

            df = df1.append(df2).append(df3)
            df = df.loc[:, df.columns.intersection([param, 'label'])]

            df12 = df1.append(df2)
            df23 = df2.append(df3)
            df31 = df3.append(df1)

            task_name = mat_file.split("_")[0]
            if param == "age":
                plot_age_box_plot(df1, df2, df3, df, task_name, options)

            scoresdf = run_glm_fit(df12, 12, scoresdf, param, task_name)
            scoresdf = run_glm_fit(df23, 23, scoresdf, param, task_name)
            scoresdf = run_glm_fit(df31, 31, scoresdf, param, task_name)

            t_test_scores = t_test(df12, "BD-Sc", t_test_scores, task_name)
            t_test_scores = t_test(df23, "Sc-Co", t_test_scores, task_name)
            t_test_scores = t_test(df31, "Co-BD", t_test_scores, task_name)

        plot_age_dist(df1, df2, df3, task_name, options)
    plot_gender_dist(options)
    print("\nGLM fit with age and gender variable used individually\n")
    print(scoresdf)

    print("\n\nT-test scores to analyse age distribution\n")
    print(t_test_scores)

Exemplo n.º 6

0

Exibir arquivo

def main():
    print("NI Thesis")
    options = tools.parse_options()
    start = time.time()

    if options.combine:
        o_subtitle = 'combined'
    else:
        o_subtitle = 'individual'

    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
    else:
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',
            'Balanced_accuracy'
        ])

    mat_files = os.listdir(options.data)
    print(mat_files)
    #To get matfiles which does not ends with 389.mat or 487.mat. Selecting only minified mat files like
    #contrast_list = ['Faces_con_0003.mat', 'Faces_con_0002.mat', 'Faces_con_0001.mat', 'Faces_con_0005.mat',
    #                 'Faces_con_0004.mat', 'nBack_con_0001.mat', 'nBack_con_0002.mat', 'nBack_con_0003.mat']

    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    #TODO remove this for old Data
    contrast_list = mat_files
    combi_contrast = contrast_permutation(contrast_list)

    if options.combine:
        clist = combi_contrast
    else:
        clist = contrast_list

    for i in range(len(clist)):

        #Getting Contrast name
        if options.combine:
            c1_name = clist[i][0].split(".")[0]
            c2_name = clist[i][1].split(",")[0]
            contrast_name = c1_name + '&' + c2_name
        else:
            contrast_name = clist[i].split(".")[0]

        # Checking if the training is already made for the particular contrast
        # TODO Uncomment this for checking if contrast is present in the file
        if len(scoresdf[scoresdf['Contrast_name'] == contrast_name]):
            continue

        for nClass in range(2, 4, 1):

            if nClass == 3:

                # Read Data and put it into panda data frame. Initially considering only means
                if options.combine:
                    df, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)
                else:
                    df, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                df = mlu.missing_values(df)
                scoresdf = run_basic_ml(df, options, 123, scoresdf,
                                        contrast_name)

            elif nClass == 2:

                if options.combine:
                    df1, df2, df3, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)

                else:
                    df1, df2, df3, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                scoresdf = run_basic_ml(df12, options, 12, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df23, options, 23, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df31, options, 31, scoresdf,
                                        contrast_name)

        scoresdf.to_csv(options.output + "%s.csv" % (o_subtitle), index=False)

    print("It took %s seconds to run %s iterations for %s model" %
          (time.time() - start, options.number_iterations, options.model))

    logger.info(
        "It took %s seconds to run %s iterations for all models for not normalized"
        % (time.time() - start, options.number_iterations))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: gender_age_cor.py Projeto: varshakirani/thesis

def main():
    options = tools.parse_options()
    start = time.time()
    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
    else:
        scoresdf = pd.DataFrame(columns=['Score', 'Type', 'Model', 'Classifier', 'Contrast_name', 'Balanced_accuracy'])

    if options.combine:
        o_subtitle = 'combined'
    else:
        o_subtitle = 'individual'

    ## Gender information and adding it as label to the data by linking the subject_cont
    file = open(options.additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(options.additional_data + '/n300.csv')
    gdf['subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont','ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN':'gender', 'ALDER':'age'})


    label = 'gender'
    label = 'age'
    label = options.age_gender

    mat_files = os.listdir(options.data)
    contrast_list = list(filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(filter(lambda x: 'nBack' in x and ('2' in x or '3' in x), contrast_list))
    faces_list = list(filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x), contrast_list))
    relevant_mat_files = n_back_list + faces_list
    relevant_mat_files = relevant_mat_files[0:2]
    print(relevant_mat_files)
    for mat_file in relevant_mat_files:
        print(mat_file)
        for nClass in range(2, 4, 1):
            if nClass == 3:
                df, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)
                # Adding Age and gender to the dataframe
                df = pd.merge(df, gdf, on=['subject_cont'], how='inner')
                df = mlu.missing_values(df)
                scoresdf = run_gender_cor(df, options, 123, scoresdf, contrast_name, label)

            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)

                #Adding Age and gender to the dataframe
                df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
                df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
                df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                df1 = mlu.missing_values(df1)
                df2 = mlu.missing_values(df2)
                df3 = mlu.missing_values(df3)


                scoresdf = run_gender_cor(df12, options, 12, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df23, options, 23, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df31, options, 31, scoresdf, contrast_name, label)

                scoresdf = run_gender_cor(df1, options, 1, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df2, options, 2, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df3, options, 3, scoresdf, contrast_name, label)
        scoresdf.to_csv(options.output + "%s.csv" % (o_subtitle), index=False)

    print("It took %s seconds to run %s iterations for %s model" % (time.time() - start, options.number_iterations,
                                                                    options.model))

Exemplo n.º 8

0

Exibir arquivo

    gdf = gdf[['KJØNN', 'subject_cont','ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN':'gender', 'ALDER':'age'})

    label = options.age_gender

    mat_files = os.listdir(options.data)
    contrast_list = list(filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(filter(lambda x: 'nBack' in x and ('2' in x or '3' in x), contrast_list))
    faces_list = list(filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x), contrast_list))
    relevant_mat_files = n_back_list + faces_list

    for mat_file in relevant_mat_files:
        print(mat_file)
        for nClass in range(2, 4, 1):
            if nClass == 3:
                df, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)
                # Adding Age and gender to the dataframe
                df = pd.merge(df, gdf, on=['subject_cont'], how='inner')
                df = mlu.missing_values(df)
                scoresdf = run_gender_cor(df, options, 123, scoresdf, contrast_name, label)

            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)

                #Adding Age and gender to the dataframe
                df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
                df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
                df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

                # Combining two pairs off all combination
                df12 = df1.append(df2)

Exemplo n.º 9

0

Exibir arquivo

def main():
    options = tools.parse_options()
    start = time.time()
    if options.combine:
        o_subtitle = 'combined'
    else:
        o_subtitle = 'individual'

    if os.path.isfile(
            options.input
    ):  # if results are already stored then use that as input
        scoresdf = pd.read_csv(options.input)
    else:  # in previous experiments, if results are not stored then create new dataframe to store the results
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',
            'Balanced_accuracy'
        ])

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    combi_contrast = contrast_permutation(contrast_list)

    if options.combine:
        clist = combi_contrast
    else:
        clist = contrast_list

    for i in range(len(clist)):

        #Getting Contrast name
        if options.combine:
            c1_name = clist[i][0].split(".")[0]
            c2_name = clist[i][1].split(",")[0]
            contrast_name = c1_name + '&' + c2_name
        else:
            contrast_name = clist[i].split(".")[0]

        # Checking if the training is already made for the particular contrast
        # TODO Uncomment this for checking if contrast is present in the file
        if len(scoresdf[scoresdf['Contrast_name'] == contrast_name]):
            continue

        for nClass in range(2, 4, 1):

            if nClass == 3:

                # Read Data and put it into panda data frame. Initially considering only means
                if options.combine:
                    df, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)
                else:
                    df, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                df = mlu.missing_values(df)
                scoresdf = run_basic_ml(df, options, 123, scoresdf,
                                        contrast_name)

            elif nClass == 2:

                if options.combine:
                    df1, df2, df3, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)

                else:
                    df1, df2, df3, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                scoresdf = run_basic_ml(df12, options, 12, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df23, options, 23, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df31, options, 31, scoresdf,
                                        contrast_name)

        scoresdf.to_csv(options.output + "basic_%s.csv" % (o_subtitle),
                        index=False)

    print("It took %s seconds to run %s iterations for %s model" %
          (time.time() - start, options.number_iterations, options.model))

    print(
        "It took %s seconds to run %s iterations for %s model after removing gender effect"
        % (time.time() - start, options.number_iterations, options.model))

Exemplo n.º 10

0

Exibir arquivo

Arquivo: association_analysis.py Projeto: varshakirani/brain_data_analysis

def main():

    options = tools.parse_options()
    start = time.time()

    ## Get Age, Gender and Subject_cont information ###

    file = open(options.additional_data + "subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(options.additional_data + 'n300.csv')
    gdf.loc[:, 'subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont', 'ALDER']]
    gdf = gdf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
               contrast_list))
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x),
               contrast_list))
    relevant_contrast_list = n_back_list + faces_list  # extracted nBack 2,3 and Faces 3,4,5 contrasts

    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
    else:
        scoresdf = pd.DataFrame(columns=[
            'feature', 'beta_f', 'beta_a', 'beta_g', 'pvalue_f', 'pvalue_a',
            'pvalue_g', 'Contrast_name', 'Labels'
        ])

    for contrast in relevant_contrast_list:
        contrast_name = contrast.split(".")[0]
        if len(scoresdf[scoresdf["Contrast_name"] == contrast_name]):
            continue

        df1, df2, df3, contrast_name = tools.data_extraction(
            options.data, 2, contrast, options.data_type)

        # Combining two pairs off all combination
        df12 = df1.append(df2)
        df23 = df2.append(df3)
        df31 = df3.append(df1)

        # Handle missing values
        df12 = mlu.missing_values(df12)
        df23 = mlu.missing_values(df23)
        df31 = mlu.missing_values(df31)

        # Adding age and gender data for Standardization purpose. This additional data will be removed in
        # data preprocessing
        df12 = pd.merge(df12, gdf, on=['subject_cont'], how='inner')
        df23 = pd.merge(df23, gdf, on=['subject_cont'], how='inner')
        df31 = pd.merge(df31, gdf, on=['subject_cont'], how='inner')

        scoresdf = run_glm_fit(df12, 12, contrast_name, scoresdf)
        scoresdf = run_glm_fit(df23, 23, contrast_name, scoresdf)
        scoresdf = run_glm_fit(df31, 31, contrast_name, scoresdf)

        scoresdf.to_csv(options.output + "individual.csv", index=False)

    fdr_analysis(options)