Пример #1
def main():
    options = tools.parse_options()
    start = time.time()
    if os.path.isfile(
    ):  # if results are already stored then use that as input
        scoresdf = pd.read_csv(options.input)
    else:  # in previous experiments, if results are not stored then create new dataframe to store the results
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x),
    relevant_contrast_list = n_back_list + faces_list  # extracted nBack 2,3 and Faces 3,4,5 contrasts

    # Age and gender information along with subject id is extracted
    file = open(options.additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    edf = pd.read_csv(options.additional_data + '/n300.csv')
    edf['subject_cont'] = ids
    edf = edf[['KJØNN', 'subject_cont', 'ALDER']]
    edf = edf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    for contrast in relevant_contrast_list:
        contrast_name = contrast.split(".")[0]
        if len(scoresdf[scoresdf["Contrast_name"] == contrast_name]):

        for nClass in range(2, 4, 1):
            #  Considering all classes: Bipolar, Schizo and Control
            if nClass == 3:
                df, contrast_name = tools.data_extraction(
                    options.data, nClass, contrast, options.data_type)
                df = mlu.missing_values(df)
                df = pd.merge(df, edf, on=['subject_cont'], how='inner')
                scoresdf = run_no_gender_ml(df, options, 123, scoresdf,

            #  Considering combination of 2 classes: Bipolar-Schizo, Schizo-Control and Control-Bipolar
            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(
                    options.data, nClass, contrast, options.data_type)

                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                # Adding age and gender data for Standardization purpose. This additional data will be removed in
                # data preprocessing
                df12 = pd.merge(df12, edf, on=['subject_cont'], how='inner')
                df23 = pd.merge(df23, edf, on=['subject_cont'], how='inner')
                df31 = pd.merge(df31, edf, on=['subject_cont'], how='inner')

                scoresdf = run_no_gender_ml(df12, options, 12, scoresdf,
                scoresdf = run_no_gender_ml(df23, options, 23, scoresdf,
                scoresdf = run_no_gender_ml(df31, options, 31, scoresdf,

        scoresdf.to_csv(options.output + "no_gender_individual.csv",

        "It took %s seconds to run %s iterations for %s model after removing gender effect"
        % (time.time() - start, options.number_iterations, options.model))
Пример #2

    plt.savefig("out/data_exploration/correlation_plots/heat_map_%s"%(title) )

def missdata_plot(df1, title):
    sns.heatmap(df1.isnull(), yticklabels=False, cbar=False, cmap='viridis').set_title(title)

if __name__ == "__main__":

    df1,c = tools.data_extraction("../Data",3, "Faces_con_0001.mat" )
    df2,c = tools.data_extraction("../Data",3,"Faces_con_0002.mat")

    #df1 = mlu.missing_values(df1, 1)

    options = parse_options()
    if options.univariate:

    if options.correlate:

    if options.heatmap:
        corr_heatmap(df1[df1["label"] == 1], "Bipolar Disorder Subjects")
        corr_heatmap(df1[df1["label"] == 2], "Schizophrenia Subjects")
        corr_heatmap(df1[df1["label"] == 3], "Control Subjects")
Пример #3
def main():
    input = "../Data"
    df, contrast_name = tools.data_extraction(input, 3, "Faces_con_0001.mat")
    df.fillna(df.mean(), inplace=True)

    scoresdf = pd.DataFrame(columns=['Score', 'Type', 'Model', 'Classifier'])

    # Model : model name

    for i in range(1):
        train, test = mlu.train_test_split(df)
        X, y = mlu.get_features_labels(train)
        tX, ty = mlu.get_features_labels(test)
        model = svm.SVC(kernel='rbf', C=4, gamma=2**-5)
        model.fit(X, y)
        train_score = model.score(X, y)
        test_score = model.score(tX, ty)
        predictions = model.predict(tX)
        print(confusion_matrix(ty, predictions))
        print(classification_report(ty, predictions))
        param_grid = {
            'C': [0.1, 1, 10, 100, 1000],
            [1, 0.1, 0.01, 0.001, 0.0001, 0.00001, 2**-5, 2**-10, 2**5],
            'kernel': ['rbf']
        grid = GridSearchCV(svm.SVC(),
        grid.fit(X, y)
        best_param = grid.best_params_
        grid_predictions = grid.predict(tX)
        print(confusion_matrix(ty, grid_predictions))
        print(classification_report(ty, grid_predictions))

        ### finding scores after hyperparamter tuning
        model = svm.SVC(kernel=best_param['kernel'],
        model.fit(X, y)
        train_score = model.score(X, y)
        test_score = model.score(tX, ty)
        scoresdf = scoresdf.append(
                'Score': train_score,
                'Type': 'train',
                'Model': 'svm_kernel',
                'Classifier': 123,
                'Contrast_name': contrast_name
        scoresdf = scoresdf.append(
                'Score': test_score,
                'Type': 'test',
                'Model': 'svm_kernel',
                'Classifier': 123,
                'Contrast_name': contrast_name

    fig, axes = plt.subplots(nrows=2, ncols=2)
    axs = axes.ravel()
    for j in range(4):

        models = scoresdf['Model'].unique()
                    data=scoresdf[(scoresdf['Type'] == 'test')
                                  & (scoresdf['Model'] == 'svm_kernel')],
Пример #4
    options = tools.parse_options()

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x),
    relevant_mat_files = n_back_list + faces_list
    start = time()
    for mat_file in relevant_mat_files:
        df1, df2, df3, contrast_name = tools.data_extraction(
            options.data, 2, mat_file, 'face_aal')
        df1 = shuffle(df1)
        df2 = shuffle(df2)
        df3 = shuffle(df3)

        # Combining two pairs off all combination
        df12 = df1.append(df2)
        df23 = df2.append(df3)
        df31 = df3.append(df1)

        # Handle missing values
        df12 = mlu.missing_values(df12)
        df23 = mlu.missing_values(df23)
        df31 = mlu.missing_values(df31)

        run_logistic_lasso(df12, contrast_name, 12, options.output)
def main():
    options = tools.parse_options()

    data = options.data
    additional_data = options.additional_data

    file = open(additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(additional_data + '/n300.csv')
    gdf['subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont', 'ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    mat_files = os.listdir(data)
    n_back_file = list(filter(lambda x: 'nBack' in x, mat_files))[0]
    face_file = list(filter(lambda x: 'Faces' in x, mat_files))[0]
    contrasts = [n_back_file, face_file]
    t_test_scores = pd.DataFrame(
        columns=['statistic', 'pvalue', 'user group', 'task_name'])
    scoresdf = pd.DataFrame(
        columns=['beta', 'pvalue', 'Labels', 'variable', 'task_name'])
    params = ['age', 'gender']
    for mat_file in contrasts:
        for param in params:
            df1, df2, df3, contrast_name = tools.data_extraction(
                data, 2, mat_file)
            df1.fillna(df1.mean(), inplace=True)
            df2.fillna(df2.mean(), inplace=True)
            df3.fillna(df3.mean(), inplace=True)

            df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
            df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
            df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

            df = df1.append(df2).append(df3)
            df = df.loc[:, df.columns.intersection([param, 'label'])]

            df12 = df1.append(df2)
            df23 = df2.append(df3)
            df31 = df3.append(df1)

            task_name = mat_file.split("_")[0]
            if param == "age":
                plot_age_box_plot(df1, df2, df3, df, task_name, options)

            scoresdf = run_glm_fit(df12, 12, scoresdf, param, task_name)
            scoresdf = run_glm_fit(df23, 23, scoresdf, param, task_name)
            scoresdf = run_glm_fit(df31, 31, scoresdf, param, task_name)

            t_test_scores = t_test(df12, "BD-Sc", t_test_scores, task_name)
            t_test_scores = t_test(df23, "Sc-Co", t_test_scores, task_name)
            t_test_scores = t_test(df31, "Co-BD", t_test_scores, task_name)

        plot_age_dist(df1, df2, df3, task_name, options)
    print("\nGLM fit with age and gender variable used individually\n")

    print("\n\nT-test scores to analyse age distribution\n")
Пример #6
def main():
    print("NI Thesis")
    options = tools.parse_options()
    start = time.time()

    if options.combine:
        o_subtitle = 'combined'
        o_subtitle = 'individual'

    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',

    mat_files = os.listdir(options.data)
    #To get matfiles which does not ends with 389.mat or 487.mat. Selecting only minified mat files like
    #contrast_list = ['Faces_con_0003.mat', 'Faces_con_0002.mat', 'Faces_con_0001.mat', 'Faces_con_0005.mat',
    #                 'Faces_con_0004.mat', 'nBack_con_0001.mat', 'nBack_con_0002.mat', 'nBack_con_0003.mat']

    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    #TODO remove this for old Data
    contrast_list = mat_files
    combi_contrast = contrast_permutation(contrast_list)

    if options.combine:
        clist = combi_contrast
        clist = contrast_list

    for i in range(len(clist)):

        #Getting Contrast name
        if options.combine:
            c1_name = clist[i][0].split(".")[0]
            c2_name = clist[i][1].split(",")[0]
            contrast_name = c1_name + '&' + c2_name
            contrast_name = clist[i].split(".")[0]

        # Checking if the training is already made for the particular contrast
        # TODO Uncomment this for checking if contrast is present in the file
        if len(scoresdf[scoresdf['Contrast_name'] == contrast_name]):

        for nClass in range(2, 4, 1):

            if nClass == 3:

                # Read Data and put it into panda data frame. Initially considering only means
                if options.combine:
                    df, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                    df, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                df = mlu.missing_values(df)
                scoresdf = run_basic_ml(df, options, 123, scoresdf,

            elif nClass == 2:

                if options.combine:
                    df1, df2, df3, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],

                    df1, df2, df3, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                scoresdf = run_basic_ml(df12, options, 12, scoresdf,
                scoresdf = run_basic_ml(df23, options, 23, scoresdf,
                scoresdf = run_basic_ml(df31, options, 31, scoresdf,

        scoresdf.to_csv(options.output + "%s.csv" % (o_subtitle), index=False)

    print("It took %s seconds to run %s iterations for %s model" %
          (time.time() - start, options.number_iterations, options.model))

        "It took %s seconds to run %s iterations for all models for not normalized"
        % (time.time() - start, options.number_iterations))
Пример #7
def main():
    options = tools.parse_options()
    start = time.time()
    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
        scoresdf = pd.DataFrame(columns=['Score', 'Type', 'Model', 'Classifier', 'Contrast_name', 'Balanced_accuracy'])

    if options.combine:
        o_subtitle = 'combined'
        o_subtitle = 'individual'

    ## Gender information and adding it as label to the data by linking the subject_cont
    file = open(options.additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(options.additional_data + '/n300.csv')
    gdf['subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont','ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN':'gender', 'ALDER':'age'})

    label = 'gender'
    label = 'age'
    label = options.age_gender

    mat_files = os.listdir(options.data)
    contrast_list = list(filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(filter(lambda x: 'nBack' in x and ('2' in x or '3' in x), contrast_list))
    faces_list = list(filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x), contrast_list))
    relevant_mat_files = n_back_list + faces_list
    relevant_mat_files = relevant_mat_files[0:2]
    for mat_file in relevant_mat_files:
        for nClass in range(2, 4, 1):
            if nClass == 3:
                df, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)
                # Adding Age and gender to the dataframe
                df = pd.merge(df, gdf, on=['subject_cont'], how='inner')
                df = mlu.missing_values(df)
                scoresdf = run_gender_cor(df, options, 123, scoresdf, contrast_name, label)

            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)

                #Adding Age and gender to the dataframe
                df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
                df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
                df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                df1 = mlu.missing_values(df1)
                df2 = mlu.missing_values(df2)
                df3 = mlu.missing_values(df3)

                scoresdf = run_gender_cor(df12, options, 12, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df23, options, 23, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df31, options, 31, scoresdf, contrast_name, label)

                scoresdf = run_gender_cor(df1, options, 1, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df2, options, 2, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df3, options, 3, scoresdf, contrast_name, label)
        scoresdf.to_csv(options.output + "%s.csv" % (o_subtitle), index=False)

    print("It took %s seconds to run %s iterations for %s model" % (time.time() - start, options.number_iterations,
Пример #8
    gdf = gdf[['KJØNN', 'subject_cont','ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN':'gender', 'ALDER':'age'})

    label = options.age_gender

    mat_files = os.listdir(options.data)
    contrast_list = list(filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(filter(lambda x: 'nBack' in x and ('2' in x or '3' in x), contrast_list))
    faces_list = list(filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x), contrast_list))
    relevant_mat_files = n_back_list + faces_list

    for mat_file in relevant_mat_files:
        for nClass in range(2, 4, 1):
            if nClass == 3:
                df, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)
                # Adding Age and gender to the dataframe
                df = pd.merge(df, gdf, on=['subject_cont'], how='inner')
                df = mlu.missing_values(df)
                scoresdf = run_gender_cor(df, options, 123, scoresdf, contrast_name, label)

            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)

                #Adding Age and gender to the dataframe
                df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
                df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
                df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

                # Combining two pairs off all combination
                df12 = df1.append(df2)
Пример #9
def main():
    options = tools.parse_options()
    start = time.time()
    if options.combine:
        o_subtitle = 'combined'
        o_subtitle = 'individual'

    if os.path.isfile(
    ):  # if results are already stored then use that as input
        scoresdf = pd.read_csv(options.input)
    else:  # in previous experiments, if results are not stored then create new dataframe to store the results
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    combi_contrast = contrast_permutation(contrast_list)

    if options.combine:
        clist = combi_contrast
        clist = contrast_list

    for i in range(len(clist)):

        #Getting Contrast name
        if options.combine:
            c1_name = clist[i][0].split(".")[0]
            c2_name = clist[i][1].split(",")[0]
            contrast_name = c1_name + '&' + c2_name
            contrast_name = clist[i].split(".")[0]

        # Checking if the training is already made for the particular contrast
        # TODO Uncomment this for checking if contrast is present in the file
        if len(scoresdf[scoresdf['Contrast_name'] == contrast_name]):

        for nClass in range(2, 4, 1):

            if nClass == 3:

                # Read Data and put it into panda data frame. Initially considering only means
                if options.combine:
                    df, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                    df, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                df = mlu.missing_values(df)
                scoresdf = run_basic_ml(df, options, 123, scoresdf,

            elif nClass == 2:

                if options.combine:
                    df1, df2, df3, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],

                    df1, df2, df3, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                scoresdf = run_basic_ml(df12, options, 12, scoresdf,
                scoresdf = run_basic_ml(df23, options, 23, scoresdf,
                scoresdf = run_basic_ml(df31, options, 31, scoresdf,

        scoresdf.to_csv(options.output + "basic_%s.csv" % (o_subtitle),

    print("It took %s seconds to run %s iterations for %s model" %
          (time.time() - start, options.number_iterations, options.model))

        "It took %s seconds to run %s iterations for %s model after removing gender effect"
        % (time.time() - start, options.number_iterations, options.model))
def main():

    options = tools.parse_options()
    start = time.time()

    ## Get Age, Gender and Subject_cont information ###

    file = open(options.additional_data + "subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(options.additional_data + 'n300.csv')
    gdf.loc[:, 'subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont', 'ALDER']]
    gdf = gdf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x),
    relevant_contrast_list = n_back_list + faces_list  # extracted nBack 2,3 and Faces 3,4,5 contrasts

    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
        scoresdf = pd.DataFrame(columns=[
            'feature', 'beta_f', 'beta_a', 'beta_g', 'pvalue_f', 'pvalue_a',
            'pvalue_g', 'Contrast_name', 'Labels'

    for contrast in relevant_contrast_list:
        contrast_name = contrast.split(".")[0]
        if len(scoresdf[scoresdf["Contrast_name"] == contrast_name]):

        df1, df2, df3, contrast_name = tools.data_extraction(
            options.data, 2, contrast, options.data_type)

        # Combining two pairs off all combination
        df12 = df1.append(df2)
        df23 = df2.append(df3)
        df31 = df3.append(df1)

        # Handle missing values
        df12 = mlu.missing_values(df12)
        df23 = mlu.missing_values(df23)
        df31 = mlu.missing_values(df31)

        # Adding age and gender data for Standardization purpose. This additional data will be removed in
        # data preprocessing
        df12 = pd.merge(df12, gdf, on=['subject_cont'], how='inner')
        df23 = pd.merge(df23, gdf, on=['subject_cont'], how='inner')
        df31 = pd.merge(df31, gdf, on=['subject_cont'], how='inner')

        scoresdf = run_glm_fit(df12, 12, contrast_name, scoresdf)
        scoresdf = run_glm_fit(df23, 23, contrast_name, scoresdf)
        scoresdf = run_glm_fit(df31, 31, contrast_name, scoresdf)

        scoresdf.to_csv(options.output + "individual.csv", index=False)
