Пример #1
0
def box_plot_selected(df, starts, ends):
    columns = df.columns.values
    sub_col_pain_peak = pick_columns(columns=columns,
                                     word_starts_with=starts,
                                     word_ends_with=ends)
    # box plot on certain cols
    df_index = df.index.values
    sub_df = pd.DataFrame(df, columns=sub_col_pain_peak, index=df_index)
    label_list = list()
    for i in sub_col_pain_peak:
        label_list.append(i.replace(starts, '').replace(ends, ''))
    title = starts + '(' + ends + ')'
    data_list = list()
    for col in sub_col_pain_peak:
        data_list.append(sub_df[col])
    fig1, ax1 = plt.subplots()
    ax1.set_title(title)
    ax1.boxplot(data_list, labels=label_list)
    fig = plt.gcf()
    plt.show()
    return fig
Пример #2
0
from pandas_method.pandas_method import pick_columns
from data_plt.plt_method import box_plot_selected

# read in
file_name = 'data/data_formal/orig/txt_file/merged_data/result_final.txt'
df = pd.read_csv(filepath_or_buffer=file_name, sep=',', header=0, index_col=0)

# get columns
columns = df.columns.values
col = columns.reshape(columns.shape[0],1)

# rest
# rest_eeg_orig
# rest_eeg_norm
starts = 'rest_norm'
sub_col_rest = pick_columns(columns=columns, word_starts_with=starts)

# pain
# pain norm

starts = 'pain_erp_peak'
ends = 'N90'
fig = box_plot_selected(df=df, starts=starts,ends=ends)

starts = 'pain_eeg_norm'
ends = '_F_T0_F_10.0'
fig = box_plot_selected(df=df, starts=starts,ends=ends)


def plot_starts_ends(df, starts, ends, if_save=True):
    fig = box_plot_selected(df=df, starts=starts,ends=ends)
Пример #3
0
def prepare_hypothesis_on_erp2(file_name):
    """
        Prepare data frame, feature columns and target columns for future test
        This method also create a faked feature and target sets for testing whether the model worked
        current hypothesis is for testing resting states on erp components
        :param file_name: file where data saved
        :return: df, columns_dict_x, columns_dict_y
        """
    # data
    # file_name = 'data/data_formal/result_final.txt'
    df = pd.read_csv(filepath_or_buffer=file_name,
                     sep=',',
                     header=0,
                     index_col=0)
    # columns = df.columns.values
    # col_T = columns.reshape(columns.shape[0],1) # this is for easier reading
    indexs = df.index.values
    # fake data
    X, y = make_regression(n_features=50,
                           n_samples=df.shape[0],
                           n_informative=25,
                           bias=0.1,
                           noise=0.2)
    X_col = list()
    for i in range(0, X.shape[1]):
        col_name = 'test_X_' + str(i)
        X_col.append(col_name)
    Y_col = ['test_Y']
    df_testx = pd.DataFrame(X, columns=X_col, index=indexs)
    df_testy = pd.DataFrame(y.reshape(y.shape[0], 1),
                            columns=Y_col,
                            index=indexs)
    df = df.join(df_testx)
    df = df.join(df_testy)
    columns = df.columns.values
    # columns
    columns_dict_y = dict()

    # amplitude diff
    columns_picked_diff = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_norm')
    for key in columns_picked_diff:
        # compute mean of gender
        if is_column_contains(key, '_Pain_F_'):
            new_col = key.replace('_Pain_F_', '_diff_avg_')
            df[new_col] = df[key] / 2 + df[key.replace('_F_', '_M_')] / 2 - (
                df[key.replace('_Pain_F_', '_Neutral_F_')] / 2 +
                df[key.replace('_Pain_F_', '_Neutral_M_')] / 2)
            columns_dict_y[new_col] = [new_col]

    # amplitude diff orig
    columns_picked_diff = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_orig')
    for key in columns_picked_diff:
        # compute mean of gender
        if is_column_contains(key, '_Pain_F_'):
            new_col = key.replace('_Pain_F_', '_diff_avg_')
            df[new_col] = df[key] / 2 + df[key.replace('_F_', '_M_')] / 2 - (
                df[key.replace('_Pain_F_', '_Neutral_F_')] / 2 +
                df[key.replace('_Pain_F_', '_Neutral_M_')] / 2)
            columns_dict_y[new_col] = [new_col]

    # test target (for testing whether the method is useful)
    columns_dict_y['test_Y'] = pick_columns(columns=columns,
                                            word_starts_with='test_Y')
    # features
    columns_dict_x = dict()
    # rest total
    columns_dict_x['rest_norm'] = pick_columns(columns=columns,
                                               word_starts_with='rest_norm_')
    columns_dict_x['rest_orig'] = pick_columns(columns=columns,
                                               word_starts_with='rest_orig_')
    # below alpha
    columns_dict_x['rest_norm_alpha'] = pick_columns(
        columns=columns, word_starts_with='rest_norm_', word_ends_with='10.0')
    columns_dict_x['rest_orig_alpha'] = pick_columns(
        columns=columns, word_starts_with='rest_orig_', word_ends_with='10.0')
    # rest alpha
    columns_dict_x['rest_norm_alpha'] = pick_columns(
        columns=columns, word_starts_with='rest_norm_', word_ends_with='10.0')
    columns_dict_x['rest_orig_alpha'] = pick_columns(
        columns=columns, word_starts_with='rest_orig_', word_ends_with='10.0')
    # rest beta
    columns_dict_x['rest_norm_beta'] = pick_columns(
        columns=columns, word_starts_with='rest_norm_', word_ends_with='17.0')
    columns_dict_x['rest_orig_beta'] = pick_columns(
        columns=columns, word_starts_with='rest_orig_', word_ends_with='17.0')

    # testing features (for testing whether the method is useful)
    columns_dict_x['test_X'] = pick_columns(columns=columns,
                                            word_starts_with='test_X_')

    return df, columns_dict_x, columns_dict_y
Пример #4
0
def prepare_hypothesis(file_name):
    """
    Prepare data frame, feature columns and target columns for future test
    This method also create a faked feature and target sets for testing whether the model worked
    :param file_name: file where data saved
    :return: df, columns_dict_x, columns_dict_y
    """
    # data
    # file_name = 'data/data_formal/orig/txt_file/merged_data/result_final.txt'
    df = pd.read_csv(filepath_or_buffer=file_name,
                     sep=',',
                     header=0,
                     index_col=0)
    # columns = df.columns.values
    # col_T = columns.reshape(columns.shape[0],1) # this is for easier reading
    indexs = df.index.values
    # fake data
    X, y = make_regression(n_features=50,
                           n_samples=df.shape[0],
                           n_informative=25,
                           bias=0.1,
                           noise=0.2)
    X_col = list()
    for i in range(0, X.shape[1]):
        col_name = 'test_X_' + str(i)
        X_col.append(col_name)
    Y_col = ['test_Y']
    df_testx = pd.DataFrame(X, columns=X_col, index=indexs)
    df_testy = pd.DataFrame(y.reshape(y.shape[0], 1),
                            columns=Y_col,
                            index=indexs)
    df = df.join(df_testx)
    df = df.join(df_testy)
    columns = df.columns.values
    # columns
    columns_dict_y = dict()
    """
    The following is all hypothesises,  columns_dict_y contains targets, and columns_dict_x contains features combination
    """
    # target
    columns_dict_y['age'] = pick_columns(columns=columns,
                                         word_starts_with='age_response')
    columns_dict_y['race_identity'] = pick_columns(
        columns=columns, word_starts_with='RESULT_r1')
    columns_dict_y['face_recognition'] = pick_columns(
        columns=columns, word_starts_with='RESULT_r2')
    columns_dict_y['day_dream'] = pick_columns(columns=columns,
                                               word_starts_with='RESULT_r3')
    columns_dict_y['IRI_FS'] = pick_columns(columns=columns,
                                            word_starts_with='RESULT_r4_FS')
    columns_dict_y['IRI_EC'] = pick_columns(columns=columns,
                                            word_starts_with='RESULT_r4_EC')
    columns_dict_y['IRI_PT'] = pick_columns(columns=columns,
                                            word_starts_with='RESULT_r4_PT')
    columns_dict_y['IRI_PD'] = pick_columns(columns=columns,
                                            word_starts_with='RESULT_r4_PD')
    columns_dict_y['Interdependent'] = pick_columns(
        columns=columns, word_starts_with='RESULT_r5_Interdependent')
    columns_dict_y['Independent'] = pick_columns(
        columns=columns, word_starts_with='RESULT_r5_Independent')
    columns_dict_y['altruism'] = pick_columns(columns=columns,
                                              word_starts_with='RESULT_r6_')
    columns_dict_y['anxiety'] = pick_columns(columns=columns,
                                             word_starts_with='RESULT_r7_')
    columns_dict_y['depression'] = pick_columns(columns=columns,
                                                word_starts_with='RESULT_r8_')
    # test target (for testing whether the method is useful)
    columns_dict_y['test_Y'] = pick_columns(columns=columns,
                                            word_starts_with='test_Y')

    # features
    columns_dict_x = dict()
    # pain
    # pain eeg
    columns_dict_x['pain_eeg_norm'] = pick_columns(
        columns=columns, word_starts_with='pain_eeg_norm_')
    columns_dict_x['pain_eeg_norm_Pain_'] = pick_columns(
        columns=columns, word_starts_with='pain_eeg_norm_Pain_')
    columns_dict_x['pain_eeg_norm_Neutral_'] = pick_columns(
        columns=columns, word_starts_with='pain_eeg_norm_Neutral_')
    columns_dict_x['pain_eeg_orig'] = pick_columns(
        columns=columns, word_starts_with='pain_eeg_orig_')
    columns_dict_x['pain_eeg_orig_Pain_'] = pick_columns(
        columns=columns, word_starts_with='pain_eeg_orig_Pain_')
    columns_dict_x['pain_eeg_orig_Neutral_'] = pick_columns(
        columns=columns, word_starts_with='pain_eeg_orig_Neutral_')
    # pain erp
    # amplitude
    columns_dict_x['pain_erp_amplitude_norm'] = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_norm_')
    columns_dict_x['pain_erp_amplitude_norm_Pain_'] = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_norm_Pain_')
    columns_dict_x['pain_erp_amplitude_norm_Neutral_'] = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_norm_Neutral_')
    columns_dict_x['pain_erp_amplitude_orig'] = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_orig_')
    columns_dict_x['pain_erp_amplitude_orig_Pain_'] = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_orig_Pain_')
    columns_dict_x['pain_erp_amplitude_orig_Neutral_'] = pick_columns(
        columns=columns, word_starts_with='pain_erp_amplitude_orig_Neutral_')
    # peak
    columns_dict_x['pain_erp_peak'] = pick_columns(
        columns=columns, word_starts_with='pain_erp_peak_')
    # rest
    columns_dict_x['rest_norm'] = pick_columns(columns=columns,
                                               word_starts_with='rest_norm_')
    columns_dict_x['rest_orig'] = pick_columns(columns=columns,
                                               word_starts_with='rest_orig_')
    # testing features (for testing whether the method is useful)
    columns_dict_x['test_X'] = pick_columns(columns=columns,
                                            word_starts_with='test_X_')

    return df, columns_dict_x, columns_dict_y