def get_rmcorr_for_sensitivities_by_participant(
    sensitivities_by_participant: List[List[InterstimSensitivities]], ):
    df = _get_dataframe_for_sensitivities_by_participant(
        sensitivities_by_participant)
    model = ols("dprime ~ C(subject) + distance", data=df).fit()
    rm_corr = pg.rm_corr(df, x="distance", y="dprime", subject="subject")
    return (
        model.params["distance"],
        rm_corr.at["rm_corr", "r"],
        rm_corr.at["rm_corr", "CI95%"],
        rm_corr.at["rm_corr", "pval"],
    )
Exemplo n.º 2
0
            shapiro_samples.append([x for x in data_points.T])
            shapiro_names.append(['{} {} {} {}'.format(metric_type, fb_type, k, th) for k in range(1, 16)])

            fb_data_points.append(data_points[:, :])
            ax = (axes[fb_typs_axes[fb_type]]
                  if metric_type=='magnitude' else axes2[metric_types_ax[metric_type], fb_typs_axes[fb_type]])
            if th == threshold or metric_type=='magnitude':
                ax.errorbar(np.arange(len(unique_blocks))+1, data_points.mean(0),
                            2*data_points.std(0) / np.sqrt(data_points.shape[0]), color=fb_typs_colors[fb_type],
                            linewidth=0.75, elinewidth=0.75, linestyle='', marker='o', markersize=2, label=fb_type, zorder=100)
                ax.plot(np.arange(len(unique_blocks))+1, data_points.T, color=fb_typs_colors[fb_type],
                        linewidth=0.5, linestyle='-', markersize=2, label=fb_type, alpha=0.25)

                df = metrics_df.query('metric_type=="{}" & fb_type=="{}"'.format(metric_type, fb_type))
                df['k'] = df['k'].astype(int)
                s = r'$\rho_{CI95}$=' + rm_corr(df, 'k', 'metric', 'subj_id')['CI95%'].values[0]

                lim = metric_types_lims[metric_type]
                ax.text(1, lim[0] + (lim[1]-lim[0])*0.9, s, size=7)
                if fb_typs_axes[fb_type] == 0: ax.set_ylabel(metric_type)


                ax.set_ylim(*lim)
                ax.set_yticks([lim[0], 1., lim[1]])


        stats = []
        stats_extra = []
        z_scores = []
        ds = []
        ps = []
def time_series_with_biometric_bar_plot(biometric_source_data_1,
                                        biometric_source_data_2,
                                        sample_source_data_1,
                                        sample_source_data_2, view_1, view_2,
                                        selection_dict):

    # parse selections
    biometric = selection_dict['biometric']
    metabolite = selection_dict['metabolite']
    user = selection_dict['user']
    scale = selection_dict['scale']
    start_time = pd.to_datetime('8/22/2018')
    end_time = pd.to_datetime('9/1/2018')

    # set up data and relevant stats
    if user == "Both":

        # run rm_corr
        title = 'Daily total/average: {}'.format(biometric)
        # .tolist() causing refresh error
        x = list(biometric_source_data_1.data[biometric]) + list(
            biometric_source_data_2.data[biometric])
        y = list(np.log2(biometric_source_data_1.data[metabolite])) \
            + list(np.log2(biometric_source_data_2.data[metabolite]))
        subject = ["Subject1"] * len(biometric_source_data_1.data[metabolite]) \
            + ["Subject2"] * len(biometric_source_data_2.data[metabolite])
        df = pd.DataFrame({
            'x': x,
            'y': y,
            'subject': subject,
        })
        r, p, dof = pg.rm_corr(data=df, x='x', y='y', subject='subject')
        title = "Daily total/average: {} vs. log2 (Avg. Int.) {}; RM Corr : r = {}, p = {}".format(
            biometric, metabolite, round(r, 3), round(p, 3))

        #  get biometric max
        biometric_max = max(x)
        # get metabolite intensity min
        metabolite_intensities = list(sample_source_data_1.data[metabolite]) \
            + list(sample_source_data_2.data[metabolite])
        intensity_min, intensity_max = min(metabolite_intensities), \
            max(metabolite_intensities)

    elif user == "Subject1":
        # calculate Spearman's Rho for Subject1
        x = biometric_source_data_1.data[biometric]
        y = np.log2(biometric_source_data_1.data[metabolite])
        corr_df = pg.corr(x, y, method='skipped')
        coef = corr_df.iloc[0]['r']
        p = corr_df.iloc[0]['p-val']
        #print(corr_df)
        title = "Daily average {} vs. log2(Avg. Int.) {}; Spearman's Rho: {}, p = {}".format(
            biometric, metabolite, round(coef, 3), round(p, 5))

        #  get biometric max
        biometric_max = max(x)
        # get metabolite intensity min
        metabolite_intensities = list(sample_source_data_1.data[metabolite])
        intensity_min, intensity_max = min(metabolite_intensities), \
            max(metabolite_intensities)

    elif user == "Subject2":
        # calculate Spearman's Rho
        x = biometric_source_data_2.data[biometric]
        y = np.log2(biometric_source_data_2.data[metabolite])
        corr_df = pg.corr(x, y, method='skipped')
        coef = corr_df.iloc[0]['r']
        p = corr_df.iloc[0]['p-val']
        #print(corr_df)
        title = "Daily average {} vs. log2(Avg. Int.) {}; Spearman's Rho: {}, p = {}".format(
            biometric, metabolite, round(coef, 3), round(p, 5))

        #  get biometric max
        biometric_max = max(x)
        # get metabolite intensity range
        metabolite_intensities = sample_source_data_2.data[metabolite]
        intensity_min, intensity_max = min(metabolite_intensities), \
            max(metabolite_intensities)

    # Set up figure and formatting
    p = figure(
        title=title,
        tools=tools,
        x_axis_type="datetime",
        plot_width=800,
        plot_height=400,
        x_range=[start_time, end_time],
        y_range=[intensity_min, intensity_max],
    )
    #tooltips = [("sample", "@SampleID")])

    # Setting the second y axis range name and range
    biometric_max_start = biometric_max * 0.10
    biometric_range_end = biometric_max * 1.10
    p.extra_y_ranges = {
        "biometric_axis":
        Range1d(start=biometric_max_start, end=biometric_range_end)
    }

    # Adding the second axis to the plot.
    p.add_layout(LinearAxis(y_range_name="biometric_axis"), 'right')

    p.xaxis.ticker = DaysTicker(days=np.arange(1, 59))
    p.xaxis.formatter = DatetimeTickFormatter(
        hours=["%d %B %Y"],
        days=["%d %B %Y"],
        months=["%d %B %Y"],
        years=["%d %B %Y"],
    )

    p.output_backend = "svg"
    p.xaxis.axis_label = None
    p.toolbar.logo = None
    p.xaxis.major_label_orientation = pi / 4
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.outline_line_color = None

    p.yaxis.axis_label = metabolite + "  {}".format(scale)
    p.yaxis[1].axis_label = biometric

    ### Now for actual data ###

    # Have to make width huge, since Datetime has millisecond resolution:
    # https://stackoverflow.com/questions/45711567/categorical-y-axis-and-datetime-x-axis-with-bokeh-vbar-plot
    # 1 hr * 4
    millisecond_width = 3600000 * 24
    if user == "Both" or user == "Subject1":

        # time series data
        legend_title = "Subject 1 [{}]".format(metabolite)
        p.line('Datetime',
               metabolite,
               source=sample_source_data_1,
               color='red')
        p.circle('Datetime',
                 metabolite,
                 source=sample_source_data_1,
                 color="red",
                 size=5,
                 alpha=0.5,
                 view=view_1,
                 hover_color="black")

        # biometric data
        legend_title = "Subject 1 Daily Total {}".format(biometric)
        p.step('Datetime',
               y=biometric,
               color="red",
               mode="center",
               line_dash="dashed",
               source=biometric_source_data_1,
               legend=legend_title,
               y_range_name="biometric_axis")

        p.vbar('Datetime',
               top=biometric,
               fill_color="red",
               width=millisecond_width,
               line_color=None,
               alpha=0.3,
               source=biometric_source_data_1,
               y_range_name="biometric_axis")

    # overwrite
    if user == "Both" or user == "Subject2":

        # time series data
        legend_title = "Subject 2 [{}]".format(metabolite)
        p.line('Datetime',
               metabolite,
               source=sample_source_data_2,
               color='blue')
        p.circle('Datetime',
                 metabolite,
                 source=sample_source_data_2,
                 color="blue",
                 size=5,
                 alpha=0.5,
                 view=view_2,
                 hover_color="black")

        # biometric data
        legend_title = "Subject 2 Daily Total {}".format(biometric)
        p.step('Datetime',
               y=biometric,
               color="blue",
               mode="center",
               line_dash="dashed",
               source=biometric_source_data_2,
               legend=legend_title,
               y_range_name="biometric_axis")

        p.vbar(x='Datetime',
               top=biometric,
               fill_color="blue",
               width=millisecond_width,
               line_color=None,
               alpha=0.3,
               source=biometric_source_data_2,
               y_range_name="biometric_axis")

    # Light cycle formatting, this needs to come second for tool tips to render
    vline_list = []
    for datetime in pd.date_range(start='8/22/2018', end='9/1/2018'):
        vline = Span(
            location=datetime,
            dimension='height',
            line_color='grey',
            #this should creat a ~6 hr window around midnight, to simulate
            # the dark cycle during this time period
            line_width=24,
            line_dash='solid',
            line_alpha=0.3)
        vline_list.append(vline)
    p.renderers.extend(vline_list)

    return p
Exemplo n.º 4
0
def print_rm_corr(df_lab, df_wild):
    """
    print the rm_corr.
    """
    print("\n\nGETTING RM_CORR:")

    print('\nIN-LAB:')

    print('ActiGraph VM3 vs Ainsworth:')
    g = pg.plot_rm_corr(data=df_lab,
                        x='MET (VM3)',
                        y='MET (Ainsworth)',
                        subject='Participant')
    output = pg.rm_corr(data=df_lab,
                        x='MET (VM3)',
                        y='MET (Ainsworth)',
                        subject='Participant')
    print(output)

    print('\nWRIST vs Ainsworth:')
    g = pg.plot_rm_corr(data=df_lab,
                        x='estimation',
                        y='MET (Ainsworth)',
                        subject='Participant')
    output = pg.rm_corr(data=df_lab,
                        x='estimation',
                        y='MET (Ainsworth)',
                        subject='Participant')
    print(output)

    print('\nGoogle Fit vs Ainsworth:')
    g = pg.plot_rm_corr(data=df_lab,
                        x='MET (Google Fit)',
                        y='MET (Ainsworth)',
                        subject='Participant')
    output = pg.rm_corr(data=df_lab,
                        x='MET (Google Fit)',
                        y='MET (Ainsworth)',
                        subject='Participant')
    print(output)

    print('\nWRIST vs ActiGraph VM3:')
    g = pg.plot_rm_corr(data=df_lab,
                        x='estimation',
                        y='MET (VM3)',
                        subject='Participant')
    output = pg.rm_corr(data=df_lab,
                        x='estimation',
                        y='MET (VM3)',
                        subject='Participant')
    print(output)

    print('\nIN-WILD:')

    print('WRIST vs ActiGraph VM3:')
    g = pg.plot_rm_corr(data=df_wild,
                        x='estimation',
                        y='MET (VM3)',
                        subject='Participant')
    output = pg.rm_corr(data=df_wild,
                        x='estimation',
                        y='MET (VM3)',
                        subject='Participant')
    print(output)
    # load data  into pandas dataframes
    data_samples, data_class, data_images = load_data(
        './plots/experiment_results.pickl')
    # with open('frames.pickl', 'wb') as f:
    #     pickle.dump((data_samples, data_class, data_images),f)
    #with open('frames.pickl', 'rb') as f:
    #    data_samples, data_class, data_images = pickle.load(f)

    print('\ncorrelation model uncertainty with error')
    for dataset in data_class['dataset'].unique():
        data_set = data_class[data_class['dataset'] == dataset]
        for model in data_set['model'].unique():
            model_data = data_set[data_set['model'] == model]
            corr_res = pg.rm_corr(model_data,
                                  x='correct',
                                  y='model_uncertainty',
                                  subject='image')

            print(dataset + "/" + model, float(corr_res['r']),
                  float(corr_res['pval']))

    print('\nged, p-values, each pair ged(model1) < ged(model2)')
    for dataset in data_images['dataset'].unique():
        data_set = data_images[data_images['dataset'] == dataset]
        for modeli in data_set['model'].unique():
            model_datai = data_set[data_set['model'] == modeli]
            for modelj in data_set['model'].unique():
                if modeli == modelj: continue
                model_dataj = data_set[data_set['model'] == modelj]
                ged_res = pg.ttest(model_datai['ged'],
                                   model_dataj['ged'],