def ANOVA_Tukey(variable):
    new = diagnoses["Overall"][["diagnosis", variable]]
    new = new.astype({variable: 'float64'})
    df_pivot = new.pivot(columns="diagnosis", values=variable)
    data = [df_pivot[diagnosis].dropna().values for diagnosis in df_pivot]
    f_val, p_val = stats.f_oneway(*data)
    anova_results = [f_val, p_val]
    tukey = pairwise_tukey(data=new, dv=variable, between="diagnosis")
    return anova_results, tukey
Exemplo n.º 2
0
def tukey_pairwise_ph(tidy_df,
                      hour_col: str = "Hour",
                      dep_var: str = "Value",
                      protocol_col: str = "Protocol"):
    """

    :type protocol_col: object
    """
    hours = tidy_df[hour_col].unique()
    ph_dict = {}
    for hour in hours:
        print(hour)
        hour_df = tidy_df.query("%s == '%s'" % (hour_col, hour))
        ph = pg.pairwise_tukey(dv=dep_var, between=protocol_col, data=hour_df)
        pg.print_table(ph)
        ph_dict[hour] = ph
    ph_df = pd.concat(ph_dict)

    return ph_df
Exemplo n.º 3
0
marker_ph_dict = {}
for marker_label, marker_df in zip(marker_dict.keys(), marker_dict.values()):
    
    print(marker_label)
    # run anova
    curr_anova_marker = pg.anova(
        dv=dep_var,
        between=condition_col,
        data=marker_df
    )
    pg.print_table(curr_anova_marker)
    
    curr_ph_marker = pg.pairwise_tukey(
        dv=dep_var,
        between=condition_col,
        data=marker_df
    )
    pg.print_table(curr_ph_marker)
    marker_ph_dict[marker_label] = curr_ph_marker
    
    # save the files
    label_test_dir = marker_test_dir / marker_label
    if not os.path.exists(label_test_dir):
        os.mkdir(label_test_dir)
    curr_anova_marker.to_csv(label_test_dir / anova_str)
    curr_ph_marker.to_csv(label_test_dir / ph_str)

marker_ph_df = pd.concat(marker_ph_dict)
marker_ph_df.set_index(["A", "B"], append=True, inplace=True)
Exemplo n.º 4
0
                                   between='Subject Group')
tdist_ph
dist_Welch.to_csv(
    '/Users/labc02/Documents/PDCB_data/Behavior/EPM/Stats/total_dist_Welch_s2.csv'
)
tdist_ph.to_csv(
    '/Users/labc02/Documents/PDCB_data/Behavior/EPM/Stats/total_dist_ph_s2.csv'
)
opa_anova = pg.anova(data=epm_s2,
                     dv='Time in Zone (%) - Open Arms',
                     between='Subject Group')
opa_anova
opa_anova.to_csv(
    '/Users/labc02/Documents/PDCB_data/Behavior/EPM/Stats/opa_anova_s2.csv')
opa_ph = pg.pairwise_tukey(data=epm_s2,
                           dv='Time in Zone (%) - Open Arms',
                           between='Subject Group')
opa_ph.to_csv(
    '/Users/labc02/Documents/PDCB_data/Behavior/EPM/Stats/opa_ph_s2.csv')
#%%
epms2_fig, epms2_ax = plt.subplots(nrows=1, ncols=3, figsize=(7, 4))
sns.boxplot(x='Subject Group',
            y='Entries in Zone - Center',
            data=epm_s2,
            palette=['forestgreen', 'forestgreen', 'royalblue', 'royalblue'],
            showmeans=True,
            meanprops={
                'marker': '+',
                'markeredgecolor': 'k'
            },
            ax=epms2_ax[0])
    def mixed_anova(self,
                    stat_key,
                    verbose=True,
                    group_tukey=True,
                    day_tukey=True):
        '''

        :param stat_key:
        :return:
        '''

        ko_sum_stat, ctrl_sum_stat = self.summary_stat_matrices(stat_key)

        df = {'ko_ctrl': [], 'day': [], stat_key: [], 'mouse': []}

        for m, mouse in enumerate(self.ko_mice):
            for day in self.days:
                df['ko_ctrl'].append(0)
                df['day'].append(day)
                df[stat_key].append(ko_sum_stat[m, day])
                df['mouse'].append(mouse)

        for m, mouse in enumerate(self.ctrl_mice):
            for day in self.days:
                df['ko_ctrl'].append(1)
                df['day'].append(day)
                df[stat_key].append(ctrl_sum_stat[m, day])
                df['mouse'].append(mouse)

        df = pd.DataFrame(df)
        results = {}
        aov = mixed_anova(data=df,
                          dv=stat_key,
                          between='ko_ctrl',
                          within='day',
                          subject='mouse')
        results['anova'] = aov
        if verbose:
            print('Mixed design ANOVA results')
            print(aov)

        if group_tukey:
            ko_ctrl_tukey = pairwise_tukey(data=df,
                                           dv=stat_key,
                                           between='ko_ctrl')
            results['ko_ctrl_tukey'] = ko_ctrl_tukey
            if verbose:
                print('PostHoc Tukey: KO vs Ctrl')
                print(ko_ctrl_tukey)

        if day_tukey:
            day_stats = []
            print('PostHov Tukey on each day')
            for day in self.days:
                print('Day %d' % day)
                stats = pairwise_tukey(data=df[df['day'] == day],
                                       dv=stat_key,
                                       between='ko_ctrl')
                day_stats.append(stats)
                if verbose:
                    print(stats)
            results['day_tukey'] = day_stats

        return results
Exemplo n.º 6
0
#%%
### STATISTICAL TESTS ###

# For subplots D and D we perform a one-way ANOVA to test the null hypothesis that
# two or more groups have the same population mean. Here all the samples are independent
# as coming from different FCGR3A haplotype and tested only in one condition

# !pip install openpyxl
ANOVA_top = anova(
    data=data_adcc,
    dv='top',  # dependent variable
    between='FCGR3A')  # between-subject identifier
ANOVA_top.to_excel('../stats/ANOVA_top_figure3.xlsx')

ANOVA_top_posthoc = pairwise_tukey(data=data_adcc, dv='top', between='FCGR3A')
ANOVA_top_posthoc.to_excel('../stats/ANOVA_top_posthoc_figure3.xlsx')

ANOVA_ec50 = anova(
    data=data_adcc,
    dv='EC50',  # dependent variable
    between='FCGR3A')  # between-subject identifier
ANOVA_ec50.to_excel('../stats/ANOVA_ec50_figure3.xlsx')

ANOVA_ec50_posthoc = pairwise_tukey(data=data_adcc,
                                    dv='EC50',
                                    between='FCGR3A')
ANOVA_ec50_posthoc.to_excel('../stats/ANOVA_ec50_posthoc_figure3.xlsx')

#%%
### FUNCTION FOR PLOTING THE STARS ###
# The output from this command provides us with two things.  First, it shows us the result of a t-test for each of the dummy variables, which basically tell us whether each of the conditions separately differs from placebo; it appears that Drug 1 does whereas Drug 2 does not.  However, keep in mind that if we wanted to interpret these tests, we would need to correct the p-values to account for the fact that we have done multiple hypothesis tests; we will see an example of how to do this in the next chapter.
#
# Remember that the hypothesis that we started out wanting to test was whether there was any difference between any of the conditions; we refer to this as an *omnibus* hypothesis test, and it is the test that is provided by the F statistic. The F statistic basically tells us whether our model is better than a simple model that just includes an intercept.  In this case we see that the F test is highly significant, consistent with our impression that there did seem to be differences between the groups (which in fact we know there were, because we created the data).

# %%
ols_model = ols(formula='BPsys~ group', data=df)
ols_result = ols_model.fit()
aov_table = sm.stats.anova_lm(ols_result)
aov_table 

# %%
import pingouin as pg
pg.anova(data=df, dv='BPsys',between='group', effsize="np2")

# %%
pg.pairwise_tukey(data=df, dv='BPsys', between='group')

# %% [markdown]
# ## Learning objectives
#
# After reading this chapter, you should be able to:
#
# * Describe the rationale behind the sign test
# * Describe how the t-test can be used to compare a single mean to a hypothesized value
# * Compare the means for two paired or unpaired groups using a two-sample t-test
#
#
# ## Appendix
#
# ### The paired t-test as a linear model
#
Exemplo n.º 8
0
    def test_pandas(self):
        """Test pandas method.
        """
        # Test the ANOVA (Pandas)
        aov = df.anova(dv='Scores', between='Group', detailed=True)
        assert aov.equals(
            pg.anova(dv='Scores', between='Group', detailed=True, data=df))
        aov3_ss1 = df_aov3.anova(dv='Cholesterol',
                                 between=['Sex', 'Drug'],
                                 ss_type=1)
        aov3_ss2 = df_aov3.anova(dv='Cholesterol',
                                 between=['Sex', 'Drug'],
                                 ss_type=2)
        aov3_ss2_pg = pg.anova(dv='Cholesterol',
                               between=['Sex', 'Drug'],
                               data=df_aov3,
                               ss_type=2)
        assert not aov3_ss1.equals(aov3_ss2)
        assert aov3_ss2.round(3).equals(aov3_ss2_pg.round(3))

        # Test the Welch ANOVA (Pandas)
        aov = df.welch_anova(dv='Scores', between='Group')
        assert aov.equals(pg.welch_anova(dv='Scores', between='Group',
                                         data=df))

        # Test the ANCOVA
        aov = df_anc.ancova(dv='Scores', covar='Income',
                            between='Method').round(3)
        assert (aov.equals(
            pg.ancova(data=df_anc,
                      dv='Scores',
                      covar='Income',
                      between='Method').round(3)))

        # Test the repeated measures ANOVA (Pandas)
        aov = df.rm_anova(dv='Scores',
                          within='Time',
                          subject='Subject',
                          detailed=True)
        assert (aov.equals(
            pg.rm_anova(dv='Scores',
                        within='Time',
                        subject='Subject',
                        detailed=True,
                        data=df)))

        # FDR-corrected post hocs with Hedges'g effect size
        ttests = df.pairwise_tests(dv='Scores',
                                   within='Time',
                                   subject='Subject',
                                   padjust='fdr_bh',
                                   effsize='hedges')
        assert (ttests.equals(
            pg.pairwise_tests(dv='Scores',
                              within='Time',
                              subject='Subject',
                              padjust='fdr_bh',
                              effsize='hedges',
                              data=df)))

        # Pairwise Tukey
        tukey = df.pairwise_tukey(dv='Scores', between='Group')
        assert tukey.equals(
            pg.pairwise_tukey(data=df, dv='Scores', between='Group'))

        # Test two-way mixed ANOVA
        aov = df.mixed_anova(dv='Scores',
                             between='Group',
                             within='Time',
                             subject='Subject',
                             correction=False)
        assert (aov.equals(
            pg.mixed_anova(dv='Scores',
                           between='Group',
                           within='Time',
                           subject='Subject',
                           correction=False,
                           data=df)))

        # Test parwise correlations
        corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman')
        corrs2 = pg.pairwise_corr(data=data,
                                  columns=['X', 'M', 'Y'],
                                  method='spearman')
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation
        corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman')
        corrs2 = pg.partial_corr(x='X',
                                 y='Y',
                                 covar='M',
                                 method='spearman',
                                 data=data)
        assert corrs['r'].equals(corrs2['r'])

        # Test partial correlation matrix (compare with the ppcor package)
        corrs = data.iloc[:, :5].pcorr().round(3)
        np.testing.assert_array_equal(corrs.iloc[0, :].to_numpy(),
                                      [1, 0.392, 0.06, -0.014, -0.149])
        # Now compare against Pingouin's own partial_corr function
        corrs = data[['X', 'Y', 'M']].pcorr()
        corrs2 = data.partial_corr(x='X', y='Y', covar='M')
        assert np.isclose(corrs.at['X', 'Y'], corrs2.at['pearson', 'r'])

        # Test rcorr (correlation matrix with p-values)
        # We compare against Pingouin pairwise_corr function
        corrs = df_corr.rcorr(padjust='holm', decimals=4)
        corrs2 = df_corr.pairwise_corr(padjust='holm').round(4)
        assert corrs.at['Neuroticism', 'Agreeableness'] == '*'
        assert (corrs.at['Agreeableness',
                         'Neuroticism'] == str(corrs2.at[2, 'r']))
        corrs = df_corr.rcorr(padjust='holm', stars=False, decimals=4)
        assert (corrs.at['Neuroticism',
                         'Agreeableness'] == str(corrs2.at[2,
                                                           'p-corr'].round(4)))
        corrs = df_corr.rcorr(upper='n', decimals=5)
        corrs2 = df_corr.pairwise_corr().round(5)
        assert corrs.at['Extraversion', 'Openness'] == corrs2.at[4, 'n']
        assert corrs.at['Openness', 'Extraversion'] == str(corrs2.at[4, 'r'])
        # Method = spearman does not work with Python 3.5 on Travis?
        # Instead it seems to return the Pearson correlation!
        df_corr.rcorr(method='spearman')
        df_corr.rcorr()

        # Test mediation analysis
        med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500)
        np.testing.assert_array_equal(med.loc[:, 'coef'].round(4).to_numpy(),
                                      [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
Exemplo n.º 9
0
names = ["No_Vibration", "Symmetric_Vibration", "Asymmetric_Vibration", "Randomized_Asymmetric_Vibration"]
df1.columns = names
df2.columns = names

import scipy.stats as stats
# stats f_oneway functions takes the groups as input and returns F and P-value
# fvalue, pvalue = stats.f_oneway(df1["No_Vibration"], df1["Symmetric_Vibration"], df1["Asymmetric_Vibration"], df1["Randomized_Asymmetric_Vibration"])
fvalue, pvalue = stats.f_oneway(df2["No_Vibration"], df2["Symmetric_Vibration"], df2["Asymmetric_Vibration"], df2["Randomized_Asymmetric_Vibration"])
print(fvalue, pvalue)

# get ANOVA table as R like output
import statsmodels.api as sm
from statsmodels.formula.api import ols
# reshape the d dataframe suitable for statsmodels package
# d_melt = pd.melt(df1.reset_index(), id_vars=['index'], value_vars=names)
d_melt = pd.melt(df2.reset_index(), id_vars=['index'], value_vars=names)
# replace column names
d_melt.columns = ['index', 'Vibration_Modes', 'value']
# Ordinary Least Squares (OLS) model
model = ols('value ~ C(Vibration_Modes)', data=d_melt).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)

from pingouin import pairwise_tukey
m_comp = pairwise_tukey(data=d_melt, dv='value', between='Vibration_Modes')
print(m_comp)

w, pvalue = stats.shapiro(model.resid)
print(w, pvalue)
Exemplo n.º 10
0
        print(key)

        # tidy data
        long_df = df.stack().reset_index()
        long_df.columns = stat_colnames
        part_df = long_df.query("%s == '%s'" % (time, part))

        # do anova
        part_rm = pg.rm_anova(dv=dep_var,
                              within=day,
                              subject=anim,
                              data=part_df)
        pg.print_table(part_rm)

        # do posthoc
        ph = pg.pairwise_tukey(dv=dep_var, between=day, data=part_df)
        pg.print_table(ph)
        ph_part_dict[key] = ph

        stage_test_dir = part_dir / key
        anova_file = stage_test_dir / "01_anova.csv"
        ph_file = stage_test_dir / "02_posthoc.csv"

        part_rm.to_csv(anova_file)
        ph.to_csv(ph_file)

    ph_part_df = pd.concat(ph_part_dict)
    ph_total_dict[part] = ph_part_df
ph_total_df = pd.concat(ph_total_dict)
ph_total_df = ph_total_df.reorder_levels([1, 0, 2])
Exemplo n.º 11
0
#data_merged.to_csv(r'C:\Users\user\Desktop\FOCUS\behavioral\P_Merged_var.csv', index = None, header=True)

# ANOVA - does correct reaction time differ between blocks?
aov_corr_rt = anova(dv='corr_rt', between='blocks', data=data_merged)

print(aov_corr_rt)

rep_anov_alarm = pg.rm_anova(data=data_merged,
                             dv='false_alarm',
                             within='blocks',
                             subject='participant',
                             detailed=True)

# follow-up pairwise comparison
pairs_corr_rt = pairwise_tukey(dv='corr_rt',
                               between='blocks',
                               data=data_merged)

print(pairs_corr_rt)

#### ANOVA - does false alarms differ between blocks?
aov_alarms = anova(dv='false_alarm', between='blocks', data=data_merged)

print(aov_alarms)

# follow-up pairwise comparison
pairs_alarms = pairwise_tukey(dv='false_alarm',
                              between='blocks',
                              data=data_merged)

print(pairs_alarms)
Exemplo n.º 12
0
d_melt = pd.melt(data0.reset_index(),
                 id_vars=['index'],
                 value_vars=['No_Up', 'Single_Up', 'Double_Up'])
# replace column names
d_melt.columns = ['index', 'treatments', 'value']
# Ordinary Least Squares (OLS) model
model = ols('value ~ C(treatments)', data=d_melt).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

# pairwise comparision significance with HSD https://reneshbedre.github.io/blog/anova.html
from pingouin import pairwise_tukey
# perform multiple pairwise comparison (Tukey HSD)
# for unbalanced (unequal sample size) data, pairwise_tukey uses Tukey-Kramer test
print("Pairwise comparison with Tukey HSD")
m_comp = pairwise_tukey(data=d_melt, dv='value', between='treatments')
print(m_comp)

plt.title("Regulation: no change", fontsize=16)
plt.tight_layout()
pdf.savefig(fig)
##### For the runs with changes ###################

sigma_1 = 0.01875
sigma_1_list = sigma_1 * np.array([1 / 2, 2, 2**2, 2**3, 2**4])

mes = ["halved"] + ["times %d" % 2**i for i in (1, 2, 3, 4)]

for i in range(len(sigma_1_list)):
    data1 = pd.read_csv("MI_10000traj_shift30_%d.csv" % i)
Exemplo n.º 13
0
    count_dir = marker_test_dir / "01_count"
    mean_dir = marker_test_dir / "02_mean"
    hist_dir = marker_test_dir / "03_hist"
    for dir in [count_dir, mean_dir, hist_dir]:
        if not os.path.exists(dir):
            os.mkdir(dir)

    curr_count = count_data_dict[curr_label]
    curr_mean = mean_data_dict[curr_label]
    curr_hist = hist_data_dict[curr_label]

    count = count_cols[-1]
    count_anova = pg.anova(dv=count, between=condition_col, data=curr_count)
    pg.print_table(count_anova)
    count_ph = pg.pairwise_tukey(dv=count,
                                 between=condition_col,
                                 data=curr_count)
    pg.print_table(count_ph)
    count_anova.to_csv(count_dir / anova_str)
    count_ph.to_csv(count_dir / ph_str)
    count_stats_dict[curr_label] = count_ph

    mean = mean_cols[-1]
    mean_anova = pg.anova(dv=mean, between=condition_col, data=curr_mean)
    pg.print_table(mean_anova)
    mean_ph = pg.pairwise_tukey(dv=mean, between=condition_col, data=curr_mean)
    pg.print_table(mean_ph)
    mean_anova.to_csv(mean_dir / anova_str)
    mean_ph.to_csv(mean_dir / ph_str)
    mean_stats_dict[curr_label] = mean_ph
Exemplo n.º 14
0
                                   (HWK_errors['Block'] == 'D1_2')].item()
    d2a = HWK_errors['Error mean'][(HWK_errors['Subject'] == ii) &
                                   (HWK_errors['Block'] == 'D2_1')].item()
    d2b = HWK_errors['Error mean'][(HWK_errors['Subject'] == ii) &
                                   (HWK_errors['Block'] == 'D2_2')].item()
    d3a = HWK_errors['Error mean'][(HWK_errors['Subject'] == ii) &
                                   (HWK_errors['Block'] == 'D3_1')].item()

    index_dict['Acquisition'].append(((d1a - d1b) + (d2a - d2b)) / 2)
    index_dict['Retrival'].append(((d1b - d2a) + (d2b - d3a)) / 2)

kesner_index = pd.DataFrame(index_dict)
kesner_index
pg.anova(dv='Acquisition', between=['Genotype', 'Sex'],
         data=kesner_index, export_filename='kesneraov_acq')
acq_tk = pg.pairwise_tukey(dv='Acquisition', between='Group', data=kesner_index)
pg.anova(dv='Retrival', between=['Genotype', 'Sex'],
         data=kesner_index, export_filename='kesneraov_ret')

acq_tk
#  Kesner indexes figure
#%%
kesner_ind = plt.figure(figsize=(9, 5))
plt.subplot(1, 2, 1)
acq_ax = sns.barplot(x='Group', y='Acquisition', data=kesner_index,
                     ci=68, capsize=.3, palette=['g', 'g', 'b', 'b'])
plt.xticks(range(0, 4), ['Fem_KO', 'Male_KO', 'Fem_WT', 'Male_WT'])
acq_ax.annotate('*', xy=(0.5, .93), xytext=(0.5, .91), xycoords='axes fraction', fontsize=18, ha='center',
                va='bottom', fontweight='bold', arrowprops=dict(arrowstyle='-[, widthB=6, lengthB=.1', lw=2, color='black'))
acq_ax.annotate('**', xy=(0.25, .83), xytext=(0.25, .81), xycoords='axes fraction', fontsize=18, ha='center',
                va='bottom', fontweight='bold', arrowprops=dict(arrowstyle='-[, widthB=2, lengthB=.1', lw=2, color='black'))
Exemplo n.º 15
0
hourly_test_dir = save_test_dir / "hour_prop"

# prop 2 way rm
test_rm = pg.rm_anova2(dv=dep_var,
                       within=[day_col, hour_col],
                       subject=anim,
                       data=long_df)
pg.print_table(test_rm)

# prop post hoc
ph_dict = {}
for hour in hours:
    print(hour)
    hour_df = long_df.query("%s == '%s'" % (hour_col, hour))
    ph = pg.pairwise_tukey(dv=dep_var, between=day_col, data=hour_df)
    pg.print_table(ph)
    ph_dict[hour] = ph
hourly_ph_df = pd.concat(ph_dict)

hr_anova_file = hourly_test_dir / anova_csv
hr_ps_file = hourly_test_dir / ph_csv
test_rm.to_csv(hr_anova_file)
hourly_ph_df.to_csv(hr_ps_file)

# can't do repeated measures on swe since missing values
swe_test = delta_mean_masked.reset_index()
swe_test = swe_test.iloc[:, [0, 2, 1, 3]].copy()
swe_test.columns = stat_colnames

swa_test_dir = save_test_dir / "SWA"
Exemplo n.º 16
0
data2 = mean2 + np.random.randn(N2)*stdev
data3 = mean3 + np.random.randn(N3)*stdev

datacolumn = np.hstack((data1,data2,data3))

# group labels
groups = ['1']*N1 + ['2']*N2 + ['3']*N3

# convert to a pandas dataframe
df = pd.DataFrame({'TheData':datacolumn,'Group':groups})
df


# In[ ]:


pg.anova(data=df,dv='TheData',between='Group')


# In[ ]:


pg.pairwise_tukey(data=df,dv='TheData',between='Group')


# In[ ]:


df.boxplot('TheData',by='Group');