def anova_test(self, value_col, group_col, subject_col, condition=False, display_result=True): # collect data data = self.__get_condition(self.df, condition) # perform test summary = rm_anova(data, value_col, group_col, subject_col, correction=True, effsize='n2') if display_result: print("#############") print("### ANOVA ###") print("#############") if not condition is False: print(self.__condition_to_string(condition)) display(summary) print("") return summary
def do_anova(meas, n_subjs, thetas, n_rules=2): n_subjs = n_reps_bundled n_thetas = len(thetas) df2_arr = np.zeros(shape=[n_subjs * n_thetas * n_rules, 4]) index = 0 for subj_i in np.arange(n_subjs): for theta_ind, theta_i in enumerate( np.arange(n_thetas)): #enumerate([0, -1]): for rule_i in np.arange(n_rules): df2_arr[index, :] = [ subj_i, rule_i, theta_ind, meas[theta_i, rule_i, subj_i] ] index += 1 df2 = pd.DataFrame(df2_arr, columns=['obs', 'ruleFact', 'thetaFact', 'perf']) aov = pg.rm_anova(data=df2, dv='perf', subject='obs', within=['ruleFact', 'thetaFact'], detailed=True, effsize='n2') fs = aov['F'].values pvals = aov['p-unc'] etas = aov['n2'] return fs, pvals, etas
def pairwise_ttests_paired(self): df = self.df.melt(id_vars="ID") self.oneway_rm_aov = pg.rm_anova(data=df, dv="value", within="variable", subject='ID') self.ttests_paired = pg.pairwise_ttests(dv="value", subject='ID', within='variable', data=df, padjust="holm", effsize="hedges", parametric=True)
def perform_anova(self, intensity): self.aov = pg.rm_anova(data=self.df_percent, dv=intensity, within="Model", subject="ID", correction=True, detailed=True) print(self.aov) self.posthoc = pg.pairwise_ttests(dv=intensity, subject='ID', within="Model", data=self.df_percent, padjust="bonf", effsize="hedges", parametric=True) print(self.posthoc)
def make_anova_2way(df, title): print("\tMAKING ANOVA") SIGNIFICANCE_CUTOFF = .4 anova_text = title + "\n" # print("ANOVA FOR ") # print(analysis_label) # print(df[analysis_label]) # bx = sns.boxplot(data=df, x='question', y='value', hue='context') # print(df_col) # df_col.columns == ['variable', 'value'] # val_min = df_col['value'].get(df_col['value'].idxmin()) # val_max = df_col['value'].get(df_col['value'].idxmax()) # homogenous_data = (val_min == val_max) homogenous_data = False if not homogenous_data: aov = pg.rm_anova(dv='value', within=['question', 'context'], subject='ResponseId', data=df) aov.round(3) anova_text = anova_text + str(aov) aov.to_csv(FILENAME_ANOVAS + fn + '-anova.csv') p_vals = aov['p-unc'] # if p_chair < SIGNIFICANCE_CUTOFF: # print("Chair position is significant for " + analysis_label + ": " + str(p_chair)) # # print(title) # if p_path_method < SIGNIFICANCE_CUTOFF: # print("Pathing method is significant for " + analysis_label + ": " + str(p_path_method)) # # print(title) # anova_text = anova_text + "\n" # Verify that subjects is legit # print(df[subject_id]) posthocs = pg.pairwise_ttests(dv='value', within=['question', 'context'], subject='ResponseId', data=df, padjust='bonf') # pg.print_table(posthocs) anova_text = anova_text + "\n" + str(posthocs) posthocs.to_csv(FILENAME_ANOVAS + fn + '-posthocs.csv') print() else: print("! Issue creating ANOVA for " + analysis_label) print("Verify that there are at least a few non-identical values recorded") anova_text = anova_text + "Column homogenous with value " + str(val_min) f = open(FILENAME_ANOVAS + fn + "-anova.txt", "w") f.write(anova_text) f.close()
def activity_stats(self, data_type='percent'): if data_type == 'percent': intensity_list = ["Sedentary%", "Light%", "Moderate%", "Vigorous%", "MVPA%"] df = self.df_activity[["ID", "Model", "Sedentary%", "Light%", "Moderate%", "Vigorous%", "MVPA%"]] if data_type == 'minutes': intensity_list = ["Sedentary", "Light", "Moderate", "Vigorous", "MVPA"] df = self.df_activity[["ID", "Model", "Sedentary", "Light", "Moderate", "Vigorous", "MVPA"]] for i, intensity in enumerate(intensity_list): if i == 0: aov_df = pg.rm_anova(data=df, dv=intensity, within="Model", subject="ID", correction=True, detailed=True) aov_df.insert(0, "Intensity", [intensity for i in range(2)]) """post_df = pg.pairwise_ttests(dv=intensity, subject='ID', within="Model", data=df, padjust="none", effsize="hedges", parametric=False) post_df.insert(0, "Intensity", [intensity for i in range(6)])""" if i > 0: aov = pg.rm_anova(data=df, dv=intensity, within="Model", subject="ID", correction=True, detailed=True) aov["Intensity"] = [intensity for i in range(2)] """post = pg.pairwise_ttests(dv=intensity, subject='ID', within="Model", data=df, padjust="none", effsize="hedges", parametric=False) post["Intensity"] = [intensity for i in range(6)]""" aov_df = aov_df.append(aov) # post_df = post_df.append(post) aov_df["Significant"] = ["Yes" if p < .05 else "No" for p in aov_df["p-unc"]] return aov_df
def test_friedman(df, ind_var, dep_var, is_non_normal=None): print(f'\n{dep_var}:') # test_df = pd.DataFrame() if is_non_normal == None: normality_p = test_normality(df, dep_var, list(df['Condition number'].unique())) significants = [p for p in normality_p if p < 0.01] is_non_normal = len(significants) > 0 sphericity_p = test_sphericity(df, dep_var, ind_var) for iv in list(df[ind_var].unique()): df_iv = df.loc[df[ind_var] == iv] dv = list(df_iv[dep_var]) # test_df[f'{dep_var} {iv}'] = dv print(f'{iv}: mean={round(np.mean(dv), 2)}, SD={round(np.std(dv), 2)}') if not is_non_normal and sphericity_p: print('\nRM ANOVA') results = pg.rm_anova(data=df, dv=dep_var, within=ind_var, subject='ID', correction=False, detailed=True) results = results.round(4) print(results) else: print('\nFriedman test') results = pg.friedman(data=df, dv=dep_var, within=ind_var, subject='ID') X2 = list(results['Q'])[0] N = len(list(df['ID'].unique())) k = len(list(df[ind_var].unique())) kendall_w = X2 / (N * (k - 1)) results['Kendall'] = [kendall_w] results = results.round(3) print(results)
def anova_onoff(on, off, subjects, columns): off = pd.DataFrame(data=np.insert(off, 0, np.arange(len(subjects)), axis=1), columns=columns[:-1]) off = pd.melt(off, id_vars=['sub'], value_vars=columns[1:-1], var_name='block', value_name='RT') off.insert(1, 'Triplet', np.zeros(len(off))) on = pd.DataFrame(data=np.insert(on, 0, np.arange(len(subjects)), axis=1), columns=columns) on = pd.melt(on, id_vars=['sub'], value_vars=columns[1:], var_name='block', value_name='RT') on.insert(1, 'Triplet', np.ones(len(on))) anova_onoff = pd.concat([on, off]) aov_stats = pg.rm_anova(data=anova_onoff, dv='RT', within=['block', 'Triplet'], subject='sub') return aov_stats
def test_pandas(self): """Test pandas method. """ # Test the ANOVA (Pandas) aov = df.anova(dv='Scores', between='Group', detailed=True) assert aov.equals( pg.anova(dv='Scores', between='Group', detailed=True, data=df)) aov3_ss1 = df_aov3.anova(dv='Cholesterol', between=['Sex', 'Drug'], ss_type=1) aov3_ss2 = df_aov3.anova(dv='Cholesterol', between=['Sex', 'Drug'], ss_type=2) aov3_ss2_pg = pg.anova(dv='Cholesterol', between=['Sex', 'Drug'], data=df_aov3, ss_type=2) assert not aov3_ss1.equals(aov3_ss2) assert aov3_ss2.round(3).equals(aov3_ss2_pg.round(3)) # Test the Welch ANOVA (Pandas) aov = df.welch_anova(dv='Scores', between='Group') assert aov.equals(pg.welch_anova(dv='Scores', between='Group', data=df)) # Test the ANCOVA aov = df_anc.ancova(dv='Scores', covar='Income', between='Method').round(3) assert (aov.equals( pg.ancova(data=df_anc, dv='Scores', covar='Income', between='Method').round(3))) # Test the repeated measures ANOVA (Pandas) aov = df.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True) assert (aov.equals( pg.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True, data=df))) # FDR-corrected post hocs with Hedges'g effect size ttests = df.pairwise_tests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges') assert (ttests.equals( pg.pairwise_tests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges', data=df))) # Pairwise Tukey tukey = df.pairwise_tukey(dv='Scores', between='Group') assert tukey.equals( pg.pairwise_tukey(data=df, dv='Scores', between='Group')) # Test two-way mixed ANOVA aov = df.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False) assert (aov.equals( pg.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False, data=df))) # Test parwise correlations corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman') corrs2 = pg.pairwise_corr(data=data, columns=['X', 'M', 'Y'], method='spearman') assert corrs['r'].equals(corrs2['r']) # Test partial correlation corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman') corrs2 = pg.partial_corr(x='X', y='Y', covar='M', method='spearman', data=data) assert corrs['r'].equals(corrs2['r']) # Test partial correlation matrix (compare with the ppcor package) corrs = data.iloc[:, :5].pcorr().round(3) np.testing.assert_array_equal(corrs.iloc[0, :].to_numpy(), [1, 0.392, 0.06, -0.014, -0.149]) # Now compare against Pingouin's own partial_corr function corrs = data[['X', 'Y', 'M']].pcorr() corrs2 = data.partial_corr(x='X', y='Y', covar='M') assert np.isclose(corrs.at['X', 'Y'], corrs2.at['pearson', 'r']) # Test rcorr (correlation matrix with p-values) # We compare against Pingouin pairwise_corr function corrs = df_corr.rcorr(padjust='holm', decimals=4) corrs2 = df_corr.pairwise_corr(padjust='holm').round(4) assert corrs.at['Neuroticism', 'Agreeableness'] == '*' assert (corrs.at['Agreeableness', 'Neuroticism'] == str(corrs2.at[2, 'r'])) corrs = df_corr.rcorr(padjust='holm', stars=False, decimals=4) assert (corrs.at['Neuroticism', 'Agreeableness'] == str(corrs2.at[2, 'p-corr'].round(4))) corrs = df_corr.rcorr(upper='n', decimals=5) corrs2 = df_corr.pairwise_corr().round(5) assert corrs.at['Extraversion', 'Openness'] == corrs2.at[4, 'n'] assert corrs.at['Openness', 'Extraversion'] == str(corrs2.at[4, 'r']) # Method = spearman does not work with Python 3.5 on Travis? # Instead it seems to return the Pearson correlation! df_corr.rcorr(method='spearman') df_corr.rcorr() # Test mediation analysis med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500) np.testing.assert_array_equal(med.loc[:, 'coef'].round(4).to_numpy(), [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
else: error = "sd" if y_var2 == "None": st.write(df.groupby(y_var)[x_var].agg(['mean', 'std', 'sem']).round(2)) else: st.write( df.groupby([y_var, y_var2])[x_var].agg(['mean', 'std', 'sem']).round(2)) if y_var2 == "None": st.success("One-way repeated measures ANOVA results") st.write( pg.rm_anova(dv=x_var, within=y_var, subject=subject_var, data=df, detailed=True)) st.success("Post-hoc tests results") st.write( pg.pairwise_ttests(dv=x_var, within=y_var, subject=subject_var, data=df)) st.success("Plots are being generated") fig = plt.figure(figsize=(12, 6)) try: ax = sns.pointplot(data=df, x=y_var, y=x_var,
def stats_effect_weeks(self, excel_path): """ Perform RM ANOVA and pairwise T Test (Holm sidak) on the mean of each week of training for each animal Parameters ---------- excel_path : TYPE DESCRIPTION. Returns ------- None. """ df_excel = pd.read_excel( excel_path) #read excel file output from analysis() # Classify sessions in weeks Week1 = list(self.range1(1, 9)) Week2 = list(self.range1(10, 14)) Week3 = list(self.range1(15, 19)) Week4 = list(self.range1(20, 24)) Week5 = list(self.range1(25, 29)) week = [] for i in range(len(df_excel.index)): week.append(1 if df_excel.iloc[i, 2] in Week1 else 2 if df_excel. iloc[i, 2] in Week2 else 3 if df_excel.iloc[ i, 2] in Week3 else 4 if df_excel.iloc[i, 2] in Week4 else 5 if df_excel.iloc[i, 2] in Week5 else 'Error') #Add a column week df_excel['Semaine'] = week #Group in a new dataframe by animal and session and calculate the mean df_stats = df_excel[['Animal', 'Passing_Time', 'Semaine']].groupby(['Animal', 'Semaine' ]).mean().reset_index() # sn.lineplot(x="Semaine", y="Passing_Time", data=df_stats.query('Semaine > 1'), hue='Animal').get_figure() #Rearrange in a new dataframe with a column for each week mean df_stats_arranged = pd.DataFrame(columns=[ 'Animal', 'Semaine 1', 'Semaine 2', 'Semaine 3', 'Semaine 4', 'Semaine 5' ]) Animal = list(dict.fromkeys(df_excel.Animal.tolist())) #Loop on every animals to append each animal in the new arranged dataframe for a in Animal: for i in range(len(df_stats.index)): if df_stats.iloc[i, 1] == 1 and df_stats.iloc[i, 0] == a: df_stats_arranged = df_stats_arranged.append( { 'Animal': a, 'Semaine 1': df_stats.iloc[i, 2], 'Semaine 2': df_stats.iloc[i + 1, 2], 'Semaine 3': df_stats.iloc[i + 2, 2], 'Semaine 4': df_stats.iloc[i + 3, 2], 'Semaine 5': df_stats.iloc[i + 4, 2] }, ignore_index=True) #create a dataframe with a repeated mesure anova df_result = pd.DataFrame( pg.rm_anova(dv='Passing_Time', within='Semaine', subject='Animal', data=df_stats, detailed=True)) #create a dataframe with pairwise t test Holm sidak df_post_hocs = pd.DataFrame( pairwise_ttests(dv='Passing_Time', within='Semaine', subject='Animal', data=df_stats, padjust='holm')) #Save in an excel file containing different sheets self.writer = pd.ExcelWriter('{}/Stats.xlsx'.format( Path(excel_path).parent), engine='xlsxwriter') df_stats_arranged.to_excel(self.writer, sheet_name='Data') df_result.to_excel(self.writer, sheet_name='ANOVA') df_post_hocs.to_excel(self.writer, sheet_name='Post Hoc') self.writer.save()
def test_pandas(self): """Test pandas method. """ # Test the ANOVA (Pandas) aov = df.anova(dv='Scores', between='Group', detailed=True) assert aov.equals( pg.anova(dv='Scores', between='Group', detailed=True, data=df)) # Test the Welch ANOVA (Pandas) aov = df.welch_anova(dv='Scores', between='Group') assert aov.equals(pg.welch_anova(dv='Scores', between='Group', data=df)) # Test the repeated measures ANOVA (Pandas) aov = df.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True) assert aov.equals( pg.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True, data=df)) # FDR-corrected post hocs with Hedges'g effect size ttests = df.pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges') assert ttests.equals( pg.pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges', data=df)) # Test two-way mixed ANOVA aov = df.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False) assert aov.equals( pg.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False, data=df)) # Test parwise correlations corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman') corrs2 = pg.pairwise_corr(data=data, columns=['X', 'M', 'Y'], method='spearman') assert corrs['r'].equals(corrs2['r']) # Test partial correlation corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman') corrs2 = pg.partial_corr(x='X', y='Y', covar='M', method='spearman', data=data) assert corrs['r'].equals(corrs2['r']) # Test partial correlation matrix (compare with the ppcor package) corrs = data.pcorr().round(3) np.testing.assert_array_equal(corrs.iloc[0, :].values, [1, 0.392, 0.06, -0.014, -0.149]) # Now compare against Pingouin's own partial_corr function corrs = data[['X', 'Y', 'M']].pcorr() corrs2 = data.partial_corr(x='X', y='Y', covar='M') assert round(corrs.loc['X', 'Y'], 3) == corrs2.loc['pearson', 'r'] # Test mediation analysis med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500) np.testing.assert_array_equal(med.loc[:, 'coef'].values, [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
columns=["winsize", "clustering"], values="deviation") pivot_t.to_csv("pt_exp2.csv") # mean values if cal_mean_std: crowdingcon = 0 cal_ds_mean(mydata, crowdingcon=crowdingcon, col_name="crowding") cal_ds_std(mydata, crowdingcon=crowdingcon, col_name="crowding") # 3 ways anova if see_clustering_level: aov_table = AnovaRM(data=mydata2test, depvar="deviation", subject="participantID", within=["crowding", "winsize", "clustering"]).fit() aov_table.summary() else: aov = pg.rm_anova(dv="deviation", within=["winsize", "crowding"], subject="participantID", data=mydata2test) posthocs = pg.pairwise_ttests(dv="deviation", within=["winsize", "crowding"], subject="participantID", data=mydata2test, padjust="fdr_bh", effsize="cohen")
def data(): # have this make a sigmoid with random number of correct trials each time its called. start_time = time.time() iterations = 500 trials = 60 track_lengths = np.array([50, 75, 112.5, 168.75, 253.125]) coef1 = 5 coef2 = -0.05 coef3 = 4 coef4 = -0.02 n_conditions = 2 n_subjects = np.array([2, 4, 5, 10, 20, 30, 40]) coef3s = np.linspace(1, 10, 5) coef4s = np.linspace(-0.01, -0.1, 5) parameters_powers_conditions = np.zeros( (len(n_subjects), len(coef3s), len(coef4s))) parameters_powers_track_length = np.zeros( (len(n_subjects), len(coef3s), len(coef4s))) parameters_powers_interaction = np.zeros( (len(n_subjects), len(coef3s), len(coef4s))) # consider condition 1 first group1_theo = (np.e**(coef1 + (coef2*track_lengths)))/ \ (np.e**(coef1 + (coef2*track_lengths))+1) z1 = coef1 + (coef2 * track_lengths) pr = 1 / (1 + np.e**(-z1)) # now consider condition 2 group2_theo = (np.e**(coef3 + (coef4*track_lengths)))/ \ (np.e**(coef3 + (coef4*track_lengths))+1) z2 = coef3 + (coef4 * track_lengths) pr2 = 1 / (1 + np.e**(-z2)) for counter3, coef3 in enumerate(coef3s): for counter4, coef4 in enumerate(coef4s): z1 = coef1 + (coef2 * track_lengths) pr = 1 / (1 + np.e**(-z1)) z2 = coef3 + (coef4 * track_lengths) pr2 = 1 / (1 + np.e**(-z2)) for n_counter, n in enumerate(n_subjects): condition_p = [] track_length_p = [] interaction_p = [] subject_id_long = np.tile( np.transpose( np.tile(np.linspace(1, n, n), (len(track_lengths), 1))).flatten(), n_conditions) conditions_long = np.append( np.ones(len(track_lengths) * n), np.ones(len(track_lengths) * n) * 2) # currently hardcoded for only 2 conditions track_lengths_long = np.tile(track_lengths, n_conditions * n) for i in range(iterations): y_percentage1 = ( (np.random.binomial(trials, pr, (n, len(track_lengths))) / trials) * 100).flatten() y_percentage2 = ( (np.random.binomial(trials, pr2, (n, len(track_lengths))) / trials) * 100).flatten() appended_correct = np.append( y_percentage1, y_percentage2 ) # currently hardcoded for only 2 conditions df = pd.DataFrame({ "subject": subject_id_long, "Condition": conditions_long, 'Track_length': track_lengths_long, 'percentage_corr_trials': appended_correct }) aov = pg.rm_anova(dv='percentage_corr_trials', within=['Condition', 'Track_length'], subject='subject', data=df, detailed=True) condition_p.append( np.nan_to_num( aov[aov.Source == "Condition"]['p-unc'].values[0])) track_length_p.append( np.nan_to_num(aov[aov.Source == "Track_length"] ['p-unc'].values[0])) interaction_p.append( np.nan_to_num( aov[aov.Source == "Condition * Track_length"] ['p-unc'].values[0])) condition_p = np.array(condition_p) track_length_p = np.array(track_length_p) interaction_p = np.array(interaction_p) power_condition = len( condition_p[condition_p < 0.05]) / iterations power_track_length = len( track_length_p[track_length_p < 0.05]) / iterations power_interaction = len( interaction_p[interaction_p < 0.05]) / iterations parameters_powers_conditions[n_counter, counter3, counter4] = power_condition parameters_powers_track_length[n_counter, counter3, counter4] = power_track_length parameters_powers_interaction[n_counter, counter3, counter4] = power_interaction print("it took ", time.time() - start_time, "for 1 simulated loop to run") print("currently on ", str(n), "n subjects, ", str(coef3), "coef3 and ", str(coef4), "coef4") start_time = time.time() #np.save('/mnt/datastore/Harry/OculusVR/Power_analysis/Harry_figs/conditions_assay.npy', parameters_powers_conditions) #np.save('/mnt/datastore/Harry/OculusVR/Power_analysis/Harry_figs/track_length.npy', parameters_powers_track_length) #np.save('/mnt/datastore/Harry/OculusVR/Power_analysis/Harry_figs/interaction.npy', parameters_powers_interaction) np.save( r'Z:\ActiveProjects\Harry\OculusVR\Power_analysis\Harry_figs\conditions_assay.npy', parameters_powers_conditions) np.save( r'Z:\ActiveProjects\Harry\OculusVR\Power_analysis\Harry_figs\track_length.npy', parameters_powers_track_length) np.save( r'Z:\ActiveProjects\Harry\OculusVR\Power_analysis\Harry_figs\interaction.npy', parameters_powers_interaction) '''
import pandas as pd from statsmodels.stats.anova import AnovaRM import pingouin as pg if __name__ == '__main__': PATH = "../../data/ms2_uniform_prolific_1_data/" DATA = "prolifc_data_combine_num_each_pp.xlsx" DATA2 = "prolifc_data_each_pp.xlsx" winsize = 0.6 # ANOVA within subject clustering (5) * type (2) for each winsize data = pd.read_excel(PATH + DATA) data = data[data["winsize"] == winsize] aov = pg.rm_anova(data=data, dv="mean_deviation_score", within=["percent_triplets", "protectzonetype"], subject="participant") posthocs = pg.pairwise_ttests( dv="mean_deviation_score", within=["percent_triplets", "protectzonetype"], subject="participant", data=data, padjust="fdr_bh", effsize="cohen") # ANOVA within subject data2 = pd.read_excel(PATH + DATA2) data2 = data2[data2["winsize"] == 0.4] # winsize 0.4 unblanced data aov_table = AnovaRM( data=data2,
df_PT = pd.DataFrame(data=d2) df_PT = df_PT[df_PT.AbsError.notnull()] # no nan assert (df_PT.PT.isnull().sum() == 0) df_mean_nn = df_mean[df_mean.AbsError.notnull()] # dropping null values #################################### # # Running Statistical Tests ###################################### # t-test for comparison to Sven's analysis ttestSNR = pingouin.ttest(loSNR, hiSNR, paired=True) # correction='auto' # rm_anova for SNR on Error rm_SNR = pingouin.rm_anova(data=df_mean_nn, dv='AbsError', within=['SNR'], subject='Sub') print(rm_SNR) # # MLM for SNR on Error # mlm_SNR = smf.mixedlm("AbsError ~ SNR", df_mean_nn, groups=df_mean_nn["Sub"]) # mdf_SNR = mlm_SNR.fit() # print(mdf_SNR.summary()) # # A = np.identity(len(mdf_SNR.params)) # A = A[1:,:] # print(mdf_SNR.f_test(A)) # MLM for PT on Error mlm_PT = smf.mixedlm("AbsError ~ PT", df_PT, groups=df_PT["Sub"]) mdf_PT = mlm_PT.fit()
import seaborn as sns import statsmodels from scipy.stats import spearmanr from pingouin import mixed_anova, anova, pairwise_tukey from pingouin import logistic_regression import pprint from statsmodels.multivariate.manova import MANOVA from pingouin import ancova #import data in long and wide format for different anlysis data_long = pd.read_csv( r'C:\Users\user\Desktop\FOCUS\behavioral\ready_to_stat\master_data_long_mDNA.csv' ) #Exclude participant 26 for ERP analysis as we know that he is way off with amplitudes in this paradigm. data_long = data_long[data_long.participant != 'P26'] data_long.describe() data_wide_mDNA = pd.read_csv( r'C:\Users\user\Desktop\FOCUS\behavioral\ready_to_stat\master_data_wide_mDNA.csv' ) #Exclude participant 26 for ERP analysis as we know that he is way off with amplitudes in this paradigm. data_wide_mDNA = data_wide_mDNA[data_wide_mDNA.participant != 'P26'] data_wide_mDNA.describe() aov_declog = pg.rm_anova(data=data_long, dv='parietal_AlphaPowerDecLog', within='blocks', subject='participant', detailed=True, correction=True) print(aov_declog)
# In[ ]: ## now to simulate the data data1 = mean1 + np.random.randn(N) * stdev data2 = mean2 + np.random.randn(N) * stdev data3 = mean3 + np.random.randn(N) * stdev datamat = np.vstack((data1, data2, data3)).T # convert to a pandas dataframe df = pd.DataFrame(data=datamat, columns=['d1', 'd2', 'd3']) df # In[ ]: pg.rm_anova(data=df, detailed=True) # In[ ]: df.boxplot() # In[ ]: ## example from SPSS website # https://www.spss-tutorials.com/repeated-measures-anova/ data = [[8, 7, 6, 7], [5, 8, 5, 6], [6, 5, 3, 4], [6, 6, 7, 3], [8, 10, 8, 6], [6, 5, 6, 3], [6, 5, 2, 3], [9, 9, 9, 6], [5, 4, 3, 7], [7, 6, 6, 5]] df = pd.DataFrame(data=data, columns=['1', '2', '3', '4'])
def efficiency(data): # out paths func_name = sys._getframe().f_code.co_name out_prefix = func_name + "_" out_csv = config.OUT_EVALS_DIR + "/" + out_prefix out_png = config.OUT_PLOT_DIR + "/" + out_prefix plot_list = [] norm_time_dict = {} var = "time" for c in config.CalcByType: # box plots plot_list.append( create_plot(data, c, var, plots.saveBoxPlot, out_png + "box")) # statistics + out norm_time_dict[c.name] = create_stat(data, c, var, shapiro, out_csv, config.OUT_NORM_FILE) # qq plots plot_list.append( create_plot(data, c, var, plots.saveQQPlot, out_png + "qq")) # var by no calctype norm_time_dict["None"] = create_stat(data, None, var, shapiro, out_csv, config.OUT_NORM_FILE) plot_list.append( create_plot(data, None, var, plots.saveQQPlot, out_png + "qq")) # ONE WAY ANOVA w repeated measurements out_one_way_anova = out_csv + var + "_" + config.OUT_ONE_WAY_ANOVA_FILE + "." + config.OUT_CSV_EXT data_log = data.deep_copy() data_log[var] = np.log10(data_log[var]) # Remove outliers q = data_log['time'].quantile(0.96) data_log = data_log[data_log["time"] < q] one_way_anova_aov = pg.rm_anova(dv=var, data=data_log, subject='user', within='video', detailed=True) one_way_anova_aov.to_csv(out_one_way_anova, index=False) # Pairwise T-test out_ttest = out_csv + var + "_" + config.OUT_TTEST_FILE + "." + config.OUT_CSV_EXT ttest_result = pg.pairwise_ttests(dv=var, within='video', subject='user', data=data_log, padjust='bonferroni', effsize='hedges', tail='one-sided', return_desc=True) ttest_result.to_csv(out_ttest, index=False) # MIXED_ANOVA out_mixed_anova = out_csv + var + "_" + config.OUT_MIXED_ANOVA_FILE + "." + config.OUT_CSV_EXT m_anova = pg.mixed_anova(dv=var, within='video', between='tool', subject='user', data=data.df) m_anova.to_csv(out_mixed_anova, index=False) # Friedmann/Kruskal and Dunn types = [config.CalcByType.VIDEO, config.CalcByType.TOOL] tests = [friedmanchisquare, kruskal] pfx = [config.OUT_FRIEDMAN_FILE, config.OUT_KRUSKAL_FILE] stat_dict = {} for i in range(len(types)): res, plt = create_var_stats(data, [var], types[i], tests[i], out_prefix + pfx[i], False) plot_list += plt stat_dict[types[i].name] = res return { "success": True, "message": { 'norm': str(norm_time_dict), 'stats': str(stat_dict), 'one_way_anova': str(one_way_anova_aov), 'plots': str(plot_list) } }
def stats(model, quantity, data, targets, tw, rm, nd): if model == 'absolute': data = data.drop(['NormQuant'], axis=1) data['NormMean'] = data['NormMean'].astype(float) mean = 'NormMean' else: data = data.drop(['rq'], axis=1) data['rqMean'] = data['rqMean'].astype(float) mean = 'rqMean' # prepare data from intermediate dataframe data = data[data['Outliers'].eq(False)] data = data.drop_duplicates(keep='first') # t-test and anova for normally distributed data if nd == 'True': if quantity == 2: # T-Test between 2 groups stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() group = data['Group'].dropna() group = group.drop_duplicates(keep='first').values.tolist() for item in targets: df = data[data['Target Name'].eq(item)] group1 = df[df['Group'].eq(group[0])][mean] group2 = df[df['Group'].eq(group[1])][mean] t_test = ttest(group1, group2, paired=bool(rm)) if rm == 'True': t_test['paired'] = 'TRUE' else: t_test['paired'] = 'FALSE' t_test['Target Name'] = item if stats_dfs is None: stats_dfs = t_test else: stats_dfs = stats_dfs.append(t_test, ignore_index=True) # reformat output table stats_dfs = stats_dfs.rename(columns={ 'cohen-d': 'effect size', 'BF10': 'Bayes factor', 'dof': 'DF' }) cols = [ 'Target Name', 'DF', 'T', 'tail', 'paired', 'p-val', 'effect size', 'power', 'Bayes factor' ] stats_dfs = stats_dfs.reindex(columns=cols) elif quantity >= 3: # ANOVA test stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() # tukey_dfs = pandas.DataFrame() pvals = [] for item in targets: if rm == 'True': # one-way if tw == 'False': # repeated measure anova aov = pg.rm_anova( dv=mean, data=data[data['Target Name'].eq(item)], within='Group', subject='Sample Name', detailed=True) pvals.append(aov['p-unc'][0]) aov = aov.drop([1]) aov['measures'] = ['dependent'] aov['Target Name'] = item # two-way else: aov = pg.rm_anova( dv=mean, data=data[data['Target Name'].eq(item)], within=['Group1', 'Group2'], subject='Sample Name', detailed=True) reject_tw, pval_corr_tw = pg.multicomp(list( aov['p-unc']), alpha=0.05, method='bonf') aov['p-value corrected'] = pval_corr_tw aov['measures'] = ['dependent'] * 3 aov['Target Name'] = [item] * 3 aov.drop(['eps'], axis=1) ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, within='Group', subject='Sample Name', padjust='fdr_bh') ph['Target Name'] = item ph['Test'] = 'T-Test' else: # one-way if tw == 'False': aov = pg.anova(dv=mean, between='Group', data=data[data['Target Name'].eq(item)], detailed=True) pvals.append(aov['p-unc'][0]) aov = aov.drop([1]) aov['measures'] = ['independent'] aov['Target Name'] = item ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between='Group', padjust='fdr_bh') ph['Test'] = 'T-Test' # two-way else: aov = pg.anova(dv=mean, between=['Group1', 'Group2'], data=data[data['Target Name'].eq(item)], detailed=False) aov = aov.drop([3]) reject_tw, pval_corr_tw = pg.multicomp(list( aov['p-unc']), alpha=0.05, method='bonf') aov['p-value corrected'] = pval_corr_tw aov['measures'] = ['independent'] * 3 aov['Target Name'] = [item] * 3 ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between=['Group1', 'Group2'], padjust='fdr_bh') ph['Test'] = 'T-Test' ph['Target Name'] = item if stats_dfs is None: stats_dfs = aov else: stats_dfs = stats_dfs.append(aov, ignore_index=True) if posthoc_dfs is None: posthoc_dfs = ph else: posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True) reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf') # reformat output tables stats_dfs = stats_dfs.rename(columns={ 'p-unc': 'p-value', 'np2': 'effect size' }) if tw == 'False': stats_dfs['p-value corrected'] = pvals_corr stats_dfs['distribution'] = ['parametric'] * len(targets) stats_dfs['test'] = ['ANOVA'] * len(targets) stats_dfs['statistic'] = ['NA'] * len(targets) else: stats_dfs['distribution'] = ['parametric'] * (len(targets) * 3) stats_dfs['test'] = ['ANOVA'] * (len(targets) * 3) stats_dfs['statistic'] = ['NA'] * (len(targets) * 3) cols = [ 'Target Name', 'Source', 'DF', 'F', 'MS', 'SS', 'p-value', 'p-value corrected', 'measures', 'distribution', 'test', 'statistic', 'effect size' ] stats_dfs = stats_dfs.reindex(columns=cols) if tw == 'False': posthoc_dfs = posthoc_dfs.drop(['Contrast', 'T'], axis=1) else: posthoc_dfs = posthoc_dfs.drop(['T'], axis=1) posthoc_dfs = posthoc_dfs.rename( columns={ 'hedges': 'effect size', 'p-corr': 'p-value corrected', 'p-unc': 'p-value', 'p-adjust': 'correction method', 'BF10': 'Bayes factor', 'dof': 'DF' }) if tw == 'False': cols2 = [ 'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] else: cols2 = [ 'Target Name', 'Contrast', 'Group1', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] posthoc_dfs = posthoc_dfs.reindex(columns=cols2) # nonparametric tests for not normally distributed data else: if quantity == 2: stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() group = data['Group'].dropna() group = group.drop_duplicates(keep='first').values.tolist() for item in targets: df = data[data['Target Name'].eq(item)] group1 = df[df['Group'].eq(group[0])][mean] group2 = df[df['Group'].eq(group[1])][mean] if rm == 'True': # Mann-Whitney U test test = mannwhitneyu(group1, group2) test = pandas.DataFrame( { 'Target Name': item, 'pvalue': test.pvalue, 'statistic': test.statistic }, index=[0]) else: # Wilcoxon test = wilcoxon(group1, group2) test = pandas.DataFrame( { 'Target Name': item, 'pvalue': test.pvalue, 'statistic': test.statistic }, index=[0]) if stats_dfs is None: stats_dfs = test else: stats_dfs = stats_dfs.append(test, ignore_index=True) elif quantity >= 3: stats_dfs = pandas.DataFrame() posthoc_dfs = pandas.DataFrame() pvals = [] for item in targets: if rm == 'True': # friedman test for repeated measurements df = pg.friedman(dv=mean, within='Group', subject='Sample Name', data=data[data['Target Name'].eq(item)]) pvals.append(df['p-unc'][0]) df['test'] = ['Friedman Q'] df['measures'] = ['dependent'] df = df.rename(columns={'Q': 'statistic'}) df['Target Name'] = item df['DF'] = 'NA' ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, within='Group', subject='Sample Name', padjust='fdr_bh', parametric=False) ph['Target Name'] = item ph['DF'] = 'NA' ph['Bayes factor'] = 'NA' ph['Test'] = 'Wilcoxon' else: # Kruskal-Wallis H test df = pg.kruskal(dv=mean, between='Group', data=data[data['Target Name'].eq(item)]) pvals.append(df['p-unc'][0]) df['test'] = ['Kruskal-Wallis H'] df['measures'] = ['independent'] df = df.rename(columns={'H': 'statistic'}) df['Target Name'] = item df['DF'] = 'NA' ph = pairwise_ttests( data=data[data['Target Name'].eq(item)], dv=mean, between='Group', padjust='fdr_bh', parametric=False) ph['Target Name'] = item ph['DF'] = 'NA' ph['Bayes factor'] = 'NA' ph['Test'] = 'Mann-Whitney U' if stats_dfs is None: stats_dfs = df else: stats_dfs = stats_dfs.append(df, ignore_index=True) if posthoc_dfs is None: posthoc_dfs = ph else: posthoc_dfs = posthoc_dfs.append(ph, ignore_index=True) reject, pvals_corr = pg.multicomp(pvals, alpha=0.05, method='bonf') # reformat output tables stats_dfs = stats_dfs.rename(columns={ 'dof': 'DF', 'p-unc': 'p-value' }) stats_dfs['p-value corrected'] = pvals_corr stats_dfs['distribution'] = ['non-parametric'] * len(targets) stats_dfs['MS'] = ['NA'] * len(targets) stats_dfs['SS'] = ['NA'] * len(targets) stats_dfs['effect size'] = ['NA'] * len(targets) cols = [ 'Target Name', 'DF', 'MS', 'SS', 'p-value', 'p-value corrected', 'measures', 'distribution', 'test', 'statistic', 'effect size' ] stats_dfs = stats_dfs.reindex(columns=cols) posthoc_dfs = posthoc_dfs.drop(['Contrast'], axis=1) posthoc_dfs = posthoc_dfs.rename( columns={ 'hedges': 'effect size', 'p-corr': 'p-value corrected', 'p-unc': 'p-value', 'p-adjust': 'correction method', 'BF10': 'Bayes factor' }) cols2 = [ 'Target Name', 'A', 'B', 'DF', 'p-value corrected', 'p-value', 'correction method', 'Paired', 'Parametric', 'Test', 'effect size', 'Bayes factor' ] posthoc_dfs = posthoc_dfs.reindex(columns=cols2) return stats_dfs, posthoc_dfs
# -*- coding: utf-8 -*- """ Created on Wed May 1 15:55:49 2019 @author: Antoine """ #%% anova pingouin import pingouin as pg import pandas as pd data = pd.read_csv("aggregated_data.txt") data = data[data.ISI<6] aov = pg.rm_anova(dv="d", within=["modulation_type", "ISI"], subject="subject", data=data) pg.print_table(aov) clean_aov = aov[["Source","ddof1", "F", "p-unc", "p-GG-corr", "np2"]] clean_aov.columns = ["Variable", "ddl", "F-value", "p-value", "p-value corrigee", "partial eta-square"] clean_aov.to_excel("resultats_anova.xlsx") #%% anova stats model MARCHE PAS from statsmodels.stats.anova import AnovaRM import pandas as pd
def analyse(self, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation=True, ttest_type=1): """This function carries out the required statistical analysis. The analysis is carried out on the specified indicators/parameters using the data extracted from all the subjects that were mentioned in the json file. There are 4 different tests that can be run, namely - Mixed ANOVA, Repeated Measures ANOVA, T Test and Simple ANOVA (both 1 and 2 way) Parameters ---------- parameter_list: set (optional) Set of the different indicators/parameters (Pupil_size, Blink_rate) on which statistical analysis is to be performed, by default it will be "all" so that all the parameter are considered. between_factor_list: list(str) (optional) List of between group factors, by default it will only contain "Subject_type". If any additional parameter (eg: Gender) needs to be considered, then the list will be: between_factor_list = ["Subject_type", "Gender"]. DO NOT FORGET TO INCLUDE "Subject_type", if you wish to consider "Subject_type" as a between group factor. Eg: between_factor_list = ["factor_x"] will no longer consider "Subject_type" as a factor. Please go through the README FILE to understand how the JSON FILE is to be written for between group factors to be considered. within_factor_list: list(str) (optional) List of within group factors, by default it will only contain "Stimuli_type" If any additional parameter, needs to be considered, then the list will be: between_factor_list = ["Subject_type", "factor_X"]. DO NOT FORGET TO INCLUDE "Stimuli_type", if you wish to consider "Stimuli_type" as a within group factor. Eg: within_factor_list = ["factor_x"] will no longer consider "Stimuli_type" as a factor. Please go through how the README FILE to understand how the JSON FILE is to be written for within group factors to be considered. statistical_test: str {"Mixed_anova","RM_anova","ttest","anova","None"} (optional) Name of the statistical test that has to be performed. NOTE: - ttest: There are 3 options for ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below. - Welch_ttest: There are 2 options for Welch Ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below. - Mixed_anova: Only 1 between group factor and 1 within group factor can be considered at any point of time - anova: Any number of between group factors can be considered for analysis - RM_anova: Upto 2 within group factors can be considered at any point of time file_creation: bool (optional) Indicates whether a csv file containing the statistical results should be created. NOTE: The name of the csv file created will be by the name of the statistical test that has been chosen. A directory called "Results" will be created within the Directory whose path is mentioned in the json file and the csv files will be stored within "Results" directory. If any previous file by the same name exists, it will be overwritten. ttest_type: int {1,2,3} (optional) Indicates what type of parameters will be considered for the ttest and Welch Ttest NOTE: For ttest- - 1: Upto 2 between group factors will be considered for ttest - 2: 1 within group factor will be considered for ttest - 3: 1 within group and 1 between group factor will be considered for ttest For Welch ttest- - 1: Will consider the first factor in 'between_factor_list' - 2: Will consider the first factor in 'within_factor_list' Examples -------- For calculating Mixed ANOVA, on all the parameters, with standardisation, NOT averaging across stimuli of the same type and considering Subject_type and Stimuli_type as between and within group factors respectively >>> analyse(self, standardise_flag=False, average_flag=False, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation = True) OR >>> analyse(self, standardise_flag=True) (as many of the option are present by default) For calculating 2-way ANOVA, for "blink_rate" and "avg_blink_duration", without standardisation with averaging across stimuli of the same type and considering Subject_type and Gender as the between group factors while NOT creating a new csv file with the results >>> analyse(self, average_flag=True, parameter_list={"blink_rate", "avg_blink_duration"}, between_factor_list=["Subject_type", "Gender"], statistical_test="anova", file_creation = False) """ with open(self.json_file, "r") as json_f: json_data = json.load(json_f) csvFile = None if file_creation: directory_path = json_data["Path"] + "/Results" if not os.path.isdir(directory_path): os.mkdir(directory_path) if not os.path.isdir(directory_path + '/Data/'): os.mkdir(directory_path + '/Data/') if statistical_test != None: file_path = directory_path + "/" + statistical_test + ".csv" csvFile = open(file_path, 'w') writer = csv.writer(csvFile) meta_not_to_be_considered = ["pupil_size", "pupil_size_downsample"] sacc_flag=0 ms_flag=0 for sen in self.sensors: for meta in Sensor.meta_cols[sen]: if meta in meta_not_to_be_considered: continue if ('all' not in parameter_list) and (meta not in parameter_list): continue print("\n\n") print("\t\t\t\tAnalysis for ",meta) #For the purpose of statistical analysis, a pandas dataframe needs to be created that can be fed into the statistical functions #The columns required are - meta (indicator), the between factors (eg: Subject type or Gender), the within group factor (eg: Stimuli Type), Subject name/id #Defining the list of columns required for the statistical analysis column_list = [meta] column_list.extend(between_factor_list) column_list.extend(within_factor_list) column_list.append("subject") column_list.append("stimuli_name") data = pd.DataFrame(columns=column_list) #For each subject for sub_index, sub in enumerate(self.subjects): #For each Question Type for stimuli_index, stimuli_type in enumerate(sub.aggregate_meta): if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]: summation_array = self.summationArrayCalculation(meta, sub_index, stimuli_index) value_array = self.meta_matrix_dict[1][meta][sub_index,stimuli_index] index_extra = 0 for value_index, _ in enumerate(value_array): if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]: if value_array[value_index] == 0: index_extra += 1 continue proper_index = self.return_index(value_index-index_extra, summation_array) stimulus_name = self.stimuli[stimuli_type][proper_index] else: stimulus_name = self.stimuli[stimuli_type][value_index] row = [] row.append(value_array[value_index]) #Add the between group factors (need to be defined in the json file) for param in between_factor_list: if param == "Subject_type": row.append(sub.subj_type) continue try: row.append(json_data["Subjects"][sub.subj_type][sub.name][param]) except: print("Between subject paramter: ", param, " not defined in the json file") for param in within_factor_list: if param == "Stimuli_type": row.append(stimuli_type) continue try: stimulus_name = self.stimuli[stimuli_type][value_index] row.append(json_data["Stimuli"][stimuli_type][stimulus_name][param]) except: print("Within stimuli parameter: ", param, " not defined in the json file") row.append(sub.name) row.append(stimulus_name) if np.isnan(value_array[value_index]): print("The data being read for analysis contains null value: ", row) #Instantiate into the pandas dataframe data.loc[len(data)] = row data.to_csv(directory_path + '/Data/' + meta + "_data.csv") #print(data) #Depending on the parameter, choose the statistical test to be done if statistical_test == "Mixed_anova": if len(within_factor_list)>1: print("Error: Too many within group factors,\nMixed ANOVA can only accept 1 within group factor\n") elif len(between_factor_list)>1: print("Error: Too many between group factors,\nMixed ANOVA can only accept 1 between group factor\n") print(meta, ":\tMixed ANOVA") aov = pg.mixed_anova(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data) pg.print_table(aov) if file_creation: values_list = ["Mixed Anova: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) posthocs = pg.pairwise_ttests(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data) pg.print_table(posthocs) if file_creation: values_list = ["Post Hoc Analysis"] self.fileWriting(writer, csvFile, posthocs, values_list) elif statistical_test == "RM_anova": if len(within_factor_list)>2 or len(within_factor_list)<1: print("Error: Too many or too few within group factors,\nRepeated Measures ANOVA can only accept 1 or 2 within group factors\n") print(meta, ":\tRM ANOVA") aov = pg.rm_anova(dv=meta, within= within_factor_list, subject = 'subject', data=data) pg.print_table(aov) if file_creation: values_list = ["Repeated Measures Anova: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) elif statistical_test == "anova": print(meta, ":\tANOVA") length = len(between_factor_list) model_equation = meta + " ~ C(" for factor_index, _ in enumerate(between_factor_list): if(factor_index<length-1): model_equation = model_equation + between_factor_list[factor_index] + ")*C(" else: model_equation = model_equation + between_factor_list[factor_index] + ")" print("Including interaction effect") print(model_equation) model = ols(model_equation, data).fit() res = sm.stats.anova_lm(model, typ= 2) print(res) if file_creation: values_list = ["Anova including interaction effect: "] values_list.append(meta) self.fileWriting(writer, csvFile, res, values_list) print("\nExcluding interaction effect") model_equation = model_equation.replace("*", "+") print(model_equation) model = ols(model_equation, data).fit() res = sm.stats.anova_lm(model, typ= 2) print(res) if file_creation: values_list = ["Anova excluding interaction effect: "] values_list.append(meta) self.fileWriting(writer, csvFile, res, values_list) elif statistical_test == "ttest": print(meta, ":\tt test") if ttest_type==1: aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, subject='subject', data=data) pg.print_table(aov) elif ttest_type==2: aov = pg.pairwise_ttests(dv=meta, within=within_factor_list, subject='subject', data=data) pg.print_table(aov) elif ttest_type==3: aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, within=within_factor_list, subject='subject', data=data) pg.print_table(aov) else: print("The value given to ttest_type is not acceptable, it must be either 1 or 2 or 3") if file_creation: values_list = ["Pairwise ttest: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) elif statistical_test == "welch_ttest": print(meta, ":\tWelch t test") if ttest_type==1: normality,aov = self.welch_ttest(dv=meta, factor=between_factor_list[0], subject='subject', data=data) pg.print_table(normality) pg.print_table(aov) elif ttest_type==2: normality,aov = self.welch_ttest(dv=meta, factor=within_factor_list[0], subject='subject', data=data) pg.print_table(normality) pg.print_table(aov) else: print("The value given to ttest_type for welch test is not acceptable, it must be either 1 or 2") if file_creation: values_list = ["Welch Pairwise ttest: "] values_list.append(meta) self.fileWriting(writer, csvFile, normality, values_list) self.fileWriting(writer, csvFile, aov, values_list) if csvFile != None: csvFile.close()
# ***This one approaches significance # data = df[["small_size","small_color","small_colorAndSize"]] # Select large graph # data = df[["large_size","large_color","large_colorAndSize"]] # Read in total interactions time dataset # ***These all fail significance # df = pd.read_csv('ANOVA_interactions.csv') # Select just small graph # This one approaches significance # data = df[["small_size","small_color","small_colorAndSize"]] # Select large graph # data = df[["large_size","large_color","large_colorAndSize"]] # Run the repeated-measures ANOVA (because this is within-subjects) aov = pg.rm_anova(data, detailed=True) pg.print_table(aov) #print(aov) # Dataset must be expressed in long format for the pairwise t-tests: melted = pd.melt( data_post, id_vars=['Participant'], value_vars=["small_size", "small_color", "small_colorAndSize"], var_name='condition') post_hocs = pg.pairwise_ttests(dv='value', within='condition', subject='Participant', data=melted) post_hocs.round(3)
data_merged = pd.read_csv( r'C:\Users\user\Desktop\FOCUS\behavioral\P_Merged_var.csv') ### Fill in Nan values in false alarm and omission error (0) data_merged = data_merged.fillna({'false_alarm': 0, 'om_err': 0}) #data_merged.to_csv(r'C:\Users\user\Desktop\FOCUS\behavioral\P_Merged_var.csv', index = None, header=True) # ANOVA - does correct reaction time differ between blocks? aov_corr_rt = anova(dv='corr_rt', between='blocks', data=data_merged) print(aov_corr_rt) rep_anov_alarm = pg.rm_anova(data=data_merged, dv='false_alarm', within='blocks', subject='participant', detailed=True) # follow-up pairwise comparison pairs_corr_rt = pairwise_tukey(dv='corr_rt', between='blocks', data=data_merged) print(pairs_corr_rt) #### ANOVA - does false alarms differ between blocks? aov_alarms = anova(dv='false_alarm', between='blocks', data=data_merged) print(aov_alarms)
dfExpTrail['hasAvoidPoint'] = dfExpTrail.apply(lambda x: hasAvoidPoints(eval(x['aimPlayerGridList']), eval(x['avoidCommitPoint'])), axis=1) statDF = pd.DataFrame() # statDF['avoidCommitPercent'] = dfExpTrail.groupby(['name', 'decisionSteps'])["hasAvoidPoint"].mean() statDF['avoidCommitPercent'] = dfExpTrail.groupby(['name', 'decisionSteps', 'conditionName'])["hasAvoidPoint"].mean() statDF['ShowCommitmentPercent'] = statDF.apply(lambda x: 1 - x['avoidCommitPercent'], axis=1) statDF = statDF.reset_index() statDF['participantsType'] = ['RL Agent' if 'max' in name else 'Human' for name in statDF['name']] # statDF['avoidCommitPercentSE'] = statDF["avoidCommitPercent"].apply(calculateSE) import pingouin as pg aov = pg.rm_anova(dv='avoidCommitPercent', within=['decisionSteps', 'conditionName'], subject='name', data=statDF) # pg.print_table(aov) posthocs = pg.pairwise_ttests(dv='avoidCommitPercent', within=['decisionSteps', 'conditionName'], subject='name', data=statDF) # pg.print_table(posthocs) import seaborn as sns ax = sns.barplot(x="decisionSteps", y="ShowCommitmentPercent", hue="conditionName", data=statDF, ci=68) # ax.set(xlabel='Decision Step', ylabel='Show Commitment Ratio', title='Commitment with Deliberation') handles, labels = ax.get_legend_handles_labels() # labels.get_texts()[0].set_text('1 obstacle at crossroad') # labels.get_texts()[1].set_text('2 obstacles at crossroad') plt.xticks(fontsize=16, color='black') plt.yticks(fontsize=10, color='black')
for part in time_vals: print(part) part_dir = save_test_dir / part # perform rm anova for each stage type ph_part_dict = {} for key, df in zip(totals_dict.keys(), totals_dict.values()): print(key) # tidy data long_df = df.stack().reset_index() long_df.columns = stat_colnames part_df = long_df.query("%s == '%s'" % (time, part)) # do anova part_rm = pg.rm_anova(dv=dep_var, within=day, subject=anim, data=part_df) pg.print_table(part_rm) # do posthoc ph = pg.pairwise_tukey(dv=dep_var, between=day, data=part_df) pg.print_table(ph) ph_part_dict[key] = ph stage_test_dir = part_dir / key anova_file = stage_test_dir / "01_anova.csv" ph_file = stage_test_dir / "02_posthoc.csv" part_rm.to_csv(anova_file) ph.to_csv(ph_file)
# break into long format and groupby evaluation/cutoff melted = pd.melt(df.reset_index(), value_vars=[ c for c in df.columns if 'cutoff' in c ], id_vars='participant_id', value_name='ld_rate') melted['eval'], melted['cutoff'] = zip(*melted['variable'].str.split('-')) avgs = melted.groupby(['eval','cutoff'] )['ld_rate'].agg(['mean','sem']) # replace sem for the binary case bc it's meaningless avgs.loc['binary_ld','sem'] = pd.NA anova = pg.rm_anova(data=melted[melted['eval']!='binary_ld'], dv='ld_rate',within=['eval','cutoff'], subject='participant_id',detailed=True) avgs.to_csv(EXPORT_FNAME_DATA,float_format=FLOAT_FMT,index=True,na_rep='NA') anova.to_csv(EXPORT_FNAME_STAT,float_format=FLOAT_FMT,index=False) #################################### ######### draw plot ######### fig, ax = plt.subplots(figsize=(FIG_WIDTH,FIG_HEIGHT)) # draw lines and points separately to have diff colored points for ev, subdf in avgs.groupby('eval'):
sorted(df.Animal.unique())): #Query animal data trimmed_result = df.loc[df.Notes.isin(['Hab4','Hab5'])\ & (df['Animal'] == animal)] #Run ANOVA across bottles bottle_stats = [] for day in sorted(trimmed_result.Notes.unique()): stat_query = trimmed_result.loc[(trimmed_result['Notes'] == day)] b_stats = stat_query.anova(dv='LICKS', between=['TUBE']) bottle_stats.append(np.round(b_stats.iloc[0,4],2)) #Run Repeated Measures ANOVA across days stats = pg.rm_anova(dv='LICKS', within=['Notes'], subject='TUBE', data=trimmed_result, detailed=True) pval = np.format_float_scientific(stats.iloc[0,5],1, exp_digits=2) rm_stats.append(pval) #Establish plot location ax = axes_list.pop(0) #Plot sns.barplot(x='Notes',\ y='LICKS',\ hue='TUBE',\ data=trimmed_result, order =['Hab4','Hab5'], palette=sns.color_palette("PuBu_r", len(trimmed_result.Notes.unique())+1),\
pivot_t = True if pivot_t: pivot_t = pd.pivot_table(data, index = ["crowdingcons", "participant_N"], columns = ["winsize"], values = "deviation_score") pivot_t.to_csv("pt_exp1.csv") data_1 = data.groupby(["participant_N", "winsize", "crowdingcons"])[dv].agg( ["mean", "std"]).reset_index(level = ["participant_N", "winsize", "crowdingcons"]) rename_df_col(df = data_1, old_col_name = "mean", new_col_name = dv) # mean crowding vs. no-crowding crowdingcon = 1 cal_ds_mean(data, crowdingcon = crowdingcon) cal_ds_std(data, crowdingcon = crowdingcon) # 2 way annova aov = pg.rm_anova(dv = dv, within = ["winsize", "crowdingcons"], subject = "participant_N", data = data_1) # post hoc posthocs = pg.pairwise_ttests(dv = dv, within = ["winsize", "crowdingcons"], subject = "participant_N", data = data_1, padjust = "fdr_bh", effsize = "cohen")
.dropna() \ .sort_values(by=["cond", "sub"]) \ .reset_index() data_stats["index"] = \ np.array([list(range(int(data_stats["cond"].shape[0]/len(conds))))] \ *len(conds)).flatten() # Perform repeated measures anova ind_col = "index" lab_col = "cond" val_col = "instability" anova_output = pg.rm_anova(data=data_stats, dv=val_col, within=lab_col, subject=ind_col, detailed=True) \ if anova_output["p-unc"][0] < 0.05: res_an = anova_output.loc[:, idx["F", "p-unc"]] res_pwc = pd.DataFrame(None, columns=["A", "B", "T", "p-corr"]) # Protected post hoc t-test (LSD) degf = anova_output.loc[1, "DF"] SSE = anova_output.loc[1, "SS"] MSE = SSE / degf n = len(conds) combos_labels = list(itertools.combinations(conds, 2))