def perform_kappa_anova(self): # MIXED ANOVA ------------------------------------------------------------------------------------------------ print( "\nPerforming Group x Comparison mixed ANOVA on Cohen's Kappa values." ) # Group x Intensity mixed ANOVA self.kappa_aov = pg.mixed_anova(dv="Kappa", within="Comparison", between="Group", subject="ID", data=self.df_kappa_long, correction=True) pg.print_table(self.kappa_aov) # POST HOC ---------------------------------------------------------------------------------------------------- self.kappa_posthoc = pg.pairwise_ttests(dv="Kappa", subject='ID', within="Comparison", between='Group', data=self.df_kappa_long, padjust="bonf", effsize="hedges", parametric=True)
def calculate_kruskalwallish(dataframe, col_name="", p_adj_method="none", stats_id="", save_files=False, save_dir="/Users/kyleweber/Desktop/"): stats_pairwise_t = None stats_main = pg.kruskal(data=dataframe, dv=col_name, between="Cohort", detailed=True) stats_main["Sig."] = ["*" if stats_main["p-unc"].iloc[0] < .05 else " "] # if stats_main["Sig."].iloc[0] == "*": stats_pairwise_t = pg.pairwise_ttests(data=dataframe, dv=col_name, between="Cohort", within=None, subject="Participant", parametric=False, marginal=True, alpha=.05, tail="two-sided", padjust=p_adj_method, effsize="cohen", correction='auto') stats_pairwise_t["Sig."] = ["*" if row[[i for i in stats_pairwise_t.columns].index("p-unc") + 1] < .05 else " " for row in stats_pairwise_t.itertuples()] if save_files: print("Saving files to {}".format(save_dir)) stats_main.to_csv(save_dir + stats_id + "_Main.csv") stats_pairwise_t.to_csv(save_dir + stats_id + "_PairwiseT.csv", index=False) return stats_main, stats_pairwise_t
def comparison_by_group_anova(self, dependent_var): """Performs a Group x Comparison mixed ANOVA on the dependent variable that is passed in. Performs pairwise T-test comparisons for post-hoc analysis. Plots group means using Seaborn package. :argument -dependent_var: name of column in self.df to use as dependent variable :returns -data objects from pingouin ANOVA and posthoc objects """ print("\nPerforming Group x Comparison mixed ANOVA for" "dependent variable {}.".format(dependent_var.capitalize())) aov = pg.mixed_anova(dv=dependent_var, within="COMPARISON", between="GROUP", subject="ID", data=self.df) pg.print_table(aov.iloc[:, 0:8]) print() pg.print_table(aov.iloc[:, 9:]) sns.pointplot(data=self.df, x='GROUP', y=dependent_var, hue='COMPARISON', dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1') plt.title("Group x Comparison Mixed ANOVA: {}".format(dependent_var.capitalize())) posthoc = pg.pairwise_ttests(dv=dependent_var, within="COMPARISON", between='GROUP', subject='ID', data=self.df) pg.print_table(posthoc) return aov, posthoc
def pairwise_ttests_paired(self): df = self.df.melt(id_vars="ID") self.oneway_rm_aov = pg.rm_anova(data=df, dv="value", within="variable", subject='ID') self.ttests_paired = pg.pairwise_ttests(dv="value", subject='ID', within='variable', data=df, padjust="holm", effsize="hedges", parametric=True)
def getstats(aggDict): animals = list(aggDict['0NP']['sub_id']) cols = list(aggDict['0NP'].columns.values) phaseCols = cols[3:19] # Getting phase names sequences = ['0NP', '1NP', '(0,1,~) D', '(0,-1,~)'] # Loading results of manual analysis mxl = pd.read_excel('Impulsivity strategies.xlsx', sheet_name=sequences, index_col=0, nrows=25, usecols=np.arange(0, 19), keep_default_na=False) # Auto. vs Man. correlation corrDict = {} meanCorrs = {} # Just the mean for kind, kval in enumerate(sequences): corrDict[kval] = {} meanCorrs[kval] = 0 df = aggDict[kval].set_index('sub_id') for animal in animals: corRes = pg.corr(mxl[kval].loc[animal, '1L':'8D'].astype('float64'), df.loc[animal, '1L':'8D'].astype('float64'), method='pearson') corrDict[kval][animal] = corRes.loc['pearson'] meanCorrs[kval] += corRes.loc['pearson']['r'] / len(animals) # For export compDict = {} for ind, sheetname in enumerate(sequences): compDict[sheetname] = pd.DataFrame.from_dict(corrDict[sheetname]).T # Between subjects pairwise t-tests betDict = {'treat': {}, 'phen': {}} for bet in betDict.keys(): for key in sequences: # Converting to long format df = aggDict[key].loc[:, :'8D'].melt(id_vars=['sub_id', 'treat', 'phen'], value_vars=phaseCols, var_name='phase', value_name='dv') df['dv'] = pd.to_numeric(df['dv']) df = pg.pairwise_ttests(data=df, subject='sub_id', dv='dv', within='phase', between=[bet], return_desc=True).round(6) betDict[bet][key] = df # Wilcoxon for impulsive sequence, for phases 6D:7L df = aggDict['(0,-1,~)'].loc[:, :'8D'] phenotype = {'epi': df[df.phen == 'epi'], 'non': df[df.phen == 'non']} for phenkey, phenval in phenotype.items(): w, p = stats.wilcoxon(phenval.loc[:, '6D'], phenval.loc[:, '7L'], mode='approx') phenotype[phenkey] = pd.DataFrame.from_dict({'w': [w], 'p': [p]}) # Descriptive stats for self-control cDrink = aggDict['(0,1,~) D'].loc[:, :'8D'] cStats = [(cDrink.mean(axis=0), cDrink.sem(axis=0))] return corrDict, meanCorrs, compDict, betDict, phenotype, cStats
def perform_anova(self, intensity): self.aov = pg.rm_anova(data=self.df_percent, dv=intensity, within="Model", subject="ID", correction=True, detailed=True) print(self.aov) self.posthoc = pg.pairwise_ttests(dv=intensity, subject='ID', within="Model", data=self.df_percent, padjust="bonf", effsize="hedges", parametric=True) print(self.posthoc)
def calculate_friedman(dataframe, usetest="friedman", save_files=False, stats_id="", save_dir="/Users/kyleweber/Desktop/"): stats_nemenyi = None stats_conover = None if usetest == "friedman": print("\nRunning Friedman test...") stats_main = pg.friedman(data=dataframe, dv="Values", within=str(dataframe.columns[1]), subject="Participant" if "Participant" in dataframe.columns else "ID", method='f') if usetest == "chisq": print("\nRunning Chi squared test.") stats_main = pg.friedman(data=dataframe, dv="Values", within=dataframe.columns[1], subject="Participant" if "Participant" in dataframe.columns else "ID", method='chisq') stats_main["Sig."] = ["*" if stats_main["p-unc"].iloc[0] < .05 else ""] if stats_main["Sig."].iloc[0] == "*": print("-{} result was significant".format(usetest.capitalize())) print(" -Running Nemenyi and Conover post-hoc tests.") print(" -Nemenyi accounts for multiple comparisons while Conover does not.") stats_nemenyi = nemenyi(a=dataframe, y_col="Values", block_col="Participant", group_col=dataframe.columns[1], melted=True) stats_conover = conover(a=dataframe, y_col="Values", block_col="Participant", group_col=dataframe.columns[1], melted=True) print("MAKE SURE DEFAULT VALUES FOR PG.PAIRWISE_TTESTS ARE WHAT YOU WANT!") wilcoxon = pg.pairwise_ttests(data=dataframe, dv="Values", within="Location", subject='Participant', parametric=False, return_desc=True, effsize="CLES", padjust="bonf") wilcoxon["Sig_Uncorr"] = ["*" if i < .05 else "" for i in wilcoxon["p-unc"]] wilcoxon["Sig_Corr"] = ["*" if i < .05 else "" for i in wilcoxon["p-corr"]] """WRITING FILES""" if save_files: print("-Saving output dataframes to {}".format(save_dir)) stats_main.to_csv(save_dir + "Stats{}_Main.csv".format(stats_id)) if stats_nemenyi is not None: stats_nemenyi.to_csv(save_dir + "Stats{}_Nemenyi.csv".format(stats_id)) if stats_conover is not None: stats_conover.to_csv(save_dir + "Stats{}_Conover.csv".format(stats_id)) if wilcoxon is not None: wilcoxon.to_csv(save_dir + "Stats{}_PairwiseWilcoxon.csv".format(stats_id)) return stats_main, stats_nemenyi, stats_conover, wilcoxon
def make_anova_2way(df, title): print("\tMAKING ANOVA") SIGNIFICANCE_CUTOFF = .4 anova_text = title + "\n" # print("ANOVA FOR ") # print(analysis_label) # print(df[analysis_label]) # bx = sns.boxplot(data=df, x='question', y='value', hue='context') # print(df_col) # df_col.columns == ['variable', 'value'] # val_min = df_col['value'].get(df_col['value'].idxmin()) # val_max = df_col['value'].get(df_col['value'].idxmax()) # homogenous_data = (val_min == val_max) homogenous_data = False if not homogenous_data: aov = pg.rm_anova(dv='value', within=['question', 'context'], subject='ResponseId', data=df) aov.round(3) anova_text = anova_text + str(aov) aov.to_csv(FILENAME_ANOVAS + fn + '-anova.csv') p_vals = aov['p-unc'] # if p_chair < SIGNIFICANCE_CUTOFF: # print("Chair position is significant for " + analysis_label + ": " + str(p_chair)) # # print(title) # if p_path_method < SIGNIFICANCE_CUTOFF: # print("Pathing method is significant for " + analysis_label + ": " + str(p_path_method)) # # print(title) # anova_text = anova_text + "\n" # Verify that subjects is legit # print(df[subject_id]) posthocs = pg.pairwise_ttests(dv='value', within=['question', 'context'], subject='ResponseId', data=df, padjust='bonf') # pg.print_table(posthocs) anova_text = anova_text + "\n" + str(posthocs) posthocs.to_csv(FILENAME_ANOVAS + fn + '-posthocs.csv') print() else: print("! Issue creating ANOVA for " + analysis_label) print("Verify that there are at least a few non-identical values recorded") anova_text = anova_text + "Column homogenous with value " + str(val_min) f = open(FILENAME_ANOVAS + fn + "-anova.txt", "w") f.write(anova_text) f.close()
def btn_clk14(self): posthoc = pg.pairwise_ttests(data=self.df4, dv='AUC', within='groupe', parametric=True, padjust='fdr_bh', effsize='hedges').round(6) posthoc_res = pd.DataFrame(posthoc.T).transpose() model = DataFrameModel(posthoc_res) self.tableView.setModel(model) self.tableView.resizeColumnsToContents()
def posthoc_ttests(dataframe, var_='dVz'): """ Pairwise posthoc t-tests on a variable in a mixed design. Between factor is 'condition', within factor is 'block'. :param dataframe: Aggregated data containing Fisher-z-transformed synergy index in long format. :type dataframe: pandas.DataFrame :param var_: The variable which to test. One of the column names in dataframe. :type var_: str :return: Pairwise T-tests results. :rtype: pandas.DataFrame """ posthocs = pg.pairwise_ttests(data=dataframe, dv=var_, within='block', subject='user', between='condition', alpha=0.05, within_first=False, padjust='fdr_by', marginal=True, return_desc=True, tail='one-sided', parametric=True) return posthocs
def computeAnovas(dv, between_var, data, adjust_type, effect_size_type, save_dir): import numpy as np import pandas as pd import pingouin as pg from pingouin import pairwise_ttests # compute anova aov = pg.anova(dv=dv, between=between_var, data=data) aov.to_csv(save_dir + '/' + dv + '_anova.csv', index=False) print(aov) # compute pairwise ttests ttests = pg.pairwise_ttests(dv=dv, between=between_var, data=data, padjust=adjust_type, effsize=effect_size_type) ttests.to_csv(save_dir + '/' + dv + '_ttests.csv', index=False) print(ttests)
np.r_[np.tile(np.arange(n), 3), np.tile(np.arange(n, n + n), 3)] }) #%% import seaborn as sns sns.pointplot(data=df, x='Time', y='Scores', hue='Group', dodge=True, markers=['o', 's'], size=.1, errwidth=1, palette='colorblind') #%% import pingouin as pg # Compute the two-way mixed-design ANOVA aov = pg.mixed_anova(dv='Scores', within='Time', between='Group', subject='Subject', data=df) # Pretty printing of ANOVA summary pg.print_table(aov) #%% posthocs = pg.pairwise_ttests(dv='Scores', within='Time', between='Group', subject='Subject', data=df) pg.print_table(posthocs)
print('brain structures left: ' + str(len(pd.unique(dane['abbrev'])))) #%% anova to check whether significant differences exist aov = pg.mixed_anova(data=dane, dv='signal_density', between='group_label', within='abbrev', subject='case_id', correction=True) print(aov[['Source', 'DF1', 'DF2', 'p-unc']]) #%% pairwise t tests #see https://pingouin-stats.org/generated/pingouin.pairwise_ttests.html#pingouin.pairwise_ttests post_hocs = pg.pairwise_ttests(data=dane, dv='signal_density', within='abbrev', between='group_label', subject='case_id', parametric=True, marginal=True, tail='two-sided', padjust='fdr_bh', effsize='cohen', return_desc=True) post_hocs_interaction = post_hocs[ post_hocs['Contrast'] == 'abbrev * group_label'].reset_index(drop=True) #%% saving export_csv = post_hocs_interaction.to_csv('pairwise_' + input_data_file, header=True)
for x in ax.get_children(): if isinstance(x, patches): x.set_alpha(0.25) grid.map_dataframe( sns.swarmplot, data=res_a.melt(id_vars="severity_group", value_name="% cells"), x="severity_group", y="% cells", palette=palettes.get("severity_group"), ) for ax in grid.axes.flat: ax.set_xticklabels(ax.get_xticklabels(), rotation=90) grid.fig.savefig( output_dir / "Tfol.CD185_vs_PD1.population_quantification.swarm_boxenplot.svg", **figkws, ) res = pd.concat([ pg.pairwise_ttests(data=res_a, parametric=False, dv=v, between="severity_group").assign(var=v) for v in res_a.columns[:-1] ]).drop("Contrast", axis=1) # the relevant population for the main figure is "f" res["p-cor"] = pg.multicomp(res["p-unc"].values, method="fdr_bh")[1] res.to_csv("diff.detailed4.csv", index=False)
# data = df[["small_size","small_color","small_colorAndSize"]] # Select large graph # data = df[["large_size","large_color","large_colorAndSize"]] # Read in total interactions time dataset # ***These all fail significance # df = pd.read_csv('ANOVA_interactions.csv') # Select just small graph # This one approaches significance # data = df[["small_size","small_color","small_colorAndSize"]] # Select large graph # data = df[["large_size","large_color","large_colorAndSize"]] # Run the repeated-measures ANOVA (because this is within-subjects) aov = pg.rm_anova(data, detailed=True) pg.print_table(aov) #print(aov) # Dataset must be expressed in long format for the pairwise t-tests: melted = pd.melt( data_post, id_vars=['Participant'], value_vars=["small_size", "small_color", "small_colorAndSize"], var_name='condition') post_hocs = pg.pairwise_ttests(dv='value', within='condition', subject='Participant', data=melted) post_hocs.round(3) pg.print_table(post_hocs)
DATA = "prolifc_data_combine_num_each_pp.xlsx" DATA2 = "prolifc_data_each_pp.xlsx" winsize = 0.6 # ANOVA within subject clustering (5) * type (2) for each winsize data = pd.read_excel(PATH + DATA) data = data[data["winsize"] == winsize] aov = pg.rm_anova(data=data, dv="mean_deviation_score", within=["percent_triplets", "protectzonetype"], subject="participant") posthocs = pg.pairwise_ttests( dv="mean_deviation_score", within=["percent_triplets", "protectzonetype"], subject="participant", data=data, padjust="fdr_bh", effsize="cohen") # ANOVA within subject data2 = pd.read_excel(PATH + DATA2) data2 = data2[data2["winsize"] == 0.4] # winsize 0.4 unblanced data aov_table = AnovaRM( data=data2, depvar="mean_deviation_score", subject="participant", within=["protectzonetype", "numerosity", "percent_triplets"]).fit() aov_table.summary()
# add stats to title for ax in grid.axes.flat: ax.set(yscale="symlog") ax.set_ylim(bottom=0) var = ax.get_title().replace("population = ", "") try: child, parent = re.findall(r"(.*)/(.*)", var)[0] ax.set_title(child) ax.set_ylabel("Cells / uL") except IndexError: ax.set_title(var) grid.savefig(figfile) plt.close(grid.fig) import pingouin as pg m = matrix.join(meta[["severity_group"]]) m["severity_group"] = m["severity_group"].cat.remove_unused_categories() res = pd.concat( [ pg.pairwise_ttests( data=m, dv=var, between="severity_group", parametric=False ).assign(variable=var) for var in m.columns[:-1] ] ).drop(["Contrast"], axis=1) res["p-cor"] = pg.multicomp(res["p-unc"].values, method="fdr_bh")[1] res.to_csv("diff.absolute.csv", index=False)
"component": states_controls_df, "weight_level": weight_lvl_df, }) df_stats = df_stats[df_stats["component"] == "force"] df_stats = df_stats[df_stats["weight_level"] == "high"] df_stats = df_stats[df_stats["RMSE"].notna()] df_stats.to_pickle("stats_df_1.pkl") aov = pg.anova(dv="RMSE", between=["EMG_objective", "co_contraction_level"], data=df_stats) ptt = pg.pairwise_ttests( dv="RMSE", between=[ "co_contraction_level", "EMG_objective", ], data=df_stats, padjust="bonf", ) pg.print_table(aov.round(3)) pg.print_table(ptt.round(3)) # Figure of RMSE on force function of co-contraction level (Fig. 7) import matplotlib matplotlib.rcParams["legend.handlelength"] = 4 matplotlib.rcParams["legend.handleheight"] = 2.25 seaborn.set_style("whitegrid") cp = seaborn.color_palette("YlOrRd", 5) cp[-1] = (0, 102 / 255, 153 / 255)
plt.figure() ######################## #####ONE-WAY ANOVA##### ####################### ANOVA_1W = ols(source + " ~ C(condition)", data=df).fit() ANOVA_1W_table = sm.stats.anova_lm(ANOVA_1W, typ=2) ######################## ###perform post-hocs### ####################### # perform multiple pairwise comparison t_test_PP = pg.pairwise_ttests(dv=source, within='condition', subject='Id', data=df) t_test_df = t_test_PP.round(3) t_test_df = t_test_df.drop( ['Contrast', 'BF10', "Parametric", "Paired", "hedges"], axis=1) t_test_df["p-unc"] = 1.5 * t_test_df["p-unc"] t_test_df['Tail'] = t_test_df['Tail'].str.replace('two', 'one') t_test_df = t_test_df.loc[((t_test_df['A'] == "general") & (t_test_df['B'] == "rule") | (t_test_df['A'] == "rule") & (t_test_df['B'] == "subrule") | (t_test_df['A'] == "spec") & (t_test_df['B'] == "subrule"))] t_test_df.reset_index(inplace=True, drop=True) plt.figure()
rm_df = pd.melt( rm_df, id_vars=['Subject', 'Group', 'Phase'], value_vars=['Time Object/New Cons Chamber', 'Time Conspecific Chamber'], var_name='Side', value_name='Time') # Run ANOVA samp_anova = pg.anova(data=rm_df[rm_df['Phase'] == 'Sample'], dv='Time', between=['Side', 'Group']) #Save to csv samp_anova.to_csv( '/Users/labc02/Documents/PDCB_data/Behavior/Stats/sample_time_anova.csv') # post hoc test, pairwise_ttests, holm-bonf correction samp_posthoc = pg.pairwise_ttests(data=rm_df[rm_df['Phase'] == 'Sample'], dv='Time', between=['Group', 'Side'], padjust='holm') samp_posthoc.to_csv( '/Users/labc02/Documents/PDCB_data/Behavior/Stats/sample_time_posthoc.csv') samp_posthoc[['Group', 'p-corr']] ph_val = zip(['***', '***', '***', '*'], [6, 55, 102, 155], [120, 100, 120, 120]) for ii, jj, kk in ph_val: print(ii, jj, kk) test_anova = pg.anova(data=rm_df[rm_df['Phase'] == 'Test'], dv='Time', between=['Side', 'Group']) test_anova.to_csv( '/Users/labc02/Documents/PDCB_data/Behavior/Stats/test_time_anova.csv') test_posthoc = pg.pairwise_ttests(data=rm_df[rm_df['Phase'] == 'Test'],
# statDF['avoidCommitPercent'] = dfExpTrail.groupby(['name', 'decisionSteps'])["hasAvoidPoint"].mean() statDF['avoidCommitPercent'] = dfExpTrail.groupby(['name', 'decisionSteps', 'conditionName'])["hasAvoidPoint"].mean() statDF['ShowCommitmentPercent'] = statDF.apply(lambda x: 1 - x['avoidCommitPercent'], axis=1) statDF = statDF.reset_index() statDF['participantsType'] = ['RL Agent' if 'max' in name else 'Human' for name in statDF['name']] # statDF['avoidCommitPercentSE'] = statDF["avoidCommitPercent"].apply(calculateSE) import pingouin as pg aov = pg.rm_anova(dv='avoidCommitPercent', within=['decisionSteps', 'conditionName'], subject='name', data=statDF) # pg.print_table(aov) posthocs = pg.pairwise_ttests(dv='avoidCommitPercent', within=['decisionSteps', 'conditionName'], subject='name', data=statDF) # pg.print_table(posthocs) import seaborn as sns ax = sns.barplot(x="decisionSteps", y="ShowCommitmentPercent", hue="conditionName", data=statDF, ci=68) # ax.set(xlabel='Decision Step', ylabel='Show Commitment Ratio', title='Commitment with Deliberation') handles, labels = ax.get_legend_handles_labels() # labels.get_texts()[0].set_text('1 obstacle at crossroad') # labels.get_texts()[1].set_text('2 obstacles at crossroad') plt.xticks(fontsize=16, color='black') plt.yticks(fontsize=10, color='black') plt.xlabel('Steps-to-crossroad Condition', fontsize=16, color='black') plt.ylabel('Show Commitment Ratio', fontsize=16, color='black')
def test_pandas(self): """Test pandas method. """ # Test the ANOVA (Pandas) aov = df.anova(dv='Scores', between='Group', detailed=True) assert aov.equals( pg.anova(dv='Scores', between='Group', detailed=True, data=df)) aov3_ss1 = df_aov3.anova(dv='Cholesterol', between=['Sex', 'Drug'], ss_type=1) aov3_ss2 = df_aov3.anova(dv='Cholesterol', between=['Sex', 'Drug'], ss_type=2) aov3_ss2_pg = pg.anova(dv='Cholesterol', between=['Sex', 'Drug'], data=df_aov3, ss_type=2) assert not aov3_ss1.equals(aov3_ss2) assert aov3_ss2.equals(aov3_ss2_pg) # Test the Welch ANOVA (Pandas) aov = df.welch_anova(dv='Scores', between='Group') assert aov.equals(pg.welch_anova(dv='Scores', between='Group', data=df)) # Test the repeated measures ANOVA (Pandas) aov = df.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True) assert aov.equals( pg.rm_anova(dv='Scores', within='Time', subject='Subject', detailed=True, data=df)) # FDR-corrected post hocs with Hedges'g effect size ttests = df.pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges') assert ttests.equals( pg.pairwise_ttests(dv='Scores', within='Time', subject='Subject', padjust='fdr_bh', effsize='hedges', data=df)) # Test two-way mixed ANOVA aov = df.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False) assert aov.equals( pg.mixed_anova(dv='Scores', between='Group', within='Time', subject='Subject', correction=False, data=df)) # Test parwise correlations corrs = data.pairwise_corr(columns=['X', 'M', 'Y'], method='spearman') corrs2 = pg.pairwise_corr(data=data, columns=['X', 'M', 'Y'], method='spearman') assert corrs['r'].equals(corrs2['r']) # Test partial correlation corrs = data.partial_corr(x='X', y='Y', covar='M', method='spearman') corrs2 = pg.partial_corr(x='X', y='Y', covar='M', method='spearman', data=data) assert corrs['r'].equals(corrs2['r']) # Test partial correlation matrix (compare with the ppcor package) corrs = data.pcorr().round(3) np.testing.assert_array_equal(corrs.iloc[0, :].values, [1, 0.392, 0.06, -0.014, -0.149]) # Now compare against Pingouin's own partial_corr function corrs = data[['X', 'Y', 'M']].pcorr() corrs2 = data.partial_corr(x='X', y='Y', covar='M') assert round(corrs.loc['X', 'Y'], 3) == corrs2.loc['pearson', 'r'] # Test rcorr (correlation matrix with p-values) # We compare against Pingouin pairwise_corr function corrs = df_corr.rcorr(padjust='holm') corrs2 = df_corr.pairwise_corr(padjust='holm') assert corrs.loc['Neuroticism', 'Agreeableness'] == '*' assert (corrs.loc['Agreeableness', 'Neuroticism'] == str(corrs2.loc[2, 'r'])) corrs = df_corr.rcorr(padjust='holm', stars=False, decimals=4) assert (corrs.loc['Neuroticism', 'Agreeableness'] == str( corrs2.loc[2, 'p-corr'].round(4))) corrs = df_corr.rcorr(upper='n') corrs2 = df_corr.pairwise_corr() assert corrs.loc['Extraversion', 'Openness'] == corrs2.loc[4, 'n'] assert corrs.loc['Openness', 'Extraversion'] == str(corrs2.loc[4, 'r']) # Method = spearman does not work with Python 3.5 on Travis? # Instead it seems to return the Pearson correlation! df_corr.rcorr(method='spearman') df_corr.rcorr() # Test mediation analysis med = data.mediation_analysis(x='X', m='M', y='Y', seed=42, n_boot=500) np.testing.assert_array_equal(med.loc[:, 'coef'].values, [0.5610, 0.6542, 0.3961, 0.0396, 0.3565])
ax.set_xticklabels(ax.get_xticklabels(), rotation=90) # better title for ax in grid.axes.flat: var = ax.get_title().replace("population = ", "") try: child, parent = re.findall(r"(.*)/(.*)", var)[0] ax.set_title(child) ax.set_ylabel(f"% {parent}") except IndexError: ax.set_title(var) grid.savefig(figfile) plt.close(grid.fig) data = (matrix.loc[:, v].join(meta[[cat_var]])).dropna() groups = (data["group"].value_counts()[ data["group"].value_counts() > 1].index.tolist()) data = data.loc[data["group"].isin(groups), :] data["group"] = data["group"].cat.remove_unused_categories() res = pd.concat([ pg.pairwise_ttests(data=data, parametric=False, dv=v, between="group").assign(var=v) for v in data.columns[:-1] ]).drop("Contrast", axis=1) res["p-cor"] = pg.multicomp(res["p-unc"].values, method="fdr_bh")[1] res = res.merge( pd.Series(panel, name="panel").rename_axis("var").reset_index()) res.to_csv("diff.detailed2.csv", index=False)
sns.set() fig_name = f'{band}.png' fig, ax = plt.subplots(1, 1, figsize=(12, 10)) sns.pointplot(data=df_tfr, x='Condition', y='Power', hue='Group', dodge=True, markers=['o', 's'], capsize=1, errwidth=1, palette='colorblind', ax=ax) ax.set_title(f'Mixed ANOVA for {band} {freq[band]}') filename = f'/Users/senthilp/Desktop/{band}_Mixed_ANOVA' plt.savefig(filename, dpi=300) sd = df_tfr.groupby(['Condition', 'Group'])['Power'].agg(['mean', 'std']).round(2) aov = pg.mixed_anova(dv='Power', within='Condition', between='Group', subject='Electrode', data=df_tfr) posthocs = pg.pairwise_ttests(dv='Power', within='Condition', between='Group', subject='Electrode', data=df_tfr)
print(crosstab) # Compute the two-way mixed-design ANOVA calAnova = 0 if calAnova: import pingouin as pg aov = pg.mixed_anova(dv='ShowCommitmentPercent', within='decisionSteps', between='participantsType', subject='name', data=statDF) pg.print_table(aov) posthocs = pg.pairwise_ttests(dv='ShowCommitmentPercent', within='decisionSteps', between='participantsType', subject='name', data=statDF, within_first=0) pg.print_table(posthocs) VIZ = 0 if VIZ: import seaborn as sns ax = sns.barplot(x="decisionSteps", y="ShowCommitmentPercent", hue="participantsType", data=statDF, ci=68) # ax = sns.barplot(x="decisionSteps", y="ShowCommitmentPercent", hue="name", data=statDF, ci=68) # ax = sns.boxplot(x="decisionSteps", y="avoidCommitPercent", hue="participantsType", data=statDF, palette="Set1", showmeans=True)
def stats_effect_weeks(self, excel_path): """ Perform RM ANOVA and pairwise T Test (Holm sidak) on the mean of each week of training for each animal Parameters ---------- excel_path : TYPE DESCRIPTION. Returns ------- None. """ df_excel = pd.read_excel( excel_path) #read excel file output from analysis() # Classify sessions in weeks Week1 = list(self.range1(1, 9)) Week2 = list(self.range1(10, 14)) Week3 = list(self.range1(15, 19)) Week4 = list(self.range1(20, 24)) Week5 = list(self.range1(25, 29)) week = [] for i in range(len(df_excel.index)): week.append(1 if df_excel.iloc[i, 2] in Week1 else 2 if df_excel. iloc[i, 2] in Week2 else 3 if df_excel.iloc[ i, 2] in Week3 else 4 if df_excel.iloc[i, 2] in Week4 else 5 if df_excel.iloc[i, 2] in Week5 else 'Error') #Add a column week df_excel['Semaine'] = week #Group in a new dataframe by animal and session and calculate the mean df_stats = df_excel[['Animal', 'Passing_Time', 'Semaine']].groupby(['Animal', 'Semaine' ]).mean().reset_index() # sn.lineplot(x="Semaine", y="Passing_Time", data=df_stats.query('Semaine > 1'), hue='Animal').get_figure() #Rearrange in a new dataframe with a column for each week mean df_stats_arranged = pd.DataFrame(columns=[ 'Animal', 'Semaine 1', 'Semaine 2', 'Semaine 3', 'Semaine 4', 'Semaine 5' ]) Animal = list(dict.fromkeys(df_excel.Animal.tolist())) #Loop on every animals to append each animal in the new arranged dataframe for a in Animal: for i in range(len(df_stats.index)): if df_stats.iloc[i, 1] == 1 and df_stats.iloc[i, 0] == a: df_stats_arranged = df_stats_arranged.append( { 'Animal': a, 'Semaine 1': df_stats.iloc[i, 2], 'Semaine 2': df_stats.iloc[i + 1, 2], 'Semaine 3': df_stats.iloc[i + 2, 2], 'Semaine 4': df_stats.iloc[i + 3, 2], 'Semaine 5': df_stats.iloc[i + 4, 2] }, ignore_index=True) #create a dataframe with a repeated mesure anova df_result = pd.DataFrame( pg.rm_anova(dv='Passing_Time', within='Semaine', subject='Animal', data=df_stats, detailed=True)) #create a dataframe with pairwise t test Holm sidak df_post_hocs = pd.DataFrame( pairwise_ttests(dv='Passing_Time', within='Semaine', subject='Animal', data=df_stats, padjust='holm')) #Save in an excel file containing different sheets self.writer = pd.ExcelWriter('{}/Stats.xlsx'.format( Path(excel_path).parent), engine='xlsxwriter') df_stats_arranged.to_excel(self.writer, sheet_name='Data') df_result.to_excel(self.writer, sheet_name='ANOVA') df_post_hocs.to_excel(self.writer, sheet_name='Post Hoc') self.writer.save()
plt.figure() ax = sns.regplot(x='Cond_L', y='RT', data=df, x_estimator=np.mean) ax.set_title(diffdf) ax.set(ylabel='Reaction Times (RT)', xlabel='Distance') ax.set(xticks=np.arange(1, 5, 1)) #limit the number of ticks to 4 ax.set_xticklabels(['spec','sub','rule', 'gen']) #plt.savefig('saving-a-seaborn-plot-as-pdf-file-300dpi.pdf', dpi = 300) ######################## ###perform post-hocs### ####################### # perform multiple pairwise comparison t_test_PP = pg.pairwise_ttests(dv='RT', between='condition', data=df)#.round(3) print(t_test_PP) #check for a normal distribution s_array = df[["RT"]].to_numpy() shapiro_test, p_shapiro = stats.shapiro(s_array) print("\nshapiro test results:", shapiro_test, ",", p_shapiro) if p_shapiro > 0.05: print("p>0.05: normal distribution\n__________________________________\n__________________________________") else: print("not normal distribution\n__________________________________\n__________________________________") ######################### #### Analysis for OT #### #########################
st.write( df.groupby([y_var, y_var2])[x_var].agg(['mean', 'std', 'sem']).round(2)) if y_var2 == "None": st.success("One-way repeated measures ANOVA results") st.write( pg.rm_anova(dv=x_var, within=y_var, subject=subject_var, data=df, detailed=True)) st.success("Post-hoc tests results") st.write( pg.pairwise_ttests(dv=x_var, within=y_var, subject=subject_var, data=df)) st.success("Plots are being generated") fig = plt.figure(figsize=(12, 6)) try: ax = sns.pointplot(data=df, x=y_var, y=x_var, capsize=.06, errwidth=0.7, ci=error, order=groups_selection) st.pyplot(fig) except: st.error("Please specify at least one within level!")
pivot_t = True if pivot_t: pivot_t = pd.pivot_table(data, index = ["crowdingcons", "participant_N"], columns = ["winsize"], values = "deviation_score") pivot_t.to_csv("pt_exp1.csv") data_1 = data.groupby(["participant_N", "winsize", "crowdingcons"])[dv].agg( ["mean", "std"]).reset_index(level = ["participant_N", "winsize", "crowdingcons"]) rename_df_col(df = data_1, old_col_name = "mean", new_col_name = dv) # mean crowding vs. no-crowding crowdingcon = 1 cal_ds_mean(data, crowdingcon = crowdingcon) cal_ds_std(data, crowdingcon = crowdingcon) # 2 way annova aov = pg.rm_anova(dv = dv, within = ["winsize", "crowdingcons"], subject = "participant_N", data = data_1) # post hoc posthocs = pg.pairwise_ttests(dv = dv, within = ["winsize", "crowdingcons"], subject = "participant_N", data = data_1, padjust = "fdr_bh", effsize = "cohen")
def analyse(self, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation=True, ttest_type=1): """This function carries out the required statistical analysis. The analysis is carried out on the specified indicators/parameters using the data extracted from all the subjects that were mentioned in the json file. There are 4 different tests that can be run, namely - Mixed ANOVA, Repeated Measures ANOVA, T Test and Simple ANOVA (both 1 and 2 way) Parameters ---------- parameter_list: set (optional) Set of the different indicators/parameters (Pupil_size, Blink_rate) on which statistical analysis is to be performed, by default it will be "all" so that all the parameter are considered. between_factor_list: list(str) (optional) List of between group factors, by default it will only contain "Subject_type". If any additional parameter (eg: Gender) needs to be considered, then the list will be: between_factor_list = ["Subject_type", "Gender"]. DO NOT FORGET TO INCLUDE "Subject_type", if you wish to consider "Subject_type" as a between group factor. Eg: between_factor_list = ["factor_x"] will no longer consider "Subject_type" as a factor. Please go through the README FILE to understand how the JSON FILE is to be written for between group factors to be considered. within_factor_list: list(str) (optional) List of within group factors, by default it will only contain "Stimuli_type" If any additional parameter, needs to be considered, then the list will be: between_factor_list = ["Subject_type", "factor_X"]. DO NOT FORGET TO INCLUDE "Stimuli_type", if you wish to consider "Stimuli_type" as a within group factor. Eg: within_factor_list = ["factor_x"] will no longer consider "Stimuli_type" as a factor. Please go through how the README FILE to understand how the JSON FILE is to be written for within group factors to be considered. statistical_test: str {"Mixed_anova","RM_anova","ttest","anova","None"} (optional) Name of the statistical test that has to be performed. NOTE: - ttest: There are 3 options for ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below. - Welch_ttest: There are 2 options for Welch Ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below. - Mixed_anova: Only 1 between group factor and 1 within group factor can be considered at any point of time - anova: Any number of between group factors can be considered for analysis - RM_anova: Upto 2 within group factors can be considered at any point of time file_creation: bool (optional) Indicates whether a csv file containing the statistical results should be created. NOTE: The name of the csv file created will be by the name of the statistical test that has been chosen. A directory called "Results" will be created within the Directory whose path is mentioned in the json file and the csv files will be stored within "Results" directory. If any previous file by the same name exists, it will be overwritten. ttest_type: int {1,2,3} (optional) Indicates what type of parameters will be considered for the ttest and Welch Ttest NOTE: For ttest- - 1: Upto 2 between group factors will be considered for ttest - 2: 1 within group factor will be considered for ttest - 3: 1 within group and 1 between group factor will be considered for ttest For Welch ttest- - 1: Will consider the first factor in 'between_factor_list' - 2: Will consider the first factor in 'within_factor_list' Examples -------- For calculating Mixed ANOVA, on all the parameters, with standardisation, NOT averaging across stimuli of the same type and considering Subject_type and Stimuli_type as between and within group factors respectively >>> analyse(self, standardise_flag=False, average_flag=False, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation = True) OR >>> analyse(self, standardise_flag=True) (as many of the option are present by default) For calculating 2-way ANOVA, for "blink_rate" and "avg_blink_duration", without standardisation with averaging across stimuli of the same type and considering Subject_type and Gender as the between group factors while NOT creating a new csv file with the results >>> analyse(self, average_flag=True, parameter_list={"blink_rate", "avg_blink_duration"}, between_factor_list=["Subject_type", "Gender"], statistical_test="anova", file_creation = False) """ with open(self.json_file, "r") as json_f: json_data = json.load(json_f) csvFile = None if file_creation: directory_path = json_data["Path"] + "/Results" if not os.path.isdir(directory_path): os.mkdir(directory_path) if not os.path.isdir(directory_path + '/Data/'): os.mkdir(directory_path + '/Data/') if statistical_test != None: file_path = directory_path + "/" + statistical_test + ".csv" csvFile = open(file_path, 'w') writer = csv.writer(csvFile) meta_not_to_be_considered = ["pupil_size", "pupil_size_downsample"] sacc_flag=0 ms_flag=0 for sen in self.sensors: for meta in Sensor.meta_cols[sen]: if meta in meta_not_to_be_considered: continue if ('all' not in parameter_list) and (meta not in parameter_list): continue print("\n\n") print("\t\t\t\tAnalysis for ",meta) #For the purpose of statistical analysis, a pandas dataframe needs to be created that can be fed into the statistical functions #The columns required are - meta (indicator), the between factors (eg: Subject type or Gender), the within group factor (eg: Stimuli Type), Subject name/id #Defining the list of columns required for the statistical analysis column_list = [meta] column_list.extend(between_factor_list) column_list.extend(within_factor_list) column_list.append("subject") column_list.append("stimuli_name") data = pd.DataFrame(columns=column_list) #For each subject for sub_index, sub in enumerate(self.subjects): #For each Question Type for stimuli_index, stimuli_type in enumerate(sub.aggregate_meta): if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]: summation_array = self.summationArrayCalculation(meta, sub_index, stimuli_index) value_array = self.meta_matrix_dict[1][meta][sub_index,stimuli_index] index_extra = 0 for value_index, _ in enumerate(value_array): if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]: if value_array[value_index] == 0: index_extra += 1 continue proper_index = self.return_index(value_index-index_extra, summation_array) stimulus_name = self.stimuli[stimuli_type][proper_index] else: stimulus_name = self.stimuli[stimuli_type][value_index] row = [] row.append(value_array[value_index]) #Add the between group factors (need to be defined in the json file) for param in between_factor_list: if param == "Subject_type": row.append(sub.subj_type) continue try: row.append(json_data["Subjects"][sub.subj_type][sub.name][param]) except: print("Between subject paramter: ", param, " not defined in the json file") for param in within_factor_list: if param == "Stimuli_type": row.append(stimuli_type) continue try: stimulus_name = self.stimuli[stimuli_type][value_index] row.append(json_data["Stimuli"][stimuli_type][stimulus_name][param]) except: print("Within stimuli parameter: ", param, " not defined in the json file") row.append(sub.name) row.append(stimulus_name) if np.isnan(value_array[value_index]): print("The data being read for analysis contains null value: ", row) #Instantiate into the pandas dataframe data.loc[len(data)] = row data.to_csv(directory_path + '/Data/' + meta + "_data.csv") #print(data) #Depending on the parameter, choose the statistical test to be done if statistical_test == "Mixed_anova": if len(within_factor_list)>1: print("Error: Too many within group factors,\nMixed ANOVA can only accept 1 within group factor\n") elif len(between_factor_list)>1: print("Error: Too many between group factors,\nMixed ANOVA can only accept 1 between group factor\n") print(meta, ":\tMixed ANOVA") aov = pg.mixed_anova(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data) pg.print_table(aov) if file_creation: values_list = ["Mixed Anova: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) posthocs = pg.pairwise_ttests(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data) pg.print_table(posthocs) if file_creation: values_list = ["Post Hoc Analysis"] self.fileWriting(writer, csvFile, posthocs, values_list) elif statistical_test == "RM_anova": if len(within_factor_list)>2 or len(within_factor_list)<1: print("Error: Too many or too few within group factors,\nRepeated Measures ANOVA can only accept 1 or 2 within group factors\n") print(meta, ":\tRM ANOVA") aov = pg.rm_anova(dv=meta, within= within_factor_list, subject = 'subject', data=data) pg.print_table(aov) if file_creation: values_list = ["Repeated Measures Anova: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) elif statistical_test == "anova": print(meta, ":\tANOVA") length = len(between_factor_list) model_equation = meta + " ~ C(" for factor_index, _ in enumerate(between_factor_list): if(factor_index<length-1): model_equation = model_equation + between_factor_list[factor_index] + ")*C(" else: model_equation = model_equation + between_factor_list[factor_index] + ")" print("Including interaction effect") print(model_equation) model = ols(model_equation, data).fit() res = sm.stats.anova_lm(model, typ= 2) print(res) if file_creation: values_list = ["Anova including interaction effect: "] values_list.append(meta) self.fileWriting(writer, csvFile, res, values_list) print("\nExcluding interaction effect") model_equation = model_equation.replace("*", "+") print(model_equation) model = ols(model_equation, data).fit() res = sm.stats.anova_lm(model, typ= 2) print(res) if file_creation: values_list = ["Anova excluding interaction effect: "] values_list.append(meta) self.fileWriting(writer, csvFile, res, values_list) elif statistical_test == "ttest": print(meta, ":\tt test") if ttest_type==1: aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, subject='subject', data=data) pg.print_table(aov) elif ttest_type==2: aov = pg.pairwise_ttests(dv=meta, within=within_factor_list, subject='subject', data=data) pg.print_table(aov) elif ttest_type==3: aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, within=within_factor_list, subject='subject', data=data) pg.print_table(aov) else: print("The value given to ttest_type is not acceptable, it must be either 1 or 2 or 3") if file_creation: values_list = ["Pairwise ttest: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) elif statistical_test == "welch_ttest": print(meta, ":\tWelch t test") if ttest_type==1: normality,aov = self.welch_ttest(dv=meta, factor=between_factor_list[0], subject='subject', data=data) pg.print_table(normality) pg.print_table(aov) elif ttest_type==2: normality,aov = self.welch_ttest(dv=meta, factor=within_factor_list[0], subject='subject', data=data) pg.print_table(normality) pg.print_table(aov) else: print("The value given to ttest_type for welch test is not acceptable, it must be either 1 or 2") if file_creation: values_list = ["Welch Pairwise ttest: "] values_list.append(meta) self.fileWriting(writer, csvFile, normality, values_list) self.fileWriting(writer, csvFile, aov, values_list) if csvFile != None: csvFile.close()