def perform_kappa_anova(self): # MIXED ANOVA ------------------------------------------------------------------------------------------------ print( "\nPerforming Group x Comparison mixed ANOVA on Cohen's Kappa values." ) # Group x Intensity mixed ANOVA self.kappa_aov = pg.mixed_anova(dv="Kappa", within="Comparison", between="Group", subject="ID", data=self.df_kappa_long, correction=True) pg.print_table(self.kappa_aov) # POST HOC ---------------------------------------------------------------------------------------------------- self.kappa_posthoc = pg.pairwise_ttests(dv="Kappa", subject='ID', within="Comparison", between='Group', data=self.df_kappa_long, padjust="bonf", effsize="hedges", parametric=True)
def twoMixANOVA(adaptation, var): """ Calculates and prints 2-way Mix ANOVA results for every light intensity in a specified light-adaptation series Group is the between-subject factor: male wildtype (mWT), female Wildtype (fWT), male knockout(mKO), female knockout (fKO) male heterozygous (mHT), female heterozygous (fHT) Time is the within-subject factor: the different time points at which the ERG was recorded for the same animal (TP1, TP2, TP3) User specifies: First parameter (str): Adaptation condition in which the ERG was recorded (Dark-adapted ('DA'), Light-adapted ('LA'), and Mesopic-adapted ('MA')) Second Parameter (var): Dependent variable to calculate ('a_amp', 'b_amp','a_time', 'b_time') Returns 2-way mix ANOVA table """ df_adaptation = depvar.loc[depvar['Adaptation'] == adaptation] grouped = df_adaptation.groupby('Light_intensity') results = pd.DataFrame() #light= [] for name, group in df_adaptation.groupby('Light_intensity'): light_df = pd.DataFrame( data=group, columns=group.columns ) #Place the tuples created with groupby into a new Dataframe #Results.append(name) aov = pg.mixed_anova( data=light_df, dv=var, between='Group', within='Timepoint', subject='Animal', correction=False ) # correction true/false depends on whether you have a balanced design or not results = results.append(aov) pg.print_table(aov) results.to_excel(savestatsto + adaptation + '_' + '_' + var + '.xlsx') return results
def tukey_pairwise_ph(tidy_df, hour_col: str = "Hour", dep_var: str = "Value", protocol_col: str = "Protocol"): """ :type protocol_col: object """ hours = tidy_df[hour_col].unique() ph_dict = {} for hour in hours: print(hour) hour_df = tidy_df.query("%s == '%s'" % (hour_col, hour)) ph = pg.pairwise_tukey(dv=dep_var, between=protocol_col, data=hour_df) pg.print_table(ph) ph_dict[hour] = ph ph_df = pd.concat(ph_dict) return ph_df
def comparison_by_group_anova(self, dependent_var): """Performs a Group x Comparison mixed ANOVA on the dependent variable that is passed in. Performs pairwise T-test comparisons for post-hoc analysis. Plots group means using Seaborn package. :argument -dependent_var: name of column in self.df to use as dependent variable :returns -data objects from pingouin ANOVA and posthoc objects """ print("\nPerforming Group x Comparison mixed ANOVA for" "dependent variable {}.".format(dependent_var.capitalize())) aov = pg.mixed_anova(dv=dependent_var, within="COMPARISON", between="GROUP", subject="ID", data=self.df) pg.print_table(aov.iloc[:, 0:8]) print() pg.print_table(aov.iloc[:, 9:]) sns.pointplot(data=self.df, x='GROUP', y=dependent_var, hue='COMPARISON', dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1') plt.title("Group x Comparison Mixed ANOVA: {}".format(dependent_var.capitalize())) posthoc = pg.pairwise_ttests(dv=dependent_var, within="COMPARISON", between='GROUP', subject='ID', data=self.df) pg.print_table(posthoc) return aov, posthoc
'Pca100_ridge','Pca300_ridge','Pca300_cca30'] subjects_ = [np.tile(el,pnts4subj) for el in subjects] subjects_= np.concatenate(subjects_) subjects_=subjects_.tolist()*len(models) models_=[np.tile(el, pnts4subj*len(subjects)) for el in models] models_=np.concatenate(models_) model_types_=[np.tile(el,pnts4subj*len(subjects)) for el in model_types] model_types_=np.concatenate(model_types_) data= {'model':models_, 'model_type':model_types_, 'subject':subjects_,'data':data} df = pd.DataFrame.from_dict(data) df.to_csv('/data/akitaitsev/decoding_model_bids/decoding_data/statistics/df_long_cor_spectr.csv',\ index=False) # violin plot fig, ax = plt.subplots(figsize=(16,9)) sea.violinplot(ax=ax, x='model',y='data', hue='model_type', kind='violin',inner='quartile',hue_order=['SM','STM'],data=df) plt.show() fig.savefig('/data/akitaitsev/decoding_model_bids/decoding_data/violinplots_spectr.png', dpi=300) ### statistical analyis aov=pg.mixed_anova(dv='data', between='model_type',within='model',subject='subject', data=df) aov.to_csv('/data/akitaitsev/decoding_model_bids/decoding_data/statistics/anova_rep_measures_spectr.csv') print(aov) pg.print_table(aov) model=ols('data~C(model)+C(model_type)+C(subject)', data=df).fit() anova=sm.stats.anova_lm(model, typ=2) anova.to_csv('/data/akitaitsev/decoding_model_bids/decoding_data/statistics/anova_3way_spect.csv') print(anova)
def analyse(self, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation=True, ttest_type=1): """This function carries out the required statistical analysis. The analysis is carried out on the specified indicators/parameters using the data extracted from all the subjects that were mentioned in the json file. There are 4 different tests that can be run, namely - Mixed ANOVA, Repeated Measures ANOVA, T Test and Simple ANOVA (both 1 and 2 way) Parameters ---------- parameter_list: set (optional) Set of the different indicators/parameters (Pupil_size, Blink_rate) on which statistical analysis is to be performed, by default it will be "all" so that all the parameter are considered. between_factor_list: list(str) (optional) List of between group factors, by default it will only contain "Subject_type". If any additional parameter (eg: Gender) needs to be considered, then the list will be: between_factor_list = ["Subject_type", "Gender"]. DO NOT FORGET TO INCLUDE "Subject_type", if you wish to consider "Subject_type" as a between group factor. Eg: between_factor_list = ["factor_x"] will no longer consider "Subject_type" as a factor. Please go through the README FILE to understand how the JSON FILE is to be written for between group factors to be considered. within_factor_list: list(str) (optional) List of within group factors, by default it will only contain "Stimuli_type" If any additional parameter, needs to be considered, then the list will be: between_factor_list = ["Subject_type", "factor_X"]. DO NOT FORGET TO INCLUDE "Stimuli_type", if you wish to consider "Stimuli_type" as a within group factor. Eg: within_factor_list = ["factor_x"] will no longer consider "Stimuli_type" as a factor. Please go through how the README FILE to understand how the JSON FILE is to be written for within group factors to be considered. statistical_test: str {"Mixed_anova","RM_anova","ttest","anova","None"} (optional) Name of the statistical test that has to be performed. NOTE: - ttest: There are 3 options for ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below. - Welch_ttest: There are 2 options for Welch Ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below. - Mixed_anova: Only 1 between group factor and 1 within group factor can be considered at any point of time - anova: Any number of between group factors can be considered for analysis - RM_anova: Upto 2 within group factors can be considered at any point of time file_creation: bool (optional) Indicates whether a csv file containing the statistical results should be created. NOTE: The name of the csv file created will be by the name of the statistical test that has been chosen. A directory called "Results" will be created within the Directory whose path is mentioned in the json file and the csv files will be stored within "Results" directory. If any previous file by the same name exists, it will be overwritten. ttest_type: int {1,2,3} (optional) Indicates what type of parameters will be considered for the ttest and Welch Ttest NOTE: For ttest- - 1: Upto 2 between group factors will be considered for ttest - 2: 1 within group factor will be considered for ttest - 3: 1 within group and 1 between group factor will be considered for ttest For Welch ttest- - 1: Will consider the first factor in 'between_factor_list' - 2: Will consider the first factor in 'within_factor_list' Examples -------- For calculating Mixed ANOVA, on all the parameters, with standardisation, NOT averaging across stimuli of the same type and considering Subject_type and Stimuli_type as between and within group factors respectively >>> analyse(self, standardise_flag=False, average_flag=False, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation = True) OR >>> analyse(self, standardise_flag=True) (as many of the option are present by default) For calculating 2-way ANOVA, for "blink_rate" and "avg_blink_duration", without standardisation with averaging across stimuli of the same type and considering Subject_type and Gender as the between group factors while NOT creating a new csv file with the results >>> analyse(self, average_flag=True, parameter_list={"blink_rate", "avg_blink_duration"}, between_factor_list=["Subject_type", "Gender"], statistical_test="anova", file_creation = False) """ with open(self.json_file, "r") as json_f: json_data = json.load(json_f) csvFile = None if file_creation: directory_path = json_data["Path"] + "/Results" if not os.path.isdir(directory_path): os.mkdir(directory_path) if not os.path.isdir(directory_path + '/Data/'): os.mkdir(directory_path + '/Data/') if statistical_test != None: file_path = directory_path + "/" + statistical_test + ".csv" csvFile = open(file_path, 'w') writer = csv.writer(csvFile) meta_not_to_be_considered = ["pupil_size", "pupil_size_downsample"] sacc_flag=0 ms_flag=0 for sen in self.sensors: for meta in Sensor.meta_cols[sen]: if meta in meta_not_to_be_considered: continue if ('all' not in parameter_list) and (meta not in parameter_list): continue print("\n\n") print("\t\t\t\tAnalysis for ",meta) #For the purpose of statistical analysis, a pandas dataframe needs to be created that can be fed into the statistical functions #The columns required are - meta (indicator), the between factors (eg: Subject type or Gender), the within group factor (eg: Stimuli Type), Subject name/id #Defining the list of columns required for the statistical analysis column_list = [meta] column_list.extend(between_factor_list) column_list.extend(within_factor_list) column_list.append("subject") column_list.append("stimuli_name") data = pd.DataFrame(columns=column_list) #For each subject for sub_index, sub in enumerate(self.subjects): #For each Question Type for stimuli_index, stimuli_type in enumerate(sub.aggregate_meta): if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]: summation_array = self.summationArrayCalculation(meta, sub_index, stimuli_index) value_array = self.meta_matrix_dict[1][meta][sub_index,stimuli_index] index_extra = 0 for value_index, _ in enumerate(value_array): if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]: if value_array[value_index] == 0: index_extra += 1 continue proper_index = self.return_index(value_index-index_extra, summation_array) stimulus_name = self.stimuli[stimuli_type][proper_index] else: stimulus_name = self.stimuli[stimuli_type][value_index] row = [] row.append(value_array[value_index]) #Add the between group factors (need to be defined in the json file) for param in between_factor_list: if param == "Subject_type": row.append(sub.subj_type) continue try: row.append(json_data["Subjects"][sub.subj_type][sub.name][param]) except: print("Between subject paramter: ", param, " not defined in the json file") for param in within_factor_list: if param == "Stimuli_type": row.append(stimuli_type) continue try: stimulus_name = self.stimuli[stimuli_type][value_index] row.append(json_data["Stimuli"][stimuli_type][stimulus_name][param]) except: print("Within stimuli parameter: ", param, " not defined in the json file") row.append(sub.name) row.append(stimulus_name) if np.isnan(value_array[value_index]): print("The data being read for analysis contains null value: ", row) #Instantiate into the pandas dataframe data.loc[len(data)] = row data.to_csv(directory_path + '/Data/' + meta + "_data.csv") #print(data) #Depending on the parameter, choose the statistical test to be done if statistical_test == "Mixed_anova": if len(within_factor_list)>1: print("Error: Too many within group factors,\nMixed ANOVA can only accept 1 within group factor\n") elif len(between_factor_list)>1: print("Error: Too many between group factors,\nMixed ANOVA can only accept 1 between group factor\n") print(meta, ":\tMixed ANOVA") aov = pg.mixed_anova(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data) pg.print_table(aov) if file_creation: values_list = ["Mixed Anova: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) posthocs = pg.pairwise_ttests(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data) pg.print_table(posthocs) if file_creation: values_list = ["Post Hoc Analysis"] self.fileWriting(writer, csvFile, posthocs, values_list) elif statistical_test == "RM_anova": if len(within_factor_list)>2 or len(within_factor_list)<1: print("Error: Too many or too few within group factors,\nRepeated Measures ANOVA can only accept 1 or 2 within group factors\n") print(meta, ":\tRM ANOVA") aov = pg.rm_anova(dv=meta, within= within_factor_list, subject = 'subject', data=data) pg.print_table(aov) if file_creation: values_list = ["Repeated Measures Anova: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) elif statistical_test == "anova": print(meta, ":\tANOVA") length = len(between_factor_list) model_equation = meta + " ~ C(" for factor_index, _ in enumerate(between_factor_list): if(factor_index<length-1): model_equation = model_equation + between_factor_list[factor_index] + ")*C(" else: model_equation = model_equation + between_factor_list[factor_index] + ")" print("Including interaction effect") print(model_equation) model = ols(model_equation, data).fit() res = sm.stats.anova_lm(model, typ= 2) print(res) if file_creation: values_list = ["Anova including interaction effect: "] values_list.append(meta) self.fileWriting(writer, csvFile, res, values_list) print("\nExcluding interaction effect") model_equation = model_equation.replace("*", "+") print(model_equation) model = ols(model_equation, data).fit() res = sm.stats.anova_lm(model, typ= 2) print(res) if file_creation: values_list = ["Anova excluding interaction effect: "] values_list.append(meta) self.fileWriting(writer, csvFile, res, values_list) elif statistical_test == "ttest": print(meta, ":\tt test") if ttest_type==1: aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, subject='subject', data=data) pg.print_table(aov) elif ttest_type==2: aov = pg.pairwise_ttests(dv=meta, within=within_factor_list, subject='subject', data=data) pg.print_table(aov) elif ttest_type==3: aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, within=within_factor_list, subject='subject', data=data) pg.print_table(aov) else: print("The value given to ttest_type is not acceptable, it must be either 1 or 2 or 3") if file_creation: values_list = ["Pairwise ttest: "] values_list.append(meta) self.fileWriting(writer, csvFile, aov, values_list) elif statistical_test == "welch_ttest": print(meta, ":\tWelch t test") if ttest_type==1: normality,aov = self.welch_ttest(dv=meta, factor=between_factor_list[0], subject='subject', data=data) pg.print_table(normality) pg.print_table(aov) elif ttest_type==2: normality,aov = self.welch_ttest(dv=meta, factor=within_factor_list[0], subject='subject', data=data) pg.print_table(normality) pg.print_table(aov) else: print("The value given to ttest_type for welch test is not acceptable, it must be either 1 or 2") if file_creation: values_list = ["Welch Pairwise ttest: "] values_list.append(meta) self.fileWriting(writer, csvFile, normality, values_list) self.fileWriting(writer, csvFile, aov, values_list) if csvFile != None: csvFile.close()
# perform rm anova for each stage type ph_part_dict = {} for key, df in zip(totals_dict.keys(), totals_dict.values()): print(key) # tidy data long_df = df.stack().reset_index() long_df.columns = stat_colnames part_df = long_df.query("%s == '%s'" % (time, part)) # do anova part_rm = pg.rm_anova(dv=dep_var, within=day, subject=anim, data=part_df) pg.print_table(part_rm) # do posthoc ph = pg.pairwise_tukey(dv=dep_var, between=day, data=part_df) pg.print_table(ph) ph_part_dict[key] = ph stage_test_dir = part_dir / key anova_file = stage_test_dir / "01_anova.csv" ph_file = stage_test_dir / "02_posthoc.csv" part_rm.to_csv(anova_file) ph.to_csv(ph_file) ph_part_df = pd.concat(ph_part_dict) ph_total_dict[part] = ph_part_df
def perform_activity_anova(self, activity_intensity, data_type="percent"): if data_type == "percent": df = self.df_percent activity_intensity = activity_intensity + "%" if data_type == "minutes": df = self.df_mins # PLOTTING --------------------------------------------------------------------------------------------------- # Creates 2x1 subplots of group means plt.title("Group x Model Mixed ANOVA: {} Activity".format( activity_intensity)) # Two activity level groups: one line for each intensity sns.pointplot(data=df, x="Group", y=activity_intensity, hue="Model", ci=95, dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1') plt.ylabel("{}".format(data_type.capitalize())) # STATISTICAL ANALYSIS --------------------------------------------------------------------------------------- print("\nPerforming Group x Model mixed ANOVA on {} activity.".format( activity_intensity)) # Group x Intensity mixed ANOVA self.aov = pg.mixed_anova(dv=activity_intensity, within="Model", between="Group", subject="ID", data=df, correction='auto') pg.print_table(self.aov) group_p = self.aov.loc[self.aov["Source"] == "Group"]["p-unc"] group_sig = group_p.values[0] <= 0.05 model_p = self.aov.loc[self.aov["Source"] == "Model"]["p-unc"] model_sig = model_p.values[0] <= 0.05 interaction_p = self.aov.loc[self.aov["Source"] == "Interaction"]["p-unc"] interaction_sig = interaction_p.values[0] <= 0.05 print("ANOVA quick summary:") if model_sig: print("-Main effect of Model (p = {})".format( round(model_p.values[0], 3))) if not model_sig: print("-No main effect of Model") if group_sig: print("-Main effect of Group (p = {})".format( round(group_p.values[0], 3))) if not group_sig: print("-No main effect of Group") if interaction_sig: print("-Signficiant Group x Model interaction (p = {})".format( round(interaction_p.values[0], 3))) if not interaction_sig: print("-No Group x Model interaction") posthoc_para = pg.pairwise_ttests(dv=activity_intensity, subject='ID', within="Model", between='Group', data=df, padjust="bonf", effsize="hedges", parametric=True) posthoc_nonpara = pg.pairwise_ttests(dv=activity_intensity, subject='ID', within="Model", between='Group', data=df, padjust="bonf", effsize="hedges", parametric=False) self.posthoc_para = posthoc_para self.posthoc_nonpara = posthoc_nonpara pg.print_table(posthoc_para)
stats_spec_df = np.log10(nrem_mean) stats_spec_df.index = stats_spec_df.index.droplevel(2) stats_spec_df = stats_spec_df.stack().reset_index() anim_col = stats_spec_df.columns[0] day_col = stats_spec_df.columns[1] freq_col = stats_spec_df.columns[2] power_col = stats_spec_df.columns[3] spec_rm = pg.mixed_anova(dv=power_col, within=day_col, between=freq_col, subject=anim_col, data=stats_spec_df) pg.print_table(spec_rm) spec_name = save_test_dir / "01_spec_anova.csv" spec_rm.to_csv(spec_name) # Q2 Does the Number of episodes change between day? # Rpeated two way anova of Count ~ Time*day | anim count_stats_df = long_frag.copy() anim_col = count_stats_df.columns[0] time_col = count_stats_df.columns[1] day_col = count_stats_df.columns[2] count_col = count_stats_df.columns[3] mean_col = count_stats_df.columns[4]
os.mkdir(marker_test_dir) count_dir = marker_test_dir / "01_count" mean_dir = marker_test_dir / "02_mean" hist_dir = marker_test_dir / "03_hist" for dir in [count_dir, mean_dir, hist_dir]: if not os.path.exists(dir): os.mkdir(dir) curr_count = count_data_dict[curr_label] curr_mean = mean_data_dict[curr_label] curr_hist = hist_data_dict[curr_label] count = count_cols[-1] count_anova = pg.anova(dv=count, between=condition_col, data=curr_count) pg.print_table(count_anova) count_ph = pg.pairwise_tukey(dv=count, between=condition_col, data=curr_count) pg.print_table(count_ph) count_anova.to_csv(count_dir / anova_str) count_ph.to_csv(count_dir / ph_str) count_stats_dict[curr_label] = count_ph mean = mean_cols[-1] mean_anova = pg.anova(dv=mean, between=condition_col, data=curr_mean) pg.print_table(mean_anova) mean_ph = pg.pairwise_tukey(dv=mean, between=condition_col, data=curr_mean) pg.print_table(mean_ph) mean_anova.to_csv(mean_dir / anova_str) mean_ph.to_csv(mean_dir / ph_str)
import researchpy as rp import statsmodels.api as sm from statsmodels.formula.api import ols import numpy as np import pingouin as pg import seaborn as sns from statsmodels.stats.multicomp import pairwise_tukeyhsd df = pd.read_csv("Matrix.csv", index_col=None ) logX = np.log1p(df["Average"]) df = df.assign(media_log=logX.values) df.drop(["Average"], axis= 1, inplace= True) df["Generator"] factores=["Generator"] plt.figure(figsize=(8, 6)) for i in factores: print(rp.summary_cont(df['media_log'].groupby(df[i]))) ANV=pg.anova (dv='media_log', between=i, data=df, detailed=True) pg.print_table (ANV) ax=sns.boxplot(x=df["media_log"], y=df[i], data=df, palette="Set1") tukey = pairwise_tukeyhsd(endog = df["media_log"], # Data groups= df[i], # Groups alpha=0.05) # Significance level plt.savefig('fig1.jpeg', bbox_inches='tight') tukey.plot_simultaneous(xlabel='Time', ylabel=i) # Plot group confidence intervals plt.vlines(x=49.57,ymin=-0.5,ymax=4.5, color="red") plt.savefig('fig2.jpeg', bbox_inches='tight') print(tukey.summary()) plt.show()
for year, df in metrics.groupby(['Year']): for level, data in df.groupby('type_cat'): pubs_list[f'{year}_{level}'] = list(data['pubs_awarded']) fwci_list[f'{year}_{level}'] = list(data['fwci_awarded']) # Generate separate dataframes pubs = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in pubs_list.items()])) fwci = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in fwci_list.items()])) FileHandling.df_to_excel(data_frames=[pubs, fwci], sheetnames=['pubs', 'fwci'], output_path=f'{output_folder}metrics_per_year.xlsx') # Collect cols for each level levels = ['_1', '_2', '_3'] for level in levels: cols = [col for col in pubs.columns.tolist() if level in col] test_df = pubs[cols].melt(value_name='publications', var_name='Group') # Two-way ANOVA aov = pg.anova(data=test_df, dv='publications', between='Group', export_filename=f'{output_folder}anova_pubs{level}.csv') pg.print_table(aov) # FDR-corrected post hocs with Hedges'g effect size posthoc = pg.pairwise_ttests(data=test_df, dv='publications', between='Group', within=None, parametric=True, alpha=.05, tail='two-sided', padjust='bonf', effsize='none', return_desc=False, export_filename=f'{output_folder}bonf_pubs{level}.csv') # Pretty printing of table pg.print_table(posthoc, floatfmt='.3f')
import pandas as pd import pingouin as pg ## Load data datafile = "tmp.csv" df = pd.read_csv(datafile) ## Compute ANOVA paovm = pg.rm_anova(data=df, dv='fraction_correct', within=['presentation_condition', 'source_condition'], subject='subject', correction='auto', detailed=True, export_filename='tmpStts') print("=== Pingouin ANOVA === sphericity: ", pg.sphericity(paovm)) pg.print_table(paovm)
color="blue", label="AM", linestyle="none") plt.xlabel("sujet") plt.ylabel("différence de cadence de modulation (%)") plt.xticks(subject) plt.legend(loc=0) plt.savefig(os.path.join(path_fig, "seuils_discrimination.png")) plt.show() #%% anova des seuils adaptatifs import pingouin as pg data_adapt = pd.read_csv("seuils_adaptatifs.txt", index_col=0) data_discr = pd.read_csv("seuils_discrimination.txt", index_col=0) adapt_am = data_adapt[data_adapt.modulation_type == "AM"] adapt_fm = data_adapt[data_adapt.modulation_type == "FM"] aov_adapt_am = pg.anova(data=adapt_am, dv="seuil", between="subject") aov_adapt_fm = pg.anova(data=adapt_fm, dv="seuil", between="subject") pg.print_table(aov_adapt_am) pg.print_table(aov_adapt_fm) #%% t-test des seuils de discrimination discr_t_test = pg.ttest(x=am_discr, y=fm_discr, paired=True, tail="one-sided") pg.print_table(discr_t_test) #discr_t_test.to_excel("t_test_seuils_discrimination.xlsx")
marker_test_dir = save_test_dir / "01_markers" if not os.path.exists(marker_test_dir): os.mkdir(marker_test_dir) marker_ph_dict = {} for marker_label, marker_df in zip(marker_dict.keys(), marker_dict.values()): print(marker_label) # run anova curr_anova_marker = pg.anova( dv=dep_var, between=condition_col, data=marker_df ) pg.print_table(curr_anova_marker) curr_ph_marker = pg.pairwise_tukey( dv=dep_var, between=condition_col, data=marker_df ) pg.print_table(curr_ph_marker) marker_ph_dict[marker_label] = curr_ph_marker # save the files label_test_dir = marker_test_dir / marker_label if not os.path.exists(label_test_dir): os.mkdir(label_test_dir) curr_anova_marker.to_csv(label_test_dir / anova_str) curr_ph_marker.to_csv(label_test_dir / ph_str)
crosstab, res = researchpy.crosstab(dfExpTrail['hasAvoidPoint'], dfExpTrail['decisionSteps'], test="chi-square") print(crosstab) # Compute the two-way mixed-design ANOVA calAnova = 0 if calAnova: import pingouin as pg aov = pg.mixed_anova(dv='ShowCommitmentPercent', within='decisionSteps', between='participantsType', subject='name', data=statDF) pg.print_table(aov) posthocs = pg.pairwise_ttests(dv='ShowCommitmentPercent', within='decisionSteps', between='participantsType', subject='name', data=statDF, within_first=0) pg.print_table(posthocs) VIZ = 0 if VIZ: import seaborn as sns ax = sns.barplot(x="decisionSteps", y="ShowCommitmentPercent", hue="participantsType",
def group_by_intensity_anova(self, model_comparison, data_type="percent", use_normed=False): """Performs a Group x Intensity mixed ANOVA on the dependent variable that is passed in. Performs pairwise T-test comparisons for post-hoc analysis. Plots group means using Seaborn package. :argument -model_comparison: name of column in self.df to use as dependent variable -data_types: 'minutes' or 'percent'; type of data to use -use_norm: whether or not to use normed data :returns -data objects from pingouin ANOVA and posthoc objects """ # DATA FORMATTING --------------------------------------------------------------------------------------------- if use_normed: df = self.norm_df if not use_normed: df = self.df # Pulls rows from self.df for desired model comparison comp_names = ["Wrist-Ankle", "Wrist-HR", "Wrist-HRAcc", "Ankle-HR", "Ankle-HRAcc", "HR-HRAcc"] row_int = comp_names.index(model_comparison) df2 = df.iloc[0::6] # df for minutes data mins_df = df2[["SEDENTARY", "LIGHT", "MODERATE", "VIGOROUS"]] # df for % data perc_df = df2[["SEDENTARY%", "LIGHT%", "MODERATE%", "VIGOROUS%"]] # Sets df to correct data type if data_type == "percent": df = perc_df if data_type == "minutes": df = mins_df df["ID"] = self.high_active_ids + self.low_active_ids # Creates column in df of IDs df_long = pd.melt(frame=df, id_vars="ID", var_name="INTENSITY", value_name="VALUE") high_list = ["HIGH" for i in range(5)] low_list = ["LOW" for i in range(5)] group_list = high_list + low_list df_long["GROUP"] = (group_list * 4) print(df_long) # DATA VISUALIZATION ----------------------------------------------------------------------------------------- # Creates 2x1 subplots of group means plt.subplots(1, 2, figsize=(12, 7)) plt.subplots_adjust(wspace=0.20) plt.suptitle("Group x Intensity Mixed ANOVA: {} " "(normalized={})".format(model_comparison.capitalize(), use_normed)) # Two activity level groups: one line for each intensity plt.subplot(1, 2, 1) sns.pointplot(data=df_long, x="GROUP", y="VALUE", hue="INTENSITY", dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1') plt.ylabel("Difference ({})".format(data_type)) plt.axhline(y=0, linestyle="dashed", color='black') # Four intensity groups: one line for each activity level group plt.subplot(1, 2, 2) sns.pointplot(data=df_long, x="INTENSITY", y="VALUE", hue="GROUP", dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1') plt.ylabel("") plt.axhline(y=0, linestyle="dashed", color='black') # STATISTICAL ANALYSIS --------------------------------------------------------------------------------------- print("\nPerforming Group x Comparison mixed ANOVA using {} data " "for the {} model.".format(data_type, model_comparison)) # Group x Intensity mixed ANOVA aov = pg.mixed_anova(dv="VALUE", within="INTENSITY", between="GROUP", subject="ID", data=df_long) pg.print_table(aov.iloc[:, 0:8]) pg.print_table(aov.iloc[:, 9:]) posthoc = pg.pairwise_ttests(dv="VALUE", within="INTENSITY", between='GROUP', subject='ID', data=df_long) pg.print_table(posthoc) return aov, posthoc
df_stats = df_stats[df_stats["RMSE"].notna()] df_stats.to_pickle("stats_df_1.pkl") aov = pg.anova(dv="RMSE", between=["EMG_objective", "co_contraction_level"], data=df_stats) ptt = pg.pairwise_ttests( dv="RMSE", between=[ "co_contraction_level", "EMG_objective", ], data=df_stats, padjust="bonf", ) pg.print_table(aov.round(3)) pg.print_table(ptt.round(3)) # Figure of RMSE on force function of co-contraction level (Fig. 7) import matplotlib matplotlib.rcParams["legend.handlelength"] = 4 matplotlib.rcParams["legend.handleheight"] = 2.25 seaborn.set_style("whitegrid") cp = seaborn.color_palette("YlOrRd", 5) cp[-1] = (0, 102 / 255, 153 / 255) plotpd = RMSEtrack_pd[RMSEtrack_pd["component"] == "force"] plotpd = plotpd[plotpd["weight_level"] == "high"] ax = seaborn.boxplot( y=plotpd["RMSE"],
"03_analysis_outputs/05_figures/00_csvs/03_fig3") anova_csv = "01_anova.csv" ph_csv = "02_posthoc.csv" test_df = hourly_sleep_prop long_df = test_df.stack().reset_index() long_df.columns = stat_colnames hourly_test_dir = save_test_dir / "hour_prop" # prop 2 way rm test_rm = pg.rm_anova2(dv=dep_var, within=[day_col, hour_col], subject=anim, data=long_df) pg.print_table(test_rm) # prop post hoc ph_dict = {} for hour in hours: print(hour) hour_df = long_df.query("%s == '%s'" % (hour_col, hour)) ph = pg.pairwise_tukey(dv=dep_var, between=day_col, data=hour_df) pg.print_table(ph) ph_dict[hour] = ph hourly_ph_df = pd.concat(ph_dict) hr_anova_file = hourly_test_dir / anova_csv hr_ps_file = hourly_test_dir / ph_csv test_rm.to_csv(hr_anova_file) hourly_ph_df.to_csv(hr_ps_file)