def permtest_ANOVA_paired(data_panda, behavMeasure, Conds, reps): # initialize vector to hold statistic on each iteration rand_vals = list() # get observed statistics (interaction) for two-way ANOVA aovrm2way = AnovaRM(data_panda, behavMeasure, 'Subject_ID', within=Conds) results_table = aovrm2way.fit() F_vals = results_table.anova_table['F Value'] # get observed interaction F-value: condition-task obs_stat = F_vals[2] # deep copy of panda structure shuffled_panda = data_panda.copy() # loop through repetitions for ii in range(reps): print('\r{} of {}'.format(ii, reps), end='') # H: shuffle column with behavioral measure of interest (PC or RT) WITHIN subject. # H: In essence, I am shuffling PC across conditions, but within subject shuffled_panda["behavMeasure_shuffled"] = shuffled_panda.groupby( "Subject_ID")[behavMeasure].transform(np.random.permutation) # H: get randomized statistic (interaction) for two-way ANOVA aovrm2way_rand = AnovaRM(shuffled_panda, "behavMeasure_shuffled", 'Subject_ID', within=Conds) results_table_rand = aovrm2way_rand.fit() F_vals_rand = results_table_rand.anova_table['F Value'] # get interaction F-value for shuffled structure: condition-task rand = F_vals_rand[2] # push back rand F value rand_vals.append(rand) rand_vals = np.array(rand_vals) # look at probability on either side of the distribution based on the observed statistic - this function is # therefore order invariant with respect to its inputs prob = np.mean(rand_vals > obs_stat) _ = plt.hist(rand_vals, bins='auto') # arguments are passed to np.histogram plt.show() print(f'p = {prob}') print(f'obs_stat = {obs_stat}') return obs_stat, prob
def test_repeated_measures_aggregate_func(): assert_raises(ValueError, AnovaRM, data.append(data), 'DV', 'id', within=['A', 'B', 'D']) m1 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'], aggregate_func=np.mean) m2 = AnovaRM(data.append(data), 'DV', 'id', within=['A', 'B', 'D'], aggregate_func=np.median) assert_raises(AssertionError, assert_equal, m1.aggregate_func, m2.aggregate_func) assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)
def three_sample_test(sample1, sample2, sample3, test): if test == "anova": #parametric, between-subjects. test_stat, p_val = scipy.stats.f_oneway(sample1, sample2, sample3) elif test =="rm-anova": #parametric, within-subjects. data = {"response": [], "id": [], "group": []} for i in range(len(sample1)): data["response"].append(sample1[i]) data["id"].append(i) data["group"].append("A") data["response"].append(sample2[i]) data["id"].append(i) data["group"].append("B") data["response"].append(sample3[i]) data["id"].append(i) data["group"].append("C") df = pd.DataFrame(data=data) anova_rm = AnovaRM(df,depvar="response",subject="id",within=["group"]) res = anova_rm.fit() test_stat = res.anova_table['F Value'][0] p_val = res.anova_table['Pr > F'][0] elif test == "kruskal-wallis": #nonparametric, between-subjects. test_stat, p_val = scipy.stats.kruskal(sample1, sample2, sample3) elif test == "friedman": #nonparametric, within-subjects. test_stat, p_val = scipy.stats.friedmanchisquare(sample1, sample2, sample3) return test_stat,p_val
def rm_one_way_anova(dataset: Dataset, design, combined_data: CombinedData): data = dataset.data xs = combined_data.get_explanatory_variables() ys = combined_data.get_explained_variables() assert (len(ys) == 1) y = ys[0] between_subjs = [] within_subjs = [] for x in xs: if "between subjects" in design and design[ "between subjects"] == x.metadata[name]: between_subjs.append(x.metadata[name]) if "within subjects" in design and design[ "within subjects"] == x.metadata[name]: within_subjs.append(x.metadata[name]) # import pdb; pdb.set_trace() id = dataset.pid_col_name aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=id, within=within_subjs) # aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=dataset.pid_col_name, within=within_subjs, between=between_subjs) # apparently not implemented in statsmodels # import pdb; pdb.set_trace() res2way = aovrm2way.fit()
def continuous_paired_group_repeated_measures_anova(**kwargs): data_frame = kwargs["data_frame"] dependable_variable = kwargs["dependable_variable"] conditions = kwargs["conditions"] # make one condition out of multiple, otherwise not supported by AnovaRM sLength = len(data_frame[dependable_variable]) data_frame.loc[:, 'condition'] = pd.Series(np.empty(sLength), index=data_frame.index) if isinstance(conditions, list) and len(conditions) > 1: for name, group in data_frame.groupby(conditions): data_frame.loc[ data_frame.groupby(conditions).get_group(name).index, "condition"] = "_".join(name) data_frame.drop(columns=conditions) # todo: list in conditions not supported map to signle condition required, reduce subject size other wise #aovrm = AnovaRM(data_frame, depvar=dependable_variable, subject='test_index', within=conditions) aovrm = AnovaRM(data_frame[data_frame["test_index"] < 1000], dependable_variable, 'test_index', within=["condition"], aggregate_func=np.mean) res = aovrm.fit() print(res) # todo: how to read pvalue res.summary()... return True, 100
def rm(self, data, dep_var, subject, within, aggregate_func=None): """ Repeated Measures ANOVA Parameters: ---------- data: DataFrame Contains at least 3 columns that are 'dependent variable', 'subject', and 'factor' respectively. dep_var: str Name of the 'dependent variable' column. subject: str Name of the 'subject' column. (subject identifier) within: a list of strings Names of the at least one 'factor' columns. Return: ------ aov_table: DataFrame ANOVA table """ aov_rm = AnovaRM(data, dep_var, subject, within, aggregate_func=aggregate_func) aov_table = aov_rm.fit().anova_table return aov_table
def anova(diff1, diff2, recall, within_factors): r = 'recall' if not recall: r = 'recognition' diff1 = rearange(diff1, 'short', within_factors = within_factors, recall = recall) diff2 = rearange(diff2, 'short', within_factors = within_factors, recall = recall) diffs_for_anova = pd.concat([diff1,diff2]) #perform anova anovarm = AnovaRM(diffs_for_anova, 'performance', 'sub_id', within = within_factors, aggregate_func = 'mean') res = anovarm.fit() #rounded p value p = round(res.anova_table['Pr > F'][0],4) print(F'ANOVA ON DIFFERENCES in memory performance - {r}', res) return diff1, diff2, diffs_for_anova
def rm_one_way(xs, y, key, df): aovrm2way = AnovaRM(df, depvar=y, subject=key, within=xs, aggregate_func='mean') res2way = aovrm2way.fit() return str(res2way)
def rm_one_way(xs, y, key, df): between_subjs = [] within_subjs = [] aovrm2way = AnovaRM(df, depvar=y, subject=key, within=xs, aggregate_func='mean') res2way = aovrm2way.fit() # import pdb; pdb.set_trace() return str(res2way)
def AnovaRM_with_post_hoc(data, dep_var, subject, within, only_significant = False): # One within anova = AnovaRM(data, dep_var, subject, within) print(anova.fit()) # Post-hoc with ttest pairwise_ttest_rel(data, dep_var, within = within, only_significant = only_significant )
def rm_one_way_anova(dataset: Dataset, predictions, design, combined_data: CombinedData): data = dataset.data xs = combined_data.get_explanatory_variables() ys = combined_data.get_explained_variables() assert (len(ys) == 1) y = ys[0] between_subjs = [] within_subjs = [] for x in xs: if "between subjects" in design and design[ "between subjects"] == x.metadata[name]: between_subjs.append(x.metadata[name]) if "within subjects" in design and design[ "within subjects"] == x.metadata[name]: within_subjs.append(x.metadata[name]) if predictions: if isinstance(predictions[0], list): prediction = predictions[0][0] else: prediction = predictions[0] else: prediction = None key = dataset.pid_col_name aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=key, within=within_subjs, aggregate_func='mean') # aovrm2way = AnovaRM(data, depvar=y.metadata[name], subject=dataset.pid_col_name, within=within_subjs, between=between_subjs) # apparently not implemented in statsmodels res2way = aovrm2way.fit() result_df = res2way.anova_table col_name = x.metadata[name] for row_name in result_df.index: if row_name == col_name: row_data = result_df.loc[row_name] test_statistic = row_data['F Value'] p_val = row_data['Pr > F'] dof = (row_data['Num DF'], row_data['Den DF']) test_result = TestResult(name=rm_one_way_anova_name, test_statistic=test_statistic, p_value=p_val, prediction=prediction, dof=dof, alpha=combined_data.alpha, table=result_df, x=x, y=y) return test_result
def rm_anova(data=None, subject=None, within=None, between=None, dv=None): """ Returns ANOVA table as dataframe. """ anova = AnovaRM(data=data, subject=subject, within=within, between=between, depvar=dv) fit = anova.fit() return fit.anova_table
def getRMAnova(dataSet, labels, verbose=False): tlabels = np.concatenate([[labels[j] for _,y in enumerate(x) ]for j,x in enumerate(dataSet)]) concatData = np.concatenate(dataSet) ids = np.concatenate([np.arange(len(x)) for _,x in enumerate(dataSet)]) d = {'id':ids, 'rt':concatData, 'cond':tlabels} df = pd.DataFrame(d) anovarm = AnovaRM(df, 'rt', 'id', within=['cond']) res = anovarm.fit() if verbose: print (res.summary()) return res
def rlrlRMANOVA(mes): # RL-RL ANOVA RM aexps = expandEvals(mes) print('********** RL Controller Error RMANOVA **********') aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist() avrm = AnovaRM(aexps, 'error', 's_id', within=['model']) rma = avrm.fit() print(rma) print('********** RL Controller Error RMANOVA **********') aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist() avrm = AnovaRM(aexps, 'rise_time', 's_id', within=['model']) rma = avrm.fit() print(rma) print('********** RL Controller Error RMANOVA **********') aexps['s_id'] = (np.array(aexps.index.values.tolist()) + 1).tolist() avrm = AnovaRM(aexps, 'energy', 's_id', within=['model']) rma = avrm.fit() print(rma)
def rank_multiple_normal_homoscedastic(data, alpha, verbose, order, effect_size, force_mode): """ Analyzes data using repeated measures ANOVA and Tukey HSD. """ stacked_data = data.stack().reset_index() stacked_data = stacked_data.rename(columns={ 'level_0': 'id', 'level_1': 'treatment', 0: 'result' }) anova = AnovaRM(stacked_data, 'result', 'id', within=['treatment']) pval = anova.fit().anova_table['Pr > F'].iat[0] if verbose: if pval >= alpha: print( "Fail to reject null hypothesis that there is no difference between the distributions (p=%f)" % pval) else: print( "Rejecting null hypothesis that there is no difference between the distributions (p=%f)" % pval) print( "Using Tukey HSD post hoc test.", "Differences are significant if the confidence intervals of the mean values are not overlapping." ) multicomp = MultiComparison(stacked_data['result'], stacked_data['treatment']) tukey_res = multicomp.tukeyhsd() # must create plot to get confidence intervals tukey_res.plot_simultaneous() # delete plot instead of showing plt.close() rankdf, effsize_method, reorder_pos = _create_result_df_skeleton( data, None, True, order, effect_size=effect_size, force_mode=force_mode) for population in rankdf.index: mean = data.loc[:, population].mean() ci_range = tukey_res.halfwidths[data.columns.get_loc(population)] lower, upper = mean - ci_range, mean + ci_range rankdf.at[population, 'ci_lower'] = lower rankdf.at[population, 'ci_upper'] = upper return _ComparisonResult(rankdf, pval, None, 'anova', 'tukeyhsd', effsize_method, reorder_pos)
def test_repeated_measures_aggregate_func(): double_data = pd.concat([data, data], axis=0) assert_raises(ValueError, AnovaRM, double_data, 'DV', 'id', within=['A', 'B', 'D']) m1 = AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'], aggregate_func=np.mean) m2 = AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'], aggregate_func=np.median) assert_raises(AssertionError, assert_equal, m1.aggregate_func, m2.aggregate_func) assert_frame_equal(m1.fit().anova_table, m2.fit().anova_table)
def anovaRM(self, depvar, subject, within=None, between=None, aggregate_func=None): """ Repeated measures Anova using least squares regression The full model regression residual sum of squares is used to compare with the reduced model for calculating the within-subject effect sum of squares. Currently, only fully balanced within-subject designs are supported. Calculation of between-subject effects and corrections for violation of sphericity are not yet implemented. Parameters ---------- depvar: str The dependent variable in data subject: str Specify the subject id within: list[str] The within-subject factors between: list[str] The between-subject factors, this is not yet implemented aggregate_func: {None, ‘mean’, callable} If the data set contains more than a single observation per subject and cell of the specified model, this function will be used to aggregate the data before running the Anova. None (the default) will not perform any aggregation; ‘mean’ is s shortcut to numpy.mean. An exception will be raised if aggregation is required, but no aggregation function was specified. Returns ---------- AnovaResults instance Notes ---------- This implementation currently only supports fully balanced designs. If the data contain more than one observation per subject and cell of the design, these observations need to be aggregated into a single observation before the Anova is calculated, either manually or by passing an aggregation function via the aggregate_func keyword argument. Note that if the input data set was not balanced before performing the aggregation, the implied heteroscedasticity of the data is ignored. References ---------- Rutherford, Andrew. Anova and ANCOVA: a GLM approach. John Wiley & Sons, 2011. """ res = AnovaRM(self.__data, depvar, subject, within, between, aggregate_func) res = res.fit() print(res)
def fit(self, data, depvar, subject, within=None, between=None, aggregate_func=None): """Estimate the model and compute ANOVA table. Parameters ---------- data : DataFrame depvar : str The dependent variable in `data` subject : str Specify the subject id within : list[str] The within-subject factors between : list[str] The between-subject factors, this is not yet implemented aggregate_func : {None, 'mean', callable} If the data set contains more than a single observation per subject and cell of the specified model, this function will be used to aggregate the data before running the Anova. `None` (the default) will not perform any aggregation; 'mean' is s shortcut to `numpy.mean`. An exception will be raised if aggregation is required, but no aggregation function was specified. Returns ------- results : AnovaResults instance Raises ------ ValueError If the data need to be aggregated, but `aggregate_func` was not specified. """ anova = AnovaRM(data=data, depvar=depvar, subject=subject, within=within, between=between, aggregate_func=aggregate_func) self._results = anova.fit()
def calculate_anova(df): pvals = [] num_subjs = 9 for vox in tqdm(df): vox = calculate_avg_across_models(vox) vox = np.append( vox, np.reshape(np.array(list(range(1, num_subjs + 1))), (num_subjs, 1)), 1) vox = pd.DataFrame(vox, columns=['bert', 'baseline', 'opennmt', 'subject']) sub_vox = vox.melt(id_vars=["subject"], var_name="model", value_name="corr") aovrm2way = AnovaRM(sub_vox, "corr", "model", within=["subject"]) mod = aovrm2way.fit() pval = mod.summary().tables[0]["Pr > F"]["subject"] pvals.append(pval) return pvals
def anova(data): data = pd.melt(data, id_vars='sub_id', var_name='cond', value_name='performance') # #perform anova anovarm = AnovaRM(data, 'performance', 'sub_id', within=['cond']) res = anovarm.fit() #rounded p value p = round(res.anova_table['Pr > F'][0], 2) F = round(res.anova_table['F Value'][0], 2) # print(F'ANOVA ON DIFFERENCES in memory performance - {r}', res) print(F, p) res2 = [[F, p]] return res2
def calculate_anova(args, all_corrs): dims = all_corrs[0][0].shape pvals = np.zeros((dims[0], dims[1], dims[2])) num_layers = 12 num_subjs = 9 print("LEN: " + str(len(all_corrs))) print("DIMS: " + str(all_corrs[0][0].shape)) for i in tqdm(range(dims[0])): for j in range(dims[1]): for k in range(dims[2]): vals_across_subjs_and_layers = [] for subj in range(num_subjs): for layer in range(num_layers): val = all_corrs[subj][layer][i][j][k] vals_across_subjs_and_layers.append( all_corrs[subj][layer][i][j][k]) # make dataframe df = pd.DataFrame({ 'voxel': np.ones(len(vals_across_subjs_and_layers)), 'corr': vals_across_subjs_and_layers, 'subject': np.repeat(list(range(1, num_subjs + 1)), num_layers), 'layer': np.tile(list(range(1, num_layers + 1)), num_subjs) }) aovrm2way = AnovaRM(df, 'voxel', 'corr', within=['subject', 'layer']) mod = aovrm2way.fit() pval = mod.summary().tables[0]["Pr > F"]["subject:layer"] pvals[i][j][k] = pval return pvals
def anova_group(means, recog): t = 'recall' if recog: t = 'recog' #melt df means = pd.melt(means,id_vars = 'sub_id', var_name = 'cond', value_name = 'performance') anovarm = AnovaRM(means, 'performance', 'sub_id', within = ['cond']) res = anovarm.fit() p = round(res.anova_table['Pr > F'][0],4) print(F'reaction times anova ({t})', res) return p
for i, row in dataset_spec.iterrows(): dataset_spec.at[i, 'condition'] = "spec" for i, row in dataset_sub.iterrows(): dataset_sub.at[i, 'condition'] = "sub" for i, row in dataset_rule.iterrows(): dataset_rule.at[i, 'condition'] = "rule" for i, row in dataset_gen.iterrows(): dataset_gen.at[i, 'condition'] = "gen" # concatenate all the dataframes frames = [dataset_spec, dataset_sub, dataset_rule, dataset_gen] result_df = pd.concat(frames) #perform the ANOVA aovrm = AnovaRM(result_df, 'OT', 'Subj_tr', within=['condition']) res = aovrm.fit() print(res) ################ ##### use ###### ### pingouin ### ################ import pingouin as pg from pingouin import mixed_anova, read_dataset df_ANOVA = result_df.rm_anova(dv='OT', within='condition', subject='Subj_tr', detailed=True)
print('') print('Valores atípicos (outliers): tan sólo tenemos uno') print('') print('Igualdad de varianzas de las diferencias ente niveles de tratamiento:') h**o = stats.levene(facebook['Visitantes'], pixel['Visitantes'], wTienda['Visitantes']) print(h**o) print( "El test de Levene para la prueba de igualdad de varianzas me da un p-valor = %f " % h**o.pvalue) print('Se cumple la hipótesis de homocedasticidad (hipótesis fuerte)') print('') print('Realizo ahora la prueba ANOVA-MR') aovrm = AnovaRM(longRM, 'Visitantes', 'Mes', ['Procedencia']) ajuste = aovrm.fit() print(ajuste.summary()) print( 'Obtengo un p-valor = 0.8436. Por tanto no rechazo la hipótesis nula de igualdad de medias' ) print( 'CONCLUSIÓN: en este subconjunto no importa la procedencia de la variable Visitantes, pues me proporcionan la misma información' ) print('') print('2º) Prueba estadística para muestras pareadas') print('Breve estudio descriptivo') print('Gráfico de cajas y bigotes:') bp2 = plt.boxplot([ gaCatalogo['Usuarios'], wCatalogo['Visitantes'][(len(wCatalogo['Visitantes']) -
values = [998, 511] sub_id = [i + 1 for i in range(N)] * len(P) mus = np.concatenate([np.repeat(value, N) for value in values]).tolist() rt = np.random.normal(mus, scale=112.0, size=N * len(P)).tolist() iv = np.concatenate([np.array([p] * N) for p in P]).tolist() df = pd.DataFrame({"id": sub_id, "rt": rt, "iv": iv}) # %% [markdown] {"slideshow": {"slide_type": "subslide"}} # Do the repeated measures ANOVA. # %% aovrm = AnovaRM(df, depvar="rt", subject="id", within=["iv"]) fit = aovrm.fit() fit.summary() # %% [markdown] {"slideshow": {"slide_type": "slide"}} # # dfply # %% [markdown] # For those of you who are familiar with R and the tidyverse, the [dfply package](https://github.com/kieferk/dfply) allows you to have dplyr-like piping in Python. The pipe operator for this package is `>>`, while the result of each computation step is given by `X`. `>>=` is used for in-place assignment. All the documentation is available at the link; I'm just going to go over some useful basics here. # %% from dfply import * diamonds >> head() # %% [markdown] {"slideshow": {"slide_type": "subslide"}} # ## Selection
def plotting_functions(m484, m479, m483, m478, m486, m480, m481,exp, n = 5): coef_subj = runs_length(exp, subject_IDs ='all', n = n) # coef_subj = recordings_n_back(m484, m479, m483, m478, m486, m480, m481, n = n) coef_subj = np.asarray(coef_subj) rewards = coef_subj[:,:,:n] choices = coef_subj[:,:,n:n*2] choices_X_reward = coef_subj[:,:,n*2:-1] _1_back_ch = choices[:, :, 0]#[:,1:] _other_back_ch = np.mean(choices[:, :,1:],2)#[:,1:] _1_back_rew_ch = choices_X_reward[:,:,0] _other_back_rew_ch = np.mean(choices_X_reward[:,:,1:],2) _all_back_rew = np.mean(rewards,2)#[:,1:] # subject_id = np.tile(np.arange(7), 7) # fraction_id = np.zeros(7*7) # k = 0 # for n in range(10): # fraction_id[n*7:n*7+7] = k # k+=1 subject_id = np.tile(np.arange(10), 9) fraction_id = np.zeros(90) k = 0 for n in range(10): fraction_id[n*9:n*9+9] = k k+=1 _1_back = np.concatenate(_1_back_ch.T,0) _1_back = {'Data':_1_back,'Sub_id': subject_id,'cond': fraction_id} _1_back = pd.DataFrame.from_dict(data = _1_back) aovrm = AnovaRM(_1_back, depvar = 'Data',subject = 'Sub_id', within=['cond']) res = aovrm.fit() _1_back = res.anova_table p_val_1_back = np.around(res.anova_table['Pr > F'][0]) _other_back_ch = np.concatenate(_other_back_ch.T,0) _other_back_ch = {'Data':_other_back_ch,'Sub_id': subject_id,'cond': fraction_id} _other_back_ch = pd.DataFrame.from_dict(data = _other_back_ch) aovrm = AnovaRM(_other_back_ch, depvar = 'Data',subject = 'Sub_id', within=['cond']) res = aovrm.fit() _other_back = res.anova_table p_val_other_back_ch = np.around(res.anova_table['Pr > F'][0]) _1_back_rew_ch = np.concatenate(_1_back_rew_ch.T,0) _1_back_rew_ch = {'Data':_1_back_rew_ch,'Sub_id': subject_id,'cond': fraction_id} _1_back_rew_ch = pd.DataFrame.from_dict(data = _1_back_rew_ch) aovrm = AnovaRM(_1_back_rew_ch, depvar = 'Data',subject = 'Sub_id', within=['cond']) res = aovrm.fit() _1_back_re_ch = res.anova_table p_val_1_back_rew_ch = np.around(res.anova_table['Pr > F'][0]) _other_back_rew_ch = np.concatenate(_other_back_rew_ch.T,0) _other_back_rew_ch = {'Data':_other_back_rew_ch,'Sub_id': subject_id,'cond': fraction_id} _other_back_rew_ch = pd.DataFrame.from_dict(data = _other_back_rew_ch) aovrm = AnovaRM(_other_back_rew_ch, depvar = 'Data',subject = 'Sub_id', within=['cond']) res = aovrm.fit() _other_back_reward_choice = res.anova_table p_val_1_back_other_back_rew_ch = np.around(res.anova_table['Pr > F'][0]) _all_back_rew = np.concatenate(_all_back_rew.T,0) _all_back_rew = {'Data':_all_back_rew,'Sub_id': subject_id,'cond': fraction_id} _all_back_rew = pd.DataFrame.from_dict(data = _all_back_rew) aovrm = AnovaRM(_all_back_rew, depvar = 'Data',subject = 'Sub_id', within=['cond']) res = aovrm.fit() _back_rew = res.anova_table p_val_1_back_all_back_rew = np.around(res.anova_table['Pr > F'][0]) _1_back_ch = np.mean(choices[:, :, 0],0) _1_back_ch_er = np.std(choices[:, :, 0],0)/np.sqrt(9) #_other_back_ch = np.mean(choices[:, :, 1:],2) _other_back_ch = np.mean(np.mean(choices[:, :,1:],2),0) _other_back_ch_err = np.std(np.mean(choices[:, :, 1:],2),0)/np.sqrt(9) #_all_back_rew = np.mean(rewards,2) _all_back_rew = np.mean(np.mean(rewards,2),0) _all_back_rew_err = np.std(np.mean(rewards,2),0)/np.sqrt(9) #_all_back_rew_ch = np.mean(choices_X_reward,2) _1_back_rew_ch = np.mean(choices_X_reward[:,:,0],0) _1_back_rew_ch_err = np.std(choices_X_reward[:,:,0],0)/np.sqrt(9) _all_back_rew_ch = np.mean(np.mean(choices_X_reward[:,:,1:],2),0) _all_back_rew_ch_err = np.std(np.mean(choices_X_reward[:,:,1:],2),0)/np.sqrt(9) isl = wes.Royal2_5.mpl_colors plt.figure(figsize = (10,4)) plt.subplot(1,5,1) plt.errorbar(np.arange(len(_all_back_rew)), _all_back_rew, yerr=_all_back_rew_err, fmt='o', color = isl[0]) plt.annotate(p_val_1_back_all_back_rew, xy = (10,np.max(_all_back_rew)+0.01)) plt.xlim(-1,10) plt.title(' N Rewards Back') plt.xticks(np.arange(10),np.arange(10)+1) plt.xlabel('Task') plt.subplot(1,5,2) #sns.boxplot(data =_1_back_ch, palette="Set3",showfliers = False) plt.errorbar(np.arange(len(_1_back_ch)), _1_back_ch, yerr=_1_back_ch_er, fmt='o', color = isl[3]) plt.annotate(p_val_1_back, xy = (10,np.max(_1_back_ch)+0.01)) plt.xlim(-1,10) plt.title(' 1 Choice Back') plt.xticks(np.arange(10),np.arange(10)+1) plt.xlabel('Task') plt.ylabel('Coefficient') plt.subplot(1,5,3) #sns.boxplot(data=_other_back_ch, palette="Set3",showfliers = False) plt.errorbar(np.arange(len(_other_back_ch)), _other_back_ch, yerr=_other_back_ch_err, fmt='o', color = isl[3]) plt.annotate(p_val_other_back_ch, xy = (10,np.max(_other_back_ch)+0.01)) plt.xlim(-1,10) plt.title(' 2+ Choices Back') plt.xticks(np.arange(10),np.arange(10)+1) plt.xlabel('Task') plt.ylabel('Coefficient') plt.subplot(1,5,4) #sns.boxplot(data=_all_back_rew_ch, palette="Set3",showfliers = False) plt.errorbar(np.arange(len(_1_back_rew_ch)), _1_back_rew_ch, yerr=_1_back_rew_ch_err, fmt='o', color = isl[4]) plt.annotate(p_val_1_back_rew_ch, xy = (10,np.max(_1_back_rew_ch)+0.01)) plt.xlim(-1,10) plt.title(' 1 Choice x Reward Back') plt.xticks(np.arange(10),np.arange(10)+1) plt.xlabel('Task') plt.ylabel('Coefficient') plt.subplot(1,5,5) #sns.boxplot(data=_all_back_rew_ch, palette="Set3",showfliers = False) plt.errorbar(np.arange(len(_all_back_rew_ch)), _all_back_rew_ch, yerr=_all_back_rew_ch_err, fmt='o', color = isl[4]) plt.annotate(p_val_1_back_other_back_rew_ch, xy = (10,np.max(_all_back_rew_ch)+0.01)) plt.xlim(-1,10) plt.xticks(np.arange(10),np.arange(10)+1) plt.xlabel('Task') plt.title(' 2 Choices x Rewards Back') plt.ylabel('Coefficient') sns.despine() plt.tight_layout()
ANOVA_list.append([ID, 'nogo', 'congruent', individ_acc[1]]) ANOVA_list.append([ID, 'go', 'incongruent', individ_acc[2]]) ANOVA_list.append([ID, 'nogo', 'incongruent', individ_acc[3]]) for i in range(4): all_accuracies[i].append(individ_acc[i]) #ANOVA data = pd.DataFrame(ANOVA_list, columns=['pid', 'response', 'congruency', 'SbjACC']) gpResult = data.groupby(['response', 'congruency']).SbjACC.mean().reset_index() print(gpResult) curr_ANOVA = AnovaRM(data, 'SbjACC', 'pid', within=['response', 'congruency']) curr_ANOVA = curr_ANOVA.fit() print(curr_ANOVA) #Overall Analysis all_accuracies_average = [] for i in range(len(all_accuracies)): all_accuracies_average.append( sum(all_accuracies[i]) / len(all_accuracies[i])) print(all_accuracies_average) all_accuracies_average = [ all_accuracies_average[0], all_accuracies_average[2], all_accuracies_average[1], all_accuracies_average[3] ]
(g.set_axis_labels('Awareness', 'Probability').set( ylim=(0, 0.85)).set_titles("{row_name} | {col_name}").despine(left=True)) for ii, (target, df_sub) in enumerate(df_plot.groupby('attention')): # formula = 'prob ~ C(correctness)*C(awareness)*C(confidence)' # model = ols(formula, df_sub).fit() # aov_table = anova_lm(model, typ=2) # s = f"{target}, F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .4f}" # print(s) # g.axes[ii][0].annotate(s,xy=(-0.45,.8)) g.axes[ii][0].set(ylabel=f'Probability | {target}') g.savefig(os.path.join(figure_dir, 'att.png'), dpi=400, bbox_inches='tight') df_plot['level'] = df_plot['correctness'] + ', ' + df_plot[ 'awareness'] + ', ' + df_plot['confidence'] for target, df_sub in df_plot.groupby(['attention']): temp = {} df_sub = df_sub.sort_values(['sub', 'window', 'level']) for level, df_sub_sub in df_sub.groupby(['level']): # print(df_sub_sub.shape) temp[level] = df_sub_sub['prob'].values for_j = pd.DataFrame(temp) for_j.to_csv(os.path.join(saving_dir, f'{target} for jsp.csv')) #,na_rep='NAN') aovrm = AnovaRM(df_sub, 'prob', 'sub', within=['awareness', 'confidence', 'correctness']) res = aovrm.fit().summary().tables[0] res.to_csv(os.path.join(saving_dir, f'ANVOA report {target}.csv'))
spark.sparkContext.setLogLevel("ERROR") print "\nspark session created sucessfully:: \n" dataset = spark.read.csv("/home/fidel/mltest/bank.csv", header=True, inferSchema=True) dataset.printSchema() df = pd.read_csv("/home/fidel/mltest/bank.csv", delimiter=";") print df.describe() ######creating the box plot # boxplot = df.boxplot('age', by='marital', figsize=(12, 8)) df_anova = dataset.toPandas() mod = ols("age ~ housing", data=df_anova).fit() aov_table = sm.stats.anova_lm(mod, typ=2) print aov_table # using 1st test anovarm = AnovaRM(df_anova, "age", "default", within=["marital"]) fit = anovarm.fit() fit.summary() # 2nd method
ANOVA_list.append([ID, 'nogo', 'congruent', individ_acc[1]]) ANOVA_list.append([ID, 'go', 'incongruent', individ_acc[2]]) ANOVA_list.append([ID, 'nogo', 'incongruent', individ_acc[3]]) for i in range(4): all_accuracies[i].append(individ_acc[i]) #ANOVA data = pd.DataFrame(ANOVA_list, columns=['pid', 'response', 'congruency', 'SbjACC']) gpResult = data.groupby(['response', 'congruency']).SbjACC.mean().reset_index() print(gpResult) prev_ANOVA = AnovaRM(data, 'SbjACC', 'pid', within=['response', 'congruency']) prev_ANOVA = prev_ANOVA.fit() print(prev_ANOVA) all_accuracies_average = [] for i in range(len(all_accuracies)): all_accuracies_average.append( sum(all_accuracies[i]) / len(all_accuracies[i])) print(all_accuracies_average) #plot the relationship labels = ('congruent go', 'congruent nogo', 'incongruent go', 'incongruent nogo') y_pos = np.arange(len(labels))