def test_normality(df, dep_var, ind_vars): p_values = [] for iv in ind_vars: df_iv = df.loc[df['Condition number'] == iv] results = pg.normality(df_iv[dep_var]) p = list(results['pval'])[0] p_values.append(p) print('') return p_values
samp_time = pd.melt( si_samp, id_vars=['Subject', 'Group'], value_vars=['Time Object/New Cons Chamber', 'Time Conspecific Chamber'], var_name='Side', value_name='Time') vars_time = [ 'Total Exploration', 'Time Conspecific Chamber', 'Time Object/New Cons Chamber' ] # Test for normality for var_ in vars_time: print(f'Normality test (Shapiro), {var_}') print(pg.normality(si_samp, dv=var_, group='Group')) #Get test phase data test_df = si_raw[si_raw['Phase'] == 'Test'] test_df = detec_outlier(test_df, 'Total Exploration', 'Group') test_time = pd.melt( test_df, id_vars=['Subject', 'Group'], value_vars=['Time Object/New Cons Chamber', 'Time Conspecific Chamber'], var_name='Side', value_name='Time') #Test for normality for var_ in vars_time: print(f'Normality test (Shapiro), {var_}') print(pg.normality(test_df, dv=var_, group='Group'))
preds = df_preds.iloc[fold][col] f1 = metrics.f1_score(y_true=trues, y_pred=preds, average="micro") f1FoldDict[f"{num}_{col}"] = f1 namesList.append(col) # add entries to per fold per variant F1 score data frame for name in namesList: foldList = list() for i in range(5): foldList.append(f1FoldDict[f"{i}_{name}"]) dfCompare[f"{name}"] = foldList ####################################### # start testing # normality test _normality = open(f"_normality.txt", "w") print(pg.normality(dfCompare), file=_normality) # ANOVA is computed here df_melt = pd.melt(dfCompare.reset_index(), id_vars=["index"], value_vars=namesList) df_melt.columns = ["index", "treatments", "value"] model = ols('value ~ C(treatments)', data=df_melt).fit() # print(model.summary()) anova_table = sm.stats.anova_lm(model, typ=2) print(anova_table) # pairways comparison tukey_ = open(f"./results/statistics/{target}_statistics.txt", "w+") if len(namesList) > 2:
print( 'A idade média foi de 25 anos. Vamos criar um histograma e analisar a distribuição da variável idade.' ) # Histograma df_nba['Age'].plot.hist(bins=12, alpha=0.5) plt.show() # BoxPlot ax = sns.boxplot(x=df_nba['Age'], palette="Set2", orient="h") plt.ylabel('\nAtletas') plt.xlabel('\nIdade') plt.show() # Teste de normalidade com Pingouin x = df_nba['Age'] print(pg.normality(x)) # Agrupando os dados por jogador e total de pontos df_nba_top10 = df_nba.groupby([ 'Player' ])['PTS'].sum().reset_index().rename(columns={'PTS': 'Total_Pontos'}) # Retornamos os Top 10 df_nba_top10 = df_nba_top10.nlargest(10, 'Total_Pontos') # Visualiza os dados print(df_nba_top10) # Quantos jogos os jogadores com 35 anos de idade ou mais iniciaram (variável GS)? def lista_jogadores35():
data_path = "../../data/combined_sample_data.xlsx" combined_df = pd.read_excel(data_path, sheet_name="SampleData", index_col="SampleID") metabolite_list = combined_df.columns[8:] # Get list of metabolites for subject subject_df = combined_df[combined_df['Subject'] == subject] subject_df = subject_df[subject_df['TimeOfDay'] != 4] alpha = 0.05 subject_diurnal_list = [] for metabolite in metabolite_list: log2_metabolite = np.log2(subject_df[metabolite]) normal, shapiro_wilk_pval = pg.normality(log2_metabolite) lr_info = pg.linear_regression(subject_df['TimeOfDay'], log2_metabolite) lr_pval = lr_info.iloc[1]['pval'] if filter_by_normal_dist: if lr_pval < alpha and normal: subject_diurnal_list.append(metabolite) else: if lr_pval < alpha: subject_diurnal_list.append(metabolite) print(len(subject_diurnal_list)) # calcualte z-score #subject_zscore_df = pd.DataFrame(zscore(np.log2(subject_df[subject_df.columns[9:]])), # index=subject_df.index, columns=subject_df.columns[9:])
if not analyzed_set.natat: tukey_ = open( f"./results/statistics/TREC6_{param}_{analyzed_set.xlabel}.txt", "w+") else: tukey_ = open( f"./results/statistics/MPD_{param}_{analyzed_set.xlabel}.txt", "w+") if len(file_list) > 2: m_comp = pairwise_tukeyhsd(endog=df_melt['value'], groups=df_melt['treatments'], alpha=0.05) print(f"{param}, NORMALITY:", file=tukey_) print(pg.normality(df_temp), file=tukey_) print("\n ANOVA:", file=tukey_) print(anova_table, file=tukey_) print("\n HSD Tukey:", file=tukey_) print(m_comp, file=tukey_) print(m_comp) elif len(file_list) < 3: print(f"{param}, NORMALITY:", file=tukey_) print(pg.normality(df_temp), file=tukey_) print("\n ANOVA:", file=tukey_) print(anova_table, file=tukey_) print("\n Wilcoxon:", file=tukey_) print(stats.wilcoxon(df_temp[analyzed_set.columns[0]], df_temp[analyzed_set.columns[1]]), file=tukey_) print(
st.header("Difference in means between groups results") st.success("Descriptive statistics are being calculated") function_dict = {x_var: ["mean", "std", "sem", "count"]} new = pd.DataFrame(df.groupby(y_var).aggregate(function_dict)) st.write(new) if normality_selected == "Shapiro-Wilk": message = "Shapiro-Wilk Normality test is being perform" else: message = "Omnibus test of normality is being performed" st.success(message) normality = pg.normality(df, dv=x_var, group=y_var, method="normaltest" if normality_selected == "Omnibus test of normality" else "shapiro") st.write(normality) x1, x2 = df.groupby(y_var)[x_var].apply(list) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4)) ax1 = pg.qqplot(x1, ax=ax1) ax2 = pg.qqplot(x2, ax=ax2) st.pyplot(fig) st.success("Levene test for homoscedasticity of variances") homoscedasticity = pg.homoscedasticity(df, dv=x_var, group=y_var) st.write(homoscedasticity) if param_vs_nonparam == "Parametric tests (Student, Welch)":
corr = pg.corr(pred_angle_data.reshape(-1), gt_angle_data.reshape(-1)) print(corr.to_string()) plt.figure() # sns.distplot(D[:,0], label="Proximal-thoracic") # sns.distplot(D[:,1], label="Main thoracic") # sns.distplot(D[:,2], label="Lumbar") sns.distplot(D.reshape(-1)) plt.xlabel("Difference in Cobb Angle (Degrees)") plt.ylabel("Density") # plt.legend() plt.title("Difference between Predicted and Ground-truth Cobb Angles") plt.show() ########## Shapiro-Wilk test ShapiroWilk = pg.normality(data=D.reshape(-1)) print(ShapiroWilk.to_string()) pg.qqplot(D.reshape(-1), dist='norm', sparams=(), confidence=0.95, figsize=(5, 4), ax=None) plt.figure() # sns.scatterplot(x=gt_angle_data[:,0], y=pred_angle_data[:,0], label="Proximal-thoracic") # sns.scatterplot(x=gt_angle_data[:,1], y=pred_angle_data[:,1], label="Main thoracic") # sns.scatterplot(x=gt_angle_data[:,2], y=pred_angle_data[:,2], label="Lumbar") sns.scatterplot(x=gt_angle_data.reshape(-1), y=pred_angle_data.reshape(-1)) plt.xlabel("Ground-truth Angle (Degrees)") plt.ylabel("Predicted Angle (Degrees)")
fig2.savefig('{}/Comparison.pdf'.format(savedir)) fig2.savefig('{}/Comparison.png'.format(savedir)) import pingouin as pg from scipy import stats #Now stats and group data mainfig, mainplot = plt.subplots(1, 1) mainfig.suptitle('Average Amplitudes distributions') mainplot.set_ylabel('Average Amplitudes (pA)') sn.boxplot(data=flattened_maps, ax=mainplot, palette=colors) sn.swarmplot(data=flattened_maps, ax=mainplot, color='black', size=2) normality = pg.normality(flattened_maps) if normality['normal'][0] == False: groupStat = stats.kruskal(flattened_maps.values[:, 0], flattened_maps.values[:, 1], flattened_maps.values[:, 2], flattened_maps.values[:, 3]) print('Normality failed, KW test (pvalue)={}'.format(groupStat[1])) if groupStat[1] < 0.05: for group in groups: controlGroup = flattened_maps['P30P40'].values compareGroup = flattened_maps[group].values
kind="point", dodge=True, height=4, aspect=1.333) fig_filepath = figures_path / 'line-plot-dVz.pdf' plt.savefig(str(fig_filepath)) logging.info(f"Written figure to {fig_filepath.resolve()}") # %% [markdown] # ### Normality of Fisher-z-transformed Synergy Index # Make sure the tranformation worked. # %% norm_dVz = df.groupby('task')['dVz'].apply( lambda x: pg.normality(x).iloc[0]).unstack(level=1) # %% [markdown] # ### Mixed ANOVA # # %% anova_dVz = analysis.mixed_anova_synergy_index_z(df) # %% [markdown] # ## Posthoc Testing # %% posthoc_comparisons = analysis.posthoc_ttests(df) # %%
width=.5) plt.legend(frameon=False, loc='lower right') plt.xlabel('Treatment') plt.tight_layout() #%% nest_fig.savefig( '/Users/labc02/Documents/PDCB_data/MK-project/Figures/nesting_fig.png', dpi=600) burrow_raw = pd.read_csv( '/Users/labc02/Documents/PDCB_data/MK-project/Burrowing.csv') burrow_raw burrow_raw['Group'] = burrow_raw['Genotype'] + '_' + burrow_raw['Tx'] for tx in burrow_raw['Tx'].unique(): print(tx) print(pg.normality(data=burrow_raw, dv='% Test (12 h)', group='Genotype')) # Check homoscedasticity pg.homoscedasticity(data=burrow_raw, dv='% Test (12 h)', group='Group') burr_kw = pg.kruskal(data=burrow_raw, dv='% Test (12 h)', between='Group') burr_kw #%% burr_fig = plt.figure(figsize=(4, 4)) sns.boxplot(x='Tx', y='% Test (12 h)', hue='Genotype', data=burrow_raw, palette=['forestgreen', 'royalblue'], showmeans=True, meanprops={ 'marker': '+',
print('\n******** %s vs %s **********' % (Yname, Xname)) plot = sns.boxplot(x=Xname, y=Yname, data=Ble, hue='phyto') # # Homoscedasticity H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname, method="levene") print(H**o) # print(Ble[Ble['variete']=='V1'].var()) # print(Ble[Ble['variete']=='V2'].var()) # print(Ble[Ble['variete']=='V3'].var()) # print(Ble[Ble['variete']=='V4'].var()) # Normality # Norm = pg.normality(data=Ble, dv=Yname, group=Xname, method="shapiro") # print(Norm) # Normality of residuals lm = pg.linear_regression(Ble[Xname].cat.codes, Ble[Yname]) Normall = pg.normality(lm.residuals_) print(Normall) plot = pg.qqplot(lm.residuals_, dist='norm') # OneWay Anova aov = Ble.anova(dv=Yname, between=Xname, detailed=True) print(aov) # Analysis of rdt vs variete Yname = 'rdt' Xname = 'phyto' print('\n******** %s vs %s **********' % (Yname, Xname)) # # Homoscedasticity H**o = pg.homoscedasticity(data=Ble, dv=Yname, group=Xname) print(H**o)
sheet_name='io_STATS_foram_only', usecols="B:Q") io_2050rcp8p5 = pd.read_excel('genie_outpout.xlsx', sheet_name='io_STATS_foram_only', usecols="T:AI") io_2100rcp8p5 = pd.read_excel('genie_outpout.xlsx', sheet_name='io_STATS_foram_only', usecols="AL:BA") io_2100rcp6 = pd.read_excel('genie_outpout.xlsx', sheet_name='io_STATS_foram_only', usecols="BD:BS") ################################################################################################################################################################# #check for the normality of the dataset #subpolar nor_subpolar_present = pg.normality(subpolar_present) nor_subpolar_2050rcp8p5 = pg.normality(subpolar_2050rcp8p5) nor_subpolar_2100rcp8p5 = pg.normality(subpolar_present) #nor_subpolar_2100rcp6 = pg.normality(subpolar_present) #temperate nor_temp_present = pg.normality(temp_present) nor_temp_2050rcp8p5 = pg.normality(temp_2050rcp8p5) nor_temp_2100rcp8p5 = pg.normality(temp_present) nor_temp_2100rcp6 = pg.normality(temp_present) #tropics nor_trop_present = pg.normality(trop_present) nor_trop_2050rcp8p5 = pg.normality(trop_2050rcp8p5) nor_trop_2100rcp8p5 = pg.normality(trop_present) nor_trop_2100rcp6 = pg.normality(trop_present) #Indian Ocean #temperate