def alpha_analysis(alpha, idx_onsets): # get the alpha power for each bin dict_alpha = {} dict_alpha['silence'] = alpha[idx_onsets['silence']] dict_alpha['low density'] = alpha[idx_onsets['low density']] dict_alpha['medium density'] = alpha[idx_onsets['medium density']] dict_alpha['high density'] = alpha[idx_onsets['high density']] # boxplot fig, ax = subplots() ax.boxplot(dict_alpha.values()) ax.set_xticklabels(dict_alpha.keys()) title("Distribution of alpha's power for four rythms") # savefig('results_44100/alpha_power_perf3') show() # kruskal_test print('Alpha Kruskal Test\n', kruskal_test(dict_alpha['silence'], dict_alpha['low density'], dict_alpha['medium density'], dict_alpha['high density'])) # post hoc t test a = [dict_alpha['silence'], dict_alpha['low density'], dict_alpha['medium density'], dict_alpha['high density']] print('Alpha Posthoc t-test\n', posthoc_ttest(a)) pass
def sign_barplot(df, val_col, group_col, test="HSD"): if test == "HSD": result_df = tukey_hsd(df, val_col, group_col) if test == "tukey": result_df = sp.posthoc_tukey(df, val_col, group_col) if test == "ttest": result_df = sp.posthoc_ttest(df, val_col, group_col) if test == "scheffe": result_df = sp.posthoc_scheffe(df, val_col, group_col) if test == "dscf": result_df = sp.posthoc_dscf(df, val_col, group_col) if test == "conover": result_df = sp.posthoc_conover(df, val_col, group_col) #マッピングのプロファイル fig, ax = plt.subplots(1, 2, figsize=(10, 6)) cmap = ['1', '#fb6a4a', '#08306b', '#4292c6', '#c6dbef'] heatmap_args = { 'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5', 'clip_on': False, 'square': True } sp.sign_plot(result_df, ax=ax[1], **heatmap_args) #検定結果を描画 sns.barplot(data=df, x=group_col, y=val_col, capsize=0.1, ax=ax[0]) #使ったデータを描画 plt.show()
def anova_posthoc_tests(benchmark_snapshot_df): """Returns p-value tables for various ANOVA posthoc tests. Results should considered only if ANOVA test rejects null hypothesis. """ common_args = { 'a': benchmark_snapshot_df, 'group_col': 'fuzzer', 'val_col': 'edges_covered', 'sort': True } p_adjust = 'holm' posthoc_tests = {} posthoc_tests['student'] = sp.posthoc_ttest(**common_args, equal_var=False, p_adjust=p_adjust) posthoc_tests['turkey'] = sp.posthoc_tukey(**common_args) return posthoc_tests
res = sm.stats.anova_lm(model, typ=2) res.to_csv(tracker, mode="a") tracker.write("\nPost-hoc Tukey Tests\n") mc = statsmodels.stats.multicomp.MultiComparison(df['total_score'], df['year']) mc_results = mc.tukeyhsd() tracker.write(str(mc_results)) mc = statsmodels.stats.multicomp.MultiComparison(df['total_score'], df['is_kashmir']) mc_results = mc.tukeyhsd() tracker.write(str(mc_results)) df['kashyear'] = df["is_kashmir"].astype(str) + df['year'].astype(str) tracker.write("\nPost Hoc Student t-yTesting\n") sp.posthoc_ttest(df, val_col='total_score', group_col='kashyear').to_csv(tracker, mode="a") #HYPOTHESIS 2 Kashmir conflict to Kashmir non conflict tracker.write( "\n\n\n\nHYPOTHESIS 2: Kashmir-related headlines will have more negative sentiment scores on average in conflict periods than Kashmir-related headlines in non-conflict periods\r\n" ) rp.summary_cont(df.groupby(['is_kashmir', 'conflict'])['total_score']).to_csv(tracker, mode="a") levene = stats.levene( df['total_score'][(df['conflict'] == "Standoff") & (df['is_kashmir'] == True)], df['total_score'][(df['conflict'] == "Mumbai") & (df['is_kashmir'] == True)],
omega_1 = list(CSI.w1) + list(SSI.w1) + list(SSD.w1) model = (['CSI'] * 50) + (['SSI'] * 50) + (['SSD'] * 50) data = pd.DataFrame({'model': model, 'omega_0': omega_0, 'omega_1': omega_1}) # check if any of the means is significantly different from the rest lm = ols('omega_0 ~ model', data=data).fit() table = sm.stats.anova_lm(lm) print(table) # post-hoc test to see if CSI mean is significantly higher print('omega_0') print( sp.posthoc_ttest(data, val_col='omega_0', group_col='model', p_adjust='bonferroni')) print('\nomega_1') print( sp.posthoc_ttest(data, val_col='omega_1', group_col='model', p_adjust='bonferroni')) #%% Load number of amino acids per site CSI, SSI, SSD = num_aa_per_site(protein) one = list(CSI[:, 0]) + list(SSI[:, 0]) + list(SSD[:, 0]) five = list(np.sum(CSI[:, 4:], axis=1)) + list(np.sum( SSI[:, 4:], axis=1)) + list(np.sum(SSD[:, 4:], axis=1))
def calculate_p_values_groups(self, measure, merged_df, figures_dir, affix, plot=True): merged_df = merged_df[merged_df[measure].notnull()] p_values = sp.posthoc_ttest(merged_df, val_col=measure, group_col="group") if plot: p_values.to_csv(figures_dir + "/p_values_" +affix+ ".csv") return p_values
#print(curr_engine_and_device) #print(total_runtimes.shape) #print(total_runtimes) # test if data is normal res_normal = np.zeros((len(total_runtimes)), dtype=np.float32) for i in range(len(total_runtimes)): res_normal[i] = np.round(shapiro(total_runtimes[i])[1], 4) print(res_normal) print(multipletests(res_normal, method="fdr_bh")[1]) df = pd.DataFrame(total_runtimes, curr_engine_and_device) # test significance between total runtime between IEs (assuming normal) res = np.round(sp.posthoc_ttest(total_runtimes, p_adjust="fdr_by"), 4) print(total_runtimes.shape) print(curr_engine_and_device) # Use Tukey's method for multiple testing instead (works well with groups of the same number of samples) print(total_runtimes.flatten().dtype) tmp = np.repeat(range(len(curr_engine_and_device)), 10) print(tmp.dtype) #tukey = pairwise_tukeyhsd(total_runtimes.flatten(), groups=tmp, alpha=0.05) #print(df) #print(res) #print(tukey) '''
import statsmodels.api as sa import statsmodels.formula.api as sfa import scikit_posthocs as sp import numpy as np import pandas as pd df = sa.datasets.get_rdataset('iris').data print(df.head()) ttest = sp.posthoc_ttest(df, val_col='Sepal.Width', group_col='Species', p_adjust='holm') tukey = sp.posthoc_tukey_hsd()
import scikit_posthocs as sp import numpy as np import matplotlib.pyplot as plt from scipy import stats import pandas as pd import statsmodels.api as sm data = pd.read_table( "https://www.krigolsonteaching.com/uploads/4/3/8/4/43848243/sampleanovadata2.txt", header=None) data.columns = ['Subject', 'Group', 'rt'] #Groupings G1 = data[data['Group'] == 1]['rt'] G2 = data[data['Group'] == 2]['rt'] G3 = data[data['Group'] == 3]['rt'] G4 = data[data['Group'] == 4]['rt'] #anova A1 = stats.f_oneway(G1, G2, G3, G4) #Post-Hoc analysis tt = sp.posthoc_ttest(A1) Tuk = sp.posthoc_tukey_hsd('rt', 'Group', alpha=0.04) #Princple component anlysis #Support Vector Machine learning #K-means cluster learning