def nemenyi(self, metric_name=None): """Nemenyi test. Post-hoc test run if the `friedman_test` reveals statistical significance. For more information see `Nemenyi test <https://en.wikipedia.org/wiki/Nemenyi_test>`_. Implementation used `scikit-posthocs <https://github.com/maximtrp/scikit-posthocs>`_. """ # lazy import to avoid hard dependency from scikit_posthocs import posthoc_nemenyi self._check_is_evaluated() metric_name = self._validate_metric_name(metric_name) metrics_per_estimator_dataset = self._get_metrics_per_estimator_dataset( metric_name) strategy_dict = pd.DataFrame(metrics_per_estimator_dataset) strategy_dict = strategy_dict.melt(var_name="groups", value_name="values") nemenyi = posthoc_nemenyi(strategy_dict, val_col="values", group_col="groups") return nemenyi
def friedman_test(results_df, score, higher_is_better): # mean all k folds of each dataset + algo and take the relevant score folds_mean = (results_df.groupby((results_df['Algorithm Name'] != results_df['Algorithm Name'].shift()).cumsum()))\ .mean().reset_index(drop=True)[score] # get models names algorithm_names = results_df['Algorithm Name'].drop_duplicates().values # reshape to a dataframe of datasets as rows and models as columns mean_reshape = pd.DataFrame(np.reshape(folds_mean.values, (int( folds_mean.shape[0] / len(algorithm_names)), len(algorithm_names))), columns=algorithm_names) # run test stat, p_value = friedmanchisquare(*mean_reshape.T.values) # reject if p_value < 0.05: print('Rejected (different distributions)') # post hoc nemenyi_p_values = posthoc_nemenyi(mean_reshape.T.values).values for algo1_index in range(len(algorithm_names)): algo1 = algorithm_names[algo1_index] algo1_mean = mean_reshape[algo1].mean() for algo2_index in range(algo1_index + 1, len(algorithm_names)): algo2 = algorithm_names[algo2_index] algo2_mean = mean_reshape[algo2].mean() if algo1_index != algo2_index: algos_p_val = nemenyi_p_values[algo1_index][algo2_index] if algos_p_val < 0.05: if (algo1_mean > algo2_mean and higher_is_better) or \ (algo1_mean < algo2_mean and not higher_is_better): print( algo1 + ' is significantly better than ' + algo2, '(Nemenyi test with 0.05)') else: print( algo2 + ' is significantly better than ' + algo1, '(Nemenyi test with 0.05)') else: print(algo1 + ' and ' + algo2 + ' are not significant') else: print('Fail to reject (same distributions)') # print table print('\nDataset-Algorithm ranks:') algorithm_names_str = '\t' for algorithm in algorithm_names: algorithm_names_str += algorithm + '\t' print(algorithm_names_str) datasets = results_df['Dataset Name'].drop_duplicates().values for dataset_index, dataset in enumerate(datasets): dataset_values = 1 - mean_reshape.values[dataset_index] dataset_ranks = rankdata(dataset_values) dataset_ranks_str = dataset + '\t' for dataset_rank in dataset_ranks: dataset_ranks_str += str(int(dataset_rank)) + '\t' print(dataset_ranks_str)
def compare_classifiers(data_set, view1, view2, classes, labels, ks): # taxas para o classificador bayesiano print('classificador bayesiano 1') bayesian_rates_data_set = bayesian_classifier(data_set, classes, labels) print('classificador bayesiano 2') bayesian_rates_view1 = bayesian_classifier(view1, classes, labels) print('classificador bayesiano 3') bayesian_rates_view2 = bayesian_classifier(view2, classes, labels) # matrizes de distancias para os knns print('matriz de distancias 1') distance_matrix1 = knn.calculate_distance_matrix(data_set) print('matriz de distancias 2') distance_matrix2 = knn.calculate_distance_matrix(view1) print('matriz de distancias 3') distance_matrix3 = knn.calculate_distance_matrix(view2) # taxas para os knns print('knn 1') knn_rates_data_set = test_knn(data_set, distance_matrix1, classes, labels, ks[0]) print('knn 2') knn_rates_view1 = test_knn(view1, distance_matrix2, classes, labels, ks[1]) print('knn 3') knn_rates_view2 = test_knn(view2, distance_matrix3, classes, labels, ks[2]) # taxas para o classificador combinado print('classificador combinado') dists = [distance_matrix1, distance_matrix2, distance_matrix3] max_rule_rates = max_rule(data_set, view1, view2, dists, classes, labels, ks) print('teste de friedman') rate_matrix = pandas.DataFrame({"bayes1": bayesian_rates_data_set, "bayes2": bayesian_rates_view1, "bayes3": bayesian_rates_view2, "knn1": knn_rates_data_set, "knn2": knn_rates_view1, "knn3": knn_rates_view2, "combined": max_rule_rates}) statistic, pvalue = friedmanchisquare(rate_matrix["bayes1"], rate_matrix["bayes2"], rate_matrix["bayes3"], rate_matrix["knn1"], rate_matrix["knn2"], rate_matrix["knn3"], rate_matrix["combined"]) print('friedman statistic: ', statistic) print('pvalue: ', pvalue) if pvalue < 0.05: # rejeitando a hipotese de que nao existe diferenca entre # os classificadores rate_matrix = rate_matrix.melt(var_name='groups', value_name='values') nemenyi_results = posthoc_nemenyi(rate_matrix, val_col='values', group_col='groups') print('teste de nemenyi') for i in range(0, nemenyi_results.shape[0]): print(nemenyi_results.iloc[i])
def calculate_test(self): """It applies Nemenyi test to the dataframe. Nemenyi is a multi-comparison method. Discover more at https://www.pydoc.io/pypi/scikit-posthocs-0.3.7/autoapi/_posthocs/index.html . It is used to non-parametric data""" nem1 = sp.posthoc_nemenyi(self.df.T.values) nem1.index = self.df.columns nem1.columns = self.df.columns print("\nNemenyi test for columns (p-value): \n") print(nem1) #self.results.write("\nNemenyi test for columns (p-value):\n"+str(nem1) +"\n") nem1.to_csv("nem_columns_" + self.metric + "_" + self.run + ".csv") nem2 = sp.posthoc_nemenyi(self.df.values) nem2.index = self.df.index nem2.columns = self.df.index print("\nNemenyi test for rows (p-value):\n") nem2.to_csv("nem_rows_" + self.metric + "_" + self.run + ".csv") print(nem2) #self.results.write("\nNemenyi test for rows (p-value):\n"+str(nem2) +"\n") return (nem1, nem2)
def data_prep(infile): df = pd.read_csv(infile, header=0, index_col=0) df.drop(['average','std'],inplace=True) dfa = df.reset_index() new_dataframes = locals() df_total = [] for column_name in df.columns: i = df.columns.get_loc(column_name) new_dataframes['df0'+str(i)] = dfa[['pdbname', column_name]] new_dataframes['df0'+str(i)]['SF'] = column_name new_dataframes['df0'+str(i)] = new_dataframes['df0'+str(i)].rename(columns={column_name:'AUROC'}) new_dataframes['df0'+str(i)].set_index('pdbname', inplace=True) df_total.append(new_dataframes['df0'+str(i)]) df_out = pd.concat(df_total, axis=0) df_ref = df_out.groupby(['SF']).mean() df_ref.sort_values(by=['AUROC'], ascending = False, inplace=True) name_index = list(df_ref.index) df_out['SF'] = df_out['SF'].astype('category') df_out['SF'].cat.reorder_categories(name_index, inplace=True) df_out.sort_values('SF', inplace=True) listnames = locals() df_stat_list = [] for i in df_ref.index: j = list(df_ref.index).index(i) listnames['df_%s'%j] = df_out[df_out['SF']==i]['AUROC'] df_stat_list.append(listnames['df_%s'%j]) tot_data = np.array(df_stat_list) df_stat = sp.posthoc_nemenyi(tot_data) df_stat.columns = list(df_ref.index) df_stat.index = list(df_ref.index) df_stat2 = pd.DataFrame(np.ones(42**2).reshape(42,42)) df_stat2.columns = list(df_ref.index) df_stat2.index = list(df_ref.index) df_stat2 = df_stat2.astype('str') for a in range(42): for b in range(42): if df_stat.iloc[a,b] >= 0.10: df_stat2.iloc[a,b] = 5 if 0.05 <= df_stat.iloc[a,b] < 0.10: df_stat2.iloc[a,b] = 4 if 0.01 <= df_stat.iloc[a,b] < 0.05: df_stat2.iloc[a,b] = 3 if 0.001 <= df_stat.iloc[a,b] < 0.01: df_stat2.iloc[a,b] = 2 if 0.0 <= df_stat.iloc[a,b] < 0.001: df_stat2.iloc[a,b] = 1 if df_stat.iloc[a,b] < 0.00: df_stat2.iloc[a,b] = 0 return df_stat2
def nemenyi(self, obeservations): """ Post-hoc test run if the `friedman_test` reveals statistical significance. For more information see `Nemenyi test <https://en.wikipedia.org/wiki/Nemenyi_test>`_. Implementation used `scikit-posthocs <https://github.com/maximtrp/scikit-posthocs>`_. Args: observations(dictionary): Dictionary with errors on test sets achieved by estimators. Returns: pandas DataFrame. """ obeservations = pd.DataFrame(obeservations) obeservations = obeservations.melt(var_name='groups', value_name='values') nemenyi = sp.posthoc_nemenyi(obeservations, val_col='values', group_col='groups') return nemenyi.round(3)
def nemenyi(self, strategy_dict): """ Post-hoc test run if the `friedman_test` reveals statistical significance. For more information see `Nemenyi test <https://en.wikipedia.org/wiki/Nemenyi_test>`_. Implementation used `scikit-posthocs <https://github.com/maximtrp/scikit-posthocs>`_. Parameters ---------- strategy_dict : dict Dictionary with errors on test sets achieved by estimators. Returns ------- pandas DataFrame Results of te Nemenyi test """ strategy_dict = pd.DataFrame(strategy_dict) strategy_dict = strategy_dict.melt(var_name='groups', value_name='values') nemenyi = sp.posthoc_nemenyi(strategy_dict, val_col='values', group_col='groups') return nemenyi
def kruskal_posthoc_tests(benchmark_snapshot_df): """Returns p-value tables for various Kruskal posthoc tests. Results should considered only if Kruskal test rejects null hypothesis. """ common_args = { 'a': benchmark_snapshot_df, 'group_col': 'fuzzer', 'val_col': 'edges_covered', 'sort': True } p_adjust = 'holm' posthoc_tests = {} posthoc_tests['mann_whitney'] = sp.posthoc_mannwhitney(**common_args, p_adjust=p_adjust) posthoc_tests['conover'] = sp.posthoc_conover(**common_args, p_adjust=p_adjust) posthoc_tests['wilcoxon'] = sp.posthoc_wilcoxon(**common_args, p_adjust=p_adjust) posthoc_tests['dunn'] = sp.posthoc_dunn(**common_args, p_adjust=p_adjust) posthoc_tests['nemenyi'] = sp.posthoc_nemenyi(**common_args) return posthoc_tests
results.rf, results.lf, results.rt, results.lt, results.rs, results.ls ] data_act = [ results.walking, results.running, results.standing, results.sitting, results.bicycling ] f_comp = stats.friedmanchisquare(*data_seg) print(f_comp) #data = [proposed_dist.rice, proposed_dist.spline, proposed_dist.fifth, proposed_dist.fourth, proposed_dist.third, proposed_dist.second, proposed_dist.linear, proposed_dist.diff] data = [ proposed_dist.fifth, proposed_dist.fourth, proposed_dist.third, proposed_dist.second, proposed_dist.spline, proposed_dist.linear, proposed_dist.diff ] n_comp = sp.posthoc_nemenyi(data) print(n_comp) #n_comp = sp.posthoc_nemenyi_friedman(data) #print(n_comp) num_cat = len(data) rankings = [0 for _ in range(num_cat)] print(rankings) for index, _ in enumerate(data[0]): values = [data[i][index] for i in range(len(data))] for order, value in enumerate(values): rankings[order] += sorted(values).index(value) rankings = [num_cat - a / len(data[0]) for a in rankings] print(rankings)
print('Kruskal-Wallis test for cross validation with k=%d'%fold) print() print('Accuracy: statistics=%.3f, p=%.20f' % (stat_acc, p_acc)) print('F1 score: statistics=%.3f, p=%.20f' % (stat_f1, p_f1)) print('MCC: statistics=%.3f, p=%.20f' % (stat_mcc, p_mcc)) print() # interpret alpha = 0.05 if p_acc > alpha: print('Same distributions (fail to reject H0)') else: print('Different distributions (reject H0)') print() posthoc_acc = posthoc_nemenyi(clf_acc) posthoc_f1 = posthoc_nemenyi(clf_f1) posthoc_mcc = posthoc_nemenyi(clf_mcc) print('Posthoc Nemenyi for accuracy\n', posthoc_acc) print() print('Posthoc Nemenyi for F1 score\n', posthoc_f1) print() print('Posthoc Nemenyi for MCC\n', posthoc_mcc) print() #%% Application for the final test datasetTest = pd.read_csv("Dataset_processado/dataset_teste_processado.csv")
# coding=utf-8 from scipy.stats import friedmanchisquare import scikit_posthocs as sp #pip3 install scikit-posthocs import pandas as pd df = pd.read_csv('csv/comparativo_tecnicas.csv', encoding='utf-8') print('\nFriedman') stat, p = friedmanchisquare(df['dt'].tolist(), df['nb'].tolist(), df['mlp'].tolist()) print('p=%.3f' % (p)) if p > 0.05: print('Não há diferença significativa') else: print('Há diferença significativa') print('\n Posthoc') posthoc = sp.posthoc_nemenyi( [df['dt'].tolist(), df['nb'].tolist(), df['mlp'].tolist()]) print(posthoc)
rating_matriz['RandomForestClassifier(3_estimators)'], rating_matriz['DecisionTreeClassifier()'], rating_matriz['SVM'], rating_matriz['KNeighborsClassifier(3)'], rating_matriz['GaussianNB'], rating_matriz['KNeighborsClassifier(2)'], rating_matriz['KNeighborsClassifier(5)'], rating_matriz['BernoulliNB'], rating_matriz['KNeighborsClassifier(8)']) alpha = 0.05 if p > alpha: print('Same distributions (fail to reject H0)') else: print('Different distributions (reject H0)') import scikit_posthocs as sp df_nemenyi = sp.posthoc_nemenyi(data) #df_nemenyi = sp.posthoc_nemenyi_friedman(data) name_col = {} #for i in range(len(list_rating)): # name_col[i+1] = list_rating[i][0] for i in range(len(real)): name_col[i + 1] = real[i] df_nemenyi = df_nemenyi.rename(columns=name_col, index=name_col) df_nemenyi.to_csv('Real_clf_nemenyi.csv') import seaborn as sns
peso_amarela = df.PESO[df.RACACOR == 3].values peso_parda = df.PESO[df.RACACOR == 4].values peso_indigena = df.PESO[df.RACACOR == 5].values pesos_racas = [ peso_branca, peso_preta, peso_amarela, peso_parda, peso_indigena ] # Teste de Kruskal-Wallis, que compara o conjunto stat_peso_raca, p_peso_raca = kruskal(peso_branca, peso_preta, peso_amarela, peso_parda, peso_indigena) print('Teste de Kruskal-Wallis para variação de raça/cor:') print('Estatística do teste = %.2f, p-valor = %.7f' % (stat_peso_raca, p_peso_raca)) # Se o teste de Kruskal rejeitar a hipótese nula (mesmas distribuições), então # é feito o pós-teste de Nemenyi, comparando par a par os testes alpha = 0.05 if p_peso_raca > alpha: print( 'Mesmas distribuições de peso para as raças/cores (falha em rejeitar H0)' ) print() else: print('Diferentes distribuições segundo a raça/cor (rejeita H0)') print() print('Pós-teste de Nemenyi:') print(posthoc_nemenyi(pesos_racas))