def run_cochran_q_test(y_test, *model_predictions, output_name): """ Runs Cochran's Q test to determine if there is a statistically significant difference in more than two models' class predictions. The function can support up to five sets of predictions. Results are saved locally. :param y_test: y_test series :param model_predictions: arbitrary number of model predictions :param output_name: name to append to file to identify models used in the test """ n_models = len(model_predictions) if n_models == 3: chi2, p = cochrans_q(y_test.values, model_predictions[0].values, model_predictions[1].values, model_predictions[2].values) elif n_models == 4: chi2, p = cochrans_q(y_test.values, model_predictions[0].values, model_predictions[1].values, model_predictions[2].values, model_predictions[3].values) elif n_models == 5: chi2, p = cochrans_q(y_test.values, model_predictions[0].values, model_predictions[1].values, model_predictions[2].values, model_predictions[3].values, model_predictions[4].values) else: raise Exception( 'function cannot support more than five sets of predictions') pd.DataFrame({ 'chi2': [chi2], 'p': [p] }).to_csv( os.path.join('modeling', 'comparison_files', f'{output_name}_cochrans_q_test.csv'))
def test_compare_to_mcnemar_on_2_models(): y_true = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ym1 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ym2 = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) q, p = cochrans_q(y_true, ym1, ym2) mcn_q, mcn_p = mcnemar(mcnemar_table(y_true, ym1, ym2), corrected=False, exact=False) assert q == mcn_q assert p == mcn_p
def test_on_dataset(): y_true = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ym1 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ym2 = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ym3 = np.array([1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]) q, p_value = cochrans_q(y_true, ym1, ym2, ym3) assert round(q, 3) == 7.529 assert round(p_value, 3) == 0.023
def summarize_feature_comparisons( base_clf: BaseEstimator, comparison_clfs: Dict[str, BaseEstimator], X_test, y_test ): from mlxtend.evaluate import mcnemar, cochrans_q, mcnemar_table summary_dict = collections.OrderedDict() mcnemar_tbs = dict() # create list of predicted values base_y_predict = base_clf.predict(X_test) y_predictions = [base_y_predict] for idx, (name, clf) in enumerate(comparison_clfs.items()): # get the probability y_predict_proba = clf.predict_proba(X_test) y_predict = clf.predict(X_test) # form mcnemar tables against base classifier tb = mcnemar_table(y_test, base_y_predict, y_predict) mcnemar_tbs[f"base vs {name}"] = tb.values() # store predictions per classifier y_predictions.append(y_predict) # first run cochrans Q test qstat, pval = cochrans_q(y_test, *y_predictions) summary_dict["cochrans_q"] = qstat summary_dict["cochrans_q_pval"] = pval # run mcnemars test against all the predictions for name, table in mcnemar_tbs.items(): chi2stat, pval = mcnemar(table, exact=True) summary_dict[f"mcnemar_{name}_chi2stat"] = chi2stat summary_dict[f"mcnemar_{name}_pval"] = pval return summary_dict
# Implementing GridSearchCV on Logistic Regression model model2 = GridSearchCV(lr, parameters_lr, cv = cv) model2.fit(X_train, y_train) # Fitting on training data # In[16]: # Implementing GridSearchCV on MLP classifier model3 = GridSearchCV(mlp, parameters_mlp, cv = cv) model3.fit(X_train, y_train) # Fitting on training data # In[17]: y_model1 = model1.predict(X_test) y_model2 = model2.predict(X_test) y_model3 = model3.predict(X_test) y_test = np.array(y_test) # In[18]: q, p_value = cochrans_q(y_test, y_model1, y_model2, y_model3) print('Q: %.3f' % q) print('p-value: %.3f' % p_value)
result = f_oneway(df_result['uni_R'].to_numpy(), df_result['bi_R'].to_numpy(), df_result['unibi_R'].to_numpy()) print("ANNOVA R : %0.5f, %0.5f" % result) result = f_oneway(df_result['uni_F1'].to_numpy(), df_result['bi_F1'].to_numpy(), df_result['unibi_F1'].to_numpy()) print("ANNOVA F1 : %0.5f, %0.5f" % result) # Coher Q analysis y_uni = sr_uni.to_numpy() y_bi = sr_bi.to_numpy() y_unibi = sr_unibi.to_numpy() q, p_value = cochrans_q(y, y_uni, y_bi, y_unibi) print("COHRAN Q-Test: q: %0.5f, p_value: %0.5f" % (q, p_value)) l_grams = ['uni', 'bi', 'unibi'] l_rslt = [y_uni, y_bi, y_unibi] l_pair = list(zip(l_grams, l_rslt)) l_mcnemar_rslt = [] for i, t0 in enumerate(l_pair): for j, t1 in enumerate(l_pair[i + 1:]): k0 = t0[0] k1 = t1[0] v0 = t0[1] v1 = t1[1] tb = mcnemar_table(y_target=y, y_model1=v0, y_model2=v1)
print("ANNOVA R : %0.5f, %0.5f" % result) result = f_oneway( df_result['tc_lower_F1'].to_numpy(), df_result['tc_swrem_F1'].to_numpy(), df_result['tc_stem_F1'].to_numpy(), df_result['tc_swrem_stem_F1'].to_numpy(), ) print("ANNOVA F1 : %0.5f, %0.5f" % result) # COHRAN Q-Test y_tc_lower = sr_tc_lower.to_numpy() y_tc_swrem = sr_tc_swrem.to_numpy() y_tc_stem = sr_tc_stem.to_numpy() y_tc_swrem_stem = sr_tc_swrem_stem.to_numpy() q, p_value = cochrans_q(y, y_tc_lower, y_tc_swrem, y_tc_stem, y_tc_swrem_stem) print("COHRAN Q-Test: q: %0.5f, p_value: %0.5f" % (q, p_value)) l_repr = ['tc_lower', 'tc_swrem', 'tc_stem', 'tc_swrem_stem'] l_rslt = [y_tc_lower, y_tc_swrem, y_tc_stem, y_tc_swrem_stem] l_pair = list(zip(l_repr, l_rslt)) l_mcnemar_rslt = [] for i, t0 in enumerate(l_pair): for j, t1 in enumerate(l_pair[i + 1:]): k0 = t0[0] k1 = t1[0] v0 = t0[1] v1 = t1[1] tb = mcnemar_table(y_target=y, y_model1=v0, y_model2=v1)
print("ANNOVA R : %0.5f, %0.5f" % result) result = f_oneway(df_result['multi_nb_F1'].to_numpy(), df_result['svc_F1'].to_numpy(), df_result['lsvc_F1'].to_numpy(), df_result['rf_F1'].to_numpy(), df_result['lr_F1'].to_numpy(), df_result['ada_F1'].to_numpy()) print("ANNOVA F1 : %0.5f, %0.5f" % result) # Coher Q analysis q, p_value = cochrans_q( y, sr_multi_nb.to_numpy(), sr_svc.to_numpy(), sr_lsvc.to_numpy(), sr_rf.to_numpy(), sr_lr.to_numpy(), sr_ada.to_numpy(), ) print("COHRAN Q-Test: q: %0.5f, p_value: %0.5f" % (q, p_value)) f_out = basename(f_in) df_result.to_excel(f_in.replace('.json', '_RESULT.xlsx')) l_algo = ['multi_nb', 'svc', 'lsvc', 'rf', 'lr', 'ada'] l_rslt = [ sr_multi_nb.to_numpy(), sr_svc.to_numpy(), sr_lsvc.to_numpy(),
print("ANNOVA R : %0.5f, %0.5f" % result) str_annova_f += "%0.5f " % (result[0]) str_annova_p += "%0.5f " % (result[1]) result = f_oneway(df_result['t_lower_F1'].to_numpy(), df_result['c_lower_F1'].to_numpy(), df_result['tc_lower_F1'].to_numpy()) print("ANNOVA F1 : %0.5f, %0.5f" % result) str_annova_f += "%0.5f " % (result[0]) str_annova_p += "%0.5f " % (result[1]) # COHRAN Q-Test y_tc_lower = sr_tc_lower.to_numpy() y_t_lower = sr_t_lower.to_numpy() y_c_lower = sr_c_lower.to_numpy() q, p_value = cochrans_q(y, y_tc_lower, y_t_lower, y_c_lower) print("COHRAN Q-Test: q: %0.5f, p_value: %0.5f" % (q, p_value)) l_repr = ['t_lower', 'c_lower', 'tc_lower'] l_rslt = [y_t_lower, y_c_lower, y_tc_lower] l_pair = list(zip(l_repr, l_rslt)) str_result = "MCNEMAR RESULT: " for i, t0 in enumerate(l_pair): for j, t1 in enumerate(l_pair[i + 1:]): k0 = t0[0] k1 = t1[0] v0 = t0[1] v1 = t1[1]