def test_defaults(): rng = np.random.RandomState(123) x = rng.normal(loc=5., size=100) original, std_err, ci_bounds = bootstrap(x, func=np.mean, seed=123) assert round(original, 2) == 5.03 assert round(std_err, 2) == 0.11 assert round(ci_bounds[0], 2) == 4.80 assert round(ci_bounds[1], 2) == 5.26
def classification_report(results_pickle, output_dir, categorical_encoder, average_mechanism): """Generate classification report that gives results from classification tasks.""" from mlxtend.evaluate import bootstrap from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_curve, roc_auc_score, confusion_matrix os.makedirs(output_dir, exist_ok=True) def extract_ys(Y): return Y[:, 0], Y[:, 1:] def accuracy(Y): y_true, y_pred = extract_ys(Y) y_pred = np.argmax(y_pred, axis=1) return accuracy_score(y_true, y_pred) def recall(Y): y_true, y_pred = extract_ys(Y) y_pred = np.argmax(y_pred, axis=1) return recall_score(y_true, y_pred, average=average_mechanism) def precision(Y): y_true, y_pred = extract_ys(Y) y_pred = np.argmax(y_pred, axis=1) return precision_score(y_true, y_pred, average=average_mechanism) def f1(Y): y_true, y_pred = extract_ys(Y) y_pred = np.argmax(y_pred, axis=1) return f1_score(y_true, y_pred, average=average_mechanism) def auc(Y): y_true, y_pred = extract_ys(Y) y_pred_labels = np.argmax(y_pred, 1) supports = { i: sum((y_pred_labels[np.squeeze(y_true == i)] == i).astype(int)) for i in np.unique(y_true) } final_auc_score = 0. for i in supports: final_auc_score += supports[i] * roc_auc_score( (y_true == i).astype(int), y_pred[:, int(i)], average='weighted') final_auc_score /= sum(list(supports.values())) return final_auc_score def to_y_probas(y_pred): return np.exp(y_pred) / np.exp(y_pred).sum(1)[:, np.newaxis] results_dict = pickle.load(open(results_pickle, 'rb')) if os.path.exists(categorical_encoder): categorical_encoder = pickle.load(open(categorical_encoder, 'rb')) else: categorical_encoder = None df_roc = [] final_results = [] for k in results_dict: y_true = results_dict[k]['y_true'] y_true_labels = np.argmax(y_true, axis=1)[:, np.newaxis] classes = np.unique(y_true_labels) y_pred = to_y_probas(results_dict[k]['y_pred']) y_pred_labels = np.argmax(y_pred, 1).reshape((-1, 1)) #print(y_true_labels, y_pred_labels) out_classes = classes.astype( int ) # if categorical_encoder == None else categorical_encoder.inverse_transform(classes.astype(int).reshape((-1,1))) class_labels = out_classes if categorical_encoder == None else categorical_encoder.inverse_transform( out_classes) pd.DataFrame(confusion_matrix(y_true_labels.astype(int).flatten(), y_pred_labels.astype(int).flatten(), labels=out_classes), index=class_labels, columns=class_labels).to_csv( join(output_dir, '{}_confusion_mat.csv'.format(k))) Y = np.hstack((y_true_labels, y_pred)) supports = { i: sum((y_pred_labels[np.squeeze( y_true_labels == i)] == i).astype(int)) for i in classes } fpr = dict() tpr = dict() for i in supports: fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred[:, i]) all_fpr = np.unique(np.concatenate([fpr[i] for i in supports])) mean_tpr = np.zeros_like(all_fpr) for i in supports: mean_tpr += supports[i] * np.interp(all_fpr, fpr[i], tpr[i]) mean_tpr /= sum(list(supports.values())) tpr, fpr = mean_tpr, all_fpr df = pd.DataFrame({'fpr': fpr, 'tpr': tpr}) df['Legend'] = "{} Weighted ROC, AUC={}".format(k, round(auc(Y), 2)) df_roc.append(df) fns = dict(accuracy=accuracy, recall=recall, precision=precision, f1=f1, auc=auc) for fn in fns: print(k, fn) original, std_err, ci_bounds = bootstrap(Y, num_rounds=1000, func=fns[fn], ci=0.95, seed=123) low, high = ci_bounds final_results.append([k, fn, original, std_err, low, high]) final_results = pd.DataFrame(final_results, columns=[ 'DataSet', 'Metric', 'Score', 'Error', '95% CI Low', '95% CI High' ]) df_roc = pd.concat(df_roc) final_results.to_csv(join(output_dir, 'final_classification_results.csv')) df_roc.to_csv(join(output_dir, 'Weighted_ROC.csv'))