def test_label_binarizer_errors(): # Check that invalid arguments yield ValueError one_class = np.array([0, 0, 0, 0]) lb = LabelBinarizer().fit(one_class) multi_label = [(2, 3), (0, ), (0, 2)] err_msg = "You appear to be using a legacy multi-label data representation." with pytest.raises(ValueError, match=err_msg): lb.transform(multi_label) lb = LabelBinarizer() err_msg = "This LabelBinarizer instance is not fitted yet" with pytest.raises(ValueError, match=err_msg): lb.transform([]) with pytest.raises(ValueError, match=err_msg): lb.inverse_transform([]) input_labels = [0, 1, 0, 1] err_msg = "neg_label=2 must be strictly less than pos_label=1." lb = LabelBinarizer(neg_label=2, pos_label=1) with pytest.raises(ValueError, match=err_msg): lb.fit(input_labels) err_msg = "neg_label=2 must be strictly less than pos_label=2." lb = LabelBinarizer(neg_label=2, pos_label=2) with pytest.raises(ValueError, match=err_msg): lb.fit(input_labels) err_msg = ( "Sparse binarization is only supported with non zero pos_label and zero " "neg_label, got pos_label=2 and neg_label=1") lb = LabelBinarizer(neg_label=1, pos_label=2, sparse_output=True) with pytest.raises(ValueError, match=err_msg): lb.fit(input_labels) # Fail on y_type err_msg = "foo format is not supported" with pytest.raises(ValueError, match=err_msg): _inverse_binarize_thresholding( y=csr_matrix([[1, 2], [2, 1]]), output_type="foo", classes=[1, 2], threshold=0, ) # Sequence of seq type should raise ValueError y_seq_of_seqs = [[], [1, 2], [3], [0, 1, 3], [2]] err_msg = "You appear to be using a legacy multi-label data representation" with pytest.raises(ValueError, match=err_msg): LabelBinarizer().fit_transform(y_seq_of_seqs) # Fail on the number of classes err_msg = "The number of class is not equal to the number of dimension of y." with pytest.raises(ValueError, match=err_msg): _inverse_binarize_thresholding( y=csr_matrix([[1, 2], [2, 1]]), output_type="foo", classes=[1, 2, 3], threshold=0, ) # Fail on the dimension of 'binary' err_msg = "output_type='binary', but y.shape" with pytest.raises(ValueError, match=err_msg): _inverse_binarize_thresholding( y=np.array([[1, 2, 3], [2, 1, 3]]), output_type="binary", classes=[1, 2, 3], threshold=0, ) # Fail on multioutput data err_msg = "Multioutput target data is not supported with label binarization" with pytest.raises(ValueError, match=err_msg): LabelBinarizer().fit(np.array([[1, 3], [2, 1]])) with pytest.raises(ValueError, match=err_msg): label_binarize(np.array([[1, 3], [2, 1]]), classes=[1, 2, 3])
def class_report(y_true, y_pred, y_score=None, average='micro'): if y_true.shape != y_pred.shape: print("Error! y_true %s is not the same shape as y_pred %s" % (y_true.shape, y_pred.shape)) return lb = LabelBinarizer() if len(y_true.shape) == 1: lb.fit(y_true) #Value counts of predictions labels, cnt = np.unique(y_pred, return_counts=True) n_classes = len(labels) pred_cnt = pd.Series(cnt, index=labels) metrics_summary = precision_recall_fscore_support(y_true=y_true, y_pred=y_pred, labels=labels) avg = list( precision_recall_fscore_support(y_true=y_true, y_pred=y_pred, average='weighted')) metrics_sum_index = ['precision', 'recall', 'f1-score', 'support'] class_report_df = pd.DataFrame(list(metrics_summary), index=metrics_sum_index, columns=labels) support = class_report_df.loc['support'] total = support.sum() class_report_df['avg / total'] = avg[:-1] + [total] class_report_df = class_report_df.T class_report_df['pred'] = pred_cnt class_report_df['pred'].iloc[-1] = total if not (y_score is None): fpr = dict() tpr = dict() roc_auc = dict() for label_it, label in enumerate(labels): fpr[label], tpr[label], _ = roc_curve( (y_true == label).astype(int), y_score[:, label_it]) roc_auc[label] = auc(fpr[label], tpr[label]) if average == 'micro': if n_classes <= 2: fpr["avg / total"], tpr["avg / total"], _ = roc_curve( lb.transform(y_true).ravel(), y_score[:, 1].ravel()) else: fpr["avg / total"], tpr["avg / total"], _ = roc_curve( lb.transform(y_true).ravel(), y_score.ravel()) roc_auc["avg / total"] = auc(fpr["avg / total"], tpr["avg / total"]) elif average == 'macro': # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in labels])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in labels: mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["avg / total"] = auc(fpr["macro"], tpr["macro"]) class_report_df['AUC'] = pd.Series(roc_auc) return class_report_df