def binary_classifier_quality(model, X_test, Y_test): """ Meant for binary classification. If `model` is Grid it uses best model. """ if isinstance(model, GridSearchCV): result = pd.DataFrame( {k: model.cv_results_[k] for k in \ ['params', 'mean_test_score', 'std_test_score', 'rank_test_score']} ) print(result) print() print(f"best params: {model.best_params_}") print("best score: {:f}".format(model.best_score_)) print() Y_hat = model.predict(X_test) print("Confusion matrix (true x pred):") print(confusion_matrix(Y_test, Y_hat)) print("Sensitivity: {:f}".format( sum(Y_hat[Y_test==1]) / sum(Y_test) )) print("Specificity: {:f}".format( sum(1 - Y_hat[Y_test==0]) / sum(Y_test==0))) print("Accuracy score on test data: {:f}".format( accuracy_score(Y_test, Y_hat) )) print("F1 score on test data: {:f}".format( f1_score(Y_test, Y_hat) )) #print(confusion_matrix(grid.predict(X_test), y_test)) ConfusionMatrixDisplay.from_predictions(Y_test, Y_hat) RocCurveDisplay.from_estimator(model, X_test, Y_test)
def test_confusion_matrix_with_unknown_labels(pyplot, constructor_name): """Check that when labels=None, the unique values in `y_pred` and `y_true` will be used. Non-regression test for: https://github.com/scikit-learn/scikit-learn/pull/18405 """ n_classes = 5 X, y = make_classification( n_samples=100, n_informative=5, n_classes=n_classes, random_state=0 ) classifier = SVC().fit(X, y) y_pred = classifier.predict(X) # create unseen labels in `y_true` not seen during fitting and not present # in 'classifier.classes_' y = y + 1 # safe guard for the binary if/else construction assert constructor_name in ("from_estimator", "from_predictions") common_kwargs = {"labels": None} if constructor_name == "from_estimator": disp = ConfusionMatrixDisplay.from_estimator( classifier, X, y, **common_kwargs ) else: disp = ConfusionMatrixDisplay.from_predictions( y, y_pred, **common_kwargs ) display_labels = [tick.get_text() for tick in disp.ax_.get_xticklabels()] expected_labels = [str(i) for i in range(n_classes + 1)] assert_array_equal(expected_labels, display_labels)
def test_confusion_matrix_display_invalid_option(pyplot, constructor_name): """Check the error raise if an invalid parameter value is passed.""" X, y = make_classification(n_samples=100, n_informative=5, n_classes=5, random_state=0) classifier = SVC().fit(X, y) y_pred = classifier.predict(X) # safe guard for the binary if/else construction assert constructor_name in ("from_estimator", "from_predictions") extra_params = {"normalize": "invalid"} err_msg = r"normalize must be one of \{'true', 'pred', 'all', None\}" with pytest.raises(ValueError, match=err_msg): if constructor_name == "from_estimator": ConfusionMatrixDisplay.from_estimator(classifier, X, y, **extra_params) else: ConfusionMatrixDisplay.from_predictions(y, y_pred, **extra_params)
def test_confusion_matrix_display_validation(pyplot): """Check that we raise the proper error when validating parameters.""" X, y = make_classification(n_samples=100, n_informative=5, n_classes=5, random_state=0) regressor = SVR().fit(X, y) y_pred_regressor = regressor.predict(X) y_pred_classifier = SVC().fit(X, y).predict(X) err_msg = "ConfusionMatrixDisplay.from_estimator only supports classifiers" with pytest.raises(ValueError, match=err_msg): ConfusionMatrixDisplay.from_estimator(regressor, X, y) err_msg = "Mix type of y not allowed, got types" with pytest.raises(ValueError, match=err_msg): # Force `y_true` to be seen as a regression problem ConfusionMatrixDisplay.from_predictions(y + 0.5, y_pred_classifier) with pytest.raises(ValueError, match=err_msg): ConfusionMatrixDisplay.from_predictions(y, y_pred_regressor) err_msg = "Found input variables with inconsistent numbers of samples" with pytest.raises(ValueError, match=err_msg): ConfusionMatrixDisplay.from_predictions(y, y_pred_classifier[::2])
def test_confusion_matrix_display(pyplot, constructor_name): """Check the behaviour of the default constructor without using the class methods.""" n_classes = 5 X, y = make_classification( n_samples=100, n_informative=5, n_classes=n_classes, random_state=0 ) classifier = SVC().fit(X, y) y_pred = classifier.predict(X) # safe guard for the binary if/else construction assert constructor_name in ("from_estimator", "from_predictions") cm = confusion_matrix(y, y_pred) common_kwargs = { "normalize": None, "include_values": True, "cmap": "viridis", "xticks_rotation": 45.0, } if constructor_name == "from_estimator": disp = ConfusionMatrixDisplay.from_estimator( classifier, X, y, **common_kwargs ) else: disp = ConfusionMatrixDisplay.from_predictions( y, y_pred, **common_kwargs ) assert_allclose(disp.confusion_matrix, cm) assert disp.text_.shape == (n_classes, n_classes) rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()] assert_allclose(rotations, 45.0) image_data = disp.im_.get_array().data assert_allclose(image_data, cm) disp.plot(cmap="plasma") assert disp.im_.get_cmap().name == "plasma" disp.plot(include_values=False) assert disp.text_ is None disp.plot(xticks_rotation=90.0) rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()] assert_allclose(rotations, 90.0) disp.plot(values_format="e") expected_text = np.array([format(v, "e") for v in cm.ravel(order="C")]) text_text = np.array([t.get_text() for t in disp.text_.ravel(order="C")]) assert_array_equal(expected_text, text_text)
def runClassifier(clf, X, y): kf = KFold(10) predictions = numpy.array([]) targets = numpy.array([]) for train, test in kf.split(X): X_train = X[train] X_test = X[test] y_train = y[train] y_test = y[test] thisFoldClf = clf.fit(X_train, y_train) predictions = numpy.append(predictions, thisFoldClf.predict(X_test)) targets = numpy.append(targets, y_test) print(classification_report(targets, predictions)) ConfusionMatrixDisplay.from_predictions(targets, predictions, normalize='true') plt.show()
def test_confusion_matrix_display_custom_labels( pyplot, constructor_name, with_labels, with_display_labels ): """Check the resulting plot when labels are given.""" n_classes = 5 X, y = make_classification( n_samples=100, n_informative=5, n_classes=n_classes, random_state=0 ) classifier = SVC().fit(X, y) y_pred = classifier.predict(X) # safe guard for the binary if/else construction assert constructor_name in ("from_estimator", "from_predictions") ax = pyplot.gca() labels = [2, 1, 0, 3, 4] if with_labels else None display_labels = ["b", "d", "a", "e", "f"] if with_display_labels else None cm = confusion_matrix(y, y_pred, labels=labels) common_kwargs = { "ax": ax, "display_labels": display_labels, "labels": labels, } if constructor_name == "from_estimator": disp = ConfusionMatrixDisplay.from_estimator( classifier, X, y, **common_kwargs ) else: disp = ConfusionMatrixDisplay.from_predictions( y, y_pred, **common_kwargs ) assert_allclose(disp.confusion_matrix, cm) if with_display_labels: expected_display_labels = display_labels elif with_labels: expected_display_labels = labels else: expected_display_labels = list(range(n_classes)) expected_display_labels_str = [str(name) for name in expected_display_labels] x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()] y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()] assert_array_equal(disp.display_labels, expected_display_labels) assert_array_equal(x_ticks, expected_display_labels_str) assert_array_equal(y_ticks, expected_display_labels_str)
def plot_confusion_matrix(labels: np.ndarray, predictions: np.ndarray, class_label_names: Optional[Dict[Union[str, int], Union[str, int]]] = None, normalize: Optional[str] = None, title_fontsize: Optional[int] = 12, x_label_fontsize: Optional[int] = 12, y_label_fontsize: Optional[int] = 12, heatmap_color: Optional[str] = 'Greens') -> None: """Plot confusion matrix for a multiclass classification model. Args: labels: An array of true labels containing multiclass labels. predictions: An array of predictions containing multiclass labels. class_label_names: Dictionary of multiclass labels and corresponding target names. The type of both class lable and target names can be either 'int' or 'str'. E.g. {0: 'low_value', 1: 'mid_value', 2: 'high_value'}. normalize: A parameter controlling whether to normalize the counts in the matrix. title_fontsize: Font size of the figure title. x_label_fontsize: Font size of the x axis labels. y_label_fontsize: Font size of the y axis labels. heatmap_color: Color of the heatmap plot. Returns: Heatmap of confusion matrix. """ utils.assert_label_and_prediction_length_match(labels, predictions) if class_label_names is None: class_labels = list(set(labels)) target_names = ['%s' % l for l in class_labels] else: class_labels = list(class_label_names.keys()) target_names = list(class_label_names.values()) plot = ConfusionMatrixDisplay.from_predictions(y_true=labels, y_pred=predictions, labels=np.unique(labels), display_labels=target_names, normalize=normalize, include_values=True, cmap=heatmap_color) plot.ax_.set_title('Confusion matrix', fontsize=title_fontsize) plot.ax_.set_xlabel('Predicted label', fontsize=x_label_fontsize) plot.ax_.set_ylabel('Actual label', fontsize=y_label_fontsize) plt.show()
def test_confusion_matrix_display_plotting( pyplot, constructor_name, normalize, include_values, ): """Check the overall plotting rendering.""" n_classes = 5 X, y = make_classification( n_samples=100, n_informative=5, n_classes=n_classes, random_state=0 ) classifier = SVC().fit(X, y) y_pred = classifier.predict(X) # safe guard for the binary if/else construction assert constructor_name in ("from_estimator", "from_predictions") ax = pyplot.gca() cmap = "plasma" cm = confusion_matrix(y, y_pred) common_kwargs = { "normalize": normalize, "cmap": cmap, "ax": ax, "include_values": include_values, } if constructor_name == "from_estimator": disp = ConfusionMatrixDisplay.from_estimator( classifier, X, y, **common_kwargs ) else: disp = ConfusionMatrixDisplay.from_predictions( y, y_pred, **common_kwargs ) assert disp.ax_ == ax if normalize == "true": cm = cm / cm.sum(axis=1, keepdims=True) elif normalize == "pred": cm = cm / cm.sum(axis=0, keepdims=True) elif normalize == "all": cm = cm / cm.sum() assert_allclose(disp.confusion_matrix, cm) import matplotlib as mpl assert isinstance(disp.im_, mpl.image.AxesImage) assert disp.im_.get_cmap().name == cmap assert isinstance(disp.ax_, pyplot.Axes) assert isinstance(disp.figure_, pyplot.Figure) assert disp.ax_.get_ylabel() == "True label" assert disp.ax_.get_xlabel() == "Predicted label" x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()] y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()] expected_display_labels = list(range(n_classes)) expected_display_labels_str = [ str(name) for name in expected_display_labels ] assert_array_equal(disp.display_labels, expected_display_labels) assert_array_equal(x_ticks, expected_display_labels_str) assert_array_equal(y_ticks, expected_display_labels_str) image_data = disp.im_.get_array().data assert_allclose(image_data, cm) if include_values: assert disp.text_.shape == (n_classes, n_classes) fmt = ".2g" expected_text = np.array([format(v, fmt) for v in cm.ravel(order="C")]) text_text = np.array( [t.get_text() for t in disp.text_.ravel(order="C")] ) assert_array_equal(expected_text, text_text) else: assert disp.text_ is None
predicted_labels = lp_model.transduction_[unlabeled_set] true_labels = y[unlabeled_set] print("Label Spreading model: %d labeled & %d unlabeled points (%d total)" % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples)) # %% # Classification report print(classification_report(true_labels, predicted_labels)) # %% # Confusion matrix from sklearn.metrics import ConfusionMatrixDisplay ConfusionMatrixDisplay.from_predictions(true_labels, predicted_labels, labels=lp_model.classes_) # %% # Plot the most uncertain predictions # ----------------------------------- # # Here, we will pick and show the 10 most uncertain predictions. from scipy import stats pred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T) # %% # Pick the top 10 most uncertain labels uncertainty_index = np.argsort(pred_entropies)[-10:]
from sklearn.linear_model import RidgeClassifier clf = RidgeClassifier(tol=1e-2, solver="sparse_cg") clf.fit(X_train, y_train) pred = clf.predict(X_test) # %% # We plot the confusion matrix of this classifier to find if there is a pattern # in the classification errors. import matplotlib.pyplot as plt from sklearn.metrics import ConfusionMatrixDisplay fig, ax = plt.subplots(figsize=(10, 5)) ConfusionMatrixDisplay.from_predictions(y_test, pred, ax=ax) ax.xaxis.set_ticklabels(target_names) ax.yaxis.set_ticklabels(target_names) _ = ax.set_title( f"Confusion Matrix for {clf.__class__.__name__}\non the original documents" ) # %% # The confusion matrix highlights that documents of the `alt.atheism` class are # often confused with documents with the class `talk.religion.misc` class and # vice-versa which is expected since the topics are semantically related. # # We also observe that some documents of the `sci.space` class can be misclassified as # `comp.graphics` while the converse is much rarer. A manual inspection of those # badly classified documents would be required to get some insights on this # asymmetry. It could be the case that the vocabulary of the space topic could