def test_confusion_matrix_with_unknown_labels(pyplot, constructor_name):
    """Check that when labels=None, the unique values in `y_pred` and `y_true`
    will be used.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/pull/18405
    """
    n_classes = 5
    X, y = make_classification(n_samples=100,
                               n_informative=5,
                               n_classes=n_classes,
                               random_state=0)
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)
    # create unseen labels in `y_true` not seen during fitting and not present
    # in 'classifier.classes_'
    y = y + 1

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    common_kwargs = {"labels": None}
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(classifier, X, y,
                                                     **common_kwargs)
    else:
        disp = ConfusionMatrixDisplay.from_predictions(y, y_pred,
                                                       **common_kwargs)

    display_labels = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
    expected_labels = [str(i) for i in range(n_classes + 1)]
    assert_array_equal(expected_labels, display_labels)
def test_confusion_matrix_pipeline(pyplot, clf):
    """Check the behaviour of the plotting with more complex pipeline."""
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    with pytest.raises(NotFittedError):
        ConfusionMatrixDisplay.from_estimator(clf, X, y)
    clf.fit(X, y)
    y_pred = clf.predict(X)

    disp = ConfusionMatrixDisplay.from_estimator(clf, X, y)
    cm = confusion_matrix(y, y_pred)

    assert_allclose(disp.confusion_matrix, cm)
    assert disp.text_.shape == (n_classes, n_classes)
def test_confusion_matrix_display_invalid_option(pyplot, constructor_name):
    """Check the error raise if an invalid parameter value is passed."""
    X, y = make_classification(n_samples=100,
                               n_informative=5,
                               n_classes=5,
                               random_state=0)
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")
    extra_params = {"normalize": "invalid"}

    err_msg = r"normalize must be one of \{'true', 'pred', 'all', None\}"
    with pytest.raises(ValueError, match=err_msg):
        if constructor_name == "from_estimator":
            ConfusionMatrixDisplay.from_estimator(classifier, X, y,
                                                  **extra_params)
        else:
            ConfusionMatrixDisplay.from_predictions(y, y_pred, **extra_params)
Exemplo n.º 4
0
def test_confusion_matrix_display(pyplot, constructor_name):
    """Check the behaviour of the default constructor without using the class
    methods."""
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    cm = confusion_matrix(y, y_pred)
    common_kwargs = {
        "normalize": None,
        "include_values": True,
        "cmap": "viridis",
        "xticks_rotation": 45.0,
    }
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(
            classifier, X, y, **common_kwargs
        )
    else:
        disp = ConfusionMatrixDisplay.from_predictions(
            y, y_pred, **common_kwargs
        )

    assert_allclose(disp.confusion_matrix, cm)
    assert disp.text_.shape == (n_classes, n_classes)

    rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()]
    assert_allclose(rotations, 45.0)

    image_data = disp.im_.get_array().data
    assert_allclose(image_data, cm)

    disp.plot(cmap="plasma")
    assert disp.im_.get_cmap().name == "plasma"

    disp.plot(include_values=False)
    assert disp.text_ is None

    disp.plot(xticks_rotation=90.0)
    rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()]
    assert_allclose(rotations, 90.0)

    disp.plot(values_format="e")
    expected_text = np.array([format(v, "e") for v in cm.ravel(order="C")])
    text_text = np.array([t.get_text() for t in disp.text_.ravel(order="C")])
    assert_array_equal(expected_text, text_text)
Exemplo n.º 5
0
def test_confusion_matrix_display_custom_labels(
    pyplot, constructor_name, with_labels, with_display_labels
):
    """Check the resulting plot when labels are given."""
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    ax = pyplot.gca()
    labels = [2, 1, 0, 3, 4] if with_labels else None
    display_labels = ["b", "d", "a", "e", "f"] if with_display_labels else None

    cm = confusion_matrix(y, y_pred, labels=labels)
    common_kwargs = {
        "ax": ax,
        "display_labels": display_labels,
        "labels": labels,
    }
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(
            classifier, X, y, **common_kwargs
        )
    else:
        disp = ConfusionMatrixDisplay.from_predictions(
            y, y_pred, **common_kwargs
        )
    assert_allclose(disp.confusion_matrix, cm)

    if with_display_labels:
        expected_display_labels = display_labels
    elif with_labels:
        expected_display_labels = labels
    else:
        expected_display_labels = list(range(n_classes))

    expected_display_labels_str = [str(name)
                                   for name in expected_display_labels]

    x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
    y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()]

    assert_array_equal(disp.display_labels, expected_display_labels)
    assert_array_equal(x_ticks, expected_display_labels_str)
    assert_array_equal(y_ticks, expected_display_labels_str)
Exemplo n.º 6
0
def test_confusion_matrix_display_validation(pyplot):
    """Check that we raise the proper error when validating parameters."""
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=5, random_state=0
    )

    regressor = SVR().fit(X, y)
    y_pred_regressor = regressor.predict(X)
    y_pred_classifier = SVC().fit(X, y).predict(X)

    err_msg = "ConfusionMatrixDisplay.from_estimator only supports classifiers"
    with pytest.raises(ValueError, match=err_msg):
        ConfusionMatrixDisplay.from_estimator(regressor, X, y)

    err_msg = "Mix type of y not allowed, got types"
    with pytest.raises(ValueError, match=err_msg):
        # Force `y_true` to be seen as a regression problem
        ConfusionMatrixDisplay.from_predictions(y + 0.5, y_pred_classifier)
    with pytest.raises(ValueError, match=err_msg):
        ConfusionMatrixDisplay.from_predictions(y, y_pred_regressor)

    err_msg = "Found input variables with inconsistent numbers of samples"
    with pytest.raises(ValueError, match=err_msg):
        ConfusionMatrixDisplay.from_predictions(y, y_pred_classifier[::2])
Exemplo n.º 7
0
def test_confusion_matrix_display_plotting(
    pyplot, constructor_name, normalize, include_values,
):
    """Check the overall plotting rendering."""
    n_classes = 5
    X, y = make_classification(
        n_samples=100, n_informative=5, n_classes=n_classes, random_state=0
    )
    classifier = SVC().fit(X, y)
    y_pred = classifier.predict(X)

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    ax = pyplot.gca()
    cmap = "plasma"

    cm = confusion_matrix(y, y_pred)
    common_kwargs = {
        "normalize": normalize,
        "cmap": cmap,
        "ax": ax,
        "include_values": include_values,
    }
    if constructor_name == "from_estimator":
        disp = ConfusionMatrixDisplay.from_estimator(
            classifier, X, y, **common_kwargs
        )
    else:
        disp = ConfusionMatrixDisplay.from_predictions(
            y, y_pred, **common_kwargs
        )

    assert disp.ax_ == ax

    if normalize == "true":
        cm = cm / cm.sum(axis=1, keepdims=True)
    elif normalize == "pred":
        cm = cm / cm.sum(axis=0, keepdims=True)
    elif normalize == "all":
        cm = cm / cm.sum()

    assert_allclose(disp.confusion_matrix, cm)
    import matplotlib as mpl

    assert isinstance(disp.im_, mpl.image.AxesImage)
    assert disp.im_.get_cmap().name == cmap
    assert isinstance(disp.ax_, pyplot.Axes)
    assert isinstance(disp.figure_, pyplot.Figure)

    assert disp.ax_.get_ylabel() == "True label"
    assert disp.ax_.get_xlabel() == "Predicted label"

    x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
    y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()]

    expected_display_labels = list(range(n_classes))

    expected_display_labels_str = [
        str(name) for name in expected_display_labels
    ]

    assert_array_equal(disp.display_labels, expected_display_labels)
    assert_array_equal(x_ticks, expected_display_labels_str)
    assert_array_equal(y_ticks, expected_display_labels_str)

    image_data = disp.im_.get_array().data
    assert_allclose(image_data, cm)

    if include_values:
        assert disp.text_.shape == (n_classes, n_classes)
        fmt = ".2g"
        expected_text = np.array([format(v, fmt) for v in cm.ravel(order="C")])
        text_text = np.array(
            [t.get_text() for t in disp.text_.ravel(order="C")]
        )
        assert_array_equal(expected_text, text_text)
    else:
        assert disp.text_ is None
iris = datasets.load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01).fit(X_train, y_train)

np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
titles_options = [("Confusion matrix, without normalization", None),
                  ("Normalized confusion matrix", 'true')]
for title, normalize in titles_options:
    disp = ConfusionMatrixDisplay.from_estimator(classifier,
                                                 X_test,
                                                 y_test,
                                                 display_labels=class_names,
                                                 cmap=plt.cm.Blues,
                                                 normalize=normalize)
    disp.ax_.set_title(title)

    print(title)
    print(disp.confusion_matrix)

plt.show()
# %% [markdown]
# ## Confusion matrix and derived metrics
# The comparison that we did above and the accuracy that we calculated did not
# take into account the type of error our classifier was making. Accuracy
# is an aggregate of the errors made by the classifier. We may be interested
# in finer granularity - to know independently what the error is for each of
# the two following cases:
#
# - we predicted that a person will give blood but she/he did not;
# - we predicted that a person will not give blood but she/he did.

# %%
from sklearn.metrics import ConfusionMatrixDisplay

_ = ConfusionMatrixDisplay.from_estimator(classifier, data_test, target_test)

# %% [markdown]
# The in-diagonal numbers are related to predictions that were correct
# while off-diagonal numbers are related to incorrect predictions
# (misclassifications). We now know the four types of correct and erroneous
# predictions:
#
# * the top left corner are true positives (TP) and corresponds to people
#   who gave blood and were predicted as such by the classifier;
# * the bottom right corner are true negatives (TN) and correspond to
#   people who did not give blood and were predicted as such by the
#   classifier;
# * the top right corner are false negatives (FN) and correspond to
#   people who gave blood but were predicted to not have given blood;
# * the bottom left corner are false positives (FP) and correspond to
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

# %%
# Quantitative evaluation of the model quality on the test set

print("Predicting people's names on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)
print("done in %0.3fs" % (time() - t0))

print(classification_report(y_test, y_pred, target_names=target_names))
ConfusionMatrixDisplay.from_estimator(clf,
                                      X_test_pca,
                                      y_test,
                                      display_labels=target_names,
                                      xticks_rotation="vertical")
plt.tight_layout()
plt.show()

# %%
# Qualitative evaluation of the predictions using matplotlib


def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
    """Helper function to plot a gallery of portraits"""
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=0.01, right=0.99, top=0.90, hspace=0.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)