def test_roc_curve_display_complex_pipeline(pyplot, data_binary, clf, constructor_name): """Check the behaviour with complex pipeline.""" X, y = data_binary if constructor_name == "from_estimator": with pytest.raises(NotFittedError): RocCurveDisplay.from_estimator(clf, X, y) clf.fit(X, y) if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator(clf, X, y) name = clf.__class__.__name__ else: display = RocCurveDisplay.from_predictions(y, y) name = "Classifier" assert name in display.line_.get_label() assert display.estimator_name == name
def test_roc_curve_display_plotting( pyplot, response_method, data_binary, with_sample_weight, drop_intermediate, with_strings, constructor_name, default_name, ): """Check the overall plotting behaviour.""" X, y = data_binary pos_label = None if with_strings: y = np.array(["c", "b"])[y] pos_label = "c" if with_sample_weight: rng = np.random.RandomState(42) sample_weight = rng.randint(1, 4, size=(X.shape[0])) else: sample_weight = None lr = LogisticRegression() lr.fit(X, y) y_pred = getattr(lr, response_method)(X) y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1] if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator( lr, X, y, sample_weight=sample_weight, drop_intermediate=drop_intermediate, pos_label=pos_label, alpha=0.8, ) else: display = RocCurveDisplay.from_predictions( y, y_pred, sample_weight=sample_weight, drop_intermediate=drop_intermediate, pos_label=pos_label, alpha=0.8, ) fpr, tpr, _ = roc_curve( y, y_pred, sample_weight=sample_weight, drop_intermediate=drop_intermediate, pos_label=pos_label, ) assert_allclose(display.roc_auc, auc(fpr, tpr)) assert_allclose(display.fpr, fpr) assert_allclose(display.tpr, tpr) assert display.estimator_name == default_name import matplotlib as mpl # noqal assert isinstance(display.line_, mpl.lines.Line2D) assert display.line_.get_alpha() == 0.8 assert isinstance(display.ax_, mpl.axes.Axes) assert isinstance(display.figure_, mpl.figure.Figure) expected_label = f"{default_name} (AUC = {display.roc_auc:.2f})" assert display.line_.get_label() == expected_label expected_pos_label = 1 if pos_label is None else pos_label expected_ylabel = f"True Positive Rate (Positive label: {expected_pos_label})" expected_xlabel = f"False Positive Rate (Positive label: {expected_pos_label})" assert display.ax_.get_ylabel() == expected_ylabel assert display.ax_.get_xlabel() == expected_xlabel
def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name): # check that we can provide the positive label and display the proper # statistics X, y = load_breast_cancer(return_X_y=True) # create an highly imbalanced idx_positive = np.flatnonzero(y == 1) idx_negative = np.flatnonzero(y == 0) idx_selected = np.hstack([idx_negative, idx_positive[:25]]) X, y = X[idx_selected], y[idx_selected] X, y = shuffle(X, y, random_state=42) # only use 2 features to make the problem even harder X = X[:, :2] y = np.array(["cancer" if c == 1 else "not cancer" for c in y], dtype=object) X_train, X_test, y_train, y_test = train_test_split( X, y, stratify=y, random_state=0, ) classifier = LogisticRegression() classifier.fit(X_train, y_train) # sanity check to be sure the positive class is classes_[0] and that we # are betrayed by the class imbalance assert classifier.classes_.tolist() == ["cancer", "not cancer"] y_pred = getattr(classifier, response_method)(X_test) # we select the correcponding probability columns or reverse the decision # function otherwise y_pred_cancer = -1 * y_pred if y_pred.ndim == 1 else y_pred[:, 0] y_pred_not_cancer = y_pred if y_pred.ndim == 1 else y_pred[:, 1] if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator( classifier, X_test, y_test, pos_label="cancer", response_method=response_method, ) else: display = RocCurveDisplay.from_predictions( y_test, y_pred_cancer, pos_label="cancer", ) roc_auc_limit = 0.95679 assert display.roc_auc == pytest.approx(roc_auc_limit) assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit) if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator( classifier, X_test, y_test, response_method=response_method, pos_label="not cancer", ) else: display = RocCurveDisplay.from_predictions( y_test, y_pred_not_cancer, pos_label="not cancer", ) assert display.roc_auc == pytest.approx(roc_auc_limit) assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
# plotting parameters cols = 2 linewidth = 1 pos_label = 0 # mean 0 belongs to positive class rows = math.ceil(len(datasets_name) / cols) fig, axs = plt.subplots(rows, cols, figsize=(10, rows * 3)) for i, dataset_name in enumerate(datasets_name): (X, y) = preprocess_dataset(dataset_name=dataset_name) for model_name in models_name: y_pred = compute_prediction(X, model_name=model_name) display = RocCurveDisplay.from_predictions( y, y_pred, pos_label=pos_label, name=model_name, linewidth=linewidth, ax=axs[i // cols, i % cols], ) axs[i // cols, i % cols].plot([0, 1], [0, 1], linewidth=linewidth, linestyle=":") axs[i // cols, i % cols].set_title(dataset_name) axs[i // cols, i % cols].set_xlabel("False Positive Rate") axs[i // cols, i % cols].set_ylabel("True Positive Rate") plt.tight_layout(pad=2.0) # spacing between subplots plt.show()