def plot_results(mapies: Dict[int, Any], X_test: NDArray, X_test2: NDArray, y_test2: NDArray, alpha: float, method: str) -> None: tab10 = plt.cm.get_cmap('Purples', 4) fig, axs = plt.subplots(1, len(mapies), figsize=(20, 4)) for i, (_, mapie) in enumerate(mapies.items()): y_pi_sums = mapie.predict(X_test, alpha=alpha, include_last_label=True)[1][:, :, 0].sum(axis=1) axs[i].scatter(X_test[:, 0], X_test[:, 1], c=y_pi_sums, marker='.', s=10, alpha=1, cmap=tab10, vmin=0, vmax=3) coverage = classification_coverage_score( y_test2, mapie.predict(X_test2, alpha=alpha)[1][:, :, 0]) axs[i].set_title(f"coverage = {coverage:.3f}") plt.suptitle("Number of labels in prediction sets " f"for the {method} method") plt.show()
alpha_ = np.arange(0.02, 0.98, 0.02) coverage, mean_width = {}, {} mapie, y_ps_mapie = {}, {} for method in methods: mapie[method] = MapieClassifier( estimator=clf, method=method, cv="prefit", random_state=42, ) mapie[method].fit(X_cal, y_cal) _, y_ps_mapie[method] = mapie[method].predict( X, alpha=alpha_, include_last_label="randomized" ) coverage[method] = [ classification_coverage_score(y, y_ps_mapie[method][:, :, i]) for i, _ in enumerate(alpha_) ] mean_width[method] = [ classification_mean_width_score(y_ps_mapie[method][:, :, i]) for i, _ in enumerate(alpha_) ] fig, axs = plt.subplots(1, 3, figsize=(15, 5)) axs[0].set_xlabel("1 - alpha") axs[0].set_ylabel("Quantile") for method in methods: axs[0].scatter(1 - alpha_, mapie[method].quantiles_, label=method) axs[0].legend() for method in methods: axs[1].scatter(1 - alpha_, coverage[method], label=method)
mapie_clf2 = MapieClassifier( clf2, method="cumulated_score", cv="prefit", random_state=42 ) mapie_clf2.fit(X_calib2, y_calib2) y_pred2, y_ps2 = mapie_clf2.predict( X_test2, alpha=alpha, include_last_label="randomized" ) ############################################################################## # We can then estimate the marginal coverage for all alpha values in order # to produce a so-called calibration plot, comparing the target coverage with # the "real" coverage obtained on the test set. coverages1 = [ classification_coverage_score(y_test1, y_ps1[:, :, i]) for i, _ in enumerate(alpha) ] coverages2 = [ classification_coverage_score(y_test2, y_ps2[:, :, i]) for i, _ in enumerate(alpha) ] widths1 = [ classification_mean_width_score(y_ps1[:, :, i]) for i, _ in enumerate(alpha) ] widths2 = [ classification_mean_width_score(y_ps2[:, :, i]) for i, _ in enumerate(alpha) ]
def test_classification_toydata() -> None: "Test coverage_score for toy data." assert classification_coverage_score(y_true_class, y_pred_set) == 0.8
def test_classification_same_length() -> None: "Test when y_true and y_pred_set have different lengths." with pytest.raises(IndexError, match=r".*shape mismatch*"): classification_coverage_score(y_true_class, y_pred_set[:-1, :])
def test_classification_y_pred_set_shape() -> None: "Test shape of y_pred_set." with pytest.raises(ValueError, match=r".*Expected 2D array*"): classification_coverage_score(y_true_class, y_pred_set[:, 0])
def test_classification_y_true_shape() -> None: "Test shape of y_true." with pytest.raises(ValueError, match=r".*y should be a 1d array*"): classification_coverage_score(np.tile(y_true_class, (2, 1)), y_pred_set)
def test_classification_y_pred_set_type() -> None: "Test that list(y_pred_set) gives right coverage." scr = classification_coverage_score(y_true_class, list(y_pred_set)) assert scr == 0.8
for i, coverage in enumerate(coverages): axes[0].plot(1 - alpha, coverage, label=legends[i]) axes[0].plot([0, 1], [0, 1], ls="--", color="k") axes[0].legend() axes[1].set_xlabel("1 - alpha") axes[1].set_ylabel("Average of prediction set sizes") for i, width in enumerate(widths): axes[1].plot(1 - alpha, width, label=legends[i]) axes[1].legend() plt.suptitle("Effective coverage and prediction set size " f"for the {method} method") plt.show() split_coverages = np.array([[[ classification_coverage_score(y_test_distrib, y_ps[:, :, ia]) for ia, _ in enumerate(alpha) ] for _, y_ps in y_ps2.items()] for _, y_ps2 in y_ps_mapies.items()]) split_widths = np.array([[[ classification_mean_width_score(y_ps[:, :, ia]) for ia, _ in enumerate(alpha) ] for _, y_ps in y_ps2.items()] for _, y_ps2 in y_ps_mapies.items()]) plot_coverage_width(alpha, split_coverages[0], split_widths[0], "score") plot_coverage_width(alpha, split_coverages[1], split_widths[1], "cumulated_score") ############################################################################## # One can notice that the train/calibration indeed impacts the coverage and