示例#1
0
def plot_lift_curve(
    y_true: DataType,
    y_proba: DataType,
    title: str = None,
    ax: Axes = None,
    labels: List[str] = None,
    threshold: float = 0.5,
) -> Axes:
    """
    Plot a lift chart from results. Also calculates lift score based on a .5 threshold

    Parameters
    ----------
    y_true: DataType
        True labels

    y_proba: DataType
        Model's predicted probability

    title: str
        Plot title

    ax: Axes
        Pass your own ax

    labels: List of str
        Labels to use per class

    threshold: float
        Threshold to use when determining lift score

    Returns
    -------
    matplotlib.Axes
    """

    if ax is None:
        fig, ax = plt.subplots()

    title = "Lift Curve" if title is None else title
    classes = np.unique(y_true)
    binarized_labels = label_binarize(y_true, classes=classes)

    if labels and len(labels) != len(classes):
        raise VizError("Number of labels must match number of classes: "
                       f"got {len(labels)} labels and {len(classes)} classes")

    if binarized_labels.shape[1] == 1:
        # Binary classification case
        percents, gains = _cum_gain_curve(binarized_labels, y_proba[:, 1])
        score = lift_score(binarized_labels.ravel(), y_proba[:, 1] > threshold)
        ax.plot(percents, gains / percents, label=f"$Lift = {score:.2f}$")
    else:
        # Multi-class case
        for class_ in classes:
            percents, gains = _cum_gain_curve(binarized_labels[:, class_],
                                              y_proba[:, class_])
            score = lift_score(binarized_labels[:, class_],
                               y_proba[:, class_] > threshold)
            ax.plot(
                percents,
                gains / percents,
                label=f"Class {labels[class_] if labels else class_} "
                f"$Lift = {score:.2f}$ ",
            )

    ax.axhline(y=1, color="grey", linestyle="--", label="Baseline")
    ax.set_title(title)
    ax.set_ylabel("Lift")
    ax.set_xlabel("% of Data")
    formatter = PercentFormatter(xmax=1)
    ax.xaxis.set_major_formatter(formatter)
    ax.legend()
    return ax
示例#2
0
 def test_lift_score_returns_correctly(self, y_targ, y_pred):
     result = lift_score(y_targ, y_pred)
     assert 2 == result
def test_lift_score_fails_if_passed_non_ndarray():
    with pytest.raises(MetricError):
        lift_score([1, 2, 3], [4, 5, 6])
示例#4
0
 def test_lift_score_fails_if_passed_non_ndarray_or_series(
         self, y_targ, y_pred):
     with pytest.raises(MetricError):
         # noinspection PyTypeChecker
         lift_score(y_targ, y_pred)
def test_lift_score_returns_correctly():
    y_targ = np.array([1, 1, 1, 0, 0, 2, 0, 3, 4])
    y_pred = np.array([1, 0, 1, 0, 0, 2, 1, 3, 0])

    result = lift_score(y_targ, y_pred)
    assert 2 == result