Пример #1
0
def plot_cum_gain_chart(y_true, prob_y, prob_ysk=None):

    lw = 2
    if prob_ysk is not None:
        x_data, y_data = cumulative_gain_curve(y_true, prob_y[:, 1])
        x_data_sk, y_data_sk = cumulative_gain_curve(y_true, prob_ysk[:, 1])

        plt.plot(x_data, y_data, linewidth=lw, label='model')
        plt.plot(x_data_sk,
                 y_data_sk,
                 '--',
                 linewidth=lw,
                 label='model sklearn')

    else:
        x_data, y_data = cumulative_gain_curve(y_true, prob_y[:, 1])
        plt.plot(x_data, y_data, linewidth=lw, label='model')

    x_best, y_best = tools.bestCurve(y_true)
    x_base, y_base = np.array([0, 1]), np.array([0, 1])

    plt.plot(x_best, y_best, linewidth=lw, label='best curve')
    plt.plot(x_base, y_base, ':', linewidth=lw, label='baseline')
    plt.xlabel(r'Fraction of data', size=14)
    plt.ylabel(r'Cumulative gain', size=14)
    plt.legend(prop={'size': 12})
    plt.grid('True', linestyle='dashed')
    plt.tick_params(axis='both', labelsize=12)
    plt.tight_layout()
    plt.show()
Пример #2
0
def plot_cumulative_gain(y_true,
                         y_probas,
                         title='Cumulative Gains Curve',
                         ax=None,
                         figsize=None,
                         title_fontsize="large",
                         text_fontsize="medium"):
    """Refactored code from scikitplot's plot_cumulative_gain function.
    Area under curve functionality added and removal of one class option
    added to the plotting functionality."""
    y_true = np.array(y_true)
    y_probas = np.array(y_probas)

    classes = np.unique(y_true)
    if len(classes) != 2:
        raise ValueError('Cannot calculate Cumulative Gains for data with '
                         '{} category/ies'.format(len(classes)))

    # Compute Cumulative Gain Curves
    percentages, gains1 = cumulative_gain_curve(y_true, y_probas[:, 0],
                                                classes[0])
    percentages, gains2 = cumulative_gain_curve(y_true, y_probas[:, 1],
                                                classes[1])
    percentages, gains3 = cumulative_gain_curve(y_true, y_true, classes[0])
    percentages, gains4 = cumulative_gain_curve(y_true, y_true, classes[1])

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    ax.set_title(title, fontsize=title_fontsize)

    ax.plot(percentages,
            gains1,
            lw=3,
            label='Class {} (pred)'.format(classes[0]))
    ax.plot(percentages,
            gains2,
            lw=3,
            label='Class {} (pred)'.format(classes[1]))
    #ax.plot(percentages, gains3, lw=3, label='Class {} (true)'.format(classes[0]))
    ax.plot(percentages,
            gains4,
            lw=3,
            label='Class {} (true)'.format(classes[1]))

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.1])

    ax.plot([0, 1], [0, 1], 'k--', lw=2, label='Baseline')

    ax.set_xlabel('Percentage of sample', fontsize=text_fontsize)
    ax.set_ylabel('Gain', fontsize=text_fontsize)
    ax.tick_params(labelsize=text_fontsize)
    ax.grid('on')
    ax.legend(loc='lower right', fontsize=text_fontsize)
    plt.show()
    return ax
Пример #3
0
def main(config):
    data_loader = config.init_obj('data_loader', module_data)

    X_train, X_test, Y_train, Y_test = train_test_split(data_loader.dataset.datas, data_loader.dataset.targets, test_size=0.2)

    RF = RandomForestClassifier()
    RF.fit(X_train, Y_train)

    preds = RF.predict(X_test)
    preds_prob = RF.predict_proba(X_test)
    print(f'Accuracy score: {accuracy_score(Y_test, preds)}')
    print(f'Precision: {precision_score(Y_test, preds)}')
    print(f'Recall: {recall_score(Y_test, preds)}')
    print(f'F1-score: {f1_score(Y_test, preds)}')

    plot_roc_curve(RF, X_test, Y_test)
    plt.plot([0, 1], [0, 1], linestyle='--')
    plt.legend(loc='lower right')
    plt.savefig(f'RF_ROC.png')
    plt.close()

    plot_precision_recall_curve(RF, X_test, Y_test)
    plt.legend(loc='lower right')
    plt.savefig(f'RF_PRC.png')
    plt.close()

    #plot_cumulative_gain(Y_test, preds_prob)
    p, g = cumulative_gain_curve(Y_test, preds_prob[:, 1])
    plt.plot(p, g)
    plt.plot([0, preds.sum()/len(preds), 1], [0, 1, 1])
    plt.plot([0, 1], [0, 1], linestyle='--')
    plt.savefig(f'RF_LIFT.png')
    plt.close()
Пример #4
0
def plot_cumulative_gain(y_true, y_proba, title_fontsize=15, text_fontsize=10):
    # Compute Cumulative Gain Curves
    percentages, gains1 = cumulative_gain_curve(y_true, y_proba, True)

    # Best classifier
    #percentages, gains2 = cumulative_gain_curve(y_true, y_true, True)

    fig, ax = plt.subplots(1, 1)

    ax.set_title('Cumulative gains chart', fontsize=title_fontsize)

    ax.plot(percentages, gains1, lw=3, label='Class {}'.format(True))

    # Best classifier
    #ax.plot(percentages, gains2, lw=3, label='Class {}'.format('best'))

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.0])

    ax.plot([0, 1], [0, 1], 'k--', lw=2, label='Baseline')

    ax.set_xlabel('Percentage of sample', fontsize=text_fontsize)
    ax.set_ylabel('Gain', fontsize=text_fontsize)
    ax.tick_params(labelsize=text_fontsize)
    ax.grid('on')
    ax.legend(loc='lower right', fontsize=text_fontsize)

    return ax
Пример #5
0
def lift_curve(output, target):
    try:
        from sklearn.metrics import roc_curve as rc
        from scikitplot.helpers import cumulative_gain_curve
    except ImportError:
        raise RuntimeError(
            "This contrib module requires scikitplot to be installed")

    with torch.no_grad():
        pred = torch.argmax(output, dim=1)
        assert pred.shape[0] == len(target)

        percentages, gains = cumulative_gain_curve(target.cpu().numpy(),
                                                   output.cpu().numpy()[:, 1],
                                                   1)

    fig = plt.figure()
    plt.plot([0, target.cpu().numpy().sum() / len(target.cpu().numpy()), 1],
             [0, 1, 1])
    plt.plot(percentages, gains)
    plt.plot([0, 1], [0, 1], linestyle='--')
    fig.canvas.draw()

    buf = np.asarray(fig.canvas.buffer_rgba(), dtype=np.uint8)[:, :, :3]
    image = torch.from_numpy(buf).permute(2, 0, 1)

    plt.close(fig)

    return image
Пример #6
0
def auc_CGC(y_true, prob_y):
    """ calculate area under cumulativ gain curve """

    if prob_y.shape[1] > 1:
        prob_y = prob_y[:, 1]

    x_data, y_data = cumulative_gain_curve(y_true, prob_y)
    x_best, y_best = tools.bestCurve(y_true)
    x_base, y_base = np.array([0, 1]), np.array([0, 1])
    #plt.plot(x_data,y_data)
    #plt.show()
    auc_data = np.trapz(y_data, x_data)  #- np.trapz(y_best, x_best)
    auc_best = np.trapz(y_best, x_best)  #- np.trapz(y_best, x_best)
    auc_base = np.trapz(y_base, x_base)
    area_ratio = (auc_data - auc_base) / (auc_best - auc_base)
    return auc_data, area_ratio
Пример #7
0
def plot_cumulative_gain(y_true, y_probas, title='Cumulative Gains Curve',ax=None, figsize=None, title_fontsize="large",text_fontsize="medium"):
    """Generates the Cumulative Gains Plot from labels and scores/probabilities
    The cumulative gains chart is used to determine the effectiveness of a
    binary classifier. A detailed explanation can be found at
    http://mlwiki.org/index.php/Cumulative_Gain_Chart. The implementation
    here works only for binary classification.
    Args:
        y_true (array-like, shape (n_samples)):
            Ground truth (correct) target values.
        y_probas (array-like, shape (n_samples, n_classes)):
            Prediction probabilities for each class returned by a classifier.
        title (string, optional): Title of the generated plot. Defaults to
            "Cumulative Gains Curve".
        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
            plot the learning curve. If None, the plot is drawn on a new set of
            axes.
        figsize (2-tuple, optional): Tuple denoting figure size of the plot
            e.g. (6, 6). Defaults to ``None``.
        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "large".
        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "medium".
    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
            drawn.
    Example:
        >>> import scikitplot as skplt
        >>> lr = LogisticRegression()
        >>> lr = lr.fit(X_train, y_train)
        >>> y_probas = lr.predict_proba(X_test)
        >>> skplt.metrics.plot_cumulative_gain(y_test, y_probas)
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
        >>> plt.show()
        .. image:: _static/examples/plot_cumulative_gain.png
           :align: center
           :alt: Cumulative Gains Plot
    """
    y_true = np.array(y_true)
    y_probas = np.array(y_probas)

    classes = np.unique(y_true)
    if len(classes) != 2:
        raise ValueError('Cannot calculate Cumulative Gains for data with '
                         '{} category/ies'.format(len(classes)))

    # Compute Cumulative Gain Curves
    #percentages, gains1 = cumulative_gain_curve(y_true, y_probas[:, 0],
    #                                            classes[0])
    percentages, gains2 = cumulative_gain_curve(y_true, y_probas[:, 1],
                                                classes[1])

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    ax.set_title(title, fontsize=title_fontsize)

    #ax.plot(percentages, gains1, lw=3, label='Class {}'.format(classes[0]))
    ax.plot(percentages, gains2, lw=3, label='Class {}'.format(classes[1]))

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.0])

    ax.set_xlabel('Percentage of sample', fontsize=text_fontsize)
    ax.set_ylabel('Gain', fontsize=text_fontsize)
    ax.tick_params(labelsize=text_fontsize)
    ax.grid('on')
    #ax.legend(loc='lower right', fontsize=text_fontsize)
    return ax
Пример #8
0
def plot_lift_curve(y_true, y_probas, title='Lift Curve',
                    ax=None, figsize=None, title_fontsize="large",
                    text_fontsize="medium"):
    """Generates the Lift Curve from labels and scores/probabilities

    The lift curve is used to determine the effectiveness of a
    binary classifier. A detailed explanation can be found at
    http://www2.cs.uregina.ca/~dbd/cs831/notes/lift_chart/lift_chart.html.
    The implementation here works only for binary classification.

    Args:
        y_true (array-like, shape (n_samples)):
            Ground truth (correct) target values.

        y_probas (array-like, shape (n_samples, n_classes)):
            Prediction probabilities for each class returned by a classifier.

        title (string, optional): Title of the generated plot. Defaults to
            "Lift Curve".

        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
            plot the learning curve. If None, the plot is drawn on a new set of
            axes.

        figsize (2-tuple, optional): Tuple denoting figure size of the plot
            e.g. (6, 6). Defaults to ``None``.

        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "large".

        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
            Use e.g. "small", "medium", "large" or integer-values. Defaults to
            "medium".

    Returns:
        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
            drawn.

    Example:
        >>> import scikitplot as skplt
        >>> lr = LogisticRegression()
        >>> lr = lr.fit(X_train, y_train)
        >>> y_probas = lr.predict_proba(X_test)
        >>> skplt.metrics.plot_lift_curve(y_test, y_probas)
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
        >>> plt.show()

        .. image:: _static/examples/plot_lift_curve.png
           :align: center
           :alt: Lift Curve
    """
    y_true = np.array(y_true)
    y_probas = np.array(y_probas)

    classes = np.unique(y_true)
    if len(classes) != 2:
        raise ValueError('Cannot calculate Lift Curve for data with '
                         '{} category/ies'.format(len(classes)))

    # Compute Cumulative Gain Curves
    percentages, gains1 = cumulative_gain_curve(y_true, y_probas[:, 0],
                                                classes[0])
    percentages, gains2 = cumulative_gain_curve(y_true, y_probas[:, 1],
                                                classes[1])

    percentages = percentages[1:]
    gains1 = gains1[1:]
    gains2 = gains2[1:]

    gains1 = gains1 / percentages
    gains2 = gains2 / percentages

    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    ax.set_title(title, fontsize=title_fontsize)

    ax.plot(percentages, gains1, lw=3, label='Class {}'.format(classes[0]))
    ax.plot(percentages, gains2, lw=3, label='Class {}'.format(classes[1]))

    ax.plot([0, 1], [1, 1], 'k--', lw=2, label='Baseline')

    ax.set_xlabel('Percentage of sample', fontsize=text_fontsize)
    ax.set_ylabel('Lift', fontsize=text_fontsize)
    ax.tick_params(labelsize=text_fontsize)
    ax.grid('on')
    ax.legend(loc='lower right', fontsize=text_fontsize)

    return ax
Пример #9
0
def cumulative_gain_area_ratio(y_true,
                               y_probas,
                               onehot=False,
                               title='Cumulative Gains Curve',
                               ax=None,
                               figsize=None,
                               title_fontsize="large",
                               text_fontsize="large"):
    """
    Refactored code from scikit-plot's plot_cumulative_gain function.
    Plots the cumulative gain curve and calculates the area ratio.

    Inputs:
    - y_true: vector of targets (must be binary).
    - y_probas: probability of classification.
    - onehot: binary, True: y_vectors are of shape (n,2), False: shape (n,)
    """

    y_true = np.array(y_true)
    y_probas = np.array(y_probas)
    classes = np.unique(y_true)

    if len(classes) != 2:
        raise ValueError('Cannot calculate Cumulative Gains for data with '
                         '{} category/ies'.format(len(classes)))

    #Workaround..
    if not onehot:
        y_probas = y_probas.reshape((len(y_probas), 1))
        y_probas = np.concatenate((np.zeros((len(y_probas), 1)), y_probas),
                                  axis=1)

    #Compute Cumulative Gain Curves
    percentages, gains = cumulative_gain_curve(y_true, y_probas[:, 1],
                                               classes[1])

    #Calculate optimal model curve
    best_curve_x = [0, np.sum(y_true) / len(y_true), 1]
    best_curve_y = [0, 1, 1]

    #Calculate area ratio
    best_curve_area = auc(best_curve_x, best_curve_y) - 0.5
    model_curve_area = auc(percentages, gains) - 0.5
    area_ratio = model_curve_area / best_curve_area

    #plotting
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    ax.set_title(title, fontsize=title_fontsize)

    ax.plot(percentages, gains, lw=2, label='Model')
    ax.plot(best_curve_x, best_curve_y, lw=2, label='Best curve')
    ax.plot([0, 1], [0, 1], 'k--', lw=2, label='Baseline')

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.2])

    ax.set_xlabel('Percentage of data', fontsize=text_fontsize)
    ax.set_ylabel('Cumulative percentage of target data',
                  fontsize=text_fontsize)
    ax.tick_params(labelsize=text_fontsize)
    ax.grid('on')
    ax.legend(loc='lower right', fontsize=text_fontsize)
    plt.show()

    return area_ratio