示例#1
0
 def test_cmap(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, self.y)
     probas = clf.predict_proba(self.X)
     plot_precision_recall(self.y, probas, cmap='nipy_spectral')
     plot_precision_recall(self.y, probas, cmap=plt.cm.nipy_spectral)
示例#2
0
 def test_classes_to_plot(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, self.y)
     probas = clf.predict_proba(self.X)
     plot_precision_recall(self.y, probas, classes_to_plot=[0, 1])
     plot_precision_recall(self.y, probas, classes_to_plot=np.array([0, 1]))
示例#3
0
 def test_plot_micro(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, self.y)
     probas = clf.predict_proba(self.X)
     plot_precision_recall(self.y, probas, plot_micro=True)
     plot_precision_recall(self.y, probas, plot_micro=False)
 def prc(self, predicted_probs, plot_name, file_name):
     """
     A method that plots a precision recall curve for the given model.
     """
     plot_precision_recall(self.results_convert(self.y),
                           predicted_probs,
                           title=plot_name)
     plt.savefig(file_name)
示例#5
0
def create_precision_recall_chart(classifier,
                                  X_test,
                                  y_test,
                                  y_pred_proba=None):
    """Create precision recall chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_test (:obj:`ndarray`):
            | The classification target for testing
        y_pred_proba (:obj:`ndarray`, optional, default is ``None``):
            | Classifier predictions probabilities on test data.

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/precision_recall'] = npt_utils.create_precision_recall_chart(rfc, X_test, y_test)
    """
    assert is_classifier(
        classifier), 'classifier should be sklearn classifier.'

    chart = None

    if y_pred_proba is None:
        try:
            y_pred_proba = classifier.predict_proba(X_test)
        except Exception as e:
            print(
                'Did not log Precision-Recall chart: this classifier does not provide predictions probabilities.'
                'Error {}'.format(e))
            return chart

    try:
        fig, ax = plt.subplots()
        plot_precision_recall(y_test, y_pred_proba, ax=ax)
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log Precision-Recall chart. Error {}'.format(e))

    return chart
示例#6
0
def log_precision_recall_auc(y_true,
                             y_pred,
                             experiment=None,
                             channel_name='metric_charts',
                             prefix=''):
    """Creates and logs Precision Recall curve and Average precision score to Neptune.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Logs Precision Recall curve and Average precision score to Neptune::

            import neptune
            from neptunecontrib.monitoring.metrics import log_precision_recall_auc

            neptune.init()
            with neptune.create_experiment():
                log_precision_recall_auc(y_test, y_test_pred)

        Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs.

    """
    assert len(
        y_pred.shape
    ) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'

    _exp = experiment if experiment else neptune

    expect_not_a_run(_exp)

    avg_precision = sk_metrics.average_precision_score(y_true, y_pred[:, 1])
    _exp.log_metric(prefix + 'avg_precision', avg_precision)

    fig, ax = plt.subplots()
    plt_metrics.plot_precision_recall(y_true, y_pred, ax=ax)
    send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
    plt.close()
示例#7
0
 def test_ax(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, self.y)
     probas = clf.predict_proba(self.X)
     fig, ax = plt.subplots(1, 1)
     out_ax = plot_precision_recall(self.y, probas)
     assert ax is not out_ax
     out_ax = plot_precision_recall(self.y, probas, ax=ax)
     assert ax is out_ax
示例#8
0
def log_precision_recall_chart(classifier, X_test, y_test, y_pred_proba=None, experiment=None):
    """Log precision recall chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Fitted sklearn classifier object
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_test (:obj:`ndarray`):
            | The classification target for testing
        y_pred_proba (:obj:`ndarray`, optional, default is ``None``):
            | Classifier predictions probabilities on test data.
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_precision_recall_chart(rfc, X_test, y_test)
    """
    assert is_classifier(classifier), 'classifier should be sklearn classifier.'
    exp = _validate_experiment(experiment)

    if y_pred_proba is None:
        try:
            y_pred_proba = classifier.predict_proba(X_test)
        except Exception as e:
            print('Did not log Precision-Recall chart: this classifier does not provide predictions probabilities.'
                  'Error {}'.format(e))
            return

    try:
        fig, ax = plt.subplots()
        plot_precision_recall(y_test, y_pred_proba, ax=ax)
        exp.log_image('charts_sklearn', fig, image_name='Precision Recall Curve')
        plt.close(fig)
    except Exception as e:
        print('Did not log Precision-Recall chart. Error {}'.format(e))
示例#9
0
    def plot_learning_curve(self, save_dir):
        # Plot learning curve for the classifier
        est = self.model
        est.set_params(**self.params)

        _, axes = plt.subplots(3,
                               3,
                               figsize=(20, 12),
                               dpi=200,
                               constrained_layout=True)
        # plt.tight_layout()
        _train_ax = [axes[0][0], axes[0][1], axes[0][2]]
        plot_learning_curve(est,
                            "{} - Learning curves (Train)".format(self.symbol),
                            self.X_train,
                            self.y_train,
                            axes=_train_ax,
                            cv=self.cv)

        n_classes = np.unique(self.y_test).shape[0]
        if hasattr(self.cv_estimator, 'predict_proba'):
            y_train_proba = self.cv_estimator.predict_proba(self.X_train)
            axes[1][0].set_title("{} - ROC (Train)".format(self.symbol))
            plot_roc(self.y_train, y_train_proba, n_classes, ax=axes[1][0])
            axes[1][1].set_title("{} - Precision/Recall (Train)".format(
                self.symbol))
            plot_precision_recall(self.y_train, y_train_proba, ax=axes[1][1])
        axes[1][2].set_title("{} - Confusion matrix (Train)".format(
            self.symbol))
        plot_confusion_matrix(self.cv_estimator,
                              self.X_train,
                              self.y_train,
                              cmap='Blues',
                              ax=axes[1][2])
        if hasattr(self.cv_estimator, 'predict_proba'):
            y_test_proba = self.cv_estimator.predict_proba(self.X_test)
            axes[2][0].set_title("{} - ROC (Test)".format(self.symbol))
            plot_roc(self.y_test, y_test_proba, ax=axes[2][0])
            axes[2][1].set_title("{} - Precision/Recall (Test)".format(
                self.symbol))
            plot_precision_recall(self.y_test, y_test_proba, ax=axes[2][1])
        axes[2][2].set_title("{} - Confusion matrix (Test)".format(
            self.symbol))
        plot_confusion_matrix(self.cv_estimator,
                              self.X_test,
                              self.y_test,
                              cmap='Oranges',
                              ax=axes[2][2])

        curve_path = '{}{}_learning_curve.png'.format(save_dir, self.symbol)
        plt.savefig(curve_path)
        plt.close()
示例#10
0
 def prc(self, predicted_probs, plot_name, file_name):
     plot_precision_recall(self.y_test, predicted_probs, title=plot_name)
     plt.savefig(file_name)
示例#11
0
 def test_array_like(self):
     plot_precision_recall([0, 1], [[0.8, 0.2], [0.2, 0.8]])
     plot_precision_recall([0, 'a'], [[0.8, 0.2], [0.2, 0.8]])
     plot_precision_recall(['b', 'a'], [[0.8, 0.2], [0.2, 0.8]])
示例#12
0
sorted(cancer.target_names)

print(classification_report(y_test, y_pred))

# Specify a threshold

y_pred_thresh = rf.predict_proba(X_test)[:, 1] > 0.85  # 0.85 as threshold

print(classification_report(y_test, y_pred_thresh))

### Precision-Recall curve

from scikitplot.metrics import plot_precision_recall

rf_probas = rf.predict_proba(X_test)[:, 1]
plot_precision_recall(y_test, rf_probas)

from yellowbrick.classifier import PrecisionRecallCurve

viz = PrecisionRecallCurve(rf)
viz.fit(X_train, y_train)
viz.score(X_test, y_test)
viz.poof()

# Discimination Threshold - probability or score at which the positive class is chosen over the negative class

from yellowbrick.classifier import DiscriminationThreshold

viz = DiscriminationThreshold(rf)
viz.fit(X_train, y_train)
viz.poof()
示例#13
0
def plot_classification_metrics(y_true,
                                y_pred_proba,
                                classes_to_plot=None,
                                threshold=None,
                                plot_micro=True):
    """ Plot ROC Curve, Precision-Recall Curve and Confusion Matrix

    Parameters
    ----------
    y_true : array-like, shape (n_samples)
        Ground truth labels.

    y_pred_proba : array-like, shape (n_samples, n_classes)
        Prediction probabilities or decision scores for each class
        returned by a classifier.

    classes_to_plot : list-like, optional
        Classes for which the ROC curve should be plotted. e.g. [0, 'cold'].
        If the specified class does not exist, it will be ignored.
        If ``None``, all classes will be plotted.

    threshold : None or float
        if a float is set, it will be used as the decision threshold for
        binary classification

    plot_micro : bool
        whether to plot the averaged roc_curve and the precision-recall curve
        using average method 'micro'

    Returns
    -------
    fig : :class:`matplotlib.figure.Figure` object

    axs : Axes object or array of Axes objects.
    """

    fig = plt.figure(dpi=100, figsize=(10.5, 8))
    ax1 = plt.subplot2grid((4, 4), (0, 0), rowspan=2, colspan=2)
    ax2 = plt.subplot2grid((4, 4), (0, 2), rowspan=2, colspan=2)
    ax3 = plt.subplot2grid((4, 4), (2, 0), rowspan=2, colspan=2)

    # region Plot ROC Curve
    plot_roc(y_true,
             y_pred_proba,
             plot_macro=False,
             plot_micro=plot_micro,
             classes_to_plot=classes_to_plot,
             ax=ax1)

    # endregion

    # region plot Precision-Recall Curve
    plot_precision_recall(y_true,
                          y_pred_proba,
                          plot_micro=plot_micro,
                          classes_to_plot=classes_to_plot,
                          ax=ax2)
    ax2.legend(loc='lower right')
    # endregion

    # region Plot Confusion Matrix
    y_pred_idx = np.argmax(y_pred_proba, axis=-1)
    labels = np.sort(np.unique(y_true))
    y_pred = labels[y_pred_idx]
    plot_confusion_matrix(y_true, y_pred, normalize=True, ax=ax3)
    # endregion
    axs = [ax1, ax2, ax3]

    if threshold:
        # region Plot Confusion Matrix
        labels = np.sort(np.unique(y_true))
        assert len(labels) == 2, """Problem is not binary classification
        but decision threshold is set"""
        ax4 = plt.subplot2grid((4, 4), (2, 2), rowspan=2, colspan=2)
        is_positive = y_pred_proba[:, 1] > threshold
        y_pred = labels[is_positive.astype('int')]
        plot_confusion_matrix(y_true, y_pred, normalize=True, ax=ax4)
        ax4.set_title('Confusion Matrix with adjusted '
                      'decision threshold: {:.2}'.format(threshold))

        # update color limit
        im3 = ax3.get_images()[0]
        clim = im3.get_clim()
        im4 = ax4.get_images()[0]
        im4.set_clim(clim)
        axs.append(ax4)

        # endregion
    fig.tight_layout()
    return fig, axs
        'y_pred': y_pred,
        'y_pred_probability': y_pred_proba.max(axis=1)
    })
log_table('predictions', df)

# Log model performance visualizations

import matplotlib.pyplot as plt
from scikitplot.metrics import plot_roc, plot_precision_recall

fig, ax = plt.subplots()
plot_roc(y_test, y_pred_proba, ax=ax)
neptune.log_image('model-performance-visualizations', fig, image_name='ROC')

fig, ax = plt.subplots()
plot_precision_recall(y_test, y_pred_proba, ax=ax)
neptune.log_image('model-performance-visualizations',
                  fig,
                  image_name='precision recall')
plt.close('all')

# Log train data sample (images per class)

for j, class_name in enumerate(class_names):
    plt.figure(figsize=(10, 10))
    label_ = np.where(y_train == j)
    for i in range(9):
        plt.subplot(3, 3, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
示例#15
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, convert_labels_into_string(self.y))
     probas = clf.predict_proba(self.X)
     plot_precision_recall(convert_labels_into_string(self.y), probas)
示例#16
0
def predict(model, data_loader, pos_loss_weight, verbose=0, tag='', xlfile_name=None, prefix=None): #DOC OK
    """
    This function is used to evaluate a binary classifier.

    Params:
    ------

    model: torch.nn.Module
        the model to be used for prediction
    data_loader: torch.utils.data.DataLoader
        the test data real_loaders
    pos_loss_weight: int/float
        a number to be passed as a the weight for the positive class for BCEWithLogitsLoss

    verbose: int
        if 0 : only metrics will be printed

        if 1 : verbose 0 + plotting precision-recall curve

        if 2 : verbose 1 + visualize 40 images of wrong and correct predictions.

        if 3: verbose 2 + visualize the confusion matrix

    tag: string
        a string to include in the save figure name.

    xlfile_name: str
        the path where the results Excel file is located.
    prefix: list
        the values to append results to, usually are the parameters used to get these results.
    Returns:
    -------
        tuple(predicted labels, the loss value, F1 score)
    Notes:
    ------
    Sigmoid is applied on the output of the model, so no need to include it in the model.

    """

    # set the device
    device = Classifier.device

    matplotlib.use('Agg') #TODO disable when you want to show figures
    mode = model.training
    # set the mode to evaluation
    model.eval()

    np_pred_labels = np.array([])
    np_true_labels = np.array([])
    np_probs = np.array([])

    # for all batches
    for idx ,(imgs, labels) in enumerate(data_loader):
        imgs, labels = imgs.to(device), labels.to(device).float()

        # start testing with no auto grad
        with torch.set_grad_enabled(False):
            output = model(imgs).view(-1) #get the output batch
            probs = torch.sigmoid(output)

            preds = torch.gt(probs, 0.5).float()
            loss = auto_weight_BCEWithLogitsLoss(output, labels, pos_loss_weight)

        # stack all the true labels, predicted labels, and probabilities for all batches together
        np_true_labels = np.hstack((np_true_labels, labels.data.cpu().numpy())).astype(np.int)
        np_pred_labels = np.hstack((np_pred_labels, preds.data.cpu().numpy())).astype(np.int)
        np_probs = np.hstack((np_probs, probs.data.cpu().numpy()))

    if verbose >= 0: #print metrics
        Pr, R, F1, S = precision_recall_fscore_support(y_true=np_true_labels, y_pred=np_pred_labels, average=None)
        average_precision = average_precision_score(y_true=np_true_labels, y_score=np_probs)

        print('-' * 10 + '(class 0): ' + ' Loss= {:.5f} Precision = {:.4f} Recall = {:.4f} F1 = {:.4f} Support = {}'.format(loss.item(), Pr[0], R[0], F1[0], S[0]))
        print('-'*10+ '(class 1): ' + ' Loss= {:.5f} Precision = {:.4f} Recall = {:.4f} F1 = {:.4f} Support = {}'.format(loss.item(), Pr[1], R[1], F1[1], S[1]))
        print('\naverage precision {:.4f}'.format(average_precision)) # area under precision-recall curve can be useful here

        if verbose >= 1: #plot precision recall curve
            two_class_probs = np.stack((1-np_probs, np_probs), axis=1) #stack positive class and negative class probabilities
            plot_precision_recall(y_true=np_true_labels, y_probas= two_class_probs) # plot precision recall curve

            fpr, tpr, thresholds = roc_curve(y_true= np_true_labels, y_score= np_probs, pos_label=1)
            roc_auc = roc_auc_score(y_true=np_true_labels, y_score=np_probs)
            plt.figure()
            plt.plot(fpr, tpr)
            plt.xlabel('FPR'), plt.ylabel('TPR'), plt.title('ROC Curve for model\n' +  os.path.split(model.path)[1]), plt.legend(['auc = {:.4f}'.format(roc_auc)])
            plt.savefig(os.path.join(model.path, model.id + tag + '_ROC' ))
            if verbose >= 2: #show some images (20 correctly predicted and 20 mistakenly predicted)
                visualize_model(num_images=[20,20], data_loader=data_loader, true_labels=np_true_labels, pred_labels=np_pred_labels ,classes=['Tis', 'Les'])

                if verbose==3:
                    plot_confusion_matrix(y_true=np_true_labels, y_pred=np_pred_labels, classes=np.array(['Tis', 'Les']))
                    plt.savefig(os.path.join(model.path, model.id + tag+ '_confusion'))
                    plt.show()

    #return the model to the mode it came with
    model.train(mode)

    # if there is an excel file name, write the results there
    if xlfile_name:
        print('writing metrics to the excel file...')
        write_xl(xlfile_name, prefix+[Pr[1], R[1], F1[1], roc_auc])

    return np_pred_labels, loss.item(), F1
def main():
    neptune.init(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                 project_qualified_name=os.getenv('NEPTUNE_PROJECT'))

    train_idx = pd.read_csv(TRAIN_IDX_PATH, nrows=NROWS)
    valid_idx = pd.read_csv(VALID_IDX_PATH, nrows=NROWS)
    features = pd.read_csv(FEATURES_PATH, nrows=NROWS)

    train = pd.merge(train_idx, features, on='SK_ID_CURR')
    valid = pd.merge(valid_idx, features, on='SK_ID_CURR')

    all_params = {
        'num_boost_round': NUM_BOOST_ROUND,
        'early_stopping_rounds': EARLY_STOPPING_ROUNDS,
        **LGBM_PARAMS
    }

    with neptune.create_experiment(name='model training',
                                   params=all_params,
                                   tags=['lgbm'],
                                   upload_source_files=get_filepaths(),
                                   properties={
                                       'features_path':
                                       FEATURES_PATH,
                                       'features_version':
                                       md5_hash(FEATURES_PATH),
                                       'train_split_version':
                                       md5_hash(TRAIN_IDX_PATH),
                                       'valid_split_version':
                                       md5_hash(VALID_IDX_PATH),
                                   }):
        results = train_evaluate(train,
                                 valid,
                                 LGBM_PARAMS,
                                 callbacks=[neptune_monitor()])
        train_score, valid_score = results['train_score'], results[
            'valid_score']
        train_preds, valid_preds = results['train_preds'], results[
            'valid_preds']

        neptune.send_metric('train_auc', train_score)
        neptune.send_metric('valid_auc', valid_score)

        train_pred_path = os.path.join(PREDICTION_DIRPATH, 'train_preds.csv')
        train_preds.to_csv(train_pred_path, index=None)
        neptune.send_artifact(train_pred_path)

        valid_pred_path = os.path.join(PREDICTION_DIRPATH, 'valid_preds.csv')
        valid_preds.to_csv(valid_pred_path, index=None)
        neptune.send_artifact(valid_pred_path)

        model_path = os.path.join(MODEL_DIRPATH, 'model.pkl')
        joblib.dump(results['model'], model_path)
        neptune.set_property('model_path', model_path)
        neptune.set_property('model_version', md5_hash(model_path))
        neptune.send_artifact(model_path)

        if PACKAGE_TO_PROD:
            saved_path = CreditDefaultClassifier.pack(
                model=results['model']).save(PRODUCTION_DIRPATH)
            neptune.set_property('production_model_path', saved_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_confusion_matrix(valid_preds['TARGET'],
                                         valid_preds['preds_pos'] > 0.5,
                                         ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'conf_matrix.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_roc(valid_preds['TARGET'],
                            valid_preds[['preds_neg', 'preds_pos']],
                            ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'roc_auc.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        sk_metrics.plot_precision_recall(
            valid_preds['TARGET'],
            valid_preds[['preds_neg', 'preds_pos']],
            ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'prec_recall.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)

        fig, ax = plt.subplots(figsize=(16, 12))
        plot_prediction_distribution(valid_preds['TARGET'],
                                     valid_preds['preds_pos'],
                                     ax=ax)
        plot_path = os.path.join(REPORTS_DIRPATH, 'preds_dist.png')
        fig.savefig(plot_path)
        neptune.send_image('diagnostics', plot_path)