def _get_net_benefit_curve(y_true, probs):
    Wrapper function for sklearn's _binary_clf_curve
    fps, tps, thresholds = _binary_clf_curve(y_true, probs)
    n = len(probs)
    net_benefits = (tps / n) - (thresholds / (1 - thresholds)) * (fps / n)
    return net_benefits, thresholds
def precision_recall_curve(y_true, y_pred, pos_label=None,
    """Compute precision-recall (with optional calibration) pairs for different probability thresholds
    This implementation is a modification of scikit-learn "precision_recall_curve" function that adds calibration
    y_true : array, shape = [n_samples]
        True binary labels. If labels are not either {-1, 1} or {0, 1}, then
        pos_label should be explicitly given.
    probas_pred : array, shape = [n_samples]
        Estimated probabilities or decision function.
    pos_label : int or str, default=None
        The label of the positive class.
        When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},
        ``pos_label`` is set to 1, otherwise an error will be raised.
    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.
    calib_precision : array, shape = [n_thresholds + 1]
        Calibrated Precision values such that element i is the calibrated precision of
        predictions with score >= thresholds[i] and the last element is 1.
    recall : array, shape = [n_thresholds + 1]
        Decreasing recall values such that element i is the recall of
        predictions with score >= thresholds[i] and the last element is 0.
    thresholds : array, shape = [n_thresholds <= len(np.unique(probas_pred))]
        Increasing thresholds on the decision function used to compute
        precision and recall.
    fps, tps, thresholds = _binary_clf_curve(y_true, y_pred,
    if pi0 is not None:
        pi = np.sum(y_true)/float(np.array(y_true).shape[0])
        ratio = pi*(1-pi0)/(pi0*(1-pi))
        precision = tps / (tps + ratio*fps)
        precision = tps / (tps + fps)
    precision[np.isnan(precision)] = 0
    recall = tps / tps[-1]

    # stop when full recall attained
    # and reverse the outputs so recall is decreasing
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
Пример #3
def roc_curve(y_true, y_score):
    fps, tps, thresholds = _binary_clf_curve(
        y_true, y_score, pos_label=None, sample_weight=None)

    if tps.size == 0 or fps[0] != 0:
        # Add an extra threshold position if necessary
        tps = np.r_[0, tps]
        fps = np.r_[0, fps]
        thresholds = np.r_[thresholds[0] + 1e-2, thresholds]

    fpr = fps / fps[-1]
    tpr = tps / tps[-1]
    return fpr, 1 - tpr, thresholds
Пример #4
def roc(outcomes, prediction):
    fps, tps, thresholds = _binary_clf_curve(outcomes, prediction)
    clf = pd.DataFrame([fps, tps, thresholds]).T
    clf.columns = ['fps', 'tps', 'thresholds']
    clf['fps'] = clf['fps'].astype(int)
    clf['tps'] = clf['tps'].astype(int)
    fpr, tpr, thresholds = roc_curve(outcomes,
    r = pd.DataFrame([fpr, tpr, thresholds]).T
    r.columns = ['fpr', 'tpr', 'thresholds']
    df = pd.merge(clf, r, on='thresholds')
    return df
Пример #5
def fp_tp_curve(true_classes, scores, pos_label=1):
    True positive and false positive counts for different classification thresholds.
    This is just a wrapper for sklearn.metrics.ranking._binary_clf_curve so far.

    :param true_classes: true binary labels
    :param scores: predicted scores
    :param pos_label: label considered as positive, everything else is considered negative
    :return: increasing false positive counts, increasing true positive counts, decreasing thresholds
    fps, tps, thresholds = _binary_clf_curve(true_classes,
    return fps, tps, thresholds
Пример #6
def roc_curve(y_true, y_score):
    fps, tps, thresholds = _binary_clf_curve(y_true,

    if tps.size == 0 or fps[0] != 0:
        # Add an extra threshold position if necessary
        tps = np.r_[0, tps]
        fps = np.r_[0, fps]
        thresholds = np.r_[thresholds[0] + 1e-2, thresholds]

    fpr = fps / fps[-1]
    tpr = tps / tps[-1]
    return fpr, 1 - tpr, thresholds
def full_precision_recall_curve(y_true, y_score):
	Helper function to implement precision-recall curve in a way that takes into account recall reaching 1.
	y_true: an array of true outcome labels
	y_prob: an array of predicted probabilities
    from sklearn.metrics.ranking import _binary_clf_curve
    fps, tps, thresholds = _binary_clf_curve(y_true, y_score)

    precision = tps / (tps + fps)
    precision[np.isnan(precision)] = 0
    recall = tps / tps[-1]

    return precision, recall, thresholds
Пример #8
def prec_star(y_true, probas_pred, ss, rs, pos_label=None, sample_weight=None):
    fps, tps, thresholds = _binary_clf_curve(y_true,

    fps = fps * rs / float(ss)

    precision = tps / (tps + fps)
    precision[np.isnan(precision)] = 0

    # stop when full recall attained
    # and reverse the outputs so recall is decreasing
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    return np.r_[precision[sl], 1]
def precision_recall_curve_modified(y_true,
    fps, tps, thresholds = _binary_clf_curve(y_true,

    precision = tps / (tps + fps)
    precision[np.isnan(precision)] = 0
    recall = np.ones(tps.size) if tps[-1] == 0 else tps / tps[-1]

    # stop when full recall attained
    # and reverse the outputs so recall is decreasing
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
Пример #10
def spec_sens(y_true, y_pred, pos_label=None, sample_weight=None):
    # get false positive and true positive
    fps, tps, thresholds = _binary_clf_curve(y_true, y_pred,
    # positive sample number
    actual_p = sum(y_true)
    # negative sample number
    actual_f = len(y_true) - sum(y_true)
    fps = fps * 1.0 / actual_f
    tps = tps * 1.0 / actual_p
    # true negative
    tns = 1 - fps
    # specificity and sensitivity
    spec = tns / (fps + tns)
    sens = tps / tps[-1]

    dis = []
    for k in range(len(sens)):
        d = (1 - spec[k]) * (1 - spec[k]) + (1 - sens[k]) * (1 - sens[k])
    index = np.argmin(dis)

    return spec, sens, thresholds, index
Пример #11
    # ----------------------------------------- #
    # FPS / TPS TERMINOLOGY   (A = Fraud)       #
    #                                           #
    # TPS: test says A and sample is A          #
    # FPS: test says A and sample is not A      #
    # TNS: test says not A and sample is not A  #
    # FNS: test says not A and sample is A      #
    # ----------------------------------------- #

    now =

    saver.restore(sess, save_model)

    test_batch_mse =, feed_dict={X: test_x})
    fps, tps, thresholds = _binary_clf_curve(test_y, test_batch_mse)
    fpr, tpr, threshold = roc_curve(test_y, test_batch_mse)
    print("Test auc score: {:.6f}".format(auc(test_y, test_batch_mse)))

max = 0
for i in range(thresholds.shape[0]):
    ratio = fps[i] / tps[i]
    if ratio > max:
        max = ratio


with tf.Session() as sess:

    # Build the graph and restore weights here ...
Пример #12
def nv_binary_clf_curve_test():
    N = np.random.randint(low=1, high=10)

    y_bool = np.random.rand(N) <= 0.5
    y_pred = np.random.rand(N)

    sample_weight = None
    if np.random.rand() <= 0.2:
        sample_weight = np.abs(np.random.randn(N))
    if np.random.rand() <= 0.2:
        sample_weight = 1 + np.random.multinomial(N, np.ones(N) / N)
    if np.random.rand() <= 0.2:
        sample_weight = np.maximum(np.random.multinomial(N,
                                                         np.ones(N) / N), 1e-6)

    fps, tps, thresholds = _nv_binary_clf_curve(y_bool, y_pred, sample_weight)
    assert (fps.shape == tps.shape and fps.shape == thresholds.shape)
    assert (np.all(np.isfinite(fps)))
    assert (np.all(np.isfinite(tps)))
    assert (np.all(np.isfinite(thresholds[1:])))
    assert (fps[0] == 0 and tps[0] == 0 and thresholds[0] == np.inf)
    if sample_weight is None:
        assert (np.abs(fps[-1] - np.sum(~y_bool)) <= 1e-8)
        assert (np.abs(tps[-1] - np.sum(y_bool)) <= 1e-8)
        assert (np.abs(fps[-1] - np.sum(sample_weight * ~y_bool)) <= 1e-8)
        assert (np.abs(tps[-1] - np.sum(sample_weight * y_bool)) <= 1e-8)
    assert (np.all((np.diff(fps) >= 0.0) & (np.diff(tps) >= 0.0)))
    assert (np.all((np.diff(fps) > 0) | (np.diff(tps) > 0)))
    assert (np.all(np.diff(thresholds) < 0.0))

    fpr, tpr, thresholds_roc = _nv_roc_curve(y_bool, y_pred, sample_weight)
    assert (fpr.shape == tpr.shape and fpr.shape == thresholds_roc.shape)
    assert (np.all(np.isfinite(fpr)))
    assert (np.all(np.isfinite(tpr)))
    assert (np.all(np.isfinite(thresholds_roc[1:])))
    assert (fpr[0] == 0.0 and tpr[0] == 0.0)
    assert (fpr[-1] == 1.0 and tpr[-1] == 1.0)
    assert (np.all((np.diff(fpr) >= 0.0) & (np.diff(tpr) >= 0.0)))
    assert (np.all((np.diff(fpr) > 0.0) | (np.diff(tpr) > 0.0)))
    assert (np.all(np.diff(thresholds_roc) < 0.0))

    rec, prec, thresholds_pr = _nv_recall_precision_curve(
        y_bool, y_pred, sample_weight)
    assert (rec.shape == prec.shape and rec.shape == thresholds_pr.shape)
    assert (np.all(np.isfinite(rec)))
    assert (np.all(np.isfinite(prec)))
    assert (np.all(np.isfinite(thresholds_pr[1:])))
    assert (rec[0] == 0.0 and rec[-1] == 1.0)
    assert (len(prec) >= 2 and prec[0] == prec[1])
    b_rate = np.mean(y_bool) if sample_weight is None else \
        np.true_divide(np.sum(sample_weight * y_bool), np.sum(sample_weight))
    assert (np.max(np.abs(prec[-1] - b_rate)) <= 1e-8)
    # Note: may have repeats in PR curve
    assert (np.all(np.diff(rec) >= 0.0))
    assert (np.all(np.diff(thresholds_pr) < 0.0))

    rec_gain, prec_gain, thresholds_prg = _nv_prg_curve(
        y_bool, y_pred, sample_weight)
    assert (rec_gain.shape == prec_gain.shape)
    assert (rec_gain.shape == thresholds_prg.shape)
    assert (np.all(np.isfinite(thresholds_prg[1:])))
    assert (rec_gain[0] == 0.0 and rec_gain[-1] == 1.0)
    assert (np.all(rec_gain <= 1.0) and np.all(prec_gain <= 1.0))
    assert (np.all(np.diff(rec_gain) >= 0.0))
    assert (np.allclose(prec_gain[-1], 0.0))

    if np.all(y_bool) or (not np.any(y_bool)):
        assert (np.allclose(0.5, np.trapz(fpr, tpr)))
        assert (np.allclose(np.mean(y_bool), np.sum(prec[:-1] * np.diff(rec))))
        assert (np.allclose(0.0, np.sum(prec_gain[:-1] * np.diff(rec_gain))))

    fps2, tps2, thresholds2 = _binary_clf_curve(y_bool,
    assert (np.allclose(fps[1:], fps2))
    assert (np.allclose(tps[1:], tps2))
    assert (np.allclose(thresholds[1:], thresholds2))

    fpr2, tpr2, thresholds2 = roc_curve(y_bool,
    # sklearn inconsistent on including origin ==> need if statement
    if len(fpr) == len(fpr2):
        assert (np.allclose(fpr, fpr2))
        assert (np.allclose(tpr, tpr2))
        assert (np.allclose(thresholds_roc[1:], thresholds2[1:]))
        assert (np.allclose(fpr[1:], fpr2))
        assert (np.allclose(tpr[1:], tpr2))
        assert (np.allclose(thresholds_roc[1:], thresholds2))

    prec2, rec2, thresholds2 = \
        precision_recall_curve(y_bool, y_pred, pos_label=True,
    prec2, rec2, thresholds2 = prec2[::-1], rec2[::-1], thresholds2[::-1]
    prec2[0] = prec2[1]
    err = rec[len(rec2):] - 1.0
    assert (len(err) == 0 or np.max(np.abs(err)) <= 1e-8)
    assert (np.allclose(rec[:len(rec2)], rec2))
    assert (np.allclose(prec[:len(rec2)], prec2))
    assert (np.allclose(thresholds_pr[1:len(rec2)], thresholds2))

    with np.errstate(divide='ignore', invalid='ignore'):
        rec_gain2 = (rec - b_rate) / ((1.0 - b_rate) * rec)
        prec_gain2 = (prec - b_rate) / ((1.0 - b_rate) * prec)
    idx = rec_gain2 > 0.0
    assert (np.allclose(rec_gain[1:], rec_gain2[idx]))
    assert (np.allclose(prec_gain[1:], prec_gain2[idx]))
    assert (np.allclose(thresholds_prg[1:], thresholds_pr[idx]))
    assert (np.allclose(rec_gain[0], 0.0))
    idx0 = np.where(~idx)[0][-1]
    assert (np.allclose(prec_gain[0], prec_gain2[idx0]))
    assert (np.allclose(thresholds_prg[0], thresholds_pr[idx0]))
Пример #13
def detection_error_tradeoff(y_true,
    """Compute error rates for different probability thresholds

    Note: this implementation is restricted to the binary classification task.

    y_true : array, shape = [n_samples]
        True targets of binary classification in range {-1, 1} or {0, 1}.

    probas_pred : array, shape = [n_samples]
        Estimated probabilities or decision function.

    pos_label : int, optional (default=None)
        The label of the positive class

    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.

     fps : array, shape = [n_thresholds]
         A count of false positives, at index i being the number of negative
         samples assigned a score >= thresholds[i]. The total number of
         negative samples is equal to fps[-1] (thus true negatives are given by
         fps[-1] - fps).

     fns : array, shape = [n_thresholds]
         A count of false negatives, at index i being the number of positive
         samples assigned a score < thresholds[i]. The total number of
         positive samples is equal to tps[-1] (thus false negatives are given by
         tps[-1] - tps).

     thresholds : array, shape = [n_thresholds]
         Decreasing score values.

     .. [1] `Wikipedia entry for Detection error tradeoff
     .. [2] `The DET Curve in Assessment of Detection Task Performance
     .. [3] `2008 NIST Speaker Recognition Evaluation Results
     .. [4] `DET-Curve Plotting software for use with MATLAB

     import numpy as np
     from sklearn.metrics import detection_error_tradeoff
     y_true = np.array([0, 0, 1, 1])
     y_scores = np.array([0.1, 0.4, 0.35, 0.8])
     fps, fns, thresholds = detection_error_tradeoff(y_true, y_scores)
     array([ 0.5,  0.5,  0. ])
     array([ 0. ,  0.5,  0.5])
     array([ 0.35,  0.4 ,  0.8 ])

    fps, tps, thresholds = _binary_clf_curve(y_true,
    fns = tps[-1] - tps
    tp_count = tps[-1]
    tn_count = (fps[-1] - fps)[0]

    # start with false positives is zero and stop with false negatives zero
    # and reverse the outputs so list of false positives is decreasing
    last_ind = tps.searchsorted(tps[-1]) + 1
    first_ind = fps[::-1].searchsorted(fps[0])
    sl = range(first_ind, last_ind)[::-1]
    return fps[sl] / tp_count, fns[sl] / tn_count, thresholds[sl]
Пример #14
    def plot_eval_metrics(self, plot_size=8, fname='ACC_PRE_REC_F1', table=True,
                          save_format_table='csv', metrics=('ACC', 'PRE', 'REC', 'F1'),

        """This method saves a plot of the requested metrics at different thresholds for the data
        used to create the object. It also save a table of the values used to create the plot if
        it is requested.

        :param plot_size: int
            Dimensions of the plot, it is always a square plot

        :param fname: str
            the name of the file used for saving it to disk

        :param table: bool
            Whether a table of the metrics should be saved beside the plot or not

        :param save_format_table: str
            the format of the file to be saved either .csv or .pkl

        :param metrics: tuple ('ACC', 'PRE', 'REC', 'F1')
            Contains the different metrics to be plotted, you can only select from those 4.

        :param plot_format: str
            This defines the format used to save the plot '.png', '.jpg', '.pdf'

        :return None, It saves a plot of the requested metrics at different thresholds, these metrics are
        calculated with respect to the data used to initiate the instance of the class viz.

        fps, tps, thr = _binary_clf_curve(self.true_label, self.pred_score)
        tns, fns = fps[-1] - fps, tps[-1] - tps
        precision = tps / (tps + fps)
        recall = tps / tps[-1]
        accuracy = (tns + tps) / (fps[-1] + tps[-1])
        f1_score = 2 * (precision * recall) / (precision + recall + 1e-8)

        if table:
            metrics_df = pd.DataFrame({'Threshold': thr, 'Accuracy': accuracy,
                                       'Precision': precision, 'Recall': recall,
                                       'F1_Score': f1_score})
            if save_format_table == 'csv':
                metrics_df.to_csv(os.path.join(self.viz_dir, fname + '.csv'))
                metrics_df.to_pickle(os.path.join(self.viz_dir, fname + '.pkl'))

        fig, ax = plt.subplots(figsize=(plot_size, plot_size))
        if 'ACC' in metrics:
            ax.plot(thr, accuracy, color=Config.colors['RED'], lw=2, label='Accuracy')
        if 'PRE' in metrics:
            ax.plot(thr, precision, color=Config.colors['TRQ'], lw=2, label='Precision')
        if 'REC' in metrics:
            ax.plot(thr, recall, color=Config.colors['YEL'], lw=2, label='Recall')
        if 'F1' in metrics:
            ax.plot(thr, f1_score, color='black', lw=2, label='F1_Score', linestyle='-')

        ax.set_title('Model Evaluation Metrics', fontsize=Config.TIT_FS, fontweight='bold')
        ax.set_xlim([0.0, thr.max() + 0.01])
        ax.set_ylim([0.0, 1.05])
        ax.set_xlabel('Probability Threshold', fontsize=Config.AXS_FS)
        ax.set_ylabel('Evaluation Metrics Scores', fontsize=Config.AXS_FS)

        fig.savefig(os.path.join(self.viz_dir, fname + plot_format), bbox_inches='tight')