def _get_net_benefit_curve(y_true, probs): """ Wrapper function for sklearn's _binary_clf_curve """ fps, tps, thresholds = _binary_clf_curve(y_true, probs) n = len(probs) net_benefits = (tps / n) - (thresholds / (1 - thresholds)) * (fps / n) return net_benefits, thresholds
def precision_recall_curve(y_true, y_pred, pos_label=None, sample_weight=None,pi0=None): """Compute precision-recall (with optional calibration) pairs for different probability thresholds This implementation is a modification of scikit-learn "precision_recall_curve" function that adds calibration ---------- y_true : array, shape = [n_samples] True binary labels. If labels are not either {-1, 1} or {0, 1}, then pos_label should be explicitly given. probas_pred : array, shape = [n_samples] Estimated probabilities or decision function. pos_label : int or str, default=None The label of the positive class. When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1}, ``pos_label`` is set to 1, otherwise an error will be raised. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- calib_precision : array, shape = [n_thresholds + 1] Calibrated Precision values such that element i is the calibrated precision of predictions with score >= thresholds[i] and the last element is 1. recall : array, shape = [n_thresholds + 1] Decreasing recall values such that element i is the recall of predictions with score >= thresholds[i] and the last element is 0. thresholds : array, shape = [n_thresholds <= len(np.unique(probas_pred))] Increasing thresholds on the decision function used to compute precision and recall. """ fps, tps, thresholds = _binary_clf_curve(y_true, y_pred, pos_label=pos_label, sample_weight=sample_weight) if pi0 is not None: pi = np.sum(y_true)/float(np.array(y_true).shape[0]) ratio = pi*(1-pi0)/(pi0*(1-pi)) precision = tps / (tps + ratio*fps) else: precision = tps / (tps + fps) precision[np.isnan(precision)] = 0 recall = tps / tps[-1] # stop when full recall attained # and reverse the outputs so recall is decreasing last_ind = tps.searchsorted(tps[-1]) sl = slice(last_ind, None, -1) return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
def roc_curve(y_true, y_score): fps, tps, thresholds = _binary_clf_curve( y_true, y_score, pos_label=None, sample_weight=None) if tps.size == 0 or fps[0] != 0: # Add an extra threshold position if necessary tps = np.r_[0, tps] fps = np.r_[0, fps] thresholds = np.r_[thresholds[0] + 1e-2, thresholds] fpr = fps / fps[-1] tpr = tps / tps[-1] return fpr, 1 - tpr, thresholds
def roc(outcomes, prediction): fps, tps, thresholds = _binary_clf_curve(outcomes, prediction) clf = pd.DataFrame([fps, tps, thresholds]).T clf.columns = ['fps', 'tps', 'thresholds'] clf['fps'] = clf['fps'].astype(int) clf['tps'] = clf['tps'].astype(int) fpr, tpr, thresholds = roc_curve(outcomes, prediction, drop_intermediate=False) r = pd.DataFrame([fpr, tpr, thresholds]).T r.columns = ['fpr', 'tpr', 'thresholds'] df = pd.merge(clf, r, on='thresholds') return df
def fp_tp_curve(true_classes, scores, pos_label=1): """ True positive and false positive counts for different classification thresholds. This is just a wrapper for sklearn.metrics.ranking._binary_clf_curve so far. :param true_classes: true binary labels :param scores: predicted scores :param pos_label: label considered as positive, everything else is considered negative :return: increasing false positive counts, increasing true positive counts, decreasing thresholds """ fps, tps, thresholds = _binary_clf_curve(true_classes, scores, pos_label=pos_label, sample_weight=None) return fps, tps, thresholds
def roc_curve(y_true, y_score): fps, tps, thresholds = _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None) if tps.size == 0 or fps[0] != 0: # Add an extra threshold position if necessary tps = np.r_[0, tps] fps = np.r_[0, fps] thresholds = np.r_[thresholds[0] + 1e-2, thresholds] fpr = fps / fps[-1] tpr = tps / tps[-1] return fpr, 1 - tpr, thresholds
def full_precision_recall_curve(y_true, y_score): ''' Helper function to implement precision-recall curve in a way that takes into account recall reaching 1. Input: y_true: an array of true outcome labels y_prob: an array of predicted probabilities ''' from sklearn.metrics.ranking import _binary_clf_curve fps, tps, thresholds = _binary_clf_curve(y_true, y_score) precision = tps / (tps + fps) precision[np.isnan(precision)] = 0 recall = tps / tps[-1] return precision, recall, thresholds
def prec_star(y_true, probas_pred, ss, rs, pos_label=None, sample_weight=None): fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred, pos_label=pos_label, sample_weight=sample_weight) fps = fps * rs / float(ss) precision = tps / (tps + fps) precision[np.isnan(precision)] = 0 # stop when full recall attained # and reverse the outputs so recall is decreasing last_ind = tps.searchsorted(tps[-1]) sl = slice(last_ind, None, -1) return np.r_[precision[sl], 1]
def precision_recall_curve_modified(y_true, probas_pred, pos_label=None, sample_weight=None): fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred, pos_label=pos_label, sample_weight=sample_weight) precision = tps / (tps + fps) precision[np.isnan(precision)] = 0 recall = np.ones(tps.size) if tps[-1] == 0 else tps / tps[-1] # stop when full recall attained # and reverse the outputs so recall is decreasing last_ind = tps.searchsorted(tps[-1]) sl = slice(last_ind, None, -1) return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
def spec_sens(y_true, y_pred, pos_label=None, sample_weight=None): # get false positive and true positive fps, tps, thresholds = _binary_clf_curve(y_true, y_pred, pos_label=pos_label, sample_weight=sample_weight) # positive sample number actual_p = sum(y_true) # negative sample number actual_f = len(y_true) - sum(y_true) fps = fps * 1.0 / actual_f tps = tps * 1.0 / actual_p # true negative tns = 1 - fps # specificity and sensitivity spec = tns / (fps + tns) sens = tps / tps[-1] dis = [] for k in range(len(sens)): d = (1 - spec[k]) * (1 - spec[k]) + (1 - sens[k]) * (1 - sens[k]) dis.append(d) index = np.argmin(dis) return spec, sens, thresholds, index
# ----------------------------------------- # # FPS / TPS TERMINOLOGY (A = Fraud) # # # # TPS: test says A and sample is A # # FPS: test says A and sample is not A # # TNS: test says not A and sample is not A # # FNS: test says not A and sample is A # # ----------------------------------------- # now = datetime.now() saver.restore(sess, save_model) test_batch_mse = sess.run(batch_mse, feed_dict={X: test_x}) fps, tps, thresholds = _binary_clf_curve(test_y, test_batch_mse) fpr, tpr, threshold = roc_curve(test_y, test_batch_mse) print("Test auc score: {:.6f}".format(auc(test_y, test_batch_mse))) max = 0 for i in range(thresholds.shape[0]): ratio = fps[i] / tps[i] if ratio > max: max = ratio # FINDING THE THRESHOLD (TRAINING SET) with tf.Session() as sess: # Build the graph and restore weights here ...
def nv_binary_clf_curve_test(): N = np.random.randint(low=1, high=10) y_bool = np.random.rand(N) <= 0.5 y_pred = np.random.rand(N) sample_weight = None if np.random.rand() <= 0.2: sample_weight = np.abs(np.random.randn(N)) if np.random.rand() <= 0.2: sample_weight = 1 + np.random.multinomial(N, np.ones(N) / N) if np.random.rand() <= 0.2: sample_weight = np.maximum(np.random.multinomial(N, np.ones(N) / N), 1e-6) fps, tps, thresholds = _nv_binary_clf_curve(y_bool, y_pred, sample_weight) assert (fps.shape == tps.shape and fps.shape == thresholds.shape) assert (np.all(np.isfinite(fps))) assert (np.all(np.isfinite(tps))) assert (np.all(np.isfinite(thresholds[1:]))) assert (fps[0] == 0 and tps[0] == 0 and thresholds[0] == np.inf) if sample_weight is None: assert (np.abs(fps[-1] - np.sum(~y_bool)) <= 1e-8) assert (np.abs(tps[-1] - np.sum(y_bool)) <= 1e-8) else: assert (np.abs(fps[-1] - np.sum(sample_weight * ~y_bool)) <= 1e-8) assert (np.abs(tps[-1] - np.sum(sample_weight * y_bool)) <= 1e-8) assert (np.all((np.diff(fps) >= 0.0) & (np.diff(tps) >= 0.0))) assert (np.all((np.diff(fps) > 0) | (np.diff(tps) > 0))) assert (np.all(np.diff(thresholds) < 0.0)) fpr, tpr, thresholds_roc = _nv_roc_curve(y_bool, y_pred, sample_weight) assert (fpr.shape == tpr.shape and fpr.shape == thresholds_roc.shape) assert (np.all(np.isfinite(fpr))) assert (np.all(np.isfinite(tpr))) assert (np.all(np.isfinite(thresholds_roc[1:]))) assert (fpr[0] == 0.0 and tpr[0] == 0.0) assert (fpr[-1] == 1.0 and tpr[-1] == 1.0) assert (np.all((np.diff(fpr) >= 0.0) & (np.diff(tpr) >= 0.0))) assert (np.all((np.diff(fpr) > 0.0) | (np.diff(tpr) > 0.0))) assert (np.all(np.diff(thresholds_roc) < 0.0)) rec, prec, thresholds_pr = _nv_recall_precision_curve( y_bool, y_pred, sample_weight) assert (rec.shape == prec.shape and rec.shape == thresholds_pr.shape) assert (np.all(np.isfinite(rec))) assert (np.all(np.isfinite(prec))) assert (np.all(np.isfinite(thresholds_pr[1:]))) assert (rec[0] == 0.0 and rec[-1] == 1.0) assert (len(prec) >= 2 and prec[0] == prec[1]) b_rate = np.mean(y_bool) if sample_weight is None else \ np.true_divide(np.sum(sample_weight * y_bool), np.sum(sample_weight)) assert (np.max(np.abs(prec[-1] - b_rate)) <= 1e-8) # Note: may have repeats in PR curve assert (np.all(np.diff(rec) >= 0.0)) assert (np.all(np.diff(thresholds_pr) < 0.0)) rec_gain, prec_gain, thresholds_prg = _nv_prg_curve( y_bool, y_pred, sample_weight) assert (rec_gain.shape == prec_gain.shape) assert (rec_gain.shape == thresholds_prg.shape) assert (np.all(np.isfinite(thresholds_prg[1:]))) assert (rec_gain[0] == 0.0 and rec_gain[-1] == 1.0) assert (np.all(rec_gain <= 1.0) and np.all(prec_gain <= 1.0)) assert (np.all(np.diff(rec_gain) >= 0.0)) assert (np.allclose(prec_gain[-1], 0.0)) if np.all(y_bool) or (not np.any(y_bool)): assert (np.allclose(0.5, np.trapz(fpr, tpr))) assert (np.allclose(np.mean(y_bool), np.sum(prec[:-1] * np.diff(rec)))) assert (np.allclose(0.0, np.sum(prec_gain[:-1] * np.diff(rec_gain)))) return fps2, tps2, thresholds2 = _binary_clf_curve(y_bool, y_pred, pos_label=True, sample_weight=sample_weight) assert (np.allclose(fps[1:], fps2)) assert (np.allclose(tps[1:], tps2)) assert (np.allclose(thresholds[1:], thresholds2)) fpr2, tpr2, thresholds2 = roc_curve(y_bool, y_pred, pos_label=True, sample_weight=sample_weight, drop_intermediate=False) # sklearn inconsistent on including origin ==> need if statement if len(fpr) == len(fpr2): assert (np.allclose(fpr, fpr2)) assert (np.allclose(tpr, tpr2)) assert (np.allclose(thresholds_roc[1:], thresholds2[1:])) else: assert (np.allclose(fpr[1:], fpr2)) assert (np.allclose(tpr[1:], tpr2)) assert (np.allclose(thresholds_roc[1:], thresholds2)) prec2, rec2, thresholds2 = \ precision_recall_curve(y_bool, y_pred, pos_label=True, sample_weight=sample_weight) prec2, rec2, thresholds2 = prec2[::-1], rec2[::-1], thresholds2[::-1] prec2[0] = prec2[1] err = rec[len(rec2):] - 1.0 assert (len(err) == 0 or np.max(np.abs(err)) <= 1e-8) assert (np.allclose(rec[:len(rec2)], rec2)) assert (np.allclose(prec[:len(rec2)], prec2)) assert (np.allclose(thresholds_pr[1:len(rec2)], thresholds2)) with np.errstate(divide='ignore', invalid='ignore'): rec_gain2 = (rec - b_rate) / ((1.0 - b_rate) * rec) prec_gain2 = (prec - b_rate) / ((1.0 - b_rate) * prec) idx = rec_gain2 > 0.0 assert (np.allclose(rec_gain[1:], rec_gain2[idx])) assert (np.allclose(prec_gain[1:], prec_gain2[idx])) assert (np.allclose(thresholds_prg[1:], thresholds_pr[idx])) assert (np.allclose(rec_gain[0], 0.0)) idx0 = np.where(~idx)[0][-1] assert (np.allclose(prec_gain[0], prec_gain2[idx0])) assert (np.allclose(thresholds_prg[0], thresholds_pr[idx0]))
def detection_error_tradeoff(y_true, probas_pred, pos_label=None, sample_weight=None): """Compute error rates for different probability thresholds Note: this implementation is restricted to the binary classification task. Parameters ---------- y_true : array, shape = [n_samples] True targets of binary classification in range {-1, 1} or {0, 1}. probas_pred : array, shape = [n_samples] Estimated probabilities or decision function. pos_label : int, optional (default=None) The label of the positive class sample_weight : array-like of shape = [n_samples], optional Sample weights. Returns ------- fps : array, shape = [n_thresholds] A count of false positives, at index i being the number of negative samples assigned a score >= thresholds[i]. The total number of negative samples is equal to fps[-1] (thus true negatives are given by fps[-1] - fps). fns : array, shape = [n_thresholds] A count of false negatives, at index i being the number of positive samples assigned a score < thresholds[i]. The total number of positive samples is equal to tps[-1] (thus false negatives are given by tps[-1] - tps). thresholds : array, shape = [n_thresholds] Decreasing score values. References ---------- .. [1] `Wikipedia entry for Detection error tradeoff <https://en.wikipedia.org/wiki/Detection_error_tradeoff>`_ .. [2] `The DET Curve in Assessment of Detection Task Performance <http://www.itl.nist.gov/iad/mig/publications/storage_paper/det.pdf>`_ .. [3] `2008 NIST Speaker Recognition Evaluation Results <http://www.itl.nist.gov/iad/mig/tests/sre/2008/official_results/>`_ .. [4] `DET-Curve Plotting software for use with MATLAB <http://www.itl.nist.gov/iad/mig/tools/DETware_v2.1.targz.htm>`_ Examples -------- import numpy as np from sklearn.metrics import detection_error_tradeoff y_true = np.array([0, 0, 1, 1]) y_scores = np.array([0.1, 0.4, 0.35, 0.8]) fps, fns, thresholds = detection_error_tradeoff(y_true, y_scores) fps array([ 0.5, 0.5, 0. ]) fns array([ 0. , 0.5, 0.5]) thresholds array([ 0.35, 0.4 , 0.8 ]) """ fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred, pos_label=pos_label, sample_weight=sample_weight) fns = tps[-1] - tps tp_count = tps[-1] tn_count = (fps[-1] - fps)[0] # start with false positives is zero and stop with false negatives zero # and reverse the outputs so list of false positives is decreasing last_ind = tps.searchsorted(tps[-1]) + 1 first_ind = fps[::-1].searchsorted(fps[0]) sl = range(first_ind, last_ind)[::-1] return fps[sl] / tp_count, fns[sl] / tn_count, thresholds[sl]
def plot_eval_metrics(self, plot_size=8, fname='ACC_PRE_REC_F1', table=True, save_format_table='csv', metrics=('ACC', 'PRE', 'REC', 'F1'), plot_format='.pdf'): """This method saves a plot of the requested metrics at different thresholds for the data used to create the object. It also save a table of the values used to create the plot if it is requested. :param plot_size: int Dimensions of the plot, it is always a square plot :param fname: str the name of the file used for saving it to disk :param table: bool Whether a table of the metrics should be saved beside the plot or not :param save_format_table: str the format of the file to be saved either .csv or .pkl :param metrics: tuple ('ACC', 'PRE', 'REC', 'F1') Contains the different metrics to be plotted, you can only select from those 4. :param plot_format: str This defines the format used to save the plot '.png', '.jpg', '.pdf' :return None, It saves a plot of the requested metrics at different thresholds, these metrics are calculated with respect to the data used to initiate the instance of the class viz. """ fps, tps, thr = _binary_clf_curve(self.true_label, self.pred_score) tns, fns = fps[-1] - fps, tps[-1] - tps precision = tps / (tps + fps) recall = tps / tps[-1] accuracy = (tns + tps) / (fps[-1] + tps[-1]) f1_score = 2 * (precision * recall) / (precision + recall + 1e-8) if table: metrics_df = pd.DataFrame({'Threshold': thr, 'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1_Score': f1_score}) if save_format_table == 'csv': metrics_df.to_csv(os.path.join(self.viz_dir, fname + '.csv')) else: metrics_df.to_pickle(os.path.join(self.viz_dir, fname + '.pkl')) fig, ax = plt.subplots(figsize=(plot_size, plot_size)) if 'ACC' in metrics: ax.plot(thr, accuracy, color=Config.colors['RED'], lw=2, label='Accuracy') if 'PRE' in metrics: ax.plot(thr, precision, color=Config.colors['TRQ'], lw=2, label='Precision') if 'REC' in metrics: ax.plot(thr, recall, color=Config.colors['YEL'], lw=2, label='Recall') if 'F1' in metrics: ax.plot(thr, f1_score, color='black', lw=2, label='F1_Score', linestyle='-') ax.set_title('Model Evaluation Metrics', fontsize=Config.TIT_FS, fontweight='bold') ax.set_xlim([0.0, thr.max() + 0.01]) ax.set_ylim([0.0, 1.05]) ax.set_xlabel('Probability Threshold', fontsize=Config.AXS_FS) ax.set_ylabel('Evaluation Metrics Scores', fontsize=Config.AXS_FS) ax.legend(loc="best") fig.savefig(os.path.join(self.viz_dir, fname + plot_format), bbox_inches='tight') plt.close()