def compare_binary(roc1: ROC, roc2: ROC, alt_hypothesis: float = 0.05, seed: Optional[int] = None) -> Tuple[bool, float]: """Compute roc1 < roc2 using binary comparison with bootstrapping. The idea behind the this algorithm is to bootstrap roc1 and roc2, and compute the AUC (Area Under the Curve) for each of the bootstraps for roc1 and roc2. For each bootstraps of roc1 and roc2 we compute the difference of the AUCs of ROC curves. Let aucs_diff = [auc11 - auc21, auc12 - auc22, ..., auc1n - auc2n], where auc1i is the AUC of ith bootstrap of roc1, and auc2i is the AUC of the ith bootstrap of roc2. We define the statistical strength, i.e. p-value, for which we can reject the zero hypothesis roc1 > roc2 as p_value = sum(aucs_diff > 0) / n. If p_value is smaller than alt_hypothesis we accept the alternative hypothesis roc1 < roc2. Parameters ---------- roc1 The "to be assumed" smaller ROC curve than roc2. roc2 The "to be assumed" larger ROC curve than roc1. alt_hypothesis The density for which we reject the zero hypothesis, and for which we therefore accept roc1 < roc2. seed Seed used for DeLong bootstrapping. If no seed is given a random seed will be used, resulting in non-deterministic results. Raises ------ ValueError If alt_hypothesis is not between 0 and 1. Returns ------- Tuple of a boolean and the p-value. I.e. the boolean represents if we can accept the alternative hypothesis roc1 < roc2, and the p-value represents the strength with which we accept the alternative hypothesis roc1 < roc2. """ if not 0 <= alt_hypothesis <= 1: raise ValueError('Alternative hypothesis must be between 0 and 1.') bootstrap_auc1 = np.array( list(roc.auc for roc in bootstrap_roc(roc1, seed=seed))) bootstrap_auc2 = np.array( list(roc.auc for roc in bootstrap_roc(roc2, seed=seed))) aucs = bootstrap_auc2 - bootstrap_auc1 p_value = sum(aucs <= 0) / aucs.size return p_value < alt_hypothesis, p_value
def test_bootstrap_roc_n_jobs(self): gt = [True, True, False, False] est = [0.1, 0.3, 0.2, 0.4] roc = ROC(gt, est) with self.assertRaises(RuntimeError): bootstrap_roc(roc, n_jobs=0) for n_jobs in [-2, -1, 1, 2, 4, 8, 16]: result = bootstrap_roc(roc, n_jobs=n_jobs) assert len(result) == 1000
def test_bootstrap_roc_num_bootstraps(self): gt = [True, True, False, False] est = [0.1, 0.3, 0.2, 0.4] roc = ROC(gt, est) for num_bootstraps in [-1000, -1, 0]: with self.assertRaises(ValueError): bootstrap_roc(roc, num_bootstraps=num_bootstraps) for num_bootstraps in [1, 2, 8, 100, 1000, 10000]: result = bootstrap_roc(roc, num_bootstraps=num_bootstraps) assert len(result) == num_bootstraps
def test_bootstrap_roc_ex1(self): gt = [True, True, False, False] est = [0.1, 0.3, 0.2, 0.4] roc = ROC(gt, est) result = bootstrap_roc(roc) assert len(result) == 1000
def test_bootstrap_roc_ex2(self): ex_rng = np.random.RandomState(37) num = 10000 ex_gt = ex_rng.binomial(1, 0.5, num) ex_est = ex_rng.rand((num)) ex_roc = ROC(ex_gt, ex_est) ex_roc_auc_list = [roc.auc for roc in bootstrap_roc(ex_roc, seed=37)] assert np.isclose(np.mean(ex_roc_auc_list), 0.5042963196452369) assert np.isclose(np.var(ex_roc_auc_list)**.5, 0.006105232099260582)
def bootstrap_confidence(self, num_bootstraps: int = 1000, num_bootstrap_jobs: int = 1, show_min_max: bool = False, mean_roc: bool = False, p_value: float = 0.05, seed: Optional[int] = None) -> BootstrapPlot: """Compute ROC curve confidence with bootstrapping. Parameters ---------- num_bootstraps Number of bootstraps to apply on the ROC curve. The number of ROC curves returned by this method is equal to num_bootstraps. num_bootstrap_jobs Number of jobs used to compute the bootstraps for the ROC curve in parallel. If n_jobs is set negative all available cpu threads will be used. show_min_max If set to True this method will return the minimum and maximum values obtained during bootstrapping. mean_roc If set to True all bootstrapped ROC curves are used to create an averaged ROC curve. Usually this ROC curve looks more smooth than the original ROC curve, and therefore can be used for smoothing the original ROC curve. p_value Value between 0 and 1. This value shows the confidence area of the ROC curve. seed Seed used for bootstrapping the ROC curve. If seed is set to None a random seed will be chosen, which will lead to non-deterministic results. Returns ------- BootstrapPlot A named tuple containing: `xrange`, the false positive rate; `min_quantile`, the smallest true positive rate values within the given confidence; `max_quantile`, the largest true positive rate values within the given confidence; `mean`, if mean_roc is set, the averaged true positive values over the bootstrapped ROC curves; `min`, if show_min_max is set, the smallest true positive rate values over the bootstrapped ROC curves; `max`, if show_min_max is set, the largest true positive rate values of the bootstrapped ROC curves. """ if not 0 <= p_value < 1: raise ValueError('P-value should be between 0 and 1.') # Import bootstrap_roc locally to avoid cross reference imports. from pyroc import bootstrap_roc bs_roc_list = bootstrap_roc(self, num_bootstraps=num_bootstraps, seed=seed, n_jobs=num_bootstrap_jobs) arange = np.arange(0, 1.01, 0.01) interp_list = [] for cur_roc in bs_roc_list: cur_fps, cur_tps, _ = cur_roc.roc() interp_list.append(np.interp(arange, cur_fps, cur_tps)) interp_funcs = np.vstack(interp_list) return BootstrapPlot( xrange=arange, min=np.min(interp_funcs, axis=0) if show_min_max else None, max=np.max(interp_funcs, axis=0) if show_min_max else None, mean=np.mean(interp_funcs, axis=0) if mean_roc else None, min_quantile=np.quantile(interp_funcs, p_value / 2, axis=0), max_quantile=np.quantile(interp_funcs, 1 - p_value / 2, axis=0))
"""Simple example to show how to use bootstrapping for ROC curves.""" import matplotlib.pyplot as plt import numpy as np from pyroc import ROC, bootstrap_roc # Simple example to test bootstrap ex_rng = np.random.RandomState(37) num = 100 ex_gt = ex_rng.binomial(1, 0.5, num) ex_est = ex_rng.rand((num)) ex_roc = ROC(ex_gt, ex_est) ex_roc_list = bootstrap_roc(ex_roc, seed=37) ex_roc_auc_list = [roc.auc for roc in ex_roc_list] print(f'Average ROC AUC: {np.mean(ex_roc_auc_list)}' f' +/- {np.var(ex_roc_auc_list)**.5}') ax = ex_roc.plot(bootstrap=True, num_bootstraps=1000, seed=37, num_bootstrap_jobs=-1, color='red', p_value=0.05, mean_roc=False, plot_roc_curve=True, show_min_max=False) ax = ex_roc.plot(bootstrap=True, num_bootstraps=1000, seed=37,
def compare_bootstrap(roc1: ROC, roc2: ROC, alt_hypothesis: float = 0.05, seed: Optional[int] = None) -> Tuple[bool, float]: """Compute roc1 < roc2 with alternative hypothesis using DeLong bootstrapping. The idea behind the this algorithm is to bootstrap roc1 and roc2, and compute the AUC (Area Under the Curve) for each of the bootstraps for roc1 and roc2. For each bootstraps of roc1 and roc2 we compute the difference of the AUCs of ROC curves. Let aucs_diff = [auc11 - auc21, auc12 - auc22, ..., auc1n - auc2n], where auc1i is the AUC of ith bootstrap of roc1, and auc2i is the AUC of the ith bootstrap of roc2. We define a new stochast by Z = mean(aucs_diff) / std(aucs_diff). We assume that Z ~ N(0, 1), i.e. Z is drawn from a Gaussian distribution centered around 0 with standard deviation 1. Our zero hypothesis is that roc1 >= roc2, or in other words that P(Z) < 1 - alt_hypothesis. So that our alternative hypothesis is that roc1 < roc2. We reject the zero hypothesis if P(Z) > 1 - alt_hypothesis. Parameters ---------- roc1 The "to be assumed" smaller ROC curve than roc2. roc2 The "to be assumed" larger ROC curve than roc1. alt_hypothesis The density for which we reject the zero hypothesis, and for which we therefore accept roc1 < roc2. seed Seed used for DeLong bootstrapping. If no seed is given a random seed will be used, resulting in non-deterministic results. Raises ------ ValueError If alt_hypothesis is not between 0 and 1. Returns ------- Tuple of a boolean and the p-value. I.e. the boolean represents if we can accept the alternative hypothesis roc1 < roc2, and the p-value represents the strength with which we accept the alternative hypothesis roc1 < roc2. """ if not 0 <= alt_hypothesis <= 1: raise ValueError('Alternative hypothesis must be between 0 and 1.') bootstrap_auc1 = np.array( list(roc.auc for roc in bootstrap_roc(roc1, seed=seed))) bootstrap_auc2 = np.array( list(roc.auc for roc in bootstrap_roc(roc2, seed=seed))) aucs = bootstrap_auc2 - bootstrap_auc1 sample = np.mean(aucs) if np.std(aucs) > 0: sample /= np.std(aucs) p_value = 1 - gaussian_cdf(sample) return p_value < alt_hypothesis, p_value