def test_bootstrap_vectorized_3samp(method, axis): def statistic(*data, axis=0): # an arbitrary, vectorized statistic return sum((sample.mean(axis) for sample in data)) def statistic_1d(*data): # the same statistic, not vectorized for sample in data: assert sample.ndim == 1 return statistic(*data, axis=0) np.random.seed(0) x = np.random.rand(4, 5) y = np.random.rand(4, 5) z = np.random.rand(4, 5) res1 = bootstrap((x, y, z), statistic, vectorized=True, axis=axis, n_resamples=100, method=method, random_state=0) res2 = bootstrap((x, y, z), statistic_1d, vectorized=False, axis=axis, n_resamples=100, method=method, random_state=0) assert_allclose(res1.confidence_interval, res2.confidence_interval) assert_allclose(res1.standard_error, res2.standard_error)
def test_bootstrap_vectorized_1samp(method, axis): def statistic(x, axis=0): # an arbitrary, vectorized statistic return x.mean(axis=axis) def statistic_1d(x): # the same statistic, not vectorized assert x.ndim == 1 return statistic(x, axis=0) np.random.seed(0) x = np.random.rand(4, 5) res1 = bootstrap((x, ), statistic, vectorized=True, axis=axis, n_resamples=100, batch=None, method=method, random_state=0) res2 = bootstrap((x, ), statistic_1d, vectorized=False, axis=axis, n_resamples=100, batch=10, method=method, random_state=0) assert_allclose(res1.confidence_interval, res2.confidence_interval) assert_allclose(res1.standard_error, res2.standard_error)
def test_bootstrap_batch(method, axis): # for one-sample statistics, batch size shouldn't affect the result np.random.seed(0) x = np.random.rand(10, 11, 12) res1 = bootstrap((x,), np.mean, batch=None, method=method, random_state=0, axis=axis, n_resamples=100) res2 = bootstrap((x,), np.mean, batch=10, method=method, random_state=0, axis=axis, n_resamples=100) assert_equal(res2.confidence_interval.low, res1.confidence_interval.low) assert_equal(res2.confidence_interval.high, res1.confidence_interval.high) assert_equal(res2.standard_error, res1.standard_error)
def test_bootstrap_vectorized(method, axis, paired): # test that paired is vectorized as expected: when samples are tiled, # CI and standard_error of each axis-slice is the same as those of the # original 1d sample if not paired and method == 'BCa': # should re-assess when BCa is extended pytest.xfail(reason="BCa currently for 1-sample statistics only") np.random.seed(0) def my_statistic(x, y, z, axis=-1): return x.mean(axis=axis) + y.mean(axis=axis) + z.mean(axis=axis) shape = 10, 11, 12 n_samples = shape[axis] x = np.random.rand(n_samples) y = np.random.rand(n_samples) z = np.random.rand(n_samples) res1 = bootstrap((x, y, z), my_statistic, paired=paired, method=method, random_state=0, axis=0, n_resamples=100) reshape = [1, 1, 1] reshape[axis] = n_samples x = np.broadcast_to(x.reshape(reshape), shape) y = np.broadcast_to(y.reshape(reshape), shape) z = np.broadcast_to(z.reshape(reshape), shape) res2 = bootstrap((x, y, z), my_statistic, paired=paired, method=method, random_state=0, axis=axis, n_resamples=100) assert_allclose(res2.confidence_interval.low, res1.confidence_interval.low) assert_allclose(res2.confidence_interval.high, res1.confidence_interval.high) assert_allclose(res2.standard_error, res1.standard_error) result_shape = list(shape) result_shape.pop(axis) assert_equal(res2.confidence_interval.low.shape, result_shape) assert_equal(res2.confidence_interval.high.shape, result_shape) assert_equal(res2.standard_error.shape, result_shape)
def test_bootstrap_degenerate(method): data = 35 * [10000.] if method == "BCa": with np.errstate(invalid='ignore'): with pytest.warns(BootstrapDegenerateDistributionWarning): res = bootstrap([ data, ], np.mean, method=method) assert_equal(res.confidence_interval, (np.nan, np.nan)) else: res = bootstrap([ data, ], np.mean, method=method) assert_equal(res.confidence_interval, (10000., 10000.)) assert_equal(res.standard_error, 0)
def test_bootstrap_against_itself_1samp(method, expected): # The expected values in this test were generated using bootstrap # to check for unintended changes in behavior. The test also makes sure # that bootstrap works with multi-sample statistics and that the # `axis` argument works as expected / function is vectorized. np.random.seed(0) n = 100 # size of sample n_resamples = 999 # number of bootstrap resamples used to form each CI confidence_level = 0.9 # The true mean is 5 dist = stats.norm(loc=5, scale=1) stat_true = dist.mean() # Do the same thing 2000 times. (The code is fully vectorized.) n_replications = 2000 data = dist.rvs(size=(n_replications, n)) res = bootstrap((data,), statistic=np.mean, confidence_level=confidence_level, n_resamples=n_resamples, batch=50, method=method, axis=-1) ci = res.confidence_interval # ci contains vectors of lower and upper confidence interval bounds ci_contains_true = np.sum((ci[0] < stat_true) & (stat_true < ci[1])) assert ci_contains_true == expected # ci_contains_true is not inconsistent with confidence_level pvalue = stats.binomtest(ci_contains_true, n_replications, confidence_level).pvalue assert pvalue > 0.1
def test_vector_valued_statistic(method): # Generate 95% confidence interval around MLE of normal distribution # parameters. Repeat 100 times, each time on sample of size 100. # Check that confidence interval contains true parameters ~95 times. # Confidence intervals are estimated and stochastic; a test failure # does not necessarily indicate that something is wrong. More important # than values of `counts` below is that the shapes of the outputs are # correct. rng = np.random.default_rng(2196847219) params = 1, 0.5 sample = stats.norm.rvs(*params, size=(100, 100), random_state=rng) def statistic(data): return stats.norm.fit(data) res = bootstrap((sample, ), statistic, method=method, axis=-1, vectorized=False) counts = np.sum((res.confidence_interval.low.T < params) & (res.confidence_interval.high.T > params), axis=0) assert np.all(counts >= 90) assert np.all(counts <= 100) assert res.confidence_interval.low.shape == (2, 100) assert res.confidence_interval.high.shape == (2, 100) assert res.standard_error.shape == (2, 100)
def test_bootstrap_against_R(method, expected): # Compare against R's "boot" library # library(boot) # stat <- function (x, a) { # mean(x[a]) # } # x <- c(10, 12, 12.5, 12.5, 13.9, 15, 21, 22, # 23, 34, 50, 81, 89, 121, 134, 213) # # Use a large value so we get a few significant digits for the CI. # n = 1000000 # bootresult = boot(x, stat, n) # result <- boot.ci(bootresult) # print(result) x = np.array([ 10, 12, 12.5, 12.5, 13.9, 15, 21, 22, 23, 34, 50, 81, 89, 121, 134, 213 ]) res = bootstrap((x, ), np.mean, n_resamples=1000000, method=method, random_state=0) assert_allclose(res.confidence_interval, expected, rtol=0.005)
def _grad_conf_int(forecasts, p_value) -> tuple[float, float]: forecasts = (forecasts, ) interval = stats.bootstrap( forecasts, np.median, confidence_level=(1 - p_value), random_state=0, ).confidence_interval return interval.low, interval.high
def species_accumulation(x, max_steps, n_iter=100): steps = np.arange(1, max_steps) interpolated = np.arange(1, max_steps) < x.sum() accumulation = stats.bootstrap(x, fn=partial(stats.rarefaction_extrapolation, max_steps=max_steps), n_iter=n_iter) accumulation['interpolated'] = interpolated accumulation['steps'] = steps return accumulation
def check_sample_var(sample, popvar): # check that population mean lies within the CI bootstrapped from the # sample. This used to be a chi-squared test for variance, but there were # too many false positives res = stats.bootstrap( (sample, ), lambda x, axis: x.var(ddof=1, axis=axis), confidence_level=0.995, ) conf = res.confidence_interval low, high = conf.low, conf.high assert low <= popvar <= high
def table_bootpack(table, bin_size, n_bootstraps, seed=8472): new_table = data.Table() for head, subtable in misc.sorted_groupby(table, key=lambda r: r.corr.shape): subtable = data.Table(subtable) stacked_corrs = numpy.stack(subtable['corr'], axis=0) bootpacks = data.BootPack( stats.mean(stacked_corrs), stats.bootstrap(stats.bin_(stacked_corrs, bin_size), n_bootstraps, seed=seed)) new_table.extend( data.Record(record, corr=bootpack, bin_size=bin_size, n_bootstraps=n_bootstraps) for record, bootpack in zip(subtable, bootpacks)) return new_table
def lower_ci_bound_on_raw_rewards( actions_with_scores_list: List[ActionsWithScores], debug: bool = False ) -> Union[AggregatedScores, List[Tuple[PlayerAction, float, float, float]]]: """ The aggregated score is the lower CI bound of the mean of all the individual rewards across all permutations (i.e., it doesn't compute averages for each permutation first). This requires MctsPlayerOptions.save_rewards to be True. If debug is True, the output contains the CI limits as well. WARNING: This is very slow. """ # pylint: disable=too-many-branches is_fully_simulated = are_all_nodes_fully_simulated( actions_with_scores_list) if is_fully_simulated: return _average_ucb_for_fully_simulated_trees(actions_with_scores_list) stats = defaultdict(list) for actions_with_scores in actions_with_scores_list: for action, score in actions_with_scores.items(): if score.fully_simulated: stats[action].extend([score.score for _ in range(score.n)]) else: stats[action].extend(score.rewards) actions_and_scores = [] for action, rewards in stats.items(): if len(rewards) == 1: if debug: actions_and_scores.append( (action, rewards[0], rewards[0], rewards[0])) else: actions_and_scores.append((action, rewards[0])) else: bootstrap_result = bootstrap((rewards, ), np.mean, method='percentile', n_resamples=1000) confidence_interval = bootstrap_result.confidence_interval if debug: actions_and_scores.append( (action, confidence_interval.low, confidence_interval.low, confidence_interval.high)) else: actions_and_scores.append((action, confidence_interval.low)) # noinspection PyUnreachableCode if __debug__: logging.debug( "MctsPlayer: Lower CI bounds on raw rewards:\n%s", pprint.pformat( sorted(actions_and_scores, key=lambda x: x[1], reverse=True))) return actions_and_scores
def test_bootstrap_against_theory(method): # based on https://www.statology.org/confidence-intervals-python/ data = stats.norm.rvs(loc=5, scale=2, size=5000, random_state=0) alpha = 0.95 dist = stats.t(df=len(data)-1, loc=np.mean(data), scale=stats.sem(data)) expected_interval = dist.interval(alpha=alpha) expected_se = dist.std() res = bootstrap((data,), np.mean, n_resamples=5000, confidence_level=alpha, method=method, random_state=0) assert_allclose(res.confidence_interval, expected_interval, rtol=5e-4) assert_allclose(res.standard_error, expected_se, atol=3e-4)
def test_bootstrap_gh15678(method): # Check that gh-15678 is fixed: when statistic function returned a Python # float, method="BCa" failed when trying to add a dimension to the float rng = np.random.default_rng(354645618886684) dist = stats.norm(loc=2, scale=4) data = dist.rvs(size=100, random_state=rng) data = (data, ) res = bootstrap(data, stats.skew, method=method, n_resamples=100, random_state=np.random.default_rng(9563)) # this always worked because np.apply_along_axis returns NumPy data type ref = bootstrap(data, stats.skew, method=method, n_resamples=100, random_state=np.random.default_rng(9563), vectorized=False) assert_allclose(res.confidence_interval, ref.confidence_interval) assert_allclose(res.standard_error, ref.standard_error) assert isinstance(res.standard_error, np.float64)
def test_bootstrap_paired(method): # test that `paired` works as expected np.random.seed(0) n = 100 x = np.random.rand(n) y = np.random.rand(n) def my_statistic(x, y, axis=-1): return ((x-y)**2).mean(axis=axis) def my_paired_statistic(i, axis=-1): a = x[i] b = y[i] res = my_statistic(a, b) return res i = np.arange(len(x)) res1 = bootstrap((i,), my_paired_statistic, random_state=0) res2 = bootstrap((x, y), my_statistic, paired=True, random_state=0) assert_allclose(res1.confidence_interval, res2.confidence_interval) assert_allclose(res1.standard_error, res2.standard_error)
def compute_CI(data, metric=np.mean, confidence_level=0.95, axis=-1, n_resamples=999, eps=1e-8, **kwargs): """ data: np.array of shape (timesteps, sample_size) (second dim. is the number of runs for ex.) Returns: -------- ci_l : np.array of shape (timesteps,) ci_u: np.array of shape (timesteps,) """ from scipy.stats import bootstrap data = data + eps res = bootstrap((data,), metric, confidence_level=confidence_level, axis=axis, n_resamples=n_resamples, **kwargs) ci_l, ci_u = res.confidence_interval return ci_l, ci_u
def test_bootstrap_against_itself_2samp(method, expected): # The expected values in this test were generated using bootstrap # to check for unintended changes in behavior. The test also makes sure # that bootstrap works with multi-sample statistics and that the # `axis` argument works as expected / function is vectorized. np.random.seed(0) n1 = 100 # size of sample 1 n2 = 120 # size of sample 2 n_resamples = 999 # number of bootstrap resamples used to form each CI confidence_level = 0.9 # The statistic we're interested in is the difference in means def my_stat(data1, data2, axis=-1): mean1 = np.mean(data1, axis=axis) mean2 = np.mean(data2, axis=axis) return mean1 - mean2 # The true difference in the means is -0.1 dist1 = stats.norm(loc=0, scale=1) dist2 = stats.norm(loc=0.1, scale=1) stat_true = dist1.mean() - dist2.mean() # Do the same thing 1000 times. (The code is fully vectorized.) n_replications = 1000 data1 = dist1.rvs(size=(n_replications, n1)) data2 = dist2.rvs(size=(n_replications, n2)) res = bootstrap((data1, data2), statistic=my_stat, confidence_level=confidence_level, n_resamples=n_resamples, batch=50, method=method, axis=-1) ci = res.confidence_interval # ci contains vectors of lower and upper confidence interval bounds ci_contains_true = np.sum((ci[0] < stat_true) & (stat_true < ci[1])) assert ci_contains_true == expected # ci_contains_true is not inconsistent with confidence_level pvalue = stats.binomtest(ci_contains_true, n_replications, confidence_level).pvalue assert pvalue > 0.1
def genStatsFunction( self, fcn: Callable, fcnkwargs: dict[str, Any] = None, ) -> None: """ A wrapper function to generate statistics via a generic function. Parameters ---------- fcn : Callable The function used to generate the desired statistics. fcnkwargs : dict[str, Any] The keyword arguments for the function. """ self.fcn = fcn if fcnkwargs is None: fcnkwargs = dict() self.fcnkwargs = fcnkwargs if self.bootstrap: self.bootstrap_n = order_stat_TI_n(self.bootstrap_k, p=0.5, c=self.conf) # Scalar Variables if self.var.isscalar: # Calculate nums and confidence interval for each point in the sequence self.nums = self.statsFunctionWrapper(self.var.nums) if self.bootstrap: # Switch to method='Bca' once https://github.com/scipy/scipy/issues/15883 resolved res = bootstrap((np.array(self.var.nums), ), self.statsFunctionWrapper, confidence_level=self.conf, n_resamples=self.bootstrap_n, random_state=self.seed, method='basic') self.confidence_interval_low_nums = res.confidence_interval.low self.confidence_interval_high_nums = res.confidence_interval.high # Calculate the corresponding vals based on the nummap self.vals = copy(self.nums) if self.bootstrap: self.confidence_interval_low_vals = copy( self.confidence_interval_low_nums) self.confidence_interval_high_vals = copy( self.confidence_interval_high_nums) if self.var.nummap is not None: self.vals = [self.var.nummap[num] for num in self.nums] if self.bootstrap: self.confidence_interval_low_vals = \ [self.var.nummap[num] for num in self.confidence_interval_low_nums] self.confidence_interval_high_vals = \ [self.var.nummap[num] for num in self.confidence_interval_high_nums] # 1-D Variables elif self.var.maxdim == 1: nums_list = get_list(self.var.nums) npoints = max(len(x) for x in nums_list) if self.bootstrap: confidence_interval_low_nums = [] confidence_interval_high_nums = [] # Calculate nums and confidence interval for each point in the sequence nums = [] for i in range(npoints): numsatidx = np.array([x[i] for x in nums_list if len(x) > i]) nums.append(self.statsFunctionWrapper(numsatidx)) if self.bootstrap: # Switch to Bca once https://github.com/scipy/scipy/issues/15883 resolved res = bootstrap((numsatidx, ), self.statsFunctionWrapper, confidence_level=self.conf, n_resamples=self.bootstrap_n, random_state=self.seed, method='basic') confidence_interval_low_nums.append( res.confidence_interval.low) confidence_interval_high_nums.append( res.confidence_interval.high) self.nums = nums if self.bootstrap: self.confidence_interval_low_nums = confidence_interval_low_nums self.confidence_interval_high_nums = confidence_interval_high_nums # Calculate the corresponding vals based on the nummap self.vals = copy(self.nums) if self.bootstrap: self.confidence_interval_low_vals = copy( self.confidence_interval_low_nums) self.confidence_interval_high_vals = copy( self.confidence_interval_high_nums) if self.var.nummap is not None: self.vals = [[self.var.nummap[x] for x in y] for y in self.nums] if self.bootstrap: self.confidence_interval_low_vals \ = [[self.var.nummap[x] for x in y] for y in self.confidence_interval_low_nums] self.confidence_interval_low_vals \ = [[self.var.nummap[x] for x in y] for y in self.confidence_interval_high_nums] else: # Suppress warning since this will become valid when Var is split # warn('VarStat only available for scalar or 1-D data') pass
sm_l.append(float(sm[key])) c_l.append(float(cython[key])) r_l.append(float(r[key])) gl_l = np.asarray(gl_l) sm_l = np.asarray(sm_l) c_l = np.asarray(c_l) r_l = np.asarray(r_l) cython_errors = np.asarray(np.abs(c_l - gl_l), dtype=float) sm_errors = np.asarray(np.abs(sm_l - gl_l), dtype=float) r_errors = np.asarray(np.abs(r_l - gl_l), dtype=float) #%% import scipy.stats as stats CI = .99 res_cython = stats.bootstrap((cython_errors,), np.mean, confidence_level=CI) ci_cython = res_cython.confidence_interval print(f""" Mean Absolute Error CI\n Cython {CI*100}% Confidence Interval: --------------------------------- Lower | Upper ----------------------------- {ci_cython.low:.3e} | {ci_cython.high:.3e} """) res_sm = stats.bootstrap((sm_errors,), np.mean, confidence_level=CI) ci_sm = res_sm.confidence_interval print(f"""\n\n Statsmodels {CI*100}% Confidence Interval:
def test_bootstrap_iv(): message = "`data` must be a sequence of samples." with pytest.raises(ValueError, match=message): bootstrap(1, np.mean) message = "`data` must contain at least one sample." with pytest.raises(ValueError, match=message): bootstrap(tuple(), np.mean) message = "each sample in `data` must contain two or more observations..." with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3], [1]), np.mean) message = ("When `paired is True`, all samples must have the same length ") with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3], [1, 2, 3, 4]), np.mean, paired=True) message = "`vectorized` must be `True` or `False`." with pytest.raises(ValueError, match=message): bootstrap(1, np.mean, vectorized='ekki') message = "`axis` must be an integer." with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, axis=1.5) message = "could not convert string to float" with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, confidence_level='ni') message = "`n_resamples` must be a positive integer." with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, n_resamples=-1000) message = "`n_resamples` must be a positive integer." with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, n_resamples=1000.5) message = "`batch` must be a positive integer or None." with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, batch=-1000) message = "`batch` must be a positive integer or None." with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, batch=1000.5) message = "`method` must be in" with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, method='ekki') message = "`method = 'BCa' is only available for one-sample statistics" def statistic(x, y, axis): mean1 = np.mean(x, axis) mean2 = np.mean(y, axis) return mean1 - mean2 with pytest.raises(ValueError, match=message): bootstrap(([.1, .2, .3], [.1, .2, .3]), statistic, method='BCa') message = "'herring' cannot be used to seed a" with pytest.raises(ValueError, match=message): bootstrap(([1, 2, 3],), np.mean, random_state='herring')
def _lower_ci_bound(ucbs: List[Tuple[float, int]]) -> float: scores = [q / n for q, n in ucbs] if len(scores) == 1: return scores[0] bootstrap_result = bootstrap((scores, ), np.mean, method='percentile') return bootstrap_result.confidence_interval.low
def diversity(x, method=None, CI=False, conf=0.95, n_iter=1000, n_jobs=1, seed=None, disable_pb=False, **kwargs): r""" Wrapper for various bias-corrected richness functions Parameters ---------- x : array-like, with shape (number of species) An array representing the abundances (observed counts) for each individual species. method : str (default = None) One estimator of: - 'chao1' - 'egghe_proot' - 'jackknife' - 'minsample' - 'empirical' (same as None) **kwargs : additional parameters passed to selected method Note ---- If `CI` is True, a bootstrap procedure will be called on the specified method to compute the confidence intervals around the central estimate etc. For the Jackknife procedure, the CI is calculated analytically and no bootstrap values will be included in the returned dict. Returns ------- Consult the documentation of selected method. """ x = np.array(x, dtype=np.int64) if (x < 0).any(): msg = "Elements of `x` should be strictly non-negative" raise ValueError(msg) if x.sum() <= 0: msg = "`x` appears to be empty" raise ValueError(msg) if method is not None and method.lower() not in ESTIMATORS: raise ValueError(f"Unknown estimation method `{method}`.") if method is None: method = "empirical" method = method.lower() if CI and method != 'jackknife': estimate = stats.bootstrap(x, fn=partial(ESTIMATORS[method], **kwargs), n_iter=n_iter, n_jobs=n_jobs, seed=seed, disable_pb=disable_pb) elif CI and method == 'jackknife': estimate = ESTIMATORS[method](x, CI=CI, conf=conf, **kwargs) else: estimate = ESTIMATORS[method](x, **kwargs) return estimate