def test_bayesian_bootstrap_vs_bootstrap_geometric_quantiles( spark_context_or_none): num_enrollments = 20000 rs = np.random.RandomState(42) data = rs.geometric(p=0.1, size=num_enrollments) quantiles = [0.3, 0.5, 0.9] def calc_quantiles(x): return dict(zip(quantiles, np.quantile(x, quantiles))) bb_res = mabsbb.bootstrap_one_branch( data, stat_fn=mabsbb.make_bb_quantile_closure(quantiles), sc=spark_context_or_none) pboot_res = mafsb.bootstrap_one_branch(data, stat_fn=calc_quantiles, sc=spark_context_or_none) for q in bb_res.index: for l in bb_res.columns: assert bb_res.loc[q, l] == pytest.approx(pboot_res.loc[q, l], rel=5e-3), (q, l, bb_res, pboot_res)
def test_bootstrap_vs_beta(spark_context_or_none): num_enrollments = 10000 fake_data = pd.Series(np.zeros(num_enrollments)) fake_data[:300] = 1 boot_res = mafsb.bootstrap_one_branch(fake_data, sc=spark_context_or_none) beta_res = mabsbin.summarize_one_branch_from_agg( pd.Series({ # `-1` to simulate Beta(0, 0) improper prior, closer to # bootstrap for quantiles (i think?) 'num_enrollments': len(fake_data) - 1, 'num_conversions': fake_data.sum() - 1 })) for l in boot_res.index: # Bootstrapped quantiles are discretized based on the number of enrollments, # which sets `abs`. # # Is `num_samples` large enough to consistently achieve results that # match the beta model to within the accuracy of this discrete limit? # Not quite. So we backed it off a bit and ask for the bootstrapped result # to be within 1.9 quanta of the beta result - which was enough for a # percentile bootstrap assert boot_res.loc[l] == pytest.approx( beta_res.loc[l], # abs=1.9 is usually good enough with a percentile bootstrap # set abs=2.9 because there are lots of tests abs=2.9 / num_enrollments), l
def test_bootstrap_one_branch(spark_context_or_none): data = np.concatenate([np.zeros(10000), np.ones(10000)]) res = mafsb.bootstrap_one_branch( data, num_samples=100, summary_quantiles=(0.5, 0.61), sc=spark_context_or_none ) assert res['mean'] == pytest.approx(0.5, rel=1e-1) assert res['0.5'] == pytest.approx(0.5, rel=1e-1) assert res['0.61'] == pytest.approx(0.5, rel=1e-1)
def test_bayesian_bootstrap_vs_bootstrap_geometric(spark_context_or_none): num_enrollments = 20000 rs = np.random.RandomState(42) data = rs.geometric(p=0.1, size=num_enrollments) bb_res = mabsbb.bootstrap_one_branch(data, sc=spark_context_or_none) pboot_res = mafsb.bootstrap_one_branch(data, sc=spark_context_or_none) assert bb_res['mean'] == pytest.approx(10, rel=1e-2) assert bb_res['0.5'] == pytest.approx(10, rel=1e-2) for l in bb_res.index: assert bb_res.loc[l] == pytest.approx(pboot_res.loc[l], rel=5e-3), (l, bb_res, pboot_res)
def test_bayesian_bootstrap_vs_bootstrap_poisson(spark_context): num_enrollments = 10001 rs = np.random.RandomState(42) data = rs.poisson(lam=10, size=num_enrollments) bb_res = mabsbb.bootstrap_one_branch(spark_context, data) pboot_res = mafsb.bootstrap_one_branch(spark_context, data) assert bb_res['mean'] == pytest.approx(10, rel=1e-2) assert bb_res['0.5'] == pytest.approx(10, rel=1e-2) for l in bb_res.index: assert bb_res.loc[l] == pytest.approx(pboot_res.loc[l], rel=5e-3), (l, bb_res, pboot_res)
def bootstrap_data(exp_path, single_window_res, num_samples, ref_branch_label='control', threshold_quantile=0.999, ci_quantiles=(0.005, 0.025, 0.975, 0.995)): report = validate_schema(op.join(op.abspath(exp_path), "report.json")) metric_list = _make_metric_list(report) metric_names = [metric.name for metric in metric_list] branches = np.unique(single_window_res['branch']).tolist() res_metrics = list() if len(branches) == 1: for metric in metric_names: res_metric = mafsb.bootstrap_one_branch( single_window_res[metric], stat_fn=_decilize, num_samples=num_samples, summary_quantiles=list(ci_quantiles)) res_metrics.append(res_metric) elif ref_branch_label in branches: for metric in metric_names: # TODO: figure out why columns aren't being typecast with numpy single_window_res[metric] = single_window_res[metric].astype( 'float') res_metric = _res_to_df_nest( metric, mafsb.compare_branches( single_window_res, col_label=metric, ref_branch_label=ref_branch_label, stat_fn=_decilize, threshold_quantile=threshold_quantile, individual_summary_quantiles=list(ci_quantiles), comparative_summary_quantiles=list(ci_quantiles), num_samples=num_samples)) res_metrics.append(res_metric) else: raise ValueError( "There are multiple branches present in this ", "study, but `ref_branch_label` is either " "missing or incorrect.") res_metrics = pd.concat(res_metrics) return res_metrics
def test_bootstrap_one_branch_multistat(spark_context): data = np.concatenate([np.zeros(10000), np.ones(10000), [1e20]]) res = mafsb.bootstrap_one_branch(spark_context, data, stat_fn=lambda x: { 'max': np.max(x), 'mean': np.mean(x), }, num_samples=5, summary_quantiles=(0.5, 0.61), threshold_quantile=0.9999) assert res.shape == (2, 3) assert res.loc['max', 'mean'] == 1 assert res.loc['max', '0.5'] == 1 assert res.loc['max', '0.61'] == 1 assert res.loc['mean', 'mean'] == pytest.approx(0.5, rel=1e-1) assert res.loc['mean', '0.5'] == pytest.approx(0.5, rel=1e-1) assert res.loc['mean', '0.61'] == pytest.approx(0.5, rel=1e-1)
def test_bootstrap_one_branch_multistat(spark_context_or_none): data = np.concatenate([np.zeros(10000), np.ones(10000), [1e20]]) res = mafsb.bootstrap_one_branch( data, stat_fn=lambda x: { "max": np.max(x), "mean": np.mean(x), }, num_samples=5, summary_quantiles=(0.5, 0.61), threshold_quantile=0.9999, sc=spark_context_or_none, ) assert res.shape == (2, 3) assert res.loc["max", "mean"] == 1 assert res.loc["max", "0.5"] == 1 assert res.loc["max", "0.61"] == 1 assert res.loc["mean", "mean"] == pytest.approx(0.5, rel=1e-1) assert res.loc["mean", "0.5"] == pytest.approx(0.5, rel=1e-1) assert res.loc["mean", "0.61"] == pytest.approx(0.5, rel=1e-1)
def test_bayesian_bootstrap_vs_bootstrap_poisson_quantiles(spark_context): num_enrollments = 10001 rs = np.random.RandomState(42) data = rs.poisson(lam=10, size=num_enrollments) quantiles = [0.1, 0.5, 0.95] def calc_quantiles(x): return dict(zip(quantiles, np.quantile(x, quantiles))) bb_res = mabsbb.bootstrap_one_branch( spark_context, data, stat_fn=mabsbb.make_bb_quantile_closure(quantiles)) pboot_res = mafsb.bootstrap_one_branch(spark_context, data, stat_fn=calc_quantiles) for q in bb_res.index: for l in bb_res.columns: assert bb_res.loc[q, l] == pytest.approx(pboot_res.loc[q, l], rel=5e-3), (q, l, bb_res, pboot_res)