def test_compare_branches(spark_context): data = pd.DataFrame( index=range(60000), columns=['branch', 'val'], dtype='float' ) data.iloc[::3, 0] = 'control' data.iloc[1::3, 0] = 'same' data.iloc[2::3, 0] = 'bigger' data.iloc[::2, 1] = 0 data.iloc[1::2, 1] = 1 data.iloc[2::12, 1] = 1 assert data.val[data.branch != 'bigger'].mean() == 0.5 assert data.val[data.branch == 'bigger'].mean() == pytest.approx(0.75) res = mabsbb.compare_branches(spark_context, data, 'val', num_samples=2) assert res['individual']['control']['mean'] == pytest.approx(0.5, rel=1e-1) assert res['individual']['same']['mean'] == pytest.approx(0.5, rel=1e-1) assert res['individual']['bigger']['mean'] == pytest.approx(0.75, rel=1e-1) assert 'control' not in res['comparative'].keys() assert res['comparative']['same'][('rel_uplift', 'exp')] == \ pytest.approx(0, abs=0.1) assert res['comparative']['bigger'][('rel_uplift', 'exp')] == \ pytest.approx(0.5, abs=0.1) # num_samples=2 so only 3 possible outcomes assert res['comparative']['same'][('prob_win', None)] in (0, 0.5, 1) assert res['comparative']['bigger'][('prob_win', None)] == \ pytest.approx(1, abs=0.01)
def transform(self, df: DataFrame, metric: str) -> "StatisticResultCollection": stats_results = StatisticResultCollection([]) critical_point = (1 - self.confidence_interval) / 2 summary_quantiles = (critical_point, 1 - critical_point) ma_result = mabsbb.compare_branches( df, col_label=metric, ref_branch_label=self.ref_branch_label, num_samples=self.num_samples, individual_summary_quantiles=summary_quantiles, ) for branch, branch_result in ma_result["individual"].items(): lower, upper = _extract_ci(branch_result, critical_point) result = StatisticResult( metric=metric, statistic="mean", parameter=None, branch=branch, ci_width=self.confidence_interval, point=branch_result["mean"], lower=lower, upper=upper, ) stats_results.data.append(result) for branch, branch_result in ma_result["comparative"].items(): lower_abs, upper_abs = _extract_ci(branch_result["abs_uplift"], critical_point) stats_results.data.append( StatisticResult( metric=metric, statistic="mean", parameter=None, branch=branch, comparison_to_control="difference", ci_width=self.confidence_interval, point=branch_result["abs_uplift"]["exp"], lower=lower_abs, upper=upper_abs, ) ) lower_rel, upper_rel = _extract_ci(branch_result["rel_uplift"], critical_point) stats_results.data.append( StatisticResult( metric=metric, statistic="mean", parameter=None, branch=branch, comparison_to_control="relative_uplift", ci_width=self.confidence_interval, point=branch_result["rel_uplift"]["exp"], lower=lower_rel, upper=upper_rel, ) ) return stats_results
def test_compare_branches(spark_context_or_none): data = pd.DataFrame(index=range(60000), columns=["branch", "val"], dtype="float") data.iloc[::3, 0] = "control" data.iloc[1::3, 0] = "same" data.iloc[2::3, 0] = "bigger" data.iloc[::2, 1] = 0 data.iloc[1::2, 1] = 1 data.iloc[2::12, 1] = 1 assert data.val[data.branch != "bigger"].mean() == 0.5 assert data.val[data.branch == "bigger"].mean() == pytest.approx(0.75) res = mabsbb.compare_branches(data, "val", num_samples=2, sc=spark_context_or_none) assert res["individual"]["control"]["mean"] == pytest.approx(0.5, rel=1e-1) assert res["individual"]["same"]["mean"] == pytest.approx(0.5, rel=1e-1) assert res["individual"]["bigger"]["mean"] == pytest.approx(0.75, rel=1e-1) assert "control" not in res["comparative"].keys() assert res["comparative"]["same"][("rel_uplift", "exp")] == pytest.approx(0, abs=0.1) assert res["comparative"]["bigger"][("rel_uplift", "exp")] == pytest.approx(0.5, abs=0.1) # num_samples=2 so only 3 possible outcomes assert res["comparative"]["same"][("prob_win", None)] in (0, 0.5, 1) assert res["comparative"]["bigger"][("prob_win", None)] == pytest.approx(1, abs=0.01)
def test_compare_branches_multistat(spark_context_or_none): data = pd.DataFrame(index=range(60000), columns=['branch', 'val'], dtype='float') data.iloc[::3, 0] = 'control' data.iloc[1::3, 0] = 'same' data.iloc[2::3, 0] = 'bigger' data.iloc[::2, 1] = 0 data.iloc[1::2, 1] = 1 data.iloc[2::12, 1] = 1 assert data.val[data.branch != 'bigger'].mean() == 0.5 assert data.val[data.branch == 'bigger'].mean() == pytest.approx(0.75) res = mabsbb.compare_branches( data, 'val', stat_fn=lambda x, y: { 'max': np.max(x), 'mean': np.dot(x, y), }, num_samples=2, sc=spark_context_or_none, ) assert res['individual']['control'].loc['mean', 'mean'] \ == pytest.approx(0.5, rel=1e-1) assert res['individual']['same'].loc['mean', 'mean'] \ == pytest.approx(0.5, rel=1e-1) assert res['individual']['bigger'].loc['mean', 'mean'] \ == pytest.approx(0.75, rel=1e-1) assert 'control' not in res['comparative'].keys() assert res['comparative']['same'].loc['mean', ('rel_uplift', 'exp')] \ == pytest.approx(0, abs=0.1) assert res['comparative']['bigger'].loc['mean', ('rel_uplift', 'exp')] \ == pytest.approx(0.5, abs=0.1) # num_samples=2 so only 3 possible outcomes assert res['comparative']['same'].loc['mean', ('prob_win', None)] in (0, 0.5, 1) assert res['comparative']['bigger'].loc['mean', ('prob_win', None)] \ == pytest.approx(1, abs=0.01) assert res['comparative']['same'].loc['max', ('rel_uplift', 'exp')] == 0 assert res['comparative']['bigger'].loc['max', ('rel_uplift', 'exp')] == 0
def crunch_nums_ts(ts, col_label, stats_model, ref_branch_label='control', sc=None): assert all_eq((len(v) for v in ts.values())) assert all_eq((set(tuple(sorted(v.branch.unique())) for v in ts.values()))) branch_list = next(iter(ts.values())).branch.unique() # # Maybe defaultdicts are offensive because they hide the schema? # res = collections.defaultdict(lambda: collections.defaultdict(dict)) res = { 'comparative': { b: {t: None for t in ts.keys()} for b in branch_list if b != ref_branch_label }, 'individual': {b: {t: None for t in ts.keys()} for b in branch_list}, } # TODO: this really smells like a map then a zip? for k, v in ts.items(): if stats_model == 'beta': bla = mabin.compare_branches(v, col_label, ref_branch_label=ref_branch_label) elif stats_model == 'bootstrap': assert sc is not None bla = mabb.compare_branches(sc, v, col_label, ref_branch_label=ref_branch_label, threshold_quantile=0.9999) else: raise NotImplementedError for branch, data in bla['comparative'].items(): res['comparative'][branch][k] = data for branch, data in bla['individual'].items(): res['individual'][branch][k] = data return res