def test_compare_branches(spark_context):
    data = pd.DataFrame(
        index=range(60000),
        columns=['branch', 'val'],
        dtype='float'
    )
    data.iloc[::3, 0] = 'control'
    data.iloc[1::3, 0] = 'same'
    data.iloc[2::3, 0] = 'bigger'

    data.iloc[::2, 1] = 0
    data.iloc[1::2, 1] = 1

    data.iloc[2::12, 1] = 1

    assert data.val[data.branch != 'bigger'].mean() == 0.5
    assert data.val[data.branch == 'bigger'].mean() == pytest.approx(0.75)

    res = mabsbb.compare_branches(spark_context, data, 'val', num_samples=2)

    assert res['individual']['control']['mean'] == pytest.approx(0.5, rel=1e-1)
    assert res['individual']['same']['mean'] == pytest.approx(0.5, rel=1e-1)
    assert res['individual']['bigger']['mean'] == pytest.approx(0.75, rel=1e-1)

    assert 'control' not in res['comparative'].keys()
    assert res['comparative']['same'][('rel_uplift', 'exp')] == \
        pytest.approx(0, abs=0.1)
    assert res['comparative']['bigger'][('rel_uplift', 'exp')] == \
        pytest.approx(0.5, abs=0.1)

    # num_samples=2 so only 3 possible outcomes
    assert res['comparative']['same'][('prob_win', None)] in (0, 0.5, 1)
    assert res['comparative']['bigger'][('prob_win', None)] == \
        pytest.approx(1, abs=0.01)
Exemplo n.º 2
0
    def transform(self, df: DataFrame, metric: str) -> "StatisticResultCollection":
        stats_results = StatisticResultCollection([])

        critical_point = (1 - self.confidence_interval) / 2
        summary_quantiles = (critical_point, 1 - critical_point)

        ma_result = mabsbb.compare_branches(
            df,
            col_label=metric,
            ref_branch_label=self.ref_branch_label,
            num_samples=self.num_samples,
            individual_summary_quantiles=summary_quantiles,
        )

        for branch, branch_result in ma_result["individual"].items():
            lower, upper = _extract_ci(branch_result, critical_point)
            result = StatisticResult(
                metric=metric,
                statistic="mean",
                parameter=None,
                branch=branch,
                ci_width=self.confidence_interval,
                point=branch_result["mean"],
                lower=lower,
                upper=upper,
            )
            stats_results.data.append(result)

        for branch, branch_result in ma_result["comparative"].items():
            lower_abs, upper_abs = _extract_ci(branch_result["abs_uplift"], critical_point)
            stats_results.data.append(
                StatisticResult(
                    metric=metric,
                    statistic="mean",
                    parameter=None,
                    branch=branch,
                    comparison_to_control="difference",
                    ci_width=self.confidence_interval,
                    point=branch_result["abs_uplift"]["exp"],
                    lower=lower_abs,
                    upper=upper_abs,
                )
            )

            lower_rel, upper_rel = _extract_ci(branch_result["rel_uplift"], critical_point)
            stats_results.data.append(
                StatisticResult(
                    metric=metric,
                    statistic="mean",
                    parameter=None,
                    branch=branch,
                    comparison_to_control="relative_uplift",
                    ci_width=self.confidence_interval,
                    point=branch_result["rel_uplift"]["exp"],
                    lower=lower_rel,
                    upper=upper_rel,
                )
            )

        return stats_results
def test_compare_branches(spark_context_or_none):
    data = pd.DataFrame(index=range(60000),
                        columns=["branch", "val"],
                        dtype="float")
    data.iloc[::3, 0] = "control"
    data.iloc[1::3, 0] = "same"
    data.iloc[2::3, 0] = "bigger"

    data.iloc[::2, 1] = 0
    data.iloc[1::2, 1] = 1

    data.iloc[2::12, 1] = 1

    assert data.val[data.branch != "bigger"].mean() == 0.5
    assert data.val[data.branch == "bigger"].mean() == pytest.approx(0.75)

    res = mabsbb.compare_branches(data,
                                  "val",
                                  num_samples=2,
                                  sc=spark_context_or_none)

    assert res["individual"]["control"]["mean"] == pytest.approx(0.5, rel=1e-1)
    assert res["individual"]["same"]["mean"] == pytest.approx(0.5, rel=1e-1)
    assert res["individual"]["bigger"]["mean"] == pytest.approx(0.75, rel=1e-1)

    assert "control" not in res["comparative"].keys()
    assert res["comparative"]["same"][("rel_uplift",
                                       "exp")] == pytest.approx(0, abs=0.1)
    assert res["comparative"]["bigger"][("rel_uplift",
                                         "exp")] == pytest.approx(0.5, abs=0.1)

    # num_samples=2 so only 3 possible outcomes
    assert res["comparative"]["same"][("prob_win", None)] in (0, 0.5, 1)
    assert res["comparative"]["bigger"][("prob_win",
                                         None)] == pytest.approx(1, abs=0.01)
Exemplo n.º 4
0
def test_compare_branches_multistat(spark_context_or_none):
    data = pd.DataFrame(index=range(60000),
                        columns=['branch', 'val'],
                        dtype='float')
    data.iloc[::3, 0] = 'control'
    data.iloc[1::3, 0] = 'same'
    data.iloc[2::3, 0] = 'bigger'

    data.iloc[::2, 1] = 0
    data.iloc[1::2, 1] = 1

    data.iloc[2::12, 1] = 1

    assert data.val[data.branch != 'bigger'].mean() == 0.5
    assert data.val[data.branch == 'bigger'].mean() == pytest.approx(0.75)

    res = mabsbb.compare_branches(
        data,
        'val',
        stat_fn=lambda x, y: {
            'max': np.max(x),
            'mean': np.dot(x, y),
        },
        num_samples=2,
        sc=spark_context_or_none,
    )

    assert res['individual']['control'].loc['mean', 'mean'] \
        == pytest.approx(0.5, rel=1e-1)
    assert res['individual']['same'].loc['mean', 'mean'] \
        == pytest.approx(0.5, rel=1e-1)
    assert res['individual']['bigger'].loc['mean', 'mean'] \
        == pytest.approx(0.75, rel=1e-1)

    assert 'control' not in res['comparative'].keys()

    assert res['comparative']['same'].loc['mean', ('rel_uplift', 'exp')] \
        == pytest.approx(0, abs=0.1)
    assert res['comparative']['bigger'].loc['mean', ('rel_uplift', 'exp')] \
        == pytest.approx(0.5, abs=0.1)

    # num_samples=2 so only 3 possible outcomes
    assert res['comparative']['same'].loc['mean',
                                          ('prob_win', None)] in (0, 0.5, 1)
    assert res['comparative']['bigger'].loc['mean', ('prob_win', None)] \
        == pytest.approx(1, abs=0.01)

    assert res['comparative']['same'].loc['max', ('rel_uplift', 'exp')] == 0
    assert res['comparative']['bigger'].loc['max', ('rel_uplift', 'exp')] == 0
Exemplo n.º 5
0
def crunch_nums_ts(ts,
                   col_label,
                   stats_model,
                   ref_branch_label='control',
                   sc=None):
    assert all_eq((len(v) for v in ts.values()))
    assert all_eq((set(tuple(sorted(v.branch.unique())) for v in ts.values())))

    branch_list = next(iter(ts.values())).branch.unique()
    # # Maybe defaultdicts are offensive because they hide the schema?
    # res = collections.defaultdict(lambda: collections.defaultdict(dict))
    res = {
        'comparative': {
            b: {t: None
                for t in ts.keys()}
            for b in branch_list if b != ref_branch_label
        },
        'individual': {b: {t: None
                           for t in ts.keys()}
                       for b in branch_list},
    }

    # TODO: this really smells like a map then a zip?
    for k, v in ts.items():
        if stats_model == 'beta':
            bla = mabin.compare_branches(v,
                                         col_label,
                                         ref_branch_label=ref_branch_label)
        elif stats_model == 'bootstrap':
            assert sc is not None
            bla = mabb.compare_branches(sc,
                                        v,
                                        col_label,
                                        ref_branch_label=ref_branch_label,
                                        threshold_quantile=0.9999)
        else:
            raise NotImplementedError

        for branch, data in bla['comparative'].items():
            res['comparative'][branch][k] = data
        for branch, data in bla['individual'].items():
            res['individual'][branch][k] = data

    return res