Exemplo n.º 1
0
def compare_branches(
    df,
    col_label,
    ref_branch_label="control",
    stat_fn=np.mean,
    num_samples=10000,
    threshold_quantile=None,
    individual_summary_quantiles=mabs.DEFAULT_QUANTILES,
    comparative_summary_quantiles=mabs.DEFAULT_QUANTILES,
    sc=None,
):
    """Jointly sample bootstrapped statistics then compare them.

    Performs a percentile bootstrap, which, according to Efron,
    is not significantly more distasteful than a basic bootstrap,
    regardless of what you may read on Stack Overflow.

    Args:
        df: a pandas DataFrame of queried experiment data in the
            standard format (see ``mozanalysis.experiment``).
        col_label (str): Label for the df column contaning the metric
            to be analyzed.
        ref_branch_label (str, optional): String in ``df['branch']`` that
            identifies the branch with respect to which we want to
            calculate uplifts - usually the control branch.
        stat_fn (func, optional): A function that either:

            - Aggregates each resampled population to a scalar (e.g.
                the default, ``np.mean``), or
            - Aggregates each resampled population to a dict of
                scalars.

            In both cases, this function must accept a one-dimensional
            ndarray or pandas Series as its input.
        num_samples (int, optional): The number of bootstrap iterations
            to perform.
        threshold_quantile (float, optional): An optional threshold
            quantile, above which to discard outliers. E.g. `0.9999`.
        individual_summary_quantiles (list, optional): Quantiles to
            determine the confidence bands on individual branch
            statistics. Change these when making Bonferroni corrections.
        comparative_summary_quantiles (list, optional): Quantiles to
            determine the confidence bands on comparative branch
            statistics (i.e. the change relative to the reference
            branch, probably the control). Change these when making
            Bonferroni corrections.
        sc (optional): The Spark context, if available

    Returns a dictionary:
        If ``stat_fn`` returns a scalar (this is the default), then
        this function returns a dictionary has the following keys and
        values:

            'individual': dictionary mapping each branch name to a pandas
                Series that holds the expected value for the bootstrapped
                ``stat_fn``, and confidence intervals.
            'comparative': dictionary mapping each branch name to a pandas
                Series of summary statistics for the possible uplifts of
                the bootstrapped ``stat_fn`` relative to the reference branch.

        Otherwise, when ``stat_fn`` returns a dict, then this function
        returns a similar dictionary, except the Series are replaced with
        DataFrames. Each row in each DataFrame corresponds to one output
        of `stat_fn`, and is the Series that would be returned if ``stat_fn``
        computed only this statistic.
    """
    branch_list = df.branch.unique()

    if ref_branch_label not in branch_list:
        raise ValueError("Branch label '{b}' not in branch list '{bl}".format(
            b=ref_branch_label, bl=branch_list))

    samples = {
        # TODO: do we need to control seed_start? If so then we must be careful here
        b: get_bootstrap_samples(
            df[col_label][df.branch == b],
            stat_fn,
            num_samples,
            threshold_quantile=threshold_quantile,
            sc=sc,
        )
        for b in branch_list
    }

    return mabs.compare_samples(
        samples,
        ref_branch_label,
        individual_summary_quantiles,
        comparative_summary_quantiles,
    )
def compare_branches(sc,
                     df,
                     col_label,
                     ref_branch_label='control',
                     stat_fn=bb_mean,
                     num_samples=10000,
                     threshold_quantile=None,
                     individual_summary_quantiles=mabs.DEFAULT_QUANTILES,
                     comparative_summary_quantiles=mabs.DEFAULT_QUANTILES):
    """Jointly sample bootstrapped statistics then compare them.

    Args:
        sc: The Spark context
        df: a pandas DataFrame of queried experiment data in the
            standard format (see `mozanalysis.experiment`).
        col_label (str): Label for the df column contaning the metric
            to be analyzed.
        ref_branch_label (str, optional): String in ``df['branch']``
            that identifies the branch with respect to which we want to
            calculate uplifts - usually the control branch.
        stat_fn (callable, optional): A function that either:

            * Aggregates each resampled population to a scalar (e.g.
              the default, ``bb_mean``), or
            * Aggregates each resampled population to a dict of
              scalars (e.g. the func returned by
              ``make_bb_quantile_closure`` when given multiple
              quantiles.

            In both cases, this function must accept two parameters:

            * a one-dimensional ndarray or pandas Series of values,
            * an identically shaped object of weights for these values

        num_samples (int, optional): The number of bootstrap iterations
            to perform.
        threshold_quantile (float, optional): An optional threshold
            quantile, above which to discard outliers. E.g. `0.9999`.
        individual_summary_quantiles (list, optional): Quantiles to
            determine the credible intervals on individual branch
            statistics. Change these when making Bonferroni corrections.
        comparative_summary_quantiles (list, optional): Quantiles to
            determine the credible intervals on comparative branch
            statistics (i.e. the change relative to the reference
            branch, probably the control). Change these when making
            Bonferroni corrections.

    Returns:
        If ``stat_fn`` returns a scalar (this is the default), then
        this function returns a dictionary has the following keys and
        values:

            * 'individual': dictionary mapping each branch name to a pandas
              Series that holds the expected value for the bootstrapped
              ``stat_fn``, and credible intervals.
            * 'comparative': dictionary mapping each branch name to a pandas
              Series of summary statistics for the possible uplifts of
              the bootstrapped ``stat_fn`` relative to the reference branch.

        Otherwise, when ``stat_fn`` returns a dict, then this function
        returns a similar dictionary, except the Series are replaced with
        DataFrames. Each row in each DataFrame corresponds to one output
        of ``stat_fn``, and is the Series that would be returned if
        ``stat_fn`` computed only this statistic.
    """
    branch_list = df.branch.unique()

    if ref_branch_label not in branch_list:
        raise ValueError("Branch label '{b}' not in branch list '{bl}".format(
            b=ref_branch_label, bl=branch_list))

    samples = {
        # TODO: do we need to control seed_start? If so then we must be careful here
        b: get_bootstrap_samples(sc,
                                 df[col_label][df.branch == b],
                                 stat_fn,
                                 num_samples,
                                 threshold_quantile=threshold_quantile)
        for b in branch_list
    }

    return mabs.compare_samples(samples, ref_branch_label,
                                individual_summary_quantiles,
                                comparative_summary_quantiles)