示例#1
0
    def greedy_search(self):
        """Searches the Matched Markets for a TBR experiment.

    Uses a greedy hill climbing algorithm to provide recommended 'matched
    markets' experimental group assignments that appear to lead to valid and
    effective TBR models relative to the pretest period.  This is accomplished
    by using a greedy hill climbing alogirhtm that alternates between two
    routines:
    1) Looks for the best set of control geos given the current
       set of treatment geos.
    2) Adds one new geo to the set of treatment geos given
       the current control group.

    See Au (2018) for more details.

    Returns:
      the set of feasible designs found given the design parameters,
        with their corresponding treatment/control groups and score.
    """
        budget_range = self.parameters.budget_range
        results = heapdict.HeapDict(size=self.parameters.n_designs)

        if self.parameters.treatment_geos_range is None:
            n_treatment = len(self.geo_assignments.t)
            max_treatment_size = n_treatment
            n_remaining = len(self.geo_assignments.all) - n_treatment
            if n_remaining == 0:
                max_treatment_size = n_treatment - 1
            self.parameters.treatment_geos_range = (1, max_treatment_size)
        else:
            max_treatment_size = self.parameters.treatment_geos_range[1]

        if self.parameters.control_geos_range is None:
            n_control = len(self.geo_assignments.c)
            max_control_size = n_control
            n_remaining = len(self.geo_assignments.all) - n_control
            if n_remaining == 0:
                max_control_size = n_control - 1
            self.parameters.control_geos_range = (1, max_control_size)

        kappa_0 = len(self.geo_assignments.t_fixed)
        group_star_trt = {kappa_0: self.geo_assignments.t_fixed}
        tmp_diag = TBRMMDiagnostics(np.random.normal(range(100)),
                                    self.parameters)
        tmp_diag.x = list(range(len(tmp_diag.y)))
        tmp_score = TBRMMScore(tmp_diag)
        tmp_score.score = tmp_score.score._replace(corr_test=0,
                                                   aa_test=0,
                                                   bb_test=0,
                                                   dw_test=0,
                                                   corr=0,
                                                   inv_required_impact=0)
        score_star = {kappa_0: tmp_score}
        group_ctl = self.geo_assignments.c
        if kappa_0 == 0:
            group_star_ctl = {kappa_0: group_ctl}
            needs_matching = False
        else:
            group_star_ctl = {}
            needs_matching = True

        k = kappa_0
        while (k < max_treatment_size) | (needs_matching):
            # Find the best control group given the current treatment group
            if needs_matching:
                r_control = self.geo_assignments.c - (group_ctl
                                                      | group_star_trt[k])
                r_unassigned = (group_ctl
                                & self.geo_assignments.x) - group_star_trt[k]

                reassignable_geos = r_control | r_unassigned
                treatment_time_series = self.data.aggregate_time_series(
                    group_star_trt[k])
                current_design = TBRMMDiagnostics(treatment_time_series,
                                                  self.parameters)
                current_design.x = self.data.aggregate_time_series(group_ctl)
                current_score = TBRMMScore(current_design)

                group_ctl_tmp = group_ctl
                for geo in reassignable_geos:
                    neighboring_control_group = group_ctl.symmetric_difference(
                        [geo])
                    # we skip checking constraints for designs with less than the minimum
                    # number of treatment geos, or above the maximum number of control
                    # geos. Otherwise, we will never be able to augment the size of
                    # treatment (to reach a size which would pass the checks) or decrease
                    # the size of control
                    if (k >= self.parameters.treatment_geos_range[0]) and (
                            len(neighboring_control_group) <=
                            self.parameters.control_geos_range[1]):
                        if (not neighboring_control_group
                            ) or (not self.design_within_constraints(
                                group_star_trt[k], neighboring_control_group)):  # pytype: disable=wrong-arg-types
                            continue

                    neighbor_design = tbrmmdiagnostics.TBRMMDiagnostics(
                        treatment_time_series, self.parameters)
                    neighbor_design.x = self.data.aggregate_time_series(
                        neighboring_control_group)
                    req_impact = neighbor_design.required_impact
                    req_budget = req_impact / self.parameters.iroas
                    if (budget_range
                            is not None) and (self._constraint_not_satisfied(
                                req_budget, budget_range[0], budget_range[1])):
                        continue

                    score = TBRMMScore(neighbor_design)
                    if score > current_score:
                        group_ctl_tmp = neighboring_control_group
                        current_score = score

                if current_score > TBRMMScore(current_design):
                    group_ctl = group_ctl_tmp
                else:
                    group_star_ctl[k] = group_ctl_tmp
                    score_star[k] = current_score
                    needs_matching = False
            # add one geo to treatment given the current control group
            elif k < max_treatment_size:
                r_treatment = self.geo_assignments.t - group_star_trt[k]

                current_score = copy.deepcopy(tmp_score)
                group_trt = group_star_trt[k]
                for geo in r_treatment:
                    augmented_treatment_group = group_star_trt[k].union([geo])
                    updated_control_group = group_star_ctl[k] - set([geo])
                    # see comment on lines 566-567 for the same if statement
                    if (k >= self.parameters.treatment_geos_range[0]) and (
                            len(neighboring_control_group) <=
                            self.parameters.control_geos_range[1]):
                        if (not updated_control_group) or (
                                not self.design_within_constraints(
                                    augmented_treatment_group,
                                    updated_control_group)):
                            continue
                    treatment_time_series = self.data.aggregate_time_series(
                        augmented_treatment_group)
                    neighbor_design = TBRMMDiagnostics(treatment_time_series,
                                                       self.parameters)
                    neighbor_design.x = self.data.aggregate_time_series(
                        updated_control_group)
                    req_impact = neighbor_design.required_impact
                    req_budget = req_impact / self.parameters.iroas
                    if (budget_range
                            is not None) and (self._constraint_not_satisfied(
                                req_budget, budget_range[0], budget_range[1])):
                        continue
                    score = TBRMMScore(neighbor_design)
                    if score > current_score:
                        group_ctl = updated_control_group
                        group_trt = augmented_treatment_group
                        current_score = score

                group_star_trt[k + 1] = group_trt
                k = k + 1
                needs_matching = True

        # if some geos are fixed to treatment, we did not check that the design
        # with treatment group = {all geos fixed in treatment} and control group =
        # {all geos that can be assigned to control} pass the diagnostic tests
        if kappa_0 > 0:
            diagnostic = TBRMMDiagnostics(
                self.data.aggregate_time_series(group_star_trt[kappa_0]),
                self.parameters)
            diagnostic.x = self.data.aggregate_time_series(
                group_star_ctl[kappa_0])
            req_impact = diagnostic.required_impact
            req_budget = req_impact / self.parameters.iroas
            if (not group_star_ctl[kappa_0]) or (
                    not self.design_within_constraints(
                        group_star_trt[kappa_0], group_star_ctl[kappa_0])):
                if (budget_range
                        is not None) and (self._constraint_not_satisfied(
                            req_budget, budget_range[0], budget_range[1])):
                    group_star_trt.pop(kappa_0, None)
                    group_star_ctl.pop(kappa_0, None)
                    score_star.pop(kappa_0, None)

        group_star_trt.pop(0, None)
        group_star_ctl.pop(0, None)
        score_star.pop(0, None)
        for k in group_star_trt:
            if self.design_within_constraints(group_star_trt[k],
                                              group_star_ctl[k]):
                design_diag = TBRMMDiagnostics(
                    self.data.aggregate_time_series(group_star_trt[k]),
                    self.parameters)
                design_diag.x = self.data.aggregate_time_series(
                    group_star_ctl[k])
                design_score = TBRMMScore(design_diag)
                design = TBRMMDesign(design_score, group_star_trt[k],
                                     group_star_ctl[k],
                                     copy.deepcopy(design_diag))
                results.push(0, design)

        self._search_results = results
        return self.search_results()
示例#2
0
    def exhaustive_search(self) -> List[TBRMMDesign]:
        """Search the design space for acceptable designs, within the constraints.

    Returns:
      the set of feasible designs found given the design parameters,
        with their corresponding treatment/control groups and score.
    """
        treatment_share_range = self.parameters.treatment_share_range
        budget_range = self.parameters.budget_range

        # Do not store patterns when we have the last treatment pattern size.
        skip_this_trt_group_size = list(
            self.treatment_group_size_range()).pop()
        skip_treatment_geo_patterns = []

        results = heapdict.HeapDict(size=self.parameters.n_designs)

        def skip_if_subset(geos: Set[GeoIndex]) -> bool:
            """Check if one of the stored geo patterns is a subset of the geos.

      Args:
        geos: Set of geo indices.

      Returns:
        bool: True if one of the stored groups is a subset of the geos.
      """
            for p in skip_treatment_geo_patterns:
                if set(p).issubset(geos):
                    return True
            return False

        volume_tol = self.parameters.volume_ratio_tolerance
        if volume_tol is not None:
            tol_min = 1.0 / (1.0 + volume_tol)
            tol_max = 1.0 + volume_tol

        treatment_group_sizes = self.treatment_group_size_range()
        for treatment_group_size in treatment_group_sizes:

            # Treatment groups are saved for the purpose of the inclusion check.
            save_treatment_groups = (treatment_group_size !=
                                     skip_this_trt_group_size)

            treatment_groups = self.treatment_group_generator(
                treatment_group_size)
            for treatment_group in treatment_groups:
                treatment_share = self.data.aggregate_geo_share(
                    treatment_group)
                if treatment_share_range is not None:
                    # Skip this treatment group if the group implies too low or high share
                    # of response volume.
                    if (treatment_share > treatment_share_range[1]
                            or treatment_share < treatment_share_range[0]):
                        continue
                elif skip_if_subset(treatment_group):
                    # If the group is a superset of a group that we already know has too
                    # high a share or budget, then skip this group too.
                    continue
                y = self.data.aggregate_time_series(treatment_group)
                diag = TBRMMDiagnostics(y, self.parameters)
                req_impact = diag.estimate_required_impact(
                    self.parameters.rho_max)
                req_budget = req_impact / self.parameters.iroas
                if budget_range is not None:
                    # If the budget is too high, skip this treatment group.
                    if req_budget > budget_range[1]:
                        if save_treatment_groups:
                            # We skip all treatment groups that are a superset of a treatment
                            # group that has too high an estimated budget.
                            skip_treatment_geo_patterns.append(treatment_group)
                            continue
                        # If the budget is too low, skip this treatment group.
                    elif req_budget < budget_range[0]:
                        continue
                control_groups = self.control_group_generator(treatment_group)
                for control_group in control_groups:
                    if volume_tol is not None:
                        control_share = self.data.aggregate_geo_share(
                            control_group)
                        xy_share = control_share / treatment_share
                        if xy_share > tol_max or xy_share < tol_min:
                            continue
                    diag.x = self.data.aggregate_time_series(control_group)
                    corr = diag.corr  # pylint: disable=unused-variable
                    req_impact = diag.required_impact
                    req_budget = req_impact / self.parameters.iroas
                    if (budget_range is not None and
                        (self._constraint_not_satisfied(
                            req_budget, budget_range[0], budget_range[1]))):
                        continue

                    # deepcopy is needed otherwise diag.corr gets overwritten, and so
                    # it will not be equal to diag.score.score.corr for some reason
                    design_score = TBRMMScore(copy.deepcopy(diag))
                    score = design_score.score
                    if budget_range is not None:
                        # If the budget was specified then we use the inverse of the
                        # minimum detectable iROAS for the max. budget as the last value
                        # in the scoring, instead of using the same for a budget of 1$
                        iroas = req_impact / budget_range[1]
                        design_score.score = score._replace(
                            inv_required_impact=1 / iroas)

                    # deepcopy is needed otherwise diag.corr gets overwritten, and so
                    # it will not be equal to diag.score.score.corr for some reason
                    design = TBRMMDesign(design_score, treatment_group,
                                         control_group, copy.deepcopy(diag))
                    results.push(0, design)

        self._search_results = results
        return self.search_results()