def greedy_search(self): """Searches the Matched Markets for a TBR experiment. Uses a greedy hill climbing algorithm to provide recommended 'matched markets' experimental group assignments that appear to lead to valid and effective TBR models relative to the pretest period. This is accomplished by using a greedy hill climbing alogirhtm that alternates between two routines: 1) Looks for the best set of control geos given the current set of treatment geos. 2) Adds one new geo to the set of treatment geos given the current control group. See Au (2018) for more details. Returns: the set of feasible designs found given the design parameters, with their corresponding treatment/control groups and score. """ budget_range = self.parameters.budget_range results = heapdict.HeapDict(size=self.parameters.n_designs) if self.parameters.treatment_geos_range is None: n_treatment = len(self.geo_assignments.t) max_treatment_size = n_treatment n_remaining = len(self.geo_assignments.all) - n_treatment if n_remaining == 0: max_treatment_size = n_treatment - 1 self.parameters.treatment_geos_range = (1, max_treatment_size) else: max_treatment_size = self.parameters.treatment_geos_range[1] if self.parameters.control_geos_range is None: n_control = len(self.geo_assignments.c) max_control_size = n_control n_remaining = len(self.geo_assignments.all) - n_control if n_remaining == 0: max_control_size = n_control - 1 self.parameters.control_geos_range = (1, max_control_size) kappa_0 = len(self.geo_assignments.t_fixed) group_star_trt = {kappa_0: self.geo_assignments.t_fixed} tmp_diag = TBRMMDiagnostics(np.random.normal(range(100)), self.parameters) tmp_diag.x = list(range(len(tmp_diag.y))) tmp_score = TBRMMScore(tmp_diag) tmp_score.score = tmp_score.score._replace(corr_test=0, aa_test=0, bb_test=0, dw_test=0, corr=0, inv_required_impact=0) score_star = {kappa_0: tmp_score} group_ctl = self.geo_assignments.c if kappa_0 == 0: group_star_ctl = {kappa_0: group_ctl} needs_matching = False else: group_star_ctl = {} needs_matching = True k = kappa_0 while (k < max_treatment_size) | (needs_matching): # Find the best control group given the current treatment group if needs_matching: r_control = self.geo_assignments.c - (group_ctl | group_star_trt[k]) r_unassigned = (group_ctl & self.geo_assignments.x) - group_star_trt[k] reassignable_geos = r_control | r_unassigned treatment_time_series = self.data.aggregate_time_series( group_star_trt[k]) current_design = TBRMMDiagnostics(treatment_time_series, self.parameters) current_design.x = self.data.aggregate_time_series(group_ctl) current_score = TBRMMScore(current_design) group_ctl_tmp = group_ctl for geo in reassignable_geos: neighboring_control_group = group_ctl.symmetric_difference( [geo]) # we skip checking constraints for designs with less than the minimum # number of treatment geos, or above the maximum number of control # geos. Otherwise, we will never be able to augment the size of # treatment (to reach a size which would pass the checks) or decrease # the size of control if (k >= self.parameters.treatment_geos_range[0]) and ( len(neighboring_control_group) <= self.parameters.control_geos_range[1]): if (not neighboring_control_group ) or (not self.design_within_constraints( group_star_trt[k], neighboring_control_group)): # pytype: disable=wrong-arg-types continue neighbor_design = tbrmmdiagnostics.TBRMMDiagnostics( treatment_time_series, self.parameters) neighbor_design.x = self.data.aggregate_time_series( neighboring_control_group) req_impact = neighbor_design.required_impact req_budget = req_impact / self.parameters.iroas if (budget_range is not None) and (self._constraint_not_satisfied( req_budget, budget_range[0], budget_range[1])): continue score = TBRMMScore(neighbor_design) if score > current_score: group_ctl_tmp = neighboring_control_group current_score = score if current_score > TBRMMScore(current_design): group_ctl = group_ctl_tmp else: group_star_ctl[k] = group_ctl_tmp score_star[k] = current_score needs_matching = False # add one geo to treatment given the current control group elif k < max_treatment_size: r_treatment = self.geo_assignments.t - group_star_trt[k] current_score = copy.deepcopy(tmp_score) group_trt = group_star_trt[k] for geo in r_treatment: augmented_treatment_group = group_star_trt[k].union([geo]) updated_control_group = group_star_ctl[k] - set([geo]) # see comment on lines 566-567 for the same if statement if (k >= self.parameters.treatment_geos_range[0]) and ( len(neighboring_control_group) <= self.parameters.control_geos_range[1]): if (not updated_control_group) or ( not self.design_within_constraints( augmented_treatment_group, updated_control_group)): continue treatment_time_series = self.data.aggregate_time_series( augmented_treatment_group) neighbor_design = TBRMMDiagnostics(treatment_time_series, self.parameters) neighbor_design.x = self.data.aggregate_time_series( updated_control_group) req_impact = neighbor_design.required_impact req_budget = req_impact / self.parameters.iroas if (budget_range is not None) and (self._constraint_not_satisfied( req_budget, budget_range[0], budget_range[1])): continue score = TBRMMScore(neighbor_design) if score > current_score: group_ctl = updated_control_group group_trt = augmented_treatment_group current_score = score group_star_trt[k + 1] = group_trt k = k + 1 needs_matching = True # if some geos are fixed to treatment, we did not check that the design # with treatment group = {all geos fixed in treatment} and control group = # {all geos that can be assigned to control} pass the diagnostic tests if kappa_0 > 0: diagnostic = TBRMMDiagnostics( self.data.aggregate_time_series(group_star_trt[kappa_0]), self.parameters) diagnostic.x = self.data.aggregate_time_series( group_star_ctl[kappa_0]) req_impact = diagnostic.required_impact req_budget = req_impact / self.parameters.iroas if (not group_star_ctl[kappa_0]) or ( not self.design_within_constraints( group_star_trt[kappa_0], group_star_ctl[kappa_0])): if (budget_range is not None) and (self._constraint_not_satisfied( req_budget, budget_range[0], budget_range[1])): group_star_trt.pop(kappa_0, None) group_star_ctl.pop(kappa_0, None) score_star.pop(kappa_0, None) group_star_trt.pop(0, None) group_star_ctl.pop(0, None) score_star.pop(0, None) for k in group_star_trt: if self.design_within_constraints(group_star_trt[k], group_star_ctl[k]): design_diag = TBRMMDiagnostics( self.data.aggregate_time_series(group_star_trt[k]), self.parameters) design_diag.x = self.data.aggregate_time_series( group_star_ctl[k]) design_score = TBRMMScore(design_diag) design = TBRMMDesign(design_score, group_star_trt[k], group_star_ctl[k], copy.deepcopy(design_diag)) results.push(0, design) self._search_results = results return self.search_results()
def exhaustive_search(self) -> List[TBRMMDesign]: """Search the design space for acceptable designs, within the constraints. Returns: the set of feasible designs found given the design parameters, with their corresponding treatment/control groups and score. """ treatment_share_range = self.parameters.treatment_share_range budget_range = self.parameters.budget_range # Do not store patterns when we have the last treatment pattern size. skip_this_trt_group_size = list( self.treatment_group_size_range()).pop() skip_treatment_geo_patterns = [] results = heapdict.HeapDict(size=self.parameters.n_designs) def skip_if_subset(geos: Set[GeoIndex]) -> bool: """Check if one of the stored geo patterns is a subset of the geos. Args: geos: Set of geo indices. Returns: bool: True if one of the stored groups is a subset of the geos. """ for p in skip_treatment_geo_patterns: if set(p).issubset(geos): return True return False volume_tol = self.parameters.volume_ratio_tolerance if volume_tol is not None: tol_min = 1.0 / (1.0 + volume_tol) tol_max = 1.0 + volume_tol treatment_group_sizes = self.treatment_group_size_range() for treatment_group_size in treatment_group_sizes: # Treatment groups are saved for the purpose of the inclusion check. save_treatment_groups = (treatment_group_size != skip_this_trt_group_size) treatment_groups = self.treatment_group_generator( treatment_group_size) for treatment_group in treatment_groups: treatment_share = self.data.aggregate_geo_share( treatment_group) if treatment_share_range is not None: # Skip this treatment group if the group implies too low or high share # of response volume. if (treatment_share > treatment_share_range[1] or treatment_share < treatment_share_range[0]): continue elif skip_if_subset(treatment_group): # If the group is a superset of a group that we already know has too # high a share or budget, then skip this group too. continue y = self.data.aggregate_time_series(treatment_group) diag = TBRMMDiagnostics(y, self.parameters) req_impact = diag.estimate_required_impact( self.parameters.rho_max) req_budget = req_impact / self.parameters.iroas if budget_range is not None: # If the budget is too high, skip this treatment group. if req_budget > budget_range[1]: if save_treatment_groups: # We skip all treatment groups that are a superset of a treatment # group that has too high an estimated budget. skip_treatment_geo_patterns.append(treatment_group) continue # If the budget is too low, skip this treatment group. elif req_budget < budget_range[0]: continue control_groups = self.control_group_generator(treatment_group) for control_group in control_groups: if volume_tol is not None: control_share = self.data.aggregate_geo_share( control_group) xy_share = control_share / treatment_share if xy_share > tol_max or xy_share < tol_min: continue diag.x = self.data.aggregate_time_series(control_group) corr = diag.corr # pylint: disable=unused-variable req_impact = diag.required_impact req_budget = req_impact / self.parameters.iroas if (budget_range is not None and (self._constraint_not_satisfied( req_budget, budget_range[0], budget_range[1]))): continue # deepcopy is needed otherwise diag.corr gets overwritten, and so # it will not be equal to diag.score.score.corr for some reason design_score = TBRMMScore(copy.deepcopy(diag)) score = design_score.score if budget_range is not None: # If the budget was specified then we use the inverse of the # minimum detectable iROAS for the max. budget as the last value # in the scoring, instead of using the same for a budget of 1$ iroas = req_impact / budget_range[1] design_score.score = score._replace( inv_required_impact=1 / iroas) # deepcopy is needed otherwise diag.corr gets overwritten, and so # it will not be equal to diag.score.score.corr for some reason design = TBRMMDesign(design_score, treatment_group, control_group, copy.deepcopy(diag)) results.push(0, design) self._search_results = results return self.search_results()