def _load_incumbent(self, traj_file, runhistory_file, incumbent, predict_best=True) -> None: """ Handles the loading of the incumbent according to the given parameters. Helper method to have the init method less cluttered. For parameter specifications, see __init__ """ self.incumbent = (None, None) if incumbent is not None: self.incumbent = incumbent elif traj_file is not None: self.logger.info('Reading traj_file: %s' % traj_file) self.incumbent = self._read_traj_file(traj_file)[0] self.logger.debug('Incumbent %s' % str(self.incumbent)) elif traj_file is None and runhistory_file is not None: traj_files = os.path.join(os.path.dirname(runhistory_file), 'traj_aclib2.json') traj_files = sorted(glob.glob(traj_files, recursive=True)) incumbents = [] for traj_ in traj_files: self.logger.info('Reading traj_file: %s' % traj_) incumbents.append(self._read_traj_file(traj_)) incumbents[-1].extend(self._model.predict_marginalized_over_instances( np.array([impute_inactive_values(incumbents[-1][0]).get_array()]))) self.logger.debug(incumbents[-1]) sort_idx = 2 if predict_best else 1 incumbents = sorted(enumerate(incumbents), key=lambda x: x[1][sort_idx]) self.best_dir = os.path.dirname(traj_files[incumbents[0][0]]) self.incumbent = incumbents[0][1][0] self.logger.info('Incumbent %s' % str(self.incumbent)) else: raise Exception('No method specified to load an incumbent. Either give the incumbent directly or specify ' 'a file to load it from!')
def _preprocess(self, runhistory): """ Method to marginalize over instances such that fANOVA can determine the parameter importance without having to deal with instance features. :param runhistory: RunHistory that knows all configurations that were run. For all these configurations we have to marginalize away the instance features with which fANOVA will make it's predictions """ self.logger.info('PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING') self.logger.info('Marginalizing away all instances!') configs = runhistory.get_all_configs() X_non_hyper, X_prime, y_prime = [], [], [] for c_id, config in tqdm(enumerate(configs), ascii=True, desc='Completed: ', total=len(configs)): config = impute_inactive_values(config).get_array() X_prime.append(config) X_non_hyper.append(config) y_prime.append(self.model.predict_marginalized_over_instances(np.array([X_prime[-1]]))[0].flatten()) for idx, param in enumerate(self.scenario.cs.get_hyperparameters()): if not isinstance(param, CategoricalHyperparameter): X_non_hyper[-1][idx] = param._transform(X_non_hyper[-1][idx]) X_non_hyper = np.array(X_non_hyper) X_prime = np.array(X_prime) y_prime = np.array(y_prime) # y_prime = np.array(self.model.predict_marginalized_over_instances(X_prime)[0]) self.X = X_prime self.X_fanova = X_non_hyper self.y_fanova = y_prime self.y = y_prime self.logger.info('Size of training X after preprocessing: %s' % str(self.X.shape)) self.logger.info('Size of training y after preprocessing: %s' % str(self.y.shape)) self.logger.info('Finished Preprocessing') self._preprocessed = True
def run(self) -> OrderedDict: """ Main function. Returns ------- evaluated_parameter_importance:OrderedDict Parameter -> importance. The order is important as smaller indices indicate higher importance """ neighborhood_dict = self._get_one_exchange_neighborhood_by_parameter( ) # sampled on a unit-hypercube! self.neighborhood_dict = neighborhood_dict incumbent_array = self.incumbent.get_array() def_perf, def_var = self._predict_over_instance_set( impute_inactive_values(self.cs.get_default_configuration())) inc_perf, inc_var = self._predict_over_instance_set( impute_inactive_values(self.incumbent)) delta = def_perf - inc_perf evaluated_parameter_importance = {} # These are used for plotting and hold the predictions for each neighbor of each parameter # That means performance_dict holds the mean, variance_dict the variance of the forest performance_dict = {} variance_dict = {} # This are used for importance and hold the corresponding importance/variance over neighbors # Only import if NOT quantifying importance via performance-variance across neighbours overall_imp = {} # Nested list of values per tree in random forest pred_per_tree = {} # Iterate over parameters pbar = tqdm(range(self._sampled_neighbors), ascii=True, disable=not self.verbose) for index, param in enumerate(self.incumbent.keys()): if param not in neighborhood_dict: pbar.set_description('{: >.70s}'.format( 'Parameter %s is inactive' % param)) continue pbar.set_description( 'Predicting performances for neighbors of {: >.30s}'.format( param)) performance_dict[param] = [] variance_dict[param] = [] pred_per_tree[param] = [] added_inc = False inc_at = 0 # Iterate over neighbors for unit_neighbor, neighbor in zip(neighborhood_dict[param][0], neighborhood_dict[param][1]): if not added_inc: # Detect incumbent if unit_neighbor > incumbent_array[index]: performance_dict[param].append(inc_perf) variance_dict[param].append(inc_var) pbar.update(1) added_inc = True else: inc_at += 1 # self.logger.debug('%s -> %s' % (self.incumbent[param], neighbor)) # Create the neighbor-Configuration object new_array = incumbent_array.copy() new_array = change_hp_value(self.incumbent.configuration_space, new_array, param, unit_neighbor, index) new_configuration = impute_inactive_values( Configuration(self.incumbent.configuration_space, vector=new_array)) # Predict performance x = np.array(new_configuration.get_array()) pred_per_tree[param].append([ np.mean(tree_pred) for tree_pred in self.model.rf.all_leaf_values(x) ]) # self.logger.debug("Pred per tree: %s", str(pred_per_tree[param][-1])) performance_dict[param].append( np.mean(pred_per_tree[param][-1])) variance_dict[param].append(np.var(pred_per_tree[param][-1])) pbar.update(1) if len(neighborhood_dict[param][0]) > 0: neighborhood_dict[param][0] = np.insert( neighborhood_dict[param][0], inc_at, incumbent_array[index]) neighborhood_dict[param][1] = np.insert( neighborhood_dict[param][1], inc_at, self.incumbent[param]) else: neighborhood_dict[param][0] = np.array(incumbent_array[index]) neighborhood_dict[param][1] = [self.incumbent[param]] if not added_inc: mean, var = self._predict_over_instance_set( impute_inactive_values(self.incumbent)) performance_dict[param].append(mean) variance_dict[param].append(var) pbar.update(1) # After all neighbors are estimated, look at all performances except the incumbent tmp_perf = performance_dict[param][:inc_at] + performance_dict[ param][inc_at + 1:] if delta == 0: delta = 1 # To avoid division by zero imp_over_mea = (np.mean(tmp_perf) - performance_dict[param][inc_at]) / delta imp_over_med = (np.median(tmp_perf) - performance_dict[param][inc_at]) / delta try: imp_over_max = (np.max(tmp_perf) - performance_dict[param][inc_at]) / delta except ValueError: imp_over_max = np.nan # Hacky fix as this is never used anyway overall_imp[param] = np.array( [imp_over_mea, imp_over_med, imp_over_max]) # Creating actual importance value (by normalizing over sum of vars) num_trees = len(list(pred_per_tree.values())[0][0]) params = list(performance_dict.keys()) overall_var_per_tree = { param: [ np.var( [neighbor[tree_idx] for neighbor in pred_per_tree[param]]) for tree_idx in range(num_trees) ] for param in params } # Sum up variances per tree across parameters sum_var_per_tree = [ sum([overall_var_per_tree[param][tree_idx] for param in params]) for tree_idx in range(num_trees) ] # Normalize overall_var_per_tree = { p: [t / sum_var_per_tree[idx] for idx, t in enumerate(trees)] for p, trees in overall_var_per_tree.items() } self.logger.debug("overall_var_per_tree %s (%d trees)", str(overall_var_per_tree), len(list(pred_per_tree.values())[0][0])) self.logger.debug("sum_var_per_tree %s (%d trees)", str(sum_var_per_tree), len(list(pred_per_tree.values())[0][0])) for param in performance_dict.keys(): if self.quantify_importance_via_variance: evaluated_parameter_importance[param] = np.mean( overall_var_per_tree[param]) else: evaluated_parameter_importance[param] = overall_imp[param][0] only_show = sorted( list(evaluated_parameter_importance.keys()), key=lambda p: evaluated_parameter_importance[p] )[:min(self.to_evaluate, len(evaluated_parameter_importance.keys()))] self.neighborhood_dict = neighborhood_dict self.performance_dict = performance_dict self.variance_dict = variance_dict self.evaluated_parameter_importance = OrderedDict([ (p, evaluated_parameter_importance[p]) for p in only_show ]) if self.quantify_importance_via_variance: self.evaluated_parameter_importance_uncertainty = OrderedDict([ (p, np.std(overall_var_per_tree[p])) for p in only_show ]) all_res = { 'imp': self.evaluated_parameter_importance, 'order': list(self.evaluated_parameter_importance.keys()) } return all_res
def run(self) -> OrderedDict: """ Main function. Returns ------- evaluated_parameter_importance:OrderedDict Parameter -> importance. The order is important as smaller indices indicate higher importance """ # Minor setup modifiable_config_dict = copy.deepcopy(self.source.get_dictionary()) prev_modifiable_config_dict = copy.deepcopy( self.source.get_dictionary()) modified_so_far = [] start_delta = len(self.delta) best_performance = -1 # Predict source and target performance to later use it to predict the %improvement a parameter causes source_mean, source_var = self._predict_over_instance_set( impute_inactive_values(self.source)) prev_performance = source_mean target_mean, target_var = self._predict_over_instance_set( impute_inactive_values(self.target)) improvement = prev_performance - target_mean self.predicted_parameter_performances[ '-source-'] = source_mean.flatten()[0] self.predicted_parameter_variances['-source-'] = source_var.flatten( )[0] self.evaluated_parameter_importance['-source-'] = 0 forbidden_name_value_pairs = self.determine_forbidden() length_ = len(self.delta) - min(len(self.delta), self.to_evaluate) while len(self.delta ) > length_: # Main loop. While parameters still left ... modifiable_config_dict = copy.deepcopy(prev_modifiable_config_dict) self.logger.debug('Round %d of %d:' % (start_delta - len(self.delta) + 1, min(start_delta, self.to_evaluate))) for param_tuple in modified_so_far: # necessary due to combined flips for parameter in param_tuple: modifiable_config_dict[parameter] = self.target[parameter] prev_modifiable_config_dict = copy.deepcopy(modifiable_config_dict) round_performances = [] round_variances = [] for candidate_tuple in self.delta: for candidate in candidate_tuple: modifiable_config_dict[candidate] = self.target[candidate] modifiable_config_dict = self._check_children( modifiable_config_dict, candidate_tuple) # Check if current config is allowed not_forbidden = self.check_not_forbidden( forbidden_name_value_pairs, modifiable_config_dict) if not not_forbidden: # othwerise skipp it self.logger.critical('FOUND FORBIDDEN!!!!! SKIPPING!!!') continue modifiable_config = Configuration(self.cs, modifiable_config_dict) mean, var = self._predict_over_instance_set( impute_inactive_values( modifiable_config)) # ... predict their performance self.logger.debug('%s: %.6f' % (candidate_tuple, mean[0])) round_performances.append(mean) round_variances.append(var) modifiable_config_dict = copy.deepcopy( prev_modifiable_config_dict) best_idx = np.argmin(round_performances) assert 0 <= best_idx < len( round_performances), 'No improving parameter found!' best_performance = round_performances[ best_idx] # greedy choice of parameter to fix best_variance = round_variances[best_idx] improvement_in_percentage = (prev_performance - best_performance) / improvement prev_performance = best_performance modified_so_far.append(self.delta[best_idx]) self.logger.info( 'Round %2d winner(s): (%s, %.4f)' % (start_delta - len(self.delta) + 1, str( self.delta[best_idx]), improvement_in_percentage * 100)) param_str = '; '.join(self.delta[best_idx]) self.evaluated_parameter_importance[ param_str] = improvement_in_percentage.flatten()[0] self.predicted_parameter_performances[ param_str] = best_performance.flatten()[0] self.predicted_parameter_variances[param_str] = best_variance for winning_param in self.delta[ best_idx]: # Delete parameters that were set to inactive by the last # best parameter prev_modifiable_config_dict[winning_param] = self.target[ winning_param] self._check_children(prev_modifiable_config_dict, self.delta[best_idx], delete=True) self.delta.pop( best_idx) # don't forget to remove already tested parameters self.predicted_parameter_performances[ '-target-'] = target_mean.flatten()[0] self.predicted_parameter_variances['-target-'] = target_var.flatten( )[0] self.evaluated_parameter_importance['-target-'] = 0 # sum_ = 0 # Small check that sum is 1 # for key in self.evaluated_parameter_importance: # print(key, self.evaluated_parameter_importance[key]) # sum_ += self.evaluated_parameter_importance[key] # print(sum_) return self.evaluated_parameter_importance
def run(self) -> OrderedDict: """ Main function. Returns ------- evaluated_parameter_importance:OrderedDict Parameter -> importance. The order is important as smaller indices indicate higher importance """ neighborhood_dict = self._get_one_exchange_neighborhood_by_parameter() # sampled on a unit-hypercube! self.neighborhood_dict = neighborhood_dict performance_dict = {} variance_dict = {} incumbent_array = self.incumbent.get_array() overall_var = {} overall_imp = {} all_preds = [] def_perf, def_var = self._predict_over_instance_set(impute_inactive_values(self.cs.get_default_configuration())) inc_perf, inc_var = self._predict_over_instance_set(impute_inactive_values(self.incumbent)) delta = def_perf - inc_perf pbar = tqdm(range(self._sampled_neighbors), ascii=True, disable=not self.verbose) sum_var = 0 for index, param in enumerate(self.incumbent.keys()): # Iterate over parameters if param in neighborhood_dict: pbar.set_description('Predicting performances for neighbors of {: >.30s}'.format(param)) performance_dict[param] = [] variance_dict[param] = [] overall_var[param] = [] added_inc = False inc_at = 0 # Iterate over neighbors for unit_neighbor, neighbor in zip(neighborhood_dict[param][0], neighborhood_dict[param][1]): if not added_inc: if unit_neighbor > incumbent_array[index]: performance_dict[param].append(inc_perf) overall_var[param].append(inc_perf) variance_dict[param].append(inc_var) pbar.update(1) added_inc = True else: inc_at += 1 # self.logger.debug('%s -> %s' % (self.incumbent[param], neighbor)) new_array = incumbent_array.copy() new_array = change_hp_value(self.incumbent.configuration_space, new_array, param, unit_neighbor, index) new_configuration = impute_inactive_values(Configuration(self.incumbent.configuration_space, vector=new_array)) mean, var = self._predict_over_instance_set(new_configuration) performance_dict[param].append(mean) overall_var[param].append(mean) variance_dict[param].append(var) pbar.update(1) if len(neighborhood_dict[param][0]) > 0: neighborhood_dict[param][0] = np.insert(neighborhood_dict[param][0], inc_at, incumbent_array[index]) neighborhood_dict[param][1] = np.insert(neighborhood_dict[param][1], inc_at, self.incumbent[param]) else: neighborhood_dict[param][0] = np.array(incumbent_array[index]) neighborhood_dict[param][1] = [self.incumbent[param]] if not added_inc: mean, var = self._predict_over_instance_set(impute_inactive_values(self.incumbent)) performance_dict[param].append(mean) overall_var[param].append(mean) variance_dict[param].append(var) pbar.update(1) all_preds.extend(performance_dict[param]) tmp_perf = performance_dict[param][:inc_at] tmp_perf.extend(performance_dict[param][inc_at + 1:]) imp_over_mea = (np.mean(tmp_perf) - performance_dict[param][inc_at]) / delta imp_over_med = (np.median(tmp_perf) - performance_dict[param][inc_at]) / delta try: imp_over_max = (np.max(tmp_perf) - performance_dict[param][inc_at]) / delta except ValueError: imp_over_max = np.nan # Hacky fix as this is never used anyway overall_imp[param] = np.array([imp_over_mea, imp_over_med, imp_over_max]) overall_var[param] = np.var(overall_var[param]) sum_var += overall_var[param] else: pbar.set_description('{: >.70s}'.format('Parameter %s is inactive' % param)) # self.logger.info('{:<30s} {:^24s}, {:^25s}'.format( # ' ', 'perf impro', 'variance' # )) # self.logger.info('{:<30s}: [{:>6s}, {:>6s}, {:>6s}], {:>6s}, {:>6s}, {:>6s}'.format( # 'Parameter', 'Mean', 'Median', 'Max', 'p_var', 't_var', 'frac' # )) # self.logger.info('-'*80) tmp = [] for param in sorted(list(overall_var.keys())): # overall_var[param].extend([inc_perf for _ in range(len(all_preds) - len(overall_var[param]))]) # overall_var[param] = np.var(overall_var[param]) # self.logger.info('{:<30s}: [{: >6.2f}, {: >6.2f}, {: >6.2f}], {: >6.2f}, {: >6.2f}, {: >6.2f}'.format( # param, *overall_imp[param]*100, overall_var[param], np.var(all_preds), # overall_var[param] / sum_var * 100 # )) if self.quantify_importance_via_variance: tmp.append([param, overall_var[param] / sum_var]) else: tmp.append([param, overall_imp[param][0]]) tmp = sorted(tmp, key=lambda x: x[1], reverse=True) tmp = tmp[:min(self.to_evaluate, len(tmp))] self.neighborhood_dict = neighborhood_dict self.performance_dict = performance_dict self.variance_dict = variance_dict self.evaluated_parameter_importance = OrderedDict(tmp) # Estimate uncertainty using the law of total variance for param in self.evaluated_parameter_importance.keys(): mean_over_vars = np.mean(variance_dict[param]) var_over_means = np.var(performance_dict[param]) # self.logger.debug("vars=%s, means=%s", str(variance_dict[param]), str(performance_dict[param])) self.logger.debug("Using law of total variance yields for %s: mean_over_vars=%f, var_over_means=%f (sum=%f)", param, mean_over_vars, var_over_means, mean_over_vars + var_over_means) self.evaluated_parameter_importance_uncertainty[param] = mean_over_vars + var_over_means all_res = {'imp': self.evaluated_parameter_importance, 'order': list(self.evaluated_parameter_importance.keys())} return all_res