def _local_search(self, configuration, acquisition): neighbors = get_one_exchange_neighbourhood(configuration, self.seed) acquisitions = self.map_acquisition(neighbors) while np.any(acquisitions > acquisition): acquisition, configuration = max(zip(acquisitions, neighbors)) neighbors = get_one_exchange_neighbourhood(configuration, self.seed) acquisitions = self.map_acquisition(neighbors) return acquisition, configuration
def mutate(config, b, N_node=None): if isinstance(N_node, random_nodes): N_node = N_node.random() print('Node={}, type={}'.format(N_node, N_node.__class__)) is_valid_graph = False satisfy_num_node_constraint = False while (not is_valid_graph) or (not satisfy_num_node_constraint): neighbor_gen = get_one_exchange_neighbourhood(config, seed=random.randint(1, 1e6)) neighbors = list(neighbor_gen) neighbor_config = neighbors[random.randint(0, len(neighbors)-1)] data, _ = config2data_A(neighbor_config,b) # Determine is valid graph if data is None: is_valid_graph = False print('Invalid graph') else: is_valid_graph = True # Determine if the graph satisfy number of nodes constraint num_node = len(data['module_operations']) if N_node is None: satisfy_num_node_constraint = True elif isinstance(N_node, list): satisfy_num_node_constraint = True if num_node in N_node else False elif isinstance(N_node, int): satisfy_num_node_constraint = True if num_node==N_node else False else: raise ValueError('Unrecognized N_node') print('sampled {}'.format(num_node)) print('Architecture length is {}'.format(num_node)) return neighbor_config
def maximize(self, batch_size=1): """ Maximizes the given acquisition function. Parameters ---------- batch_size: number of maximizer returned. Returns ------- np.ndarray(N,D) Point with highest acquisition value. """ incs_configs = list( get_one_exchange_neighbourhood(self.objective_func.eta['config'], seed=self.rng.randint(int(1e6)))) configs_list = list(incs_configs) rand_incs = convert_configurations_to_array(configs_list) # Sample random points uniformly over the whole space rand_configs = sample_configurations( self.config_space, self.n_samples - rand_incs.shape[0]) rand = convert_configurations_to_array(rand_configs) configs_list.extend(rand_configs) X = np.concatenate((rand_incs, rand), axis=0) y = self.objective_func(X).flatten() candidate_idxs = list(np.argsort(-y)[:batch_size]) # print(candidate_idxs) # print(type(candidate_idxs)) # print(configs_list[:5]) return [configs_list[idx] for idx in candidate_idxs]
def test_check_deterministic_rosenbrock(self, patch): # Make SMAC a bit faster patch.side_effect = lambda configuration, seed: get_one_exchange_neighbourhood( configuration=configuration, stdev=0.05, num_neighbors=2, seed=seed, ) def rosenbrock_2d(x): x1 = x["x1"] x2 = x["x2"] val = 100.0 * (x2 - x1**2.0) ** 2.0 + (1 - x1) ** 2.0 return val def opt_rosenbrock(): cs = ConfigurationSpace() cs.add_hyperparameter( UniformFloatHyperparameter("x1", -5, 5, default_value=-3) ) cs.add_hyperparameter( UniformFloatHyperparameter("x2", -5, 5, default_value=-4) ) scenario = Scenario( { "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 50, # maximum function evaluations "cs": cs, # configuration space "deterministic": True, "limit_resources": True, "intensification_percentage": 0.000000001, } ) smac = SMAC4AC( scenario=scenario, rng=np.random.RandomState(42), tae_runner=rosenbrock_2d, ) incumbent = smac.optimize() return incumbent, smac.scenario.output_dir i1, output_dir = opt_rosenbrock() self.output_dirs.append(output_dir) x1_1 = i1.get("x1") x2_1 = i1.get("x2") i2, output_dir = opt_rosenbrock() self.output_dirs.append(output_dir) x1_2 = i2.get("x1") x2_2 = i2.get("x2") self.assertAlmostEqual(x1_1, x1_2) self.assertAlmostEqual(x2_1, x2_2)
def run_experiment_get_one_exchange_neighbourhood(): pipeline_space = PipelineSpace() o_s = OneHotEncodingStep() i_s = ImputationStep() r_s = RescalingStep() b_s = BalancingStep() p_s = PreprocessingStep() c_s = ClassificationStep() pipeline_space.add_pipeline_steps([o_s, i_s, r_s, b_s, p_s, c_s]) constant_pipeline_steps = ["one_hot_encoder", "imputation", "rescaling", "balancing", "feature_preprocessor"] variable_pipeline_steps = ["classifier"] cs_builder = ConfigSpaceBuilder(pipeline_space) config_space = cs_builder.build_config_space() timing_v_1 = [] timing_v_2 = [] for i in range(0, 10): print("Run: {}".format(i)) # sample 1 start config sample_configs = config_space.sample_configuration(size=1000) # version 1 start_time = time.time() for config in sample_configs: get_one_exchange_neighbourhood(config, seed=1) timing_v_1.append(time.time() - start_time) # version 2 print("VERSION2") sample_configs = config_space.sample_configuration(size=1000) start_time = time.time() for config in sample_configs: get_one_exchange_neighbourhood_vector_checking(config, seed=1) timing_v_2.append(time.time() - start_time) #print(len(new_configurations), len(new_configurations_2)) print(np.mean(timing_v_1)) print(np.mean(timing_v_2))
def test_random_neigborhood_conditional(self): mini_autosklearn_config_space_path = os.path.join( os.path.dirname(__file__), 'test_searchspaces', 'mini_autosklearn_original.pcs') with open(mini_autosklearn_config_space_path) as fh: cs = read(fh) cs.seed(1) configuration = cs.get_default_configuration() for i in range(100): neighborhood = get_one_exchange_neighbourhood(configuration, i) for new_config in neighborhood: self.assertNotEqual(configuration, new_config)
def maximize(self, batch_size=1): """ Maximizes the given acquisition function. Parameters ---------- batch_size: number of maximizer returned. Returns ------- np.ndarray(N,D) Point with highest acquisition value. """ eta = 0.3 incs_num = int(eta * self.n_samples) incs_configs = list( get_one_exchange_neighbourhood(self.objective_func.eta['config'], seed=self.rng.randint(int(1e6)))) # TODO: need to implement # extra_num = incs_num - len(incs_configs) # if extra_num > 0: # incs_configs.extend(get_random_neighborhood(self.objective_func.eta['config'], extra_num, MAXINT)) configs_list = list(incs_configs) rand_incs = convert_configurations_to_array(configs_list) # Sample random points uniformly over the whole space # rand_configs = self.config_space.sample_configuration(self.n_samples - rand_incs.shape[0]) rand_configs = sample_configurations( self.config_space, self.n_samples - rand_incs.shape[0]) rand = convert_configurations_to_array(rand_configs) configs_list.extend(rand_configs) # TODO: Put a Gaussian on the incumbent and sample from that (support categorical feature) # loc = self.objective_func.model.get_incumbent()[0], # scale = np.ones([self.lower.shape[0]]) * 0.1 # rand_incs = np.array([np.clip(np.random.normal(loc, scale), self.lower, self.upper)[0] # for _ in range(int(self.n_samples * 0.3))]) # X = np.concatenate((rand_incs, rand), axis=0) y = self.objective_func(X) if batch_size == 1: return [configs_list[np.argmax(y)]] tmp = configs_list[np.argsort(y)[-batch_size:]] return tmp
def test_get_one_exchange_neighbourhood(self): # test fixed_dims cs = ConfigurationSpace() cs.add_hyperparameter( CategoricalHyperparameter('0', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('1', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('2', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('3', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('4', [0, 1, 2, 3, 4], default_value=0)) fixed_dims = {'0': 1, '1': 0, '2': 0, '3': 0} conf = cs.sample_configuration(fixed_dims=fixed_dims) neighborhood_iter = get_one_exchange_neighbourhood( conf, seed=0, fixed_dims=fixed_dims) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) # StopIteration with self.assertRaises(StopIteration): tmp = next(neighborhood_iter)
def test_deterministic(self, patch): """ Testing deterministic behaviour. """ # Make SMAC a bit faster patch.side_effect = lambda configuration, seed: get_one_exchange_neighbourhood( configuration=configuration, stdev=0.05, num_neighbors=2, seed=seed, ) testargs = [ "scripts/smac", "--scenario", self.scenario_file, "--verbose_level", "DEBUG", "--seed", "1", "--random_configuration_chooser", "test/test_cli/random_configuration_chooser_impl.py", "--output_dir", self.output_dir_1 ] SMACCLI().main_cli(testargs[1:]) testargs = [ "scripts/smac", "--scenario", self.scenario_file, "--verbose_level", "DEBUG", "--seed", "1", "--random_configuration_chooser", "test/test_cli/random_configuration_chooser_impl.py", "--output_dir", self.output_dir_2 ] SMACCLI().main_cli(testargs[1:]) testargs = [ "scripts/smac", "--scenario", self.scenario_file, "--verbose_level", "DEBUG", "--seed", "2", "--random_configuration_chooser", "test/test_cli/random_configuration_chooser_impl.py", "--output_dir", self.output_dir_3 ] SMACCLI().main_cli(testargs[1:]) # compare trajectories in output_dir_{1,2,3} h1 = json.load(open(self.output_dir_1 + '/run_1/runhistory.json')) h2 = json.load(open(self.output_dir_2 + '/run_1/runhistory.json')) h3 = json.load(open(self.output_dir_3 + '/run_2/runhistory.json')) self.assertEqual(self.ignore_timestamps(h1), self.ignore_timestamps(h2)) # As h1 is changed inplace in the line above we need to reload it h1 = json.load(open(self.output_dir_1 + '/run_1/runhistory.json')) self.assertNotEqual(self.ignore_timestamps(h1), self.ignore_timestamps(h3))
def _test_get_one_exchange_neighbourhood(self, hp): cs = ConfigurationSpace() num_neighbors = 0 if not isinstance(hp, list): hp = [hp] for hp_ in hp: cs.add_hyperparameter(hp_) if np.isinf(hp_.get_num_neighbors()): num_neighbors += 4 else: num_neighbors += hp_.get_num_neighbors() cs.seed(1) config = cs.get_default_configuration() all_neighbors = [] for i in range(100): neighborhood = get_one_exchange_neighbourhood(config, i) for new_config in neighborhood: self.assertNotEqual(config, new_config) all_neighbors.append(new_config) return all_neighbors
def test_random_neighborhood_int(self): hp = UniformIntegerHyperparameter('a', 1, 10) all_neighbors = self._test_get_one_exchange_neighbourhood(hp) all_neighbors = [neighbor['a'] for neighbor in all_neighbors] self.assertAlmostEqual(5.8125, np.mean(all_neighbors), places=2) self.assertAlmostEqual(5.60234375, np.var(all_neighbors), places=2) hp = UniformIntegerHyperparameter('a', 1, 10, log=True) all_neighbors = self._test_get_one_exchange_neighbourhood(hp) all_neighbors = [neighbor['a'] for neighbor in all_neighbors] # Default value is 3.16 self.assertAlmostEqual(3.9425, np.mean(all_neighbors), places=2) self.assertAlmostEqual(5.91, np.var(all_neighbors), places=2) cs = ConfigurationSpace() cs.add_hyperparameter(hp) for val in range(1, 11): config = Configuration(cs, values={'a': val}) for i in range(100): neighborhood = get_one_exchange_neighbourhood(config, 1) neighbors = [neighbor['a'] for neighbor in neighborhood] self.assertEqual(len(neighbors), len(np.unique(neighbors))) self.assertNotIn(val, neighbors)
# cs = pcs.read(fh) cs = ConfigurationSpace() hp1 = cs.add_hyperparameter(CategoricalHyperparameter("hp1", [0, 1, 2, 3, 4, 5])) cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 1)) cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 3)) cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 5)) times = [] for i in range(20): start_time = time.time() configs = cs.sample_configuration(500000) end_time = time.time() times.append(end_time - start_time) print("all times:", times) print('Sampling 500000 configurations took on average:', np.mean(times)) times = [] for config in configs[:100]: start_time = time.time() for i, n in enumerate(get_one_exchange_neighbourhood(config, 1)): if i == 100: break end_time = time.time() times.append((end_time - start_time) / 10) print('Getting a nearest neighbor took on average:', np.mean(times))
def local_search( self, start_points: List[Configuration], budget, ) -> Tuple[np.ndarray, List[Configuration]]: y_opt = self.get_y_opt(budget) # Compute the acquisition value of the incumbents num_incumbents = len(start_points) acq_val_incumbents_, incumbents = self.evaluate( deepcopy(start_points), budget, y_opt, return_loss_config=True) acq_val_incumbents: list = acq_val_incumbents_.tolist() # Set up additional variables required to do vectorized local search: # whether the i-th local search is still running active = [True] * num_incumbents # number of plateau walks of the i-th local search. Reaching the maximum number is the stopping criterion of # the local search. n_no_plateau_walk = [0] * num_incumbents # tracking the number of steps for logging purposes local_search_steps = [0] * num_incumbents # tracking the number of neighbors looked at for logging purposes neighbors_looked_at = [0] * num_incumbents # tracking the number of neighbors generated for logging purposse neighbors_generated = [0] * num_incumbents # how many neighbors were obtained for the i-th local search. Important to map the individual acquisition # function values to the correct local search run # todo self.vectorization_min_obtain = 2 self.n_steps_plateau_walk = 10 self.vectorization_max_obtain = 64 # todo obtain_n = [self.vectorization_min_obtain] * num_incumbents # Tracking the time it takes to compute the acquisition function times = [] # Set up the neighborhood generators neighborhood_iterators = [] for i, inc in enumerate(incumbents): neighborhood_iterators.append( get_one_exchange_neighbourhood(inc, seed=self.rng.randint( low=0, high=100000))) local_search_steps[i] += 1 # Keeping track of configurations with equal acquisition value for plateau walking neighbors_w_equal_acq = [[]] * num_incumbents num_iters = 0 while np.any(active): num_iters += 1 # Whether the i-th local search improved. When a new neighborhood is generated, this is used to determine # whether a step was made (improvement) or not (iterator exhausted) improved = [False] * num_incumbents # Used to request a new neighborhood for the incumbent of the i-th local search new_neighborhood = [False] * num_incumbents # gather all neighbors neighbors = [] for i, neighborhood_iterator in enumerate(neighborhood_iterators): if active[i]: neighbors_for_i = [] for j in range(obtain_n[i]): try: n = next( neighborhood_iterator) # n : Configuration neighbors_generated[i] += 1 neighbors_for_i.append(n) except StopIteration: obtain_n[i] = len(neighbors_for_i) new_neighborhood[i] = True break neighbors.extend(neighbors_for_i) if len(neighbors) != 0: acq_val = self.evaluate(neighbors, budget, return_loss=True) if np.ndim(acq_val.shape) == 0: acq_val = [acq_val] # Comparing the acquisition function of the neighbors with the acquisition value of the incumbent acq_index = 0 # Iterating the all i local searches for i in range(num_incumbents): if not active[i]: continue # And for each local search we know how many neighbors we obtained for j in range(obtain_n[i]): # The next line is only true if there was an improvement and we basically need to iterate to # the i+1-th local search if improved[i]: acq_index += 1 else: neighbors_looked_at[i] += 1 # Found a better configuration if acq_val[acq_index] < acq_val_incumbents[i]: self.logger.debug( "Local search %d: Switch to one of the neighbors (after %d configurations).", i, neighbors_looked_at[i], ) incumbents[i] = neighbors[acq_index] acq_val_incumbents[i] = acq_val[acq_index] new_neighborhood[i] = True improved[i] = True local_search_steps[i] += 1 neighbors_w_equal_acq[i] = [] obtain_n[i] = 1 # Found an equally well performing configuration, keeping it for plateau walking elif acq_val[acq_index] == acq_val_incumbents[i]: neighbors_w_equal_acq[i].append( neighbors[acq_index]) acq_index += 1 # Now we check whether we need to create new neighborhoods and whether we need to increase the number of # plateau walks for one of the local searches. Also disables local searches if the number of plateau walks # is reached (and all being switched off is the termination criterion). for i in range(num_incumbents): if not active[i]: continue if obtain_n[i] == 0 or improved[i]: obtain_n[i] = 2 else: obtain_n[i] = obtain_n[i] * 2 obtain_n[i] = min(obtain_n[i], self.vectorization_max_obtain) if new_neighborhood[i]: if not improved[i] and n_no_plateau_walk[ i] < self.n_steps_plateau_walk: if len(neighbors_w_equal_acq[i]) != 0: incumbents[i] = neighbors_w_equal_acq[i][0] neighbors_w_equal_acq[i] = [] n_no_plateau_walk[i] += 1 if n_no_plateau_walk[i] >= self.n_steps_plateau_walk: active[i] = False continue neighborhood_iterators[i] = get_one_exchange_neighbourhood( incumbents[i], seed=self.rng.randint(low=0, high=100000), ) self.logger.debug( "Local searches took %s steps and looked at %s configurations. Computing the acquisition function in " "vectorized for took %f seconds on average.", local_search_steps, neighbors_looked_at, np.mean(times), ) # todo: origin 标注来自局部搜索 return np.array(acq_val_incumbents), incumbents