def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # Main BO loop while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) self.logger.debug("Search for next configuration") # get all found configurations sorted according to acq challengers = self.choose_next(X, Y) time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) self.logger.debug("Intensify") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(self.intensifier._min_time, time_left)) if self.scenario.shared_model: pSMAC.write( run_history=self.runhistory, # output_directory=self.scenario.input_psmac_dirs, output_directory=self.scenario.output_dir_for_this_run, logger=self.logger) logging.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def run(self): ''' Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration ''' self.stats.start_timing() try: self.incumbent = self.initial_design.run() except FirstRunCrashedException as err: if self.scenario.abort_on_first_run_crash: raise # Main BO loop iteration = 1 while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_directory=self.scenario.output_dir, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) #print("Shapes: {}, {}".format(X.shape, Y.shape)) self.logger.debug("Search for next configuration") if self.double_intensification: # get all found configurations sorted according to acq challengers_smac, challengers_random = \ self.select_configuration.run(X, Y, incumbent=self.incumbent, num_configurations_by_random_search_sorted=100, num_configurations_by_local_search=10, double_intensification=self.double_intensification) time_spend = time.time() - start_time logging.debug( "Time spend to choose next configurations: %.2f sec" % (time_spend)) self.logger.debug("Intensify") start_time_random = time.time() self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers_random, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(0.01, time_spend / 2.), min_number_of_runs=1) time_spend_random = time.time() - start_time_random #print("IN BETWEEN INTENSIFICATIONS") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers_smac, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(0.01, time_spend_random), min_number_of_runs=1) else: # get all found configurations sorted according to acq challengers = \ self.select_configuration.run(X, Y, incumbent=self.incumbent, num_configurations_by_random_search_sorted=100, num_configurations_by_local_search=10, double_intensification=self.double_intensification) #print("Challengers: {}".format(challengers)) time_spend = time.time() - start_time logging.debug( "Time spend to choose next configurations: %.2f sec" % (time_spend)) self.logger.debug("Intensify") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(0.01, time_spend), min_number_of_runs=2) print("Incumbent: {}, Performance: {}".format(self.incumbent, inc_perf)) if self.scenario.shared_model: pSMAC.write(run_history=self.runhistory, output_directory=self.scenario.output_dir, num_run=self.num_run) iteration += 1 logging.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % ( self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.stats.start_timing() try: self.incumbent = self.initial_design.run() except FirstRunCrashedException as err: if self.scenario.abort_on_first_run_crash: raise # Main loop iteration = 1 while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) # model training self.logger.info("Model Training") X, Y = self.rh2EPM.transform(self.runhistory) self.model.train(X, Y) self.acquisition_func.update(model=self.model, eta=self.runhistory.get_cost( self.incumbent)) if iteration == 1: start_point = self.incumbent else: # Restart? if self.rng.rand() < self.restart_prob: self.logger.info("Restart Search") start_point = self.scenario.cs.sample_configuration() else: # pertubate inc self.logger.info("Pertubate Incumbent") start_point = self.incumbent for _ in range(self.pertubation_steps): start_point = random.choice( list( get_one_exchange_neighbourhood( start_point, seed=self.rng.seed()))) # SLS self.logger.info("SLS") local_inc = self.local_search(start_point=start_point) # decide global inc self.logger.info("Race local incumbent against global incumbent") # don't be too aggressive here self.intensifier.minR = self.slow_race_minR self.intensifier.Adaptive_Capping_Slackfactor = self.slow_race_adaptive_capping_factor # log traj self.incumbent, inc_perf = self.intensifier.intensify( challengers=[local_inc], incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=0.01, log_traj=True) if self.incumbent == local_inc: self.logger.info("Changed global incumbent!") if self.scenario.shared_model: pSMAC.write(run_history=self.runhistory, output_directory=self.stats.output_dir, num_run=self.num_run) iteration += 1 self.logger.debug("Remaining budget: %f (wallclock), " "%f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def run(self) -> Configuration: """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() num_obj = len(self.scenario.multi_objectives ) # type: ignore[attr-defined] # noqa F821 # Main BO loop while True: if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 pSMAC.read( run_history=self.runhistory, output_dirs=self.scenario. input_psmac_dirs, # type: ignore[attr-defined] # noqa F821 configuration_space=self.config_space, logger=self.logger, ) start_time = time.time() # sample next configuration for intensification # Initial design runs are also included in the BO loop now. intent, run_info = self.intensifier.get_next_run( challengers=self.initial_design_configs, incumbent=self.incumbent, chooser=self.epm_chooser, run_history=self.runhistory, repeat_configs=self.intensifier.repeat_configs, num_workers=self.tae_runner.num_workers(), ) # remove config from initial design challengers to not repeat it again self.initial_design_configs = [ c for c in self.initial_design_configs if c != run_info.config ] # update timebound only if a 'new' configuration is sampled as the challenger if self.intensifier.num_run == 0: time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification( time_spent, update=False) self.logger.debug("New intensification time bound: %f", time_left) else: old_time_left = time_left time_spent = time_spent + (time.time() - start_time) time_left = self._get_timebound_for_intensification( time_spent, update=True) self.logger.debug( "Updated intensification time bound from %f to %f", old_time_left, time_left, ) # Skip starting new runs if the budget is now exhausted if self.stats.is_budget_exhausted(): intent = RunInfoIntent.SKIP # Skip the run if there was a request to do so. # For example, during intensifier intensification, we # don't want to rerun a config that was previously ran if intent == RunInfoIntent.RUN: # Track the fact that a run was launched in the run # history. It's status is tagged as RUNNING, and once # completed and processed, it will be updated accordingly self.runhistory.add( config=run_info.config, cost=float(MAXINT) if num_obj == 1 else np.full( num_obj, float(MAXINT)), time=0.0, status=StatusType.RUNNING, instance_id=run_info.instance, seed=run_info.seed, budget=run_info.budget, ) run_info.config.config_id = self.runhistory.config_ids[ run_info.config] self.tae_runner.submit_run(run_info=run_info) # There are 2 criteria that the stats object uses to know # if the budged was exhausted. # The budget time, which can only be known when the run finishes, # And the number of ta executions. Because we submit the job at this point, # we count this submission as a run. This prevent for using more # runner runs than what the scenario allows self.stats.submitted_ta_runs += 1 elif intent == RunInfoIntent.SKIP: # No launch is required # This marks a transition request from the intensifier # To a new iteration pass elif intent == RunInfoIntent.WAIT: # In any other case, we wait for resources # This likely indicates that no further decision # can be taken by the intensifier until more data is # available self.tae_runner.wait() else: raise NotImplementedError( "No other RunInfoIntent has been coded!") # Check if there is any result, or else continue for run_info, result in self.tae_runner.get_finished_runs(): # Add the results of the run to the run history # Additionally check for new incumbent self._incorporate_run_results(run_info, result, time_left) if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 assert self.scenario.output_dir_for_this_run is not None # please mypy pSMAC.write( run_history=self.runhistory, output_directory=self.scenario. output_dir_for_this_run, # type: ignore[attr-defined] # noqa F821 logger=self.logger, ) self.logger.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % ( self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs(), )) if self.stats.is_budget_exhausted() or self._stop: if self.stats.is_budget_exhausted(): self.logger.debug("Exhausted configuration budget") else: self.logger.debug( "Shutting down because a configuration or callback returned status STOP" ) # The budget can be exhausted for 2 reasons: number of ta runs or # time. If the number of ta runs is reached, but there is still budget, # wait for the runs to finish while self.tae_runner.pending_runs(): self.tae_runner.wait() for run_info, result in self.tae_runner.get_finished_runs( ): # Add the results of the run to the run history # Additionally check for new incumbent self._incorporate_run_results(run_info, result, time_left) # Break from the intensification loop, # as there are no more resources break # print stats at the end of each intensification iteration if self.intensifier.iteration_done: self.stats.print_stats(debug_out=True) return self.incumbent
def test_write(self): # The nulls make sure that we correctly emit the python None value fixture = '{"data": [[[1, "branin", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[1, "branini", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[2, "branini", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[2, null, 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[3, "branin-hoo", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[4, null, 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]]],' \ '"config_origins": {},' \ '"configs": {' \ '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \ '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \ '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \ '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}' run_history = RunHistory(aggregate_func=average_cost) configuration_space = test_helpers.get_branin_config_space() configuration_space.seed(1) config = configuration_space.sample_configuration() # Config on two instances run_history.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') run_history.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branini') config_2 = configuration_space.sample_configuration() # Another config on a known instance run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branini') # Known Config on no instance run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1) # New config on new instance config_3 = configuration_space.sample_configuration() run_history.add(config_3, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin-hoo') # New config on no instance config_4 = configuration_space.sample_configuration() run_history.add(config_4, 1, 1, StatusType.SUCCESS, seed=1) # External configuration which will not be written to json file! config_5 = configuration_space.sample_configuration() run_history.add(config_5, 1, 1, StatusType.SUCCESS, seed=1, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) logger = logging.getLogger("Test") pSMAC.write(run_history, self.tmp_dir, logger=logger) r_size = len(run_history.data) pSMAC.read(run_history=run_history, output_dirs=[self.tmp_dir], configuration_space=configuration_space, logger=logger) self.assertEqual( r_size, len(run_history.data), "Runhistory should be the same and not changed after reading") output_filename = os.path.join(self.tmp_dir, 'runhistory.json') self.assertTrue(os.path.exists(output_filename)) fixture = json.loads(fixture, object_hook=StatusType.enum_hook) with open(output_filename) as fh: output = json.load(fh, object_hook=StatusType.enum_hook) self.assertEqual(output, fixture)
def run(self) -> Configuration: """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # Main BO loop while True: if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, # type: ignore[attr-defined] # noqa F821 configuration_space=self.config_space, logger=self.logger) start_time = time.time() # sample next configuration for intensification # Initial design runs are also included in the BO loop now. challenger, new_challenger = self.intensifier.get_next_challenger( challengers=self.initial_design_configs, chooser=self.epm_chooser, run_history=self.runhistory, repeat_configs=self.intensifier.repeat_configs ) # remove config from initial design challengers to not repeat it again self.initial_design_configs = [c for c in self.initial_design_configs if c != challenger] # update timebound only if a 'new' configuration is sampled as the challenger if new_challenger: time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) if challenger: # evaluate selected challenger self.logger.debug("Intensify - evaluate challenger") try: self.incumbent, inc_perf = self.intensifier.eval_challenger( challenger=challenger, incumbent=self.incumbent, run_history=self.runhistory, time_bound=max(self.intensifier._min_time, time_left)) except FirstRunCrashedException: if self.scenario.abort_on_first_run_crash: # type: ignore[attr-defined] # noqa F821 raise if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 assert self.scenario.output_dir_for_this_run is not None # please mypy pSMAC.write(run_history=self.runhistory, output_directory=self.scenario.output_dir_for_this_run, # type: ignore[attr-defined] # noqa F821 logger=self.logger) self.logger.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % ( self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def run_smbo(self): self.watcher.start_task('SMBO') # == first things first: load the datamanager self.reset_data_manager() # == Initialize non-SMBO stuff # first create a scenario seed = self.seed self.config_space.seed(seed) num_params = len(self.config_space.get_hyperparameters()) # allocate a run history num_run = self.start_num_run # Initialize some SMAC dependencies runhistory = RunHistory(aggregate_func=average_cost) # meta_runhistory = RunHistory(aggregate_func=average_cost) # meta_runs_dataset_indices = {} # == METALEARNING suggestions # we start by evaluating the defaults on the full dataset again # and add the suggestions from metalearning behind it if self.num_metalearning_cfgs > 0: if self.metadata_directory is None: metalearning_directory = os.path.dirname( autosklearn.metalearning.__file__) # There is no multilabel data in OpenML if self.task == MULTILABEL_CLASSIFICATION: meta_task = BINARY_CLASSIFICATION else: meta_task = self.task metadata_directory = os.path.join( metalearning_directory, 'files', '%s_%s_%s' % (self.metric, TASK_TYPES_TO_STRING[meta_task], 'sparse' if self.datamanager.info['is_sparse'] else 'dense')) self.metadata_directory = metadata_directory if os.path.exists(self.metadata_directory): self.logger.info('Metadata directory: %s', self.metadata_directory) meta_base = MetaBase(self.config_space, self.metadata_directory) try: meta_base.remove_dataset(self.dataset_name) except: pass metafeature_calculation_time_limit = int( self.total_walltime_limit / 4) metafeature_calculation_start_time = time.time() meta_features = self._calculate_metafeatures_with_limits( metafeature_calculation_time_limit) metafeature_calculation_end_time = time.time() metafeature_calculation_time_limit = \ metafeature_calculation_time_limit - ( metafeature_calculation_end_time - metafeature_calculation_start_time) if metafeature_calculation_time_limit < 1: self.logger.warning( 'Time limit for metafeature calculation less ' 'than 1 seconds (%f). Skipping calculation ' 'of metafeatures for encoded dataset.', metafeature_calculation_time_limit) meta_features_encoded = None else: with warnings.catch_warnings(): warnings.showwarning = self._send_warnings_to_log self.datamanager.perform1HotEncoding() meta_features_encoded = \ self._calculate_metafeatures_encoded_with_limits( metafeature_calculation_time_limit) # In case there is a problem calculating the encoded meta-features if meta_features is None: if meta_features_encoded is not None: meta_features = meta_features_encoded else: if meta_features_encoded is not None: meta_features.metafeature_values.update( meta_features_encoded.metafeature_values) if meta_features is not None: meta_base.add_dataset(self.dataset_name, meta_features) # Do mean imputation of the meta-features - should be done specific # for each prediction model! all_metafeatures = meta_base.get_metafeatures( features=list(meta_features.keys())) all_metafeatures.fillna(all_metafeatures.mean(), inplace=True) with warnings.catch_warnings(): warnings.showwarning = self._send_warnings_to_log metalearning_configurations = self.collect_metalearning_suggestions( meta_base) if metalearning_configurations is None: metalearning_configurations = [] self.reset_data_manager() self.logger.info('%s', meta_features) # Convert meta-features into a dictionary because the scenario # expects a dictionary meta_features_dict = {} for dataset, series in all_metafeatures.iterrows(): meta_features_dict[dataset] = series.values meta_features_list = [] for meta_feature_name in all_metafeatures.columns: meta_features_list.append( meta_features[meta_feature_name].value) meta_features_list = np.array(meta_features_list).reshape( (1, -1)) self.logger.info(list(meta_features_dict.keys())) # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric]) # meta_runs_index = 0 # try: # meta_durations = meta_base.get_all_runs('runtime') # read_runtime_data = True # except KeyError: # read_runtime_data = False # self.logger.critical('Cannot read runtime data.') # if self.acquisition_function == 'EIPS': # self.logger.critical('Reverting to acquisition function EI!') # self.acquisition_function = 'EI' # for meta_dataset in meta_runs.index: # meta_dataset_start_index = meta_runs_index # for meta_configuration in meta_runs.columns: # if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]): # try: # config = meta_base.get_configuration_from_algorithm_index( # meta_configuration) # cost = meta_runs.loc[meta_dataset, meta_configuration] # if read_runtime_data: # runtime = meta_durations.loc[meta_dataset, # meta_configuration] # else: # runtime = 1 # # TODO read out other status types! # meta_runhistory.add(config, cost, runtime, # StatusType.SUCCESS, # instance_id=meta_dataset) # meta_runs_index += 1 # except: # # TODO maybe add warning # pass # # meta_runs_dataset_indices[meta_dataset] = ( # meta_dataset_start_index, meta_runs_index) else: meta_features = None self.logger.warning('Could not find meta-data directory %s' % metadata_directory) else: meta_features = None if meta_features is None: if self.acquisition_function == 'EIPS': self.logger.critical('Reverting to acquisition function EI!') self.acquisition_function = 'EI' meta_features_list = [] meta_features_dict = {} metalearning_configurations = [] if self.resampling_strategy in [ 'partial-cv', 'partial-cv-iterative-fit' ]: num_folds = self.resampling_strategy_args['folds'] instances = [[ json.dumps({ 'task_id': self.dataset_name, 'fold': fold_number }) ] for fold_number in range(num_folds)] else: instances = [[json.dumps({'task_id': self.dataset_name})]] startup_time = self.watcher.wall_elapsed(self.dataset_name) total_walltime_limit = self.total_walltime_limit - startup_time - 5 scenario_dict = { 'cs': self.config_space, 'cutoff-time': self.func_eval_time_limit, 'memory-limit': self.memory_limit, 'wallclock-limit': total_walltime_limit, 'output-dir': self.backend.get_smac_output_directory(self.seed), 'shared-model': self.shared_mode, 'run-obj': 'quality', 'deterministic': 'true', 'instances': instances } if self.configuration_mode == 'RANDOM': scenario_dict['minR'] = len( instances) if instances is not None else 1 scenario_dict['initial_incumbent'] = 'RANDOM' self.scenario = Scenario(scenario_dict) # TODO rebuild target algorithm to be it's own target algorithm # evaluator, which takes into account that a run can be killed prior # to the model being fully fitted; thus putting intermediate results # into a queue and querying them once the time is over exclude = dict() include = dict() if self.include_preprocessors is not None and \ self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_preprocessors and ' 'exclude_preprocessors.') elif self.include_preprocessors is not None: include['preprocessor'] = self.include_preprocessors elif self.exclude_preprocessors is not None: exclude['preprocessor'] = self.exclude_preprocessors if self.include_estimators is not None and \ self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_estimators and ' 'exclude_estimators.') elif self.include_estimators is not None: if self.task in CLASSIFICATION_TASKS: include['classifier'] = self.include_estimators elif self.task in REGRESSION_TASKS: include['regressor'] = self.include_estimators else: raise ValueError(self.task) elif self.exclude_estimators is not None: if self.task in CLASSIFICATION_TASKS: exclude['classifier'] = self.exclude_estimators elif self.task in REGRESSION_TASKS: exclude['regressor'] = self.exclude_estimators else: raise ValueError(self.task) ta = ExecuteTaFuncWithQueue( backend=self.backend, autosklearn_seed=seed, resampling_strategy=self.resampling_strategy, initial_num_run=num_run, logger=self.logger, include=include, exclude=exclude, metric=self.metric, memory_limit=self.memory_limit, disable_file_output=self.disable_file_output, **self.resampling_strategy_args) types, bounds = get_types(self.config_space, self.scenario.feature_array) # TODO extract generation of SMAC object into it's own function for # testing if self.acquisition_function == 'EI': model = RandomForestWithInstances( types=types, bounds=bounds, #instance_features=meta_features_list, seed=1, num_trees=10) rh2EPM = RunHistory2EPM4Cost(num_params=num_params, scenario=self.scenario, success_states=[ StatusType.SUCCESS, StatusType.MEMOUT, StatusType.TIMEOUT ], impute_censored_data=False, impute_state=None) _smac_arguments = dict(scenario=self.scenario, model=model, rng=seed, runhistory2epm=rh2EPM, tae_runner=ta, runhistory=runhistory) elif self.acquisition_function == 'EIPS': rh2EPM = RunHistory2EPM4EIPS(num_params=num_params, scenario=self.scenario, success_states=[ StatusType.SUCCESS, StatusType.MEMOUT, StatusType.TIMEOUT ], impute_censored_data=False, impute_state=None) model = UncorrelatedMultiObjectiveRandomForestWithInstances( ['cost', 'runtime'], types=types, bounds=bounds, num_trees=10, instance_features=meta_features_list, seed=1) acquisition_function = EIPS(model) _smac_arguments = dict(scenario=self.scenario, model=model, rng=seed, tae_runner=ta, runhistory2epm=rh2EPM, runhistory=runhistory, acquisition_function=acquisition_function) else: raise ValueError('Unknown acquisition function value %s!' % self.acquisition_function) if self.configuration_mode == 'SMAC': smac = SMAC(**_smac_arguments) elif self.configuration_mode in ['ROAR', 'RANDOM']: for not_in_roar in ['runhistory2epm', 'model']: if not_in_roar in _smac_arguments: del _smac_arguments[not_in_roar] smac = ROAR(**_smac_arguments) else: raise ValueError(self.configuration_mode) # Build a runtime model # runtime_rf = RandomForestWithInstances(types, # instance_features=meta_features_list, # seed=1, num_trees=10) # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params, # scenario=self.scenario, # success_states=None, # impute_censored_data=False, # impute_state=None) # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory) # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten()) # X_meta, Y_meta = rh2EPM.transform(meta_runhistory) # # Transform Y_meta on a per-dataset base # for meta_dataset in meta_runs_dataset_indices: # start_index, end_index = meta_runs_dataset_indices[meta_dataset] # end_index += 1 # Python indexing # Y_meta[start_index:end_index, 0]\ # [Y_meta[start_index:end_index, 0] >2.0] = 2.0 # dataset_minimum = np.min(Y_meta[start_index:end_index, 0]) # Y_meta[start_index:end_index, 0] = 1 - ( # (1. - Y_meta[start_index:end_index, 0]) / # (1. - dataset_minimum)) # Y_meta[start_index:end_index, 0]\ # [Y_meta[start_index:end_index, 0] > 2] = 2 smac.solver.stats.start_timing() # == first, evaluate all metelearning and default configurations smac.solver.incumbent = smac.solver.initial_design.run() for challenger in metalearning_configurations: smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify( challengers=[challenger], incumbent=smac.solver.incumbent, run_history=smac.solver.runhistory, aggregate_func=smac.solver.aggregate_func, time_bound=self.total_walltime_limit) if smac.solver.scenario.shared_model: pSMAC.write(run_history=smac.solver.runhistory, output_directory=smac.solver.scenario.output_dir, num_run=self.seed) if smac.solver.stats.is_budget_exhausted(): break # == after metalearning run SMAC loop while True: if smac.solver.scenario.shared_model: pSMAC.read(run_history=smac.solver.runhistory, output_dirs=glob.glob( self.backend.get_smac_output_glob()), configuration_space=self.config_space, logger=self.logger) choose_next_start_time = time.time() try: challengers = self.choose_next(smac) except Exception as e: self.logger.error(e) self.logger.error("Error in getting next configurations " "with SMAC. Using random configuration!") next_config = self.config_space.sample_configuration() challengers = [next_config] time_for_choose_next = time.time() - choose_next_start_time self.logger.info('Used %g seconds to find next ' 'configurations' % (time_for_choose_next)) time_for_choose_next = max(time_for_choose_next, 1.0) smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify( challengers=challengers, incumbent=smac.solver.incumbent, run_history=smac.solver.runhistory, aggregate_func=smac.solver.aggregate_func, time_bound=time_for_choose_next) if smac.solver.scenario.shared_model: pSMAC.write(run_history=smac.solver.runhistory, output_directory=smac.solver.scenario.output_dir, num_run=self.seed) if smac.solver.stats.is_budget_exhausted(): break self.runhistory = smac.solver.runhistory self.trajectory = smac.solver.intensifier.traj_logger.trajectory smac.runhistory = self.runhistory self.fANOVA_input = smac.get_X_y() return self.runhistory, self.trajectory, self.fANOVA_input
def test_write(self): # The nulls make sure that we correctly emit the python None value fixture = '{"data": [[[1, "branin", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[1, "branini", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[2, "branini", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[2, null, 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[3, "branin-hoo", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[4, null, 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]]],' \ '"configs": {' \ '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \ '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \ '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \ '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}' run_history = RunHistory(aggregate_func=average_cost) configuration_space = test_helpers.get_branin_config_space() configuration_space.seed(1) config = configuration_space.sample_configuration() # Config on two instances run_history.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') run_history.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branini') config_2 = configuration_space.sample_configuration() # Another config on a known instance run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branini') # Known Config on no instance run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1) # New config on new instance config_3 = configuration_space.sample_configuration() run_history.add(config_3, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin-hoo') # New config on no instance config_4 = configuration_space.sample_configuration() run_history.add(config_4, 1, 1, StatusType.SUCCESS, seed=1) pSMAC.write(run_history, self.tmp_dir, 20) output_filename = os.path.join(self.tmp_dir, 'runhistory.json') self.assertTrue(os.path.exists(output_filename)) fixture = json.loads(fixture, object_hook=StatusType.enum_hook) with open(output_filename) as fh: output = json.load(fh, object_hook=StatusType.enum_hook) print(output) print(fixture) self.assertEqual(output, fixture)
def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # 设置一个counter counter = 0 # Main BO loop while True: # 打印每轮SMBO的最优结果(包括首轮SMBO 0) print('SMBO ' + str(counter) + ': ' + str(self.runhistory.get_cost(self.incumbent))) counter += 1 if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) self.logger.debug("Search for next configuration") # get all found configurations sorted according to acq challengers = self.choose_next(X, Y) time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) self.logger.debug("Intensify") if self.server is None: self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(self.intensifier._min_time, time_left)) else: # 从worker读取loss,加入history再运行新的challengers print(time_left) self.server.push(incumbent=self.incumbent, runhistory=self.runhistory, challengers=challengers.challengers, time_left=time_left) # 从worker读取runhistory,并merge到self.runhistory incumbent, new_runhistory = self.server.pull() self.runhistory.update(new_runhistory) # 更新了runhistory之后,应该找寻是否存在新的incumbent # 因为worker没有完整的 runhistory_old = self.runhistory.get_history_for_config( self.incumbent) runhistory_new = self.runhistory.get_history_for_config( incumbent) # 找寻cost最小值 lowest_cost_old = min([cost[0] for cost in runhistory_old]) lowest_cost_new = min([cost[0] for cost in runhistory_new]) if lowest_cost_new < lowest_cost_old: # 替换为新的incumbent self.incumbent = incumbent """可以考虑用这个函数 new_incumbent = self._compare_configs( incumbent=incumbent, challenger=challenger, run_history=run_history, aggregate_func=aggregate_func, log_traj=log_traj) """ if self.scenario.shared_model: pSMAC.write( run_history=self.runhistory, output_directory=self.scenario.output_dir_for_this_run) logging.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent