def run_roar(python_path, w_dir, n_iter=5, input_file='../rawAllx1000.json', seeds=[1], task_ids=None, max_tries=10): from smac.configspace import ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter from smac.scenario.scenario import Scenario from smac.facade.roar_facade import ROAR def test_func(cutoff): cutoff = cutoff.get('x1') print(cutoff) result = find_cut_off.main(python_path=python_path, w_dir=w_dir, iter=n_iter, input_file=input_file, cutoffs=[cutoff], seeds=seeds, task_ids=task_ids) cleaned = [x[1] for x in result if 0.0 < x[1] < 1.0] mean = np.mean(cleaned) if cleaned else 0.0 mean = mean if mean != 1.0 else 0.0 return 1.0 - mean cs = ConfigurationSpace() cutoff_parameter = UniformIntegerHyperparameter('x1', 1, 99, default_value=50) cs.add_hyperparameter(cutoff_parameter) scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": max_tries, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true", "abort_on_first_run_crash": "false", }) roar = ROAR(scenario=scenario, tae_runner=test_func, rng=1234) x = roar.optimize() cost = test_func(x) return x, cost, roar
def _main_cli(self): """Main function of SMAC for CLI interface Returns ------- instance optimizer """ self.logger.info("SMAC call: %s" % (" ".join(sys.argv))) cmd_reader = CMDReader() args, _ = cmd_reader.read_cmd() root_logger = logging.getLogger() root_logger.setLevel(args.verbose_level) logger_handler = logging.StreamHandler(stream=sys.stdout) if root_logger.level >= logging.INFO: formatter = logging.Formatter("%(levelname)s:\t%(message)s") else: formatter = logging.Formatter( "%(asctime)s:%(levelname)s:%(name)s:%(message)s", "%Y-%m-%d %H:%M:%S") logger_handler.setFormatter(formatter) root_logger.addHandler(logger_handler) # remove default handler root_logger.removeHandler(root_logger.handlers[0]) # Create defaults rh = None initial_configs = None stats = None incumbent = None # Create scenario-object scen = Scenario(args.scenario_file, []) if args.mode == "SMAC": optimizer = SMAC(scenario=scen, rng=np.random.RandomState(args.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=args.seed) elif args.mode == "ROAR": optimizer = ROAR(scenario=scen, rng=np.random.RandomState(args.seed), runhistory=rh, initial_configurations=initial_configs, run_id=args.seed) elif args.mode == "EPILS": optimizer = EPILS(scenario=scen, rng=np.random.RandomState(args.seed), runhistory=rh, initial_configurations=initial_configs, run_id=args.seed) else: optimizer = None return optimizer
def create_smac_rs(benchmark, output_dir: Path, seed: int): # Set up SMAC-HB cs = benchmark.get_configuration_space(seed=seed) scenario_dict = { "run_obj": "quality", # we optimize quality (alternative to runtime) "wallclock-limit": 60, "cs": cs, "deterministic": "true", "runcount-limit": 200, "limit_resources": True, # Uses pynisher to limit memory and runtime "cutoff": 1800, # runtime limit for target algorithm "memory_limit": 10000, # adapt this to reasonable value for your hardware "output_dir": output_dir, "abort_on_first_run_crash": True, } scenario = Scenario(scenario_dict) def optimization_function_wrapper(cfg, seed, **kwargs): """ Helper-function: simple wrapper to use the benchmark with smac """ result_dict = benchmark.objective_function(cfg, rng=seed) cs.sample_configuration() return result_dict['function_value'] smac = ROAR( scenario=scenario, rng=np.random.RandomState(seed), tae_runner=optimization_function_wrapper, ) return smac
def test_inject_stats_and_runhistory_object_to_TAE(self): ta = ExecuteTAFuncArray(lambda x: x**2) self.assertIsNone(ta.stats) self.assertIsNone(ta.runhistory) ROAR(tae_runner=ta, scenario=self.scenario) self.assertIsInstance(ta.stats, Stats) self.assertIsInstance(ta.runhistory, RunHistory)
def get_roar_object_callback( scenario_dict, seed, ta, ta_kwargs, metalearning_configurations, n_jobs, dask_client, ): """Random online adaptive racing.""" if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1): raise ValueError( "Please make sure to guard the code invoking Auto-sklearn by " "`if __name__ == '__main__'` and remove this exception.") scenario = Scenario(scenario_dict) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, tae_runner_kwargs=ta_kwargs, run_id=seed, dask_client=dask_client, n_jobs=n_jobs, )
def opt_rosenbrock(): cs = ConfigurationSpace() cs.add_hyperparameter(UniformFloatHyperparameter("x1", -5, 5, default_value=-3)) cs.add_hyperparameter(UniformFloatHyperparameter("x2", -5, 5, default_value=-4)) scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 50, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true", "intensification_percentage": 0.000000001 }) roar = ROAR(scenario=scenario, rng=np.random.RandomState(42), tae_runner=rosenbrock_2d) incumbent = roar.optimize() return incumbent, roar.scenario.output_dir
def get_roar_object_callback(scenario_dict, seed, ta, **kwargs): """Random online adaptive racing. http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf""" scenario = Scenario(scenario_dict) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, )
def main_cli(self): ''' main function of SMAC for CLI interface ''' cmd_reader = CMDReader() args_, misc_args = cmd_reader.read_cmd() logging.basicConfig(level=args_.verbose_level) root_logger = logging.getLogger() root_logger.setLevel(args_.verbose_level) scen = Scenario(args_.scenario_file, misc_args) rh = None if args_.warmstart_runhistory: aggregate_func = average_cost rh = RunHistory(aggregate_func=aggregate_func) scen, rh = merge_foreign_data_from_file( scenario=scen, runhistory=rh, in_scenario_fn_list=args_.warmstart_scenario, in_runhistory_fn_list=args_.warmstart_runhistory, cs=scen.cs, aggregate_func=aggregate_func) initial_configs = None if args_.warmstart_incumbent: initial_configs = [scen.cs.get_default_configuration()] for traj_fn in args_.warmstart_incumbent: trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=scen.cs) initial_configs.append(trajectory[-1]["incumbent"]) if args_.modus == "SMAC": optimizer = SMAC(scenario=scen, rng=np.random.RandomState(args_.seed), runhistory=rh, initial_configurations=initial_configs) elif args_.modus == "ROAR": optimizer = ROAR(scenario=scen, rng=np.random.RandomState(args_.seed), runhistory=rh, initial_configurations=initial_configs) try: optimizer.optimize() finally: # ensure that the runhistory is always dumped in the end if scen.output_dir is not None: optimizer.solver.runhistory.save_json( fn=os.path.join(scen.output_dir, "runhistory.json"))
def main_cli(self): ''' main function of SMAC for CLI interface ''' self.logger.info("SMAC call: %s" % (" ".join(sys.argv))) cmd_reader = CMDReader() args_, misc_args = cmd_reader.read_cmd() logging.basicConfig(level=args_.verbose_level) root_logger = logging.getLogger() root_logger.setLevel(args_.verbose_level) scen = Scenario(args_.scenario_file, misc_args, run_id=args_.seed) rh = None if args_.warmstart_runhistory: aggregate_func = average_cost rh = RunHistory(aggregate_func=aggregate_func) scen, rh = merge_foreign_data_from_file( scenario=scen, runhistory=rh, in_scenario_fn_list=args_.warmstart_scenario, in_runhistory_fn_list=args_.warmstart_runhistory, cs=scen.cs, aggregate_func=aggregate_func) initial_configs = None if args_.warmstart_incumbent: initial_configs = [scen.cs.get_default_configuration()] for traj_fn in args_.warmstart_incumbent: trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=scen.cs) initial_configs.append(trajectory[-1]["incumbent"]) if args_.modus == "SMAC": optimizer = SMAC(scenario=scen, rng=np.random.RandomState(args_.seed), runhistory=rh, initial_configurations=initial_configs) elif args_.modus == "ROAR": optimizer = ROAR(scenario=scen, rng=np.random.RandomState(args_.seed), runhistory=rh, initial_configurations=initial_configs) try: optimizer.optimize() except (TAEAbortException, FirstRunCrashedException) as err: self.logger.error(err)
def get_roar_object_callback( scenario_dict, seed, ta, backend, metalearning_configurations, runhistory, ): """Random online adaptive racing.""" scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob() scenario = Scenario(scenario_dict) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, runhistory=runhistory, run_id=seed, )
def get_random_search_object_callback( scenario_dict, seed, ta, backend, metalearning_configurations, runhistory, ): """Random search.""" scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob() scenario_dict['minR'] = len(scenario_dict['instances']) scenario_dict['initial_incumbent'] = 'RANDOM' scenario = Scenario(scenario_dict) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, runhistory=runhistory, run_id=seed, )
def get_roar_object_callback( scenario_dict, seed, ta, backend, metalearning_configurations, runhistory, run_id, ): """Random online adaptive racing. http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf""" scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob() scenario = Scenario(scenario_dict) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, runhistory=runhistory, run_id=run_id, )
def get_random_search_object_callback(scenario_dict, seed, ta, ta_kwargs, metalearning_configurations, n_jobs, dask_client): """Random search.""" if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1): raise ValueError( "Please make sure to guard the code invoking Auto-sklearn by " "`if __name__ == '__main__'` and remove this exception.") scenario_dict['minR'] = len(scenario_dict['instances']) scenario_dict['initial_incumbent'] = 'RANDOM' scenario = Scenario(scenario_dict) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, tae_runner_kwargs=ta_kwargs, run_id=seed, dask_client=dask_client, n_jobs=n_jobs, )
def get_random_search_for_sh_callback( scenario_dict, seed, ta, ta_kwargs, backend, metalearning_configurations, ): from smac.intensification.successive_halving import SuccessiveHalving from smac.intensification.hyperband import Hyperband from smac.scenario.scenario import Scenario """Random search.""" scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob() scenario_dict['minR'] = len(scenario_dict['instances']) scenario_dict['initial_incumbent'] = 'RANDOM' scenario = Scenario(scenario_dict) ta_kwargs['budget_type'] = budget_type if bandit_strategy == 'sh': bandit = SuccessiveHalving elif bandit_strategy == 'hb': bandit = Hyperband else: raise ValueError(bandit_strategy) return ROAR( scenario=scenario, rng=seed, tae_runner=ta, tae_runner_kwargs=ta_kwargs, run_id=seed, intensifier=bandit, intensifier_kwargs={ 'initial_budget': initial_budget, 'max_budget': 100, 'eta': eta, 'min_chall': 1}, )
def main_cli(self): """Main function of SMAC for CLI interface""" self.logger.info("SMAC call: %s" % (" ".join(sys.argv))) cmd_reader = CMDReader() args_, misc_args = cmd_reader.read_cmd() root_logger = logging.getLogger() root_logger.setLevel(args_.verbose_level) logger_handler = logging.StreamHandler( stream=sys.stdout) if root_logger.level >= logging.INFO: formatter = logging.Formatter( "%(levelname)s:\t%(message)s") else: formatter = logging.Formatter( "%(asctime)s:%(levelname)s:%(name)s:%(message)s", "%Y-%m-%d %H:%M:%S") logger_handler.setFormatter(formatter) root_logger.addHandler(logger_handler) # remove default handler root_logger.removeHandler(root_logger.handlers[0]) scen = Scenario(args_.scenario_file, misc_args, run_id=args_.seed) rh = None if args_.warmstart_runhistory: aggregate_func = average_cost rh = RunHistory(aggregate_func=aggregate_func) scen, rh = merge_foreign_data_from_file( scenario=scen, runhistory=rh, in_scenario_fn_list=args_.warmstart_scenario, in_runhistory_fn_list=args_.warmstart_runhistory, cs=scen.cs, aggregate_func=aggregate_func) initial_configs = None if args_.warmstart_incumbent: initial_configs = [scen.cs.get_default_configuration()] for traj_fn in args_.warmstart_incumbent: trajectory = TrajLogger.read_traj_aclib_format( fn=traj_fn, cs=scen.cs) initial_configs.append(trajectory[-1]["incumbent"]) if args_.mode == "SMAC": optimizer = SMAC( scenario=scen, rng=np.random.RandomState(args_.seed), runhistory=rh, initial_configurations=initial_configs) elif args_.mode == "ROAR": optimizer = ROAR( scenario=scen, rng=np.random.RandomState(args_.seed), runhistory=rh, initial_configurations=initial_configs) elif args_.mode == "EPILS": optimizer = EPILS( scenario=scen, rng=np.random.RandomState(args_.seed), runhistory=rh, initial_configurations=initial_configs) try: optimizer.optimize() except (TAEAbortException, FirstRunCrashedException) as err: self.logger.error(err)
def main_cli(self, commandline_arguments: typing.List[str] = None): """Main function of SMAC for CLI interface""" self.logger.info("SMAC call: %s" % (" ".join(sys.argv))) cmd_reader = CMDReader() kwargs = {} if commandline_arguments: kwargs['commandline_arguments'] = commandline_arguments main_args_, smac_args_, scen_args_ = cmd_reader.read_cmd(**kwargs) root_logger = logging.getLogger() root_logger.setLevel(main_args_.verbose_level) logger_handler = logging.StreamHandler(stream=sys.stdout) if root_logger.level >= logging.INFO: formatter = logging.Formatter("%(levelname)s:\t%(message)s") else: formatter = logging.Formatter( "%(asctime)s:%(levelname)s:%(name)s:%(message)s", "%Y-%m-%d %H:%M:%S") logger_handler.setFormatter(formatter) root_logger.addHandler(logger_handler) # remove default handler if len(root_logger.handlers) > 1: root_logger.removeHandler(root_logger.handlers[0]) # Create defaults rh = None initial_configs = None stats = None incumbent = None # Create scenario-object scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) scen = Scenario(scenario=scenario) # Restore state if main_args_.restore_state: root_logger.debug("Restoring state from %s...", main_args_.restore_state) rh, stats, traj_list_aclib, traj_list_old = self.restore_state( scen, main_args_) scen.output_dir_for_this_run = create_output_directory( scen, main_args_.seed, root_logger, ) scen.write() incumbent = self.restore_state_after_output_dir( scen, stats, traj_list_aclib, traj_list_old) if main_args_.warmstart_runhistory: aggregate_func = average_cost rh = RunHistory(aggregate_func=aggregate_func) scen, rh = merge_foreign_data_from_file( scenario=scen, runhistory=rh, in_scenario_fn_list=main_args_.warmstart_scenario, in_runhistory_fn_list=main_args_.warmstart_runhistory, cs=scen.cs, aggregate_func=aggregate_func) if main_args_.warmstart_incumbent: initial_configs = [scen.cs.get_default_configuration()] for traj_fn in main_args_.warmstart_incumbent: trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=scen.cs) initial_configs.append(trajectory[-1]["incumbent"]) if main_args_.mode == "SMAC": optimizer = SMAC(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed) elif main_args_.mode == "BORF": optimizer = BORF(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed) elif main_args_.mode == "BOGP": optimizer = BOGP(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed) elif main_args_.mode == "ROAR": optimizer = ROAR(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, run_id=main_args_.seed) elif main_args_.mode == "EPILS": optimizer = EPILS(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, run_id=main_args_.seed) elif main_args_.mode == "Hydra": optimizer = Hydra( scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed, random_configuration_chooser=main_args_. random_configuration_chooser, n_iterations=main_args_.hydra_iterations, val_set=main_args_.hydra_validation, incs_per_round=main_args_.hydra_incumbents_per_round, n_optimizers=main_args_.hydra_n_optimizers) elif main_args_.mode == "PSMAC": optimizer = PSMAC( scenario=scen, rng=np.random.RandomState(main_args_.seed), run_id=main_args_.seed, shared_model=smac_args_.shared_model, validate=main_args_.psmac_validate, n_optimizers=main_args_.hydra_n_optimizers, n_incs=main_args_.hydra_incumbents_per_round, ) try: optimizer.optimize() except (TAEAbortException, FirstRunCrashedException) as err: self.logger.error(err)
def run_smbo(self): self.watcher.start_task('SMBO') # == first things first: load the datamanager self.reset_data_manager() # == Initialize non-SMBO stuff # first create a scenario seed = self.seed self.config_space.seed(seed) num_params = len(self.config_space.get_hyperparameters()) # allocate a run history num_run = self.start_num_run instance_id = self.dataset_name + SENTINEL # Initialize some SMAC dependencies runhistory = RunHistory(aggregate_func=average_cost) # meta_runhistory = RunHistory(aggregate_func=average_cost) # meta_runs_dataset_indices = {} # == METALEARNING suggestions # we start by evaluating the defaults on the full dataset again # and add the suggestions from metalearning behind it if self.num_metalearning_cfgs > 0: if self.metadata_directory is None: metalearning_directory = os.path.dirname( autosklearn.metalearning.__file__) # There is no multilabel data in OpenML if self.task == MULTILABEL_CLASSIFICATION: meta_task = BINARY_CLASSIFICATION else: meta_task = self.task metadata_directory = os.path.join( metalearning_directory, 'files', '%s_%s_%s' % (METRIC_TO_STRING[self.metric], TASK_TYPES_TO_STRING[meta_task], 'sparse' if self.datamanager.info['is_sparse'] else 'dense')) self.metadata_directory = metadata_directory self.logger.info('Metadata directory: %s', self.metadata_directory) meta_base = MetaBase(self.config_space, self.metadata_directory) metafeature_calculation_time_limit = int( self.total_walltime_limit / 4) metafeature_calculation_start_time = time.time() meta_features = self._calculate_metafeatures_with_limits( metafeature_calculation_time_limit) metafeature_calculation_end_time = time.time() metafeature_calculation_time_limit = \ metafeature_calculation_time_limit - ( metafeature_calculation_end_time - metafeature_calculation_start_time) if metafeature_calculation_time_limit < 1: self.logger.warning( 'Time limit for metafeature calculation less ' 'than 1 seconds (%f). Skipping calculation ' 'of metafeatures for encoded dataset.', metafeature_calculation_time_limit) meta_features_encoded = None else: with warnings.catch_warnings(): warnings.showwarning = self._send_warnings_to_log self.datamanager.perform1HotEncoding() meta_features_encoded = \ self._calculate_metafeatures_encoded_with_limits( metafeature_calculation_time_limit) # In case there is a problem calculating the encoded meta-features if meta_features is None: if meta_features_encoded is not None: meta_features = meta_features_encoded else: if meta_features_encoded is not None: meta_features.metafeature_values.update( meta_features_encoded.metafeature_values) if meta_features is not None: meta_base.add_dataset(instance_id, meta_features) # Do mean imputation of the meta-features - should be done specific # for each prediction model! all_metafeatures = meta_base.get_metafeatures( features=list(meta_features.keys())) all_metafeatures.fillna(all_metafeatures.mean(), inplace=True) with warnings.catch_warnings(): warnings.showwarning = self._send_warnings_to_log metalearning_configurations = self.collect_metalearning_suggestions( meta_base) if metalearning_configurations is None: metalearning_configurations = [] self.reset_data_manager() self.logger.info('%s', meta_features) # Convert meta-features into a dictionary because the scenario # expects a dictionary meta_features_dict = {} for dataset, series in all_metafeatures.iterrows(): meta_features_dict[dataset] = series.values meta_features_list = [] for meta_feature_name in all_metafeatures.columns: meta_features_list.append( meta_features[meta_feature_name].value) meta_features_list = np.array(meta_features_list).reshape( (1, -1)) self.logger.info(list(meta_features_dict.keys())) # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric]) # meta_runs_index = 0 # try: # meta_durations = meta_base.get_all_runs('runtime') # read_runtime_data = True # except KeyError: # read_runtime_data = False # self.logger.critical('Cannot read runtime data.') # if self.acquisition_function == 'EIPS': # self.logger.critical('Reverting to acquisition function EI!') # self.acquisition_function = 'EI' # for meta_dataset in meta_runs.index: # meta_dataset_start_index = meta_runs_index # for meta_configuration in meta_runs.columns: # if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]): # try: # config = meta_base.get_configuration_from_algorithm_index( # meta_configuration) # cost = meta_runs.loc[meta_dataset, meta_configuration] # if read_runtime_data: # runtime = meta_durations.loc[meta_dataset, # meta_configuration] # else: # runtime = 1 # # TODO read out other status types! # meta_runhistory.add(config, cost, runtime, # StatusType.SUCCESS, # instance_id=meta_dataset) # meta_runs_index += 1 # except: # # TODO maybe add warning # pass # # meta_runs_dataset_indices[meta_dataset] = ( # meta_dataset_start_index, meta_runs_index) else: meta_features = None if meta_features is None: if self.acquisition_function == 'EIPS': self.logger.critical('Reverting to acquisition function EI!') self.acquisition_function = 'EI' meta_features_list = [] meta_features_dict = {} metalearning_configurations = [] if self.resampling_strategy in [ 'partial-cv', 'partial-cv-iterative-fit' ]: num_folds = self.resampling_strategy_args['folds'] instances = [[fold_number] for fold_number in range(num_folds)] else: instances = None startup_time = self.watcher.wall_elapsed(self.dataset_name) total_walltime_limit = self.total_walltime_limit - startup_time - 5 scenario_dict = { 'cs': self.config_space, 'cutoff-time': self.func_eval_time_limit, 'memory-limit': self.memory_limit, 'wallclock-limit': total_walltime_limit, # 'instances': [[name] for name in meta_features_dict], 'output-dir': self.backend.temporary_directory, 'shared-model': self.shared_mode, 'run-obj': 'quality', 'deterministic': 'true', 'instances': instances } if self.configuration_mode == 'RANDOM': scenario_dict['minR'] = len( instances) if instances is not None else 1 scenario_dict['initial_incumbent'] = 'RANDOM' self.scenario = Scenario(scenario_dict) # TODO rebuild target algorithm to be it's own target algorithm # evaluator, which takes into account that a run can be killed prior # to the model being fully fitted; thus putting intermediate results # into a queue and querying them once the time is over exclude = dict() include = dict() if self.include_preprocessors is not None and \ self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_preprocessors and ' 'exclude_preprocessors.') elif self.include_preprocessors is not None: include['preprocessor'] = self.include_preprocessors elif self.exclude_preprocessors is not None: exclude['preprocessor'] = self.exclude_preprocessors if self.include_estimators is not None and \ self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_estimators and ' 'exclude_estimators.') elif self.include_estimators is not None: if self.task in CLASSIFICATION_TASKS: include['classifier'] = self.include_estimators elif self.task in REGRESSION_TASKS: include['regressor'] = self.include_estimators else: raise ValueError(self.task) elif self.exclude_estimators is not None: if self.task in CLASSIFICATION_TASKS: exclude['classifier'] = self.exclude_estimators elif self.task in REGRESSION_TASKS: exclude['regressor'] = self.exclude_estimators else: raise ValueError(self.task) ta = ExecuteTaFuncWithQueue( backend=self.backend, autosklearn_seed=seed, resampling_strategy=self.resampling_strategy, initial_num_run=num_run, logger=self.logger, include=include, exclude=exclude, memory_limit=self.memory_limit, disable_file_output=self.disable_file_output, **self.resampling_strategy_args) types = get_types(self.config_space, self.scenario.feature_array) # TODO extract generation of SMAC object into it's own function for # testing if self.acquisition_function == 'EI': model = RandomForestWithInstances( types, #instance_features=meta_features_list, seed=1, num_trees=10) rh2EPM = RunHistory2EPM4Cost(num_params=num_params, scenario=self.scenario, success_states=[ StatusType.SUCCESS, StatusType.MEMOUT, StatusType.TIMEOUT ], impute_censored_data=False, impute_state=None) _smac_arguments = dict(scenario=self.scenario, model=model, rng=seed, runhistory2epm=rh2EPM, tae_runner=ta, runhistory=runhistory) elif self.acquisition_function == 'EIPS': rh2EPM = RunHistory2EPM4EIPS(num_params=num_params, scenario=self.scenario, success_states=[ StatusType.SUCCESS, StatusType.MEMOUT, StatusType.TIMEOUT ], impute_censored_data=False, impute_state=None) model = UncorrelatedMultiObjectiveRandomForestWithInstances( ['cost', 'runtime'], types, num_trees=10, instance_features=meta_features_list, seed=1) acquisition_function = EIPS(model) _smac_arguments = dict(scenario=self.scenario, model=model, rng=seed, tae_runner=ta, runhistory2epm=rh2EPM, runhistory=runhistory, acquisition_function=acquisition_function) else: raise ValueError('Unknown acquisition function value %s!' % self.acquisition_function) if self.configuration_mode == 'SMAC': smac = SMAC(**_smac_arguments) elif self.configuration_mode in ['ROAR', 'RANDOM']: for not_in_roar in ['runhistory2epm', 'model']: if not_in_roar in _smac_arguments: del _smac_arguments[not_in_roar] smac = ROAR(**_smac_arguments) else: raise ValueError(self.configuration_mode) # Build a runtime model # runtime_rf = RandomForestWithInstances(types, # instance_features=meta_features_list, # seed=1, num_trees=10) # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params, # scenario=self.scenario, # success_states=None, # impute_censored_data=False, # impute_state=None) # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory) # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten()) # X_meta, Y_meta = rh2EPM.transform(meta_runhistory) # # Transform Y_meta on a per-dataset base # for meta_dataset in meta_runs_dataset_indices: # start_index, end_index = meta_runs_dataset_indices[meta_dataset] # end_index += 1 # Python indexing # Y_meta[start_index:end_index, 0]\ # [Y_meta[start_index:end_index, 0] >2.0] = 2.0 # dataset_minimum = np.min(Y_meta[start_index:end_index, 0]) # Y_meta[start_index:end_index, 0] = 1 - ( # (1. - Y_meta[start_index:end_index, 0]) / # (1. - dataset_minimum)) # Y_meta[start_index:end_index, 0]\ # [Y_meta[start_index:end_index, 0] > 2] = 2 smac.solver.stats.start_timing() # == first, evaluate all metelearning and default configurations smac.solver.incumbent = smac.solver.initial_design.run() for challenger in metalearning_configurations: smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify( challengers=[challenger], incumbent=smac.solver.incumbent, run_history=smac.solver.runhistory, aggregate_func=smac.solver.aggregate_func, time_bound=self.total_walltime_limit) if smac.solver.scenario.shared_model: pSMAC.write(run_history=smac.solver.runhistory, output_directory=smac.solver.scenario.output_dir, num_run=self.seed) if smac.solver.stats.is_budget_exhausted(): break # == after metalearning run SMAC loop while True: if smac.solver.scenario.shared_model: pSMAC.read(run_history=smac.solver.runhistory, output_directory=self.scenario.output_dir, configuration_space=self.config_space, logger=self.logger) choose_next_start_time = time.time() try: challengers = self.choose_next(smac) except Exception as e: self.logger.error(e) self.logger.error("Error in getting next configurations " "with SMAC. Using random configuration!") next_config = self.config_space.sample_configuration() challengers = [next_config] time_for_choose_next = time.time() - choose_next_start_time self.logger.info('Used %g seconds to find next ' 'configurations' % (time_for_choose_next)) time_for_choose_next = max(time_for_choose_next, 1.0) smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify( challengers=challengers, incumbent=smac.solver.incumbent, run_history=smac.solver.runhistory, aggregate_func=smac.solver.aggregate_func, time_bound=time_for_choose_next) if smac.solver.scenario.shared_model: pSMAC.write(run_history=smac.solver.runhistory, output_directory=smac.solver.scenario.output_dir, num_run=self.seed) if smac.solver.stats.is_budget_exhausted(): break self.runhistory = smac.solver.runhistory self.trajectory = smac.solver.intensifier.traj_logger.trajectory return self.runhistory, self.trajectory
# Intensifier will allocate from 5 to a maximum of 25 epochs to each configuration # Successive Halving child-instances are created to prevent idle # workers. intensifier_kwargs = { 'initial_budget': 5, 'max_budget': 25, 'eta': 3, 'min_chall': 1, 'instance_order': 'shuffle_once' } # To optimize, we pass the function to the SMAC-object smac = ROAR(scenario=scenario, rng=np.random.RandomState(42), tae_runner=mlp_from_cfg, intensifier=SuccessiveHalving, intensifier_kwargs=intensifier_kwargs, initial_design=RandomConfigurations, n_jobs=4) # Example call of the function with default values # It returns: Status, Cost, Runtime, Additional Infos def_value = smac.get_tae_runner().run( config=cs.get_default_configuration(), instance='1', budget=25, seed=0)[1] print("Value for default configuration: %.4f" % def_value) # Start optimization try: incumbent = smac.optimize() finally:
print(f"\n[{name}] ") hpo = SMAC4HPO(scenario=scenario, rng=rng, tae_runner=tat) hpo_result, info = run_smac_based_optimizer(hpo, tae) write_output( f"[{name}] time={info['time']} train_loss={info['last_train_loss']} " f"test_loss={info['last_test_loss']}\n") records = util.add_record(records, task_id, name, hpo_result) ######################################################################################################## # ROAR x2 ######################################################################################################## name = "roar_x2" print(f"\n[{name}] ") hpo = ROAR(scenario=scenario, rng=rng, tae_runner=tat) hpo_result, info = run_smac_based_optimizer(hpo, tae, speed=2) write_output( f"[{name}] time={info['time']} train_loss={info['last_train_loss']} " f"test_loss={info['last_test_loss']}\n") records = util.add_record(records, task_id, name, hpo_result) ######################################################################################################## # Random ######################################################################################################## name = "random_x2" print(f"\n[{name}] ") speed = 2 best_loss = 1