def _optimize(self, optimizer, hist_list): optimizer.optimize() pSMAC.read( run_history=optimizer.solver.runhistory, output_dirs=optimizer.solver.scenario.input_psmac_dirs, configuration_space=optimizer.solver.config_space, logger=optimizer.solver.logger, ) hist_list.append(optimizer.solver.runhistory)
def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # Main BO loop while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) self.logger.debug("Search for next configuration") # get all found configurations sorted according to acq challengers = self.choose_next(X, Y) time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) self.logger.debug("Intensify") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(self.intensifier._min_time, time_left)) if self.scenario.shared_model: pSMAC.write( run_history=self.runhistory, # output_directory=self.scenario.input_psmac_dirs, output_directory=self.scenario.output_dir_for_this_run, logger=self.logger) logging.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def _iterate(optimizer, runcount_left, return_hist): while runcount_left.value > 0: runcount_left.value -= 1 optimizer.iterate() pSMAC.read( run_history=optimizer.solver.runhistory, output_dirs=optimizer.solver.scenario.input_psmac_dirs, configuration_space=optimizer.solver.config_space, logger=optimizer.solver.logger, ) # print(optimizer.solver.runhistory.data) return_hist.append(optimizer.solver.runhistory)
def iterate(self): trial_left = multiprocessing.Value('i', self.trials_this_run) _start_time = time.time() _flag = False if len(self.configs) >= self.config_num_threshold: _flag = True self.logger.warning('Already explored 70 percentage of the ' 'hp space: %d!' % self.config_num_threshold) else: # for i in range(self.n_jobs): # self.trial_statistics.append(self.pool.submit(_iterate, # self.optimizer_list[i], trial_left)) # self.wait_tasks_finish() processes = [] return_hist = multiprocessing.Manager().list() for i in range(self.n_jobs): pSMAC.read( run_history=self.optimizer_list[i].solver.runhistory, output_dirs=self.optimizer_list[i].solver.scenario. output_dir + '/run_1', configuration_space=self.optimizer_list[i].solver. config_space, logger=self.optimizer_list[i].solver.logger, ) for i in range(self.n_jobs): p = multiprocessing.Process( target=_iterate, args=[self.optimizer_list[i], trial_left, return_hist]) processes.append(p) p.start() for p in processes: p.join() for runhistory in return_hist: runkeys = list(runhistory.data.keys()) for key in runkeys: _reward = 1. - runhistory.data[key][0] _config = runhistory.ids_config[key[0]] if _config not in self.configs: self.perfs.append(_reward) self.configs.append(_config) if _reward > self.incumbent_perf: self.incumbent_perf = _reward self.incumbent_config = _config self.trial_cnt += self.trials_per_iter if not _flag: iteration_cost = time.time() - _start_time else: iteration_cost = None return self.incumbent_perf, iteration_cost, self.incumbent_config
def run_smbo(self): self.watcher.start_task('SMBO') # == first things first: load the datamanager self.reset_data_manager() # == Initialize non-SMBO stuff # first create a scenario seed = self.seed self.config_space.seed(seed) # allocate a run history num_run = self.start_num_run # Initialize some SMAC dependencies metalearning_configurations = self.get_metalearning_suggestions() if self.resampling_strategy in ['partial-cv', 'partial-cv-iterative-fit']: num_folds = self.resampling_strategy_args['folds'] instances = [[json.dumps({'task_id': self.dataset_name, 'fold': fold_number})] for fold_number in range(num_folds)] else: instances = [[json.dumps({'task_id': self.dataset_name})]] # TODO rebuild target algorithm to be it's own target algorithm # evaluator, which takes into account that a run can be killed prior # to the model being fully fitted; thus putting intermediate results # into a queue and querying them once the time is over exclude = dict() include = dict() if self.include_preprocessors is not None and self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_preprocessors and ' 'exclude_preprocessors.') elif self.include_preprocessors is not None: include['feature_preprocessor'] = self.include_preprocessors elif self.exclude_preprocessors is not None: exclude['feature_preprocessor'] = self.exclude_preprocessors if self.include_estimators is not None and self.exclude_estimators is not None: raise ValueError('Cannot specify include_estimators and ' 'exclude_estimators.') elif self.include_estimators is not None: if self.task in CLASSIFICATION_TASKS: include['classifier'] = self.include_estimators elif self.task in REGRESSION_TASKS: include['regressor'] = self.include_estimators else: raise ValueError(self.task) elif self.exclude_estimators is not None: if self.task in CLASSIFICATION_TASKS: exclude['classifier'] = self.exclude_estimators elif self.task in REGRESSION_TASKS: exclude['regressor'] = self.exclude_estimators else: raise ValueError(self.task) ta = ExecuteTaFuncWithQueue ta_kwargs = dict( backend=self.backend, autosklearn_seed=seed, resampling_strategy=self.resampling_strategy, initial_num_run=num_run, logger=self.logger, include=include, exclude=exclude, metric=self.metric, memory_limit=self.memory_limit, disable_file_output=self.disable_file_output, **self.resampling_strategy_args ) startup_time = self.watcher.wall_elapsed(self.dataset_name) total_walltime_limit = self.total_walltime_limit - startup_time - 5 scenario_dict = { 'abort_on_first_run_crash': False, 'cs': self.config_space, 'cutoff_time': self.func_eval_time_limit, 'deterministic': 'true', 'instances': instances, 'memory_limit': self.memory_limit, 'output-dir': self.backend.get_smac_output_directory(), 'run_obj': 'quality', 'shared-model': self.shared_mode, 'wallclock_limit': total_walltime_limit, 'cost_for_crash': WORST_POSSIBLE_RESULT, } if self.smac_scenario_args is not None: for arg in [ 'abort_on_first_run_crash', 'cs', 'deterministic', 'instances', 'output-dir', 'run_obj', 'shared-model', 'cost_for_crash', ]: if arg in self.smac_scenario_args: self.logger.warning('Cannot override scenario argument %s, ' 'will ignore this.', arg) del self.smac_scenario_args[arg] for arg in [ 'cutoff_time', 'memory_limit', 'wallclock_limit', ]: if arg in self.smac_scenario_args: self.logger.warning( 'Overriding scenario argument %s: %s with value %s', arg, scenario_dict[arg], self.smac_scenario_args[arg] ) scenario_dict.update(self.smac_scenario_args) smac_args = { 'scenario_dict': scenario_dict, 'seed': seed, 'ta': ta, 'ta_kwargs': ta_kwargs, 'backend': self.backend, 'metalearning_configurations': metalearning_configurations, } if self.get_smac_object_callback is not None: smac = self.get_smac_object_callback(**smac_args) else: smac = get_smac_object(**smac_args) smac.optimize() # Patch SMAC to read in data from parallel runs after the last # function evaluation if self.shared_mode: pSMAC.read( run_history=smac.solver.runhistory, output_dirs=smac.solver.scenario.input_psmac_dirs, configuration_space=smac.solver.config_space, logger=smac.solver.logger, ) self.runhistory = smac.solver.runhistory self.trajectory = smac.solver.intensifier.traj_logger.trajectory self._budget_type = smac.solver.intensifier.tae_runner.budget_type return self.runhistory, self.trajectory, self._budget_type
def run(self): ''' Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration ''' self.stats.start_timing() try: self.incumbent = self.initial_design.run() except FirstRunCrashedException as err: if self.scenario.abort_on_first_run_crash: raise # Main BO loop iteration = 1 while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_directory=self.scenario.output_dir, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) #print("Shapes: {}, {}".format(X.shape, Y.shape)) self.logger.debug("Search for next configuration") if self.double_intensification: # get all found configurations sorted according to acq challengers_smac, challengers_random = \ self.select_configuration.run(X, Y, incumbent=self.incumbent, num_configurations_by_random_search_sorted=100, num_configurations_by_local_search=10, double_intensification=self.double_intensification) time_spend = time.time() - start_time logging.debug( "Time spend to choose next configurations: %.2f sec" % (time_spend)) self.logger.debug("Intensify") start_time_random = time.time() self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers_random, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(0.01, time_spend / 2.), min_number_of_runs=1) time_spend_random = time.time() - start_time_random #print("IN BETWEEN INTENSIFICATIONS") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers_smac, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(0.01, time_spend_random), min_number_of_runs=1) else: # get all found configurations sorted according to acq challengers = \ self.select_configuration.run(X, Y, incumbent=self.incumbent, num_configurations_by_random_search_sorted=100, num_configurations_by_local_search=10, double_intensification=self.double_intensification) #print("Challengers: {}".format(challengers)) time_spend = time.time() - start_time logging.debug( "Time spend to choose next configurations: %.2f sec" % (time_spend)) self.logger.debug("Intensify") self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(0.01, time_spend), min_number_of_runs=2) print("Incumbent: {}, Performance: {}".format(self.incumbent, inc_perf)) if self.scenario.shared_model: pSMAC.write(run_history=self.runhistory, output_directory=self.scenario.output_dir, num_run=self.num_run) iteration += 1 logging.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % ( self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def optimize(self) -> typing.List[Configuration]: """ Optimizes the algorithm provided in scenario (given in constructor) Returns ------- portfolio : typing.List[Configuration] Portfolio of found configurations """ # Setup output directory self.portfolio = [] portfolio_cost = np.inf if self.output_dir is None: self.top_dir = "hydra-output_%s" % ( datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')) self.scenario.output_dir = os.path.join( self.top_dir, "psmac3-output_%s" % (datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))) self.output_dir = create_output_directory(self.scenario, run_id=self.run_id, logger=self.logger) scen = copy.deepcopy(self.scenario) scen.output_dir_for_this_run = None scen.output_dir = None # parent process SMAC only used for validation purposes self.solver = SMAC4AC(scenario=scen, tae_runner=self._tae, rng=self.rng, run_id=self.run_id, **self.kwargs) for i in range(self.n_iterations): self.logger.info("=" * 120) self.logger.info("Hydra Iteration: %d", (i + 1)) if i == 0: tae = self._tae tae_kwargs = self._tae_kwargs else: tae = ExecuteTARunHydra if self._tae_kwargs: tae_kwargs = self._tae_kwargs else: tae_kwargs = {} tae_kwargs['cost_oracle'] = self.cost_per_inst self.optimizer = PSMAC( scenario=self.scenario, run_id=self.run_id, rng=self.rng, tae=tae, tae_kwargs=tae_kwargs, shared_model=False, validate=True if self.val_set else False, n_optimizers=self.n_optimizers, val_set=self.val_set, n_incs=self. n_optimizers, # return all configurations (unvalidated) **self.kwargs) self.optimizer.output_dir = self.output_dir incs = self.optimizer.optimize() cost_per_conf_v, val_ids, cost_per_conf_e, est_ids = self.optimizer.get_best_incumbents_ids( incs) if self.val_set: to_keep_ids = val_ids[:self.incs_per_round] else: to_keep_ids = est_ids[:self.incs_per_round] config_cost_per_inst = {} incs = incs[to_keep_ids] self.logger.info('Kept incumbents') for inc in incs: self.logger.info(inc) config_cost_per_inst[inc] = cost_per_conf_v[ inc] if self.val_set else cost_per_conf_e[inc] cur_portfolio_cost = self._update_portfolio( incs, config_cost_per_inst) if portfolio_cost <= cur_portfolio_cost: self.logger.info( "No further progress (%f) --- terminate hydra", portfolio_cost) break else: portfolio_cost = cur_portfolio_cost self.logger.info("Current pertfolio cost: %f", portfolio_cost) self.scenario.output_dir = os.path.join( self.top_dir, "psmac3-output_%s" % (datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))) self.output_dir = create_output_directory(self.scenario, run_id=self.run_id, logger=self.logger) read(self.rh, os.path.join(self.top_dir, 'psmac3*', 'run_' + str(MAXINT)), self.scenario.cs, self.logger) self.rh.save_json(fn=os.path.join( self.top_dir, 'all_validated_runs_runhistory.json'), save_external=True) with open(os.path.join(self.top_dir, 'portfolio.pkl'), 'wb') as fh: pickle.dump(self.portfolio, fh) self.logger.info("~" * 120) self.logger.info('Resulting Portfolio:') for configuration in self.portfolio: self.logger.info(str(configuration)) self.logger.info("~" * 120) return self.portfolio
def run_smbo(self): self.watcher.start_task('SMBO') # == first things first: load the datamanager self.reset_data_manager() # == Initialize non-SMBO stuff # first create a scenario seed = self.seed self.config_space.seed(seed) num_params = len(self.config_space.get_hyperparameters()) # allocate a run history num_run = self.start_num_run # Initialize some SMAC dependencies metalearning_configurations = self.get_metalearning_suggestions() if self.resampling_strategy in [ 'partial-cv', 'partial-cv-iterative-fit' ]: num_folds = self.resampling_strategy_args['folds'] instances = [[ json.dumps({ 'task_id': self.dataset_name, 'fold': fold_number }) ] for fold_number in range(num_folds)] else: instances = [[json.dumps({'task_id': self.dataset_name})]] # TODO rebuild targ to be it's own target algorithmet algorithm # evaluator, which takes into account that a run can be killed prior # to the model being fully fitted; thus putting intermediate results # into a queue and querying them once the time is over exclude = dict() include = dict() if self.include_preprocessors is not None and \ self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_preprocessors and ' 'exclude_preprocessors.') elif self.include_preprocessors is not None: include['preprocessor'] = self.include_preprocessors elif self.exclude_preprocessors is not None: exclude['preprocessor'] = self.exclude_preprocessors if self.include_estimators is not None and \ self.exclude_estimators is not None: raise ValueError('Cannot specify include_estimators and ' 'exclude_estimators.') elif self.include_estimators is not None: if self.task in CLASSIFICATION_TASKS: include['classifier'] = self.include_estimators elif self.task in REGRESSION_TASKS: include['regressor'] = self.include_estimators else: raise ValueError(self.task) elif self.exclude_estimators is not None: if self.task in CLASSIFICATION_TASKS: exclude['classifier'] = self.exclude_estimators elif self.task in REGRESSION_TASKS: exclude['regressor'] = self.exclude_estimators else: raise ValueError(self.task) ta = ExecuteTaFuncWithQueue( backend=self.backend, autosklearn_seed=seed, resampling_strategy=self.resampling_strategy, initial_num_run=num_run, logger=self.logger, include=include, exclude=exclude, metric=self.metric, memory_limit=self.memory_limit, disable_file_output=self.disable_file_output, **self.resampling_strategy_args) startup_time = self.watcher.wall_elapsed(self.dataset_name) total_walltime_limit = self.total_walltime_limit - startup_time - 5 scenario_dict = { 'abort_on_first_run_crash': False, 'cs': self.config_space, 'cutoff_time': self.func_eval_time_limit, 'deterministic': 'true', 'instances': instances, 'memory_limit': self.memory_limit, 'output-dir': self.backend.get_smac_output_directory(), 'run_obj': 'quality', 'shared-model': self.shared_mode, 'wallclock_limit': total_walltime_limit, 'cost_for_crash': WORST_POSSIBLE_RESULT, } if self.smac_scenario_args is not None: for arg in [ 'abort_on_first_run_crash', 'cs', 'deterministic', 'instances', 'output-dir', 'run_obj', 'shared-model', 'cost_for_crash', ]: if arg in self.smac_scenario_args: self.logger.warning( 'Cannot override scenario argument %s, ' 'will ignore this.', arg) del self.smac_scenario_args[arg] for arg in [ 'cutoff_time', 'memory_limit', 'wallclock_limit', ]: if arg in self.smac_scenario_args: self.logger.warning( 'Overriding scenario argument %s: %s with value %s', arg, scenario_dict[arg], self.smac_scenario_args[arg]) scenario_dict.update(self.smac_scenario_args) # runhistory = RunHistory(aggregate_func=average_cost) if self.read_history: #old version # print("load the file from Pikel") # import pickle # runhistory = pickle.load(open("/home/dfki/Desktop/temp/pickel/runhistory.p", "rb")) #new version import create_Runhistory import smac values = { 'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'no_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'pca', # 'preprocessor:pca:keep_variance': 0.99, # 'preprocessor:copy': True, # 'preprocessor:iterated_power': 'auto', # 'preprocessor:n_components': 'none', # 'preprocessor:random_state': 'none', # 'preprocessor:svd_solver': 'auto', # 'preprocessor:tol': 0.0, # 'preprocessor:whiten': 'False', 'preprocessor:pca:whiten': 'False', 'rescaling:__choice__': 'none', 'classifier:random_forest:bootstrap': 'True', # 'classifier:random_forest:class_weight': 'none', 'classifier:random_forest:criterion': 'entropy', 'classifier:random_forest:max_depth': 10, 'classifier:random_forest:max_features': 0.45000000000000001, #auto 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, # 'classifier:random_forest:min_impurity_split': '1e-07', 'classifier:random_forest:min_samples_leaf': 6, 'classifier:random_forest:min_samples_split': 7, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 512, 'classifier:random_forest:random_state': 3, # 'classifier:random_forest:n_jobs': 1, # 'classifier:random_forest:oob_score': 'False', # 'classifier:random_forest:random_state': 'none', # 'classifier:random_forest:verbose': 0, # 'classifier:random_forest:warm_start': 'False', } config = create_Runhistory.defult_config_builder( configspace=self.config_space, values=values) runhistory, traj_logger = create_Runhistory.runhistory_builder( ta=ta, scenario_dic=scenario_dict, rng=seed, backend=self.backend, config_milad=config) else: runhistory = RunHistory(aggregate_func=average_cost) smac_args = { 'scenario_dict': scenario_dict, 'seed': seed, 'ta': ta, 'backend': self.backend, 'metalearning_configurations': metalearning_configurations, 'runhistory': runhistory, } if self.get_smac_object_callback is not None: smac = self.get_smac_object_callback(**smac_args) else: smac = get_smac_object(**smac_args) smac.optimize() # Patch SMAC to read in data from parallel runs after the last # function evaluation if self.shared_mode: pSMAC.read( run_history=smac.solver.runhistory, output_dirs=smac.solver.scenario.input_psmac_dirs, configuration_space=smac.solver.config_space, logger=smac.solver.logger, ) if self.read_history: #old version # last_trajectories = pickle.load(open("/home/dfki/Desktop/temp/pickel/trajectory.p", "rb")) # self.trajectory = last_trajectories #new version import pickle import create_Runhistory pickle.dump( runhistory, open("/home/dfki/Desktop/temp/pickel/new_runhistory.p", "wb")) last_trajectories = create_Runhistory.trajectory_builder( traj_logger=traj_logger, config_milad=config) present_trajectories = smac.solver.intensifier.traj_logger.trajectory self.trajectory = present_trajectories + last_trajectories pickle.dump( self.trajectory, open("/home/dfki/Desktop/temp/pickel/new_trajectory.p", "wb")) else: self.trajectory = smac.solver.intensifier.traj_logger.trajectory self.runhistory = smac.solver.runhistory return self.runhistory, self.trajectory
def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.stats.start_timing() try: self.incumbent = self.initial_design.run() except FirstRunCrashedException as err: if self.scenario.abort_on_first_run_crash: raise # Main loop iteration = 1 while True: if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) # model training self.logger.info("Model Training") X, Y = self.rh2EPM.transform(self.runhistory) self.model.train(X, Y) self.acquisition_func.update(model=self.model, eta=self.runhistory.get_cost( self.incumbent)) if iteration == 1: start_point = self.incumbent else: # Restart? if self.rng.rand() < self.restart_prob: self.logger.info("Restart Search") start_point = self.scenario.cs.sample_configuration() else: # pertubate inc self.logger.info("Pertubate Incumbent") start_point = self.incumbent for _ in range(self.pertubation_steps): start_point = random.choice( list( get_one_exchange_neighbourhood( start_point, seed=self.rng.seed()))) # SLS self.logger.info("SLS") local_inc = self.local_search(start_point=start_point) # decide global inc self.logger.info("Race local incumbent against global incumbent") # don't be too aggressive here self.intensifier.minR = self.slow_race_minR self.intensifier.Adaptive_Capping_Slackfactor = self.slow_race_adaptive_capping_factor # log traj self.incumbent, inc_perf = self.intensifier.intensify( challengers=[local_inc], incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=0.01, log_traj=True) if self.incumbent == local_inc: self.logger.info("Changed global incumbent!") if self.scenario.shared_model: pSMAC.write(run_history=self.runhistory, output_directory=self.stats.output_dir, num_run=self.num_run) iteration += 1 self.logger.debug("Remaining budget: %f (wallclock), " "%f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def run(self) -> Configuration: """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() num_obj = len(self.scenario.multi_objectives ) # type: ignore[attr-defined] # noqa F821 # Main BO loop while True: if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 pSMAC.read( run_history=self.runhistory, output_dirs=self.scenario. input_psmac_dirs, # type: ignore[attr-defined] # noqa F821 configuration_space=self.config_space, logger=self.logger, ) start_time = time.time() # sample next configuration for intensification # Initial design runs are also included in the BO loop now. intent, run_info = self.intensifier.get_next_run( challengers=self.initial_design_configs, incumbent=self.incumbent, chooser=self.epm_chooser, run_history=self.runhistory, repeat_configs=self.intensifier.repeat_configs, num_workers=self.tae_runner.num_workers(), ) # remove config from initial design challengers to not repeat it again self.initial_design_configs = [ c for c in self.initial_design_configs if c != run_info.config ] # update timebound only if a 'new' configuration is sampled as the challenger if self.intensifier.num_run == 0: time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification( time_spent, update=False) self.logger.debug("New intensification time bound: %f", time_left) else: old_time_left = time_left time_spent = time_spent + (time.time() - start_time) time_left = self._get_timebound_for_intensification( time_spent, update=True) self.logger.debug( "Updated intensification time bound from %f to %f", old_time_left, time_left, ) # Skip starting new runs if the budget is now exhausted if self.stats.is_budget_exhausted(): intent = RunInfoIntent.SKIP # Skip the run if there was a request to do so. # For example, during intensifier intensification, we # don't want to rerun a config that was previously ran if intent == RunInfoIntent.RUN: # Track the fact that a run was launched in the run # history. It's status is tagged as RUNNING, and once # completed and processed, it will be updated accordingly self.runhistory.add( config=run_info.config, cost=float(MAXINT) if num_obj == 1 else np.full( num_obj, float(MAXINT)), time=0.0, status=StatusType.RUNNING, instance_id=run_info.instance, seed=run_info.seed, budget=run_info.budget, ) run_info.config.config_id = self.runhistory.config_ids[ run_info.config] self.tae_runner.submit_run(run_info=run_info) # There are 2 criteria that the stats object uses to know # if the budged was exhausted. # The budget time, which can only be known when the run finishes, # And the number of ta executions. Because we submit the job at this point, # we count this submission as a run. This prevent for using more # runner runs than what the scenario allows self.stats.submitted_ta_runs += 1 elif intent == RunInfoIntent.SKIP: # No launch is required # This marks a transition request from the intensifier # To a new iteration pass elif intent == RunInfoIntent.WAIT: # In any other case, we wait for resources # This likely indicates that no further decision # can be taken by the intensifier until more data is # available self.tae_runner.wait() else: raise NotImplementedError( "No other RunInfoIntent has been coded!") # Check if there is any result, or else continue for run_info, result in self.tae_runner.get_finished_runs(): # Add the results of the run to the run history # Additionally check for new incumbent self._incorporate_run_results(run_info, result, time_left) if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 assert self.scenario.output_dir_for_this_run is not None # please mypy pSMAC.write( run_history=self.runhistory, output_directory=self.scenario. output_dir_for_this_run, # type: ignore[attr-defined] # noqa F821 logger=self.logger, ) self.logger.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % ( self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs(), )) if self.stats.is_budget_exhausted() or self._stop: if self.stats.is_budget_exhausted(): self.logger.debug("Exhausted configuration budget") else: self.logger.debug( "Shutting down because a configuration or callback returned status STOP" ) # The budget can be exhausted for 2 reasons: number of ta runs or # time. If the number of ta runs is reached, but there is still budget, # wait for the runs to finish while self.tae_runner.pending_runs(): self.tae_runner.wait() for run_info, result in self.tae_runner.get_finished_runs( ): # Add the results of the run to the run history # Additionally check for new incumbent self._incorporate_run_results(run_info, result, time_left) # Break from the intensification loop, # as there are no more resources break # print stats at the end of each intensification iteration if self.intensifier.iteration_done: self.stats.print_stats(debug_out=True) return self.incumbent
def test_write(self): # The nulls make sure that we correctly emit the python None value fixture = '{"data": [[[1, "branin", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[1, "branini", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[2, "branini", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[2, null, 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[3, "branin-hoo", 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]], ' \ '[[4, null, 1], [1, 1, {"__enum__": ' \ '"StatusType.SUCCESS"}, null]]],' \ '"config_origins": {},' \ '"configs": {' \ '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \ '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \ '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \ '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}' run_history = RunHistory(aggregate_func=average_cost) configuration_space = test_helpers.get_branin_config_space() configuration_space.seed(1) config = configuration_space.sample_configuration() # Config on two instances run_history.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin') run_history.add(config, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branini') config_2 = configuration_space.sample_configuration() # Another config on a known instance run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branini') # Known Config on no instance run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1) # New config on new instance config_3 = configuration_space.sample_configuration() run_history.add(config_3, 1, 1, StatusType.SUCCESS, seed=1, instance_id='branin-hoo') # New config on no instance config_4 = configuration_space.sample_configuration() run_history.add(config_4, 1, 1, StatusType.SUCCESS, seed=1) # External configuration which will not be written to json file! config_5 = configuration_space.sample_configuration() run_history.add(config_5, 1, 1, StatusType.SUCCESS, seed=1, origin=DataOrigin.EXTERNAL_SAME_INSTANCES) logger = logging.getLogger("Test") pSMAC.write(run_history, self.tmp_dir, logger=logger) r_size = len(run_history.data) pSMAC.read(run_history=run_history, output_dirs=[self.tmp_dir], configuration_space=configuration_space, logger=logger) self.assertEqual( r_size, len(run_history.data), "Runhistory should be the same and not changed after reading") output_filename = os.path.join(self.tmp_dir, 'runhistory.json') self.assertTrue(os.path.exists(output_filename)) fixture = json.loads(fixture, object_hook=StatusType.enum_hook) with open(output_filename) as fh: output = json.load(fh, object_hook=StatusType.enum_hook) self.assertEqual(output, fixture)
def run(self) -> Configuration: """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # Main BO loop while True: if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, # type: ignore[attr-defined] # noqa F821 configuration_space=self.config_space, logger=self.logger) start_time = time.time() # sample next configuration for intensification # Initial design runs are also included in the BO loop now. challenger, new_challenger = self.intensifier.get_next_challenger( challengers=self.initial_design_configs, chooser=self.epm_chooser, run_history=self.runhistory, repeat_configs=self.intensifier.repeat_configs ) # remove config from initial design challengers to not repeat it again self.initial_design_configs = [c for c in self.initial_design_configs if c != challenger] # update timebound only if a 'new' configuration is sampled as the challenger if new_challenger: time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) if challenger: # evaluate selected challenger self.logger.debug("Intensify - evaluate challenger") try: self.incumbent, inc_perf = self.intensifier.eval_challenger( challenger=challenger, incumbent=self.incumbent, run_history=self.runhistory, time_bound=max(self.intensifier._min_time, time_left)) except FirstRunCrashedException: if self.scenario.abort_on_first_run_crash: # type: ignore[attr-defined] # noqa F821 raise if self.scenario.shared_model: # type: ignore[attr-defined] # noqa F821 assert self.scenario.output_dir_for_this_run is not None # please mypy pSMAC.write(run_history=self.runhistory, output_directory=self.scenario.output_dir_for_this_run, # type: ignore[attr-defined] # noqa F821 logger=self.logger) self.logger.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % ( self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent
def run_smbo(self): self.watcher.start_task('SMBO') # == first things first: load the datamanager self.reset_data_manager() # == Initialize non-SMBO stuff # first create a scenario seed = self.seed self.config_space.seed(seed) num_params = len(self.config_space.get_hyperparameters()) # allocate a run history num_run = self.start_num_run # Initialize some SMAC dependencies runhistory = RunHistory(aggregate_func=average_cost) # meta_runhistory = RunHistory(aggregate_func=average_cost) # meta_runs_dataset_indices = {} # == METALEARNING suggestions # we start by evaluating the defaults on the full dataset again # and add the suggestions from metalearning behind it if self.num_metalearning_cfgs > 0: if self.metadata_directory is None: metalearning_directory = os.path.dirname( autosklearn.metalearning.__file__) # There is no multilabel data in OpenML if self.task == MULTILABEL_CLASSIFICATION: meta_task = BINARY_CLASSIFICATION else: meta_task = self.task metadata_directory = os.path.join( metalearning_directory, 'files', '%s_%s_%s' % (self.metric, TASK_TYPES_TO_STRING[meta_task], 'sparse' if self.datamanager.info['is_sparse'] else 'dense')) self.metadata_directory = metadata_directory if os.path.exists(self.metadata_directory): self.logger.info('Metadata directory: %s', self.metadata_directory) meta_base = MetaBase(self.config_space, self.metadata_directory) try: meta_base.remove_dataset(self.dataset_name) except: pass metafeature_calculation_time_limit = int( self.total_walltime_limit / 4) metafeature_calculation_start_time = time.time() meta_features = self._calculate_metafeatures_with_limits( metafeature_calculation_time_limit) metafeature_calculation_end_time = time.time() metafeature_calculation_time_limit = \ metafeature_calculation_time_limit - ( metafeature_calculation_end_time - metafeature_calculation_start_time) if metafeature_calculation_time_limit < 1: self.logger.warning( 'Time limit for metafeature calculation less ' 'than 1 seconds (%f). Skipping calculation ' 'of metafeatures for encoded dataset.', metafeature_calculation_time_limit) meta_features_encoded = None else: with warnings.catch_warnings(): warnings.showwarning = self._send_warnings_to_log self.datamanager.perform1HotEncoding() meta_features_encoded = \ self._calculate_metafeatures_encoded_with_limits( metafeature_calculation_time_limit) # In case there is a problem calculating the encoded meta-features if meta_features is None: if meta_features_encoded is not None: meta_features = meta_features_encoded else: if meta_features_encoded is not None: meta_features.metafeature_values.update( meta_features_encoded.metafeature_values) if meta_features is not None: meta_base.add_dataset(self.dataset_name, meta_features) # Do mean imputation of the meta-features - should be done specific # for each prediction model! all_metafeatures = meta_base.get_metafeatures( features=list(meta_features.keys())) all_metafeatures.fillna(all_metafeatures.mean(), inplace=True) with warnings.catch_warnings(): warnings.showwarning = self._send_warnings_to_log metalearning_configurations = self.collect_metalearning_suggestions( meta_base) if metalearning_configurations is None: metalearning_configurations = [] self.reset_data_manager() self.logger.info('%s', meta_features) # Convert meta-features into a dictionary because the scenario # expects a dictionary meta_features_dict = {} for dataset, series in all_metafeatures.iterrows(): meta_features_dict[dataset] = series.values meta_features_list = [] for meta_feature_name in all_metafeatures.columns: meta_features_list.append( meta_features[meta_feature_name].value) meta_features_list = np.array(meta_features_list).reshape( (1, -1)) self.logger.info(list(meta_features_dict.keys())) # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric]) # meta_runs_index = 0 # try: # meta_durations = meta_base.get_all_runs('runtime') # read_runtime_data = True # except KeyError: # read_runtime_data = False # self.logger.critical('Cannot read runtime data.') # if self.acquisition_function == 'EIPS': # self.logger.critical('Reverting to acquisition function EI!') # self.acquisition_function = 'EI' # for meta_dataset in meta_runs.index: # meta_dataset_start_index = meta_runs_index # for meta_configuration in meta_runs.columns: # if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]): # try: # config = meta_base.get_configuration_from_algorithm_index( # meta_configuration) # cost = meta_runs.loc[meta_dataset, meta_configuration] # if read_runtime_data: # runtime = meta_durations.loc[meta_dataset, # meta_configuration] # else: # runtime = 1 # # TODO read out other status types! # meta_runhistory.add(config, cost, runtime, # StatusType.SUCCESS, # instance_id=meta_dataset) # meta_runs_index += 1 # except: # # TODO maybe add warning # pass # # meta_runs_dataset_indices[meta_dataset] = ( # meta_dataset_start_index, meta_runs_index) else: meta_features = None self.logger.warning('Could not find meta-data directory %s' % metadata_directory) else: meta_features = None if meta_features is None: if self.acquisition_function == 'EIPS': self.logger.critical('Reverting to acquisition function EI!') self.acquisition_function = 'EI' meta_features_list = [] meta_features_dict = {} metalearning_configurations = [] if self.resampling_strategy in [ 'partial-cv', 'partial-cv-iterative-fit' ]: num_folds = self.resampling_strategy_args['folds'] instances = [[ json.dumps({ 'task_id': self.dataset_name, 'fold': fold_number }) ] for fold_number in range(num_folds)] else: instances = [[json.dumps({'task_id': self.dataset_name})]] startup_time = self.watcher.wall_elapsed(self.dataset_name) total_walltime_limit = self.total_walltime_limit - startup_time - 5 scenario_dict = { 'cs': self.config_space, 'cutoff-time': self.func_eval_time_limit, 'memory-limit': self.memory_limit, 'wallclock-limit': total_walltime_limit, 'output-dir': self.backend.get_smac_output_directory(self.seed), 'shared-model': self.shared_mode, 'run-obj': 'quality', 'deterministic': 'true', 'instances': instances } if self.configuration_mode == 'RANDOM': scenario_dict['minR'] = len( instances) if instances is not None else 1 scenario_dict['initial_incumbent'] = 'RANDOM' self.scenario = Scenario(scenario_dict) # TODO rebuild target algorithm to be it's own target algorithm # evaluator, which takes into account that a run can be killed prior # to the model being fully fitted; thus putting intermediate results # into a queue and querying them once the time is over exclude = dict() include = dict() if self.include_preprocessors is not None and \ self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_preprocessors and ' 'exclude_preprocessors.') elif self.include_preprocessors is not None: include['preprocessor'] = self.include_preprocessors elif self.exclude_preprocessors is not None: exclude['preprocessor'] = self.exclude_preprocessors if self.include_estimators is not None and \ self.exclude_preprocessors is not None: raise ValueError('Cannot specify include_estimators and ' 'exclude_estimators.') elif self.include_estimators is not None: if self.task in CLASSIFICATION_TASKS: include['classifier'] = self.include_estimators elif self.task in REGRESSION_TASKS: include['regressor'] = self.include_estimators else: raise ValueError(self.task) elif self.exclude_estimators is not None: if self.task in CLASSIFICATION_TASKS: exclude['classifier'] = self.exclude_estimators elif self.task in REGRESSION_TASKS: exclude['regressor'] = self.exclude_estimators else: raise ValueError(self.task) ta = ExecuteTaFuncWithQueue( backend=self.backend, autosklearn_seed=seed, resampling_strategy=self.resampling_strategy, initial_num_run=num_run, logger=self.logger, include=include, exclude=exclude, metric=self.metric, memory_limit=self.memory_limit, disable_file_output=self.disable_file_output, **self.resampling_strategy_args) types, bounds = get_types(self.config_space, self.scenario.feature_array) # TODO extract generation of SMAC object into it's own function for # testing if self.acquisition_function == 'EI': model = RandomForestWithInstances( types=types, bounds=bounds, #instance_features=meta_features_list, seed=1, num_trees=10) rh2EPM = RunHistory2EPM4Cost(num_params=num_params, scenario=self.scenario, success_states=[ StatusType.SUCCESS, StatusType.MEMOUT, StatusType.TIMEOUT ], impute_censored_data=False, impute_state=None) _smac_arguments = dict(scenario=self.scenario, model=model, rng=seed, runhistory2epm=rh2EPM, tae_runner=ta, runhistory=runhistory) elif self.acquisition_function == 'EIPS': rh2EPM = RunHistory2EPM4EIPS(num_params=num_params, scenario=self.scenario, success_states=[ StatusType.SUCCESS, StatusType.MEMOUT, StatusType.TIMEOUT ], impute_censored_data=False, impute_state=None) model = UncorrelatedMultiObjectiveRandomForestWithInstances( ['cost', 'runtime'], types=types, bounds=bounds, num_trees=10, instance_features=meta_features_list, seed=1) acquisition_function = EIPS(model) _smac_arguments = dict(scenario=self.scenario, model=model, rng=seed, tae_runner=ta, runhistory2epm=rh2EPM, runhistory=runhistory, acquisition_function=acquisition_function) else: raise ValueError('Unknown acquisition function value %s!' % self.acquisition_function) if self.configuration_mode == 'SMAC': smac = SMAC(**_smac_arguments) elif self.configuration_mode in ['ROAR', 'RANDOM']: for not_in_roar in ['runhistory2epm', 'model']: if not_in_roar in _smac_arguments: del _smac_arguments[not_in_roar] smac = ROAR(**_smac_arguments) else: raise ValueError(self.configuration_mode) # Build a runtime model # runtime_rf = RandomForestWithInstances(types, # instance_features=meta_features_list, # seed=1, num_trees=10) # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params, # scenario=self.scenario, # success_states=None, # impute_censored_data=False, # impute_state=None) # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory) # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten()) # X_meta, Y_meta = rh2EPM.transform(meta_runhistory) # # Transform Y_meta on a per-dataset base # for meta_dataset in meta_runs_dataset_indices: # start_index, end_index = meta_runs_dataset_indices[meta_dataset] # end_index += 1 # Python indexing # Y_meta[start_index:end_index, 0]\ # [Y_meta[start_index:end_index, 0] >2.0] = 2.0 # dataset_minimum = np.min(Y_meta[start_index:end_index, 0]) # Y_meta[start_index:end_index, 0] = 1 - ( # (1. - Y_meta[start_index:end_index, 0]) / # (1. - dataset_minimum)) # Y_meta[start_index:end_index, 0]\ # [Y_meta[start_index:end_index, 0] > 2] = 2 smac.solver.stats.start_timing() # == first, evaluate all metelearning and default configurations smac.solver.incumbent = smac.solver.initial_design.run() for challenger in metalearning_configurations: smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify( challengers=[challenger], incumbent=smac.solver.incumbent, run_history=smac.solver.runhistory, aggregate_func=smac.solver.aggregate_func, time_bound=self.total_walltime_limit) if smac.solver.scenario.shared_model: pSMAC.write(run_history=smac.solver.runhistory, output_directory=smac.solver.scenario.output_dir, num_run=self.seed) if smac.solver.stats.is_budget_exhausted(): break # == after metalearning run SMAC loop while True: if smac.solver.scenario.shared_model: pSMAC.read(run_history=smac.solver.runhistory, output_dirs=glob.glob( self.backend.get_smac_output_glob()), configuration_space=self.config_space, logger=self.logger) choose_next_start_time = time.time() try: challengers = self.choose_next(smac) except Exception as e: self.logger.error(e) self.logger.error("Error in getting next configurations " "with SMAC. Using random configuration!") next_config = self.config_space.sample_configuration() challengers = [next_config] time_for_choose_next = time.time() - choose_next_start_time self.logger.info('Used %g seconds to find next ' 'configurations' % (time_for_choose_next)) time_for_choose_next = max(time_for_choose_next, 1.0) smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify( challengers=challengers, incumbent=smac.solver.incumbent, run_history=smac.solver.runhistory, aggregate_func=smac.solver.aggregate_func, time_bound=time_for_choose_next) if smac.solver.scenario.shared_model: pSMAC.write(run_history=smac.solver.runhistory, output_directory=smac.solver.scenario.output_dir, num_run=self.seed) if smac.solver.stats.is_budget_exhausted(): break self.runhistory = smac.solver.runhistory self.trajectory = smac.solver.intensifier.traj_logger.trajectory smac.runhistory = self.runhistory self.fANOVA_input = smac.get_X_y() return self.runhistory, self.trajectory, self.fANOVA_input
def optimize(self): """ Optimizes the algorithm provided in scenario (given in constructor) Returns ------- incumbent(s) : Configuration / List[Configuration] / ndarray[Configuration] Incumbent / Portfolio of incumbents pid(s) : int / ndarray[ints] Process ID(s) from which the configuration stems """ # Setup output directory if self.output_dir is None: self.scenario.output_dir = "psmac3-output_%s" % ( datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')) self.output_dir = create_output_directory(self.scenario, run_id=self.run_id, logger=self.logger) if self.shared_model: self.scenario.shared_model = self.shared_model if self.scenario.input_psmac_dirs is None: self.scenario.input_psmac_dirs = os.path.sep.join( (self.scenario.output_dir, 'run_*')) scen = copy.deepcopy(self.scenario) scen.output_dir_for_this_run = None scen.output_dir = None self.logger.info("+" * 120) self.logger.info("PSMAC run") # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Multiprocessing part start ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # q = multiprocessing.Queue() procs = [] for p in range(self.n_optimizers): proc = multiprocessing.Process( target=optimize, args=( q, # Output queue self.scenario, # Scenario object self._tae, # type of tae to run target with p, # process_id (used in output folder name) self.output_dir, # directory to create outputs in ), kwargs=self.kwargs) proc.start() procs.append(proc) for proc in procs: proc.join() incs = np.empty((self.n_optimizers, ), dtype=Configuration) pids = np.empty((self.n_optimizers, ), dtype=int) idx = 0 while not q.empty(): conf, pid = q.get_nowait() incs[idx] = conf pids[idx] = pid idx += 1 self.logger.info('Loading all runhistories') read(self.rh, self.scenario.input_psmac_dirs, self.scenario.cs, self.logger) q.close() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Multiprocessing part end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if self.n_optimizers == self.n_incs: # no validation necessary just return all incumbents return incs else: _, val_ids, _, est_ids = self.get_best_incumbents_ids( incs) # determine the best incumbents if val_ids: return incs[val_ids] return incs[est_ids]
def run(self): """Runs the Bayesian optimization loop Returns ---------- incumbent: np.array(1, H) The best found configuration """ self.start() # 设置一个counter counter = 0 # Main BO loop while True: # 打印每轮SMBO的最优结果(包括首轮SMBO 0) print('SMBO ' + str(counter) + ': ' + str(self.runhistory.get_cost(self.incumbent))) counter += 1 if self.scenario.shared_model: pSMAC.read(run_history=self.runhistory, output_dirs=self.scenario.input_psmac_dirs, configuration_space=self.config_space, logger=self.logger) start_time = time.time() X, Y = self.rh2EPM.transform(self.runhistory) self.logger.debug("Search for next configuration") # get all found configurations sorted according to acq challengers = self.choose_next(X, Y) time_spent = time.time() - start_time time_left = self._get_timebound_for_intensification(time_spent) self.logger.debug("Intensify") if self.server is None: self.incumbent, inc_perf = self.intensifier.intensify( challengers=challengers, incumbent=self.incumbent, run_history=self.runhistory, aggregate_func=self.aggregate_func, time_bound=max(self.intensifier._min_time, time_left)) else: # 从worker读取loss,加入history再运行新的challengers print(time_left) self.server.push(incumbent=self.incumbent, runhistory=self.runhistory, challengers=challengers.challengers, time_left=time_left) # 从worker读取runhistory,并merge到self.runhistory incumbent, new_runhistory = self.server.pull() self.runhistory.update(new_runhistory) # 更新了runhistory之后,应该找寻是否存在新的incumbent # 因为worker没有完整的 runhistory_old = self.runhistory.get_history_for_config( self.incumbent) runhistory_new = self.runhistory.get_history_for_config( incumbent) # 找寻cost最小值 lowest_cost_old = min([cost[0] for cost in runhistory_old]) lowest_cost_new = min([cost[0] for cost in runhistory_new]) if lowest_cost_new < lowest_cost_old: # 替换为新的incumbent self.incumbent = incumbent """可以考虑用这个函数 new_incumbent = self._compare_configs( incumbent=incumbent, challenger=challenger, run_history=run_history, aggregate_func=aggregate_func, log_traj=log_traj) """ if self.scenario.shared_model: pSMAC.write( run_history=self.runhistory, output_directory=self.scenario.output_dir_for_this_run) logging.debug( "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (self.stats.get_remaing_time_budget(), self.stats.get_remaining_ta_budget(), self.stats.get_remaining_ta_runs())) if self.stats.is_budget_exhausted(): break self.stats.print_stats(debug_out=True) return self.incumbent