def optimize(self) -> TuningResult: """ Method performs a hyperparameter optimization run according to the selected HPO-method. :return: result: TuningResult TuningResult-object that contains the results of this optimization run. """ # Select the specified HPO-tuning method if self.hpo_method == 'CMA-ES': this_optimizer = CmaEsSampler(seed=self.random_seed) elif self.hpo_method == 'TPE': this_optimizer = TPESampler(seed=self.random_seed) elif self.hpo_method == 'RandomSearch': this_optimizer = RandomSampler(seed=self.random_seed) else: raise Exception('Unknown HPO-method!') # Create a study object and specify the optimization direction study_name = 'hpo_study' study_storage = 'sqlite:///hpo.db' # Optimize on the predefined n_func_evals and measure the wall clock times # start_time = time.time() self.times = [] # Initialize a list for saving the wall clock times # Delete old study objects ('fresh start') >> otherwise the old results will be included try: optuna.delete_study(study_name, study_storage) except: print('No old optuna study objects found!') # Use a warmstart configuration? if self.do_warmstart == 'Yes': try: # Create a new study warmstart_study = optuna.create_study(direction='minimize', storage=study_storage, study_name=study_name, load_if_exists=False) # Retrieve the warmstart hyperparameters for the ML-algorithm warmstart_params = self.get_warmstart_configuration() # Initialize a dictionary for the warmstart HP-configuration warmstart_dict = {} # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and append the default values # to the dictionary for i in range(len(self.hp_space)): this_param = self.hp_space[i].name this_warmstart_value = warmstart_params[this_param] # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is # different (e.g. int) if this_warmstart_value is None and type( self.hp_space[i]) == skopt.space.space.Integer: # Try to impute these values by the mean value this_warmstart_value = int( 0.5 * (self.hp_space[i].low + self.hp_space[i].high)) # Add the warm start HP-value to the dictionary warmstart_dict[this_param] = this_warmstart_value # Enqueue a trial with the warm start HP-values warmstart_study.enqueue_trial(params=warmstart_dict) # Optimize to ensure that the warm start configuration is evaluated first (e.g. for parallel processes) warmstart_study.optimize(func=self.objective, n_trials=1) # Set flag to indicate that a warmstart took place did_warmstart = True except: print('Warmstarting optuna failed!') # Set flag to indicate that NO warmstart took place did_warmstart = False # No warmstart requested else: # Set flag to indicate that NO warmstart took place did_warmstart = False # Create a new study or reload the warmstart study (if available and requested) study = optuna.create_study(sampler=this_optimizer, direction='minimize', study_name=study_name, storage=study_storage, load_if_exists=True) # If a warm start took place, reduce the number of remaining function evaluations to ensure comparability # (equal budgets) if did_warmstart: n_func_evals = self.n_func_evals - 1 else: n_func_evals = self.n_func_evals # Start the optimization try: study.optimize(func=self.objective, n_trials=n_func_evals, n_jobs=self.n_workers) run_successful = True # Algorithm crashed except: # Add a warning here run_successful = False # If the optimization run was successful, determine the optimization results if run_successful: # Create a TuningResult-object to store the optimization results # Transformation of the results into a TuningResult-Object all_trials = study.get_trials() best_configuration = study.best_params best_val_loss = study.best_value start_times = [] # Start time of each trial finish_times = [] # Finish time of each trial # evaluation_ids = [] # Number the evaluations / iterations of this run unsorted_losses = [] # Loss of each iteration unsorted_configurations = () # HP-configuration of each iteration # Number the evaluations / iterations of this run evaluation_ids = list(range(1, len(all_trials) + 1)) for i in range(len(all_trials)): start_times.append(all_trials[i].datetime_start) finish_times.append(all_trials[i].datetime_complete) # evaluation_ids.append(all_trials[i].number) unsorted_losses.append(all_trials[i].value) unsorted_configurations = unsorted_configurations + ( all_trials[i].params, ) abs_start_time = min(start_times) # start time of the first trial unsorted_timestamps = [] for i in range(len(start_times)): this_time = finish_times[ i] - abs_start_time # time difference to the start of the first trial this_timestamp = this_time.total_seconds( ) # conversion into float value unsorted_timestamps.append(this_timestamp) wall_clock_time = max(unsorted_timestamps) ids = list(range(1, len(all_trials) + 1)) temp_dict = { 'ids': ids, 'timestamps [finished]': unsorted_timestamps, 'losses': unsorted_losses, 'configurations': unsorted_configurations, } unsorted_df = pd.DataFrame.from_dict(data=temp_dict) unsorted_df.set_index('ids', inplace=True) # Sort DataFrame according to timestamps (necessary for multiprocessing) sorted_df = unsorted_df.sort_values(by=['timestamps [finished]'], ascending=True, inplace=False) timestamps = list(sorted_df['timestamps [finished]']) losses = list(sorted_df['losses']) configurations = tuple(sorted_df['configurations']) # Optuna uses full budgets for its HPO methods budget = [100.0] * len(losses) # Compute the loss on the test set for the best found configuration test_loss = self.train_evaluate_ml_model(params=best_configuration, cv_mode=False, test_mode=True) # Run not successful (algorithm crashed) else: evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \ test_loss, budget = self.impute_results_for_crash() # Pass the results to a TuningResult-object result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses, configurations=configurations, best_val_loss=best_val_loss, best_configuration=best_configuration, wall_clock_time=wall_clock_time, test_loss=test_loss, successful=run_successful, did_warmstart=did_warmstart, budget=budget) return result
def optimize(self) -> TuningResult: """ Method performs a hyperparameter optimization run according to the selected HPO-method. :return: result: TuningResult TuningResult-object that contains the results of this optimization run. """ # Select the specified HPO-tuning method if self.hpo_method == 'TPE': this_optimizer = tpe.suggest # (seed=self.random_seed) else: raise Exception('Unknown HPO-method!') # Transform the skopt hp_space into an hyperopt space hyperopt_space = {} for i in range(len(self.hp_space)): if type(self.hp_space[i]) == skopt.space.space.Integer: hyperopt_space[self.hp_space[i].name] = hp.choice(self.hp_space[i].name, range(self.hp_space[i].low, self.hp_space[i].high + 1)) elif type(self.hp_space[i]) == skopt.space.space.Categorical: hyperopt_space[self.hp_space[i].name] = hp.choice(self.hp_space[i].name, list(self.hp_space[i].categories)) elif type(self.hp_space[i]) == skopt.space.space.Real: hyperopt_space[self.hp_space[i].name] = hp.uniform(self.hp_space[i].name, low=self.hp_space[i].low, high=self.hp_space[i].high) else: raise Exception('The skopt HP-space could not be converted correctly!') # Set the random seed of the random number generator rand_num_generator = np.random.RandomState(seed=self.random_seed) # Optimize on the predefined n_func_evals and measure the wall clock times start_time = time.time() self.times = [] # Initialize a list for saving the wall clock times if self.n_workers == 1: # Initialize a trial instance trials = Trials() # Start the optimization try: res = fmin(fn=self.objective, space=hyperopt_space, trials=trials, algo=this_optimizer, max_evals=self.n_func_evals, rstate=rand_num_generator) run_successful = True # Algorithm crashed except: # Add a warning here run_successful = False else: raise Exception('Parallelization not implemented for hyperopt framework.') # >>>>>>>>>>> # ATTEMPT TO PARALELLIZE HYPEROPT VIA MONGODB >>> problems to access the trials results # # No parallelization # if self.n_workers == 1: # # # Initialize a trial instance # trials = Trials() # # # Start the optimization # try: # res = fmin(fn=self.objective, space=hyperopt_space, trials=trials, algo=this_optimizer, # max_evals=self.n_func_evals, rstate=rand_num_generator) # run_successful = True # # # Algorithm crashed # except: # # Add a warning here # run_successful = False # # # Parallelization -> use multiprocessing # else: # try: # processes = [] # for i in range(self.n_workers + 1): # # if i == 0: # # >>> USE UNIQUE EXPERIMENT ID FOR EACH TRIAL # trials = MongoTrials('mongo://localhost:27017/mongo_hpo/jobs', exp_key='exp104') # p = Process(target=multiproc_target_funcs.hyperopt_target1, # args=(self.objective, hyperopt_space, trials, this_optimizer, self.n_func_evals, # rand_num_generator)) # else: # p = Process(target=multiproc_target_funcs.hyperopt_target2) # # p.start() # processes.append(p) # # for p in processes: # p.join() # # run_successful = True # # except: # run_successful = False # >>>>>>>>>>> # If the optimization run was successful, determine the optimization results if run_successful: # Determine the timestamps for each evaluation of the blackbox-function for i in range(len(trials.trials)): # Subtract the start time to receive the wall clock time of each function evaluation this_trial = trials.trials[i] this_eval_time = this_trial['result']['eval_time'] self.times[i] = this_eval_time - start_time wall_clock_time = max(self.times) # Timestamps timestamps = self.times # Number the evaluations / iterations of this run evaluation_ids = list(range(1, len(trials.tids) + 1)) # Loss of each iteration losses = [] for this_result in trials.results: losses.append(this_result['loss']) # Best loss best_val_loss = min(losses) # Determine the best HP-configuration of this run best_configuration = {} for i in range(len(self.hp_space)): if type(self.hp_space[i]) == skopt.space.space.Categorical: # Hyperopt only returns indexes for categorical hyperparameters categories = self.hp_space[i].categories # cat_idx = res[self.hp_space[i].name] cat_idx = trials.best_trial['misc']['vals'][self.hp_space[i].name][0] best_configuration[self.hp_space[i].name] = categories[cat_idx] else: # best_configuration[self.hp_space[i].name] = res[self.hp_space[i].name] best_configuration[self.hp_space[i].name] = \ trials.best_trial['misc']['vals'][self.hp_space[i].name][0] # HP-configuration of each iteration configurations = () for trial in trials.trials: this_config = {} for i in range(len(self.hp_space)): if type(self.hp_space[i]) == skopt.space.space.Categorical: # Hyperopt only returns indexes for categorical hyperparameters categories = self.hp_space[i].categories cat_idx = trial['misc']['vals'][self.hp_space[i].name][0] this_config[self.hp_space[i].name] = categories[cat_idx] else: this_config[self.hp_space[i].name] = trial['misc']['vals'][self.hp_space[i].name][0] configurations = configurations + (this_config,) # Hyperopt uses full budgets for its HPO methods budget = [100.0 * len(losses)] # Compute the loss on the test set for the best found configuration test_loss = self.train_evaluate_ml_model(params=best_configuration, cv_mode=False, test_mode=True) # Run not successful (algorithm crashed) else: evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \ test_loss, budget = self.impute_results_for_crash() # Pass the results to a TuningResult-object result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses, configurations=configurations, best_val_loss=best_val_loss, best_configuration=best_configuration, wall_clock_time=wall_clock_time, test_loss=test_loss, successful=run_successful, budget=budget) return result
def optimize(self) -> TuningResult: """ Method performs a hyperparameter optimization run according to the selected HPO-method. :return: result: TuningResult TuningResult-object that contains the results of this optimization run. :return: """ # Convert the skopt hyperparameter space into a continuous space for RoBO hp_space_lower = np.zeros(shape=(len(self.hp_space), )) hp_space_upper = np.zeros(shape=(len(self.hp_space), )) for i in range(len(self.hp_space)): if type(self.hp_space[i]) == skopt.space.space.Integer: hp_space_lower[i, ] = self.hp_space[i].low hp_space_upper[i, ] = self.hp_space[i].high elif type(self.hp_space[i]) == skopt.space.space.Categorical: n_choices = len(list(self.hp_space[i].categories)) hp_space_lower[i, ] = 0 hp_space_upper[i, ] = n_choices - 1 elif type(self.hp_space[i]) == skopt.space.space.Real: hp_space_lower[i, ] = self.hp_space[i].low hp_space_upper[i, ] = self.hp_space[i].high else: raise Exception( 'The skopt HP-space could not be converted correctly!') # Set the random seed of the random number generator rand_num_generator = np.random.RandomState(seed=self.random_seed) # Optimize on the predefined n_func_evals and measure the wall clock times start_time = time.time() self.times = [] # Initialize a list for saving the wall clock times # Use a warmstart configuration (only possible for BOHAMIANN, not FABOLAS) if self.do_warmstart == 'Yes': # Initialize numpy arrays for saving the warmstart configuration and the warmstart loss warmstart_config = np.zeros(shape=(1, len(self.hp_space))) warmstart_loss = np.zeros(shape=(1, 1)) # Retrieve the default hyperparameters and the default loss for the ML-algorithm default_params = self.get_warmstart_configuration() try: # Dictionary for saving the warmstart HP-configuration (only contains the HPs, which are part of the # 'tuned' HP-space warmstart_dict = {} # Iterate over all HPs of this ML-algorithm's tuned HP-space and append the default values to # the numpy array for i in range(len(self.hp_space)): this_param = self.hp_space[i].name # Categorical HPs need to be encoded as integer values for RoBO if type(self.hp_space[i]) == skopt.space.space.Categorical: choices = self.hp_space[i].categories this_warmstart_value_cat = default_params[this_param] dict_value = this_warmstart_value_cat # Find the index of the default / warmstart HP in the list of possible choices for j in range(len(choices)): if this_warmstart_value_cat == choices[j]: this_warmstart_value = j # For all non-categorical HPs else: this_warmstart_value = default_params[this_param] dict_value = this_warmstart_value # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is # different (e.g. int) if this_warmstart_value is None: # Try to impute these values by the mean value this_warmstart_value = int( 0.5 * (self.hp_space[i].low + self.hp_space[i].high)) dict_value = this_warmstart_value # Pass the warmstart value to the according numpy array warmstart_config[0, i] = this_warmstart_value warmstart_dict[this_param] = dict_value # Pass the default loss to the according numpy array warmstart_loss[0, 0] = self.get_warmstart_loss( warmstart_dict=warmstart_dict) # Pass the warmstart configuration as a kwargs dict kwargs = {'X_init': warmstart_config, 'Y_init': warmstart_loss} # Set flag to indicate that a warmstart took place did_warmstart = True except: print('Warmstarting RoBO failed!') kwargs = {} # Set flag to indicate that NO warmstart took place did_warmstart = False # No warmstart requested else: kwargs = {} # Set flag to indicate that NO warmstart took place did_warmstart = False # Select the specified HPO-tuning method try: if self.hpo_method == 'Fabolas': # Budget correct? // Set further parameters? s_max = len( self.x_train ) # Maximum number of data points for the training data set s_min = int( 0.05 * s_max ) # Maximum number of data points for the training data set n_init = int(self.n_func_evals / 3) # Requirement of the fabolas implementation result_dict = fabolas( objective_function=self.objective_fabolas, s_min=s_min, s_max=s_max, lower=hp_space_lower, upper=hp_space_upper, num_iterations=self.n_func_evals, rng=rand_num_generator, n_init=n_init) run_successful = True elif self.hpo_method == 'Bohamiann': if did_warmstart: # A single initial design point (warm start hyperparameter configuration) kwargs['n_init'] = 1 # Budget correct? // Set further parameters? result_dict = bayesian_optimization( objective_function=self.objective_bohamiann, lower=hp_space_lower, upper=hp_space_upper, model_type='bohamiann', num_iterations=self.n_func_evals, rng=rand_num_generator, **kwargs) run_successful = True else: raise Exception('Unknown HPO-method!') # Algorithm crashed except: # Add a warning here run_successful = False # If the optimization run was successful, determine the optimization results if run_successful: for i in range(len(self.times)): # Subtract the start time to receive the wall clock time of each function evaluation self.times[i] = self.times[i] - start_time wall_clock_time = max(self.times) # Insert timestamp of 0.0 for the warm start hyperparameter configuration if did_warmstart: self.times.insert(0, 0.0) # Timestamps timestamps = self.times # Losses (not incumbent losses) losses = result_dict['y'] evaluation_ids = list(range(1, len(losses) + 1)) best_loss = min(losses) configurations = () for config in result_dict['X']: # Cut off the unused Fabolas budget value at the end config = config[:len(self.hp_space)] config_dict = {} for i in range(len(config)): if type(self.hp_space[i]) == skopt.space.space.Integer: config_dict[self.hp_space[i].name] = int( round(config[i])) elif type( self.hp_space[i]) == skopt.space.space.Categorical: config_dict[self.hp_space[i].name] = list( self.hp_space[i].categories)[int(round(config[i]))] elif type(self.hp_space[i]) == skopt.space.space.Real: config_dict[self.hp_space[i].name] = config[i] else: raise Exception( 'The continuous HP-space could not be converted correctly!' ) configurations = configurations + (config_dict, ) # Find the best hyperparameter configuration (incumbent) best_configuration = {} x_opt = result_dict['x_opt'] for i in range(len(x_opt)): if type(self.hp_space[i]) == skopt.space.space.Integer: best_configuration[self.hp_space[i].name] = int( round(x_opt[i])) elif type(self.hp_space[i]) == skopt.space.space.Categorical: best_configuration[self.hp_space[i].name] = list( self.hp_space[i].categories)[int(round(x_opt[i]))] elif type(self.hp_space[i]) == skopt.space.space.Real: best_configuration[self.hp_space[i].name] = x_opt[i] else: raise Exception( 'The continuous HP-space could not be converted correctly!' ) # Run not successful (algorithm crashed) else: evaluation_ids, timestamps, losses, configurations, best_loss, best_configuration, wall_clock_time = \ self.impute_results_for_crash() # Pass the results to a TuningResult-Object result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses, configurations=configurations, best_loss=best_loss, best_configuration=best_configuration, wall_clock_time=wall_clock_time, successful=run_successful, did_warmstart=did_warmstart) return result
def optimize(self) -> TuningResult: """ Method performs a hyperparameter optimization run according to the selected HPO-method. :return: result: TuningResult TuningResult-object that contains the results of this optimization run. """ # Select the specified HPO-tuning method if self.hpo_method == 'SMAC': # SMAC expects a budget of at least 10 iterations / calls this_optimizer = forest_minimize this_acq_func = 'EI' kwargs = {'base_estimator': 'RF'} # Use a Random Forest Regressor as a surrogate model elif self.hpo_method == 'GPBO': this_optimizer = gp_minimize this_acq_func = 'EI' kwargs = {} else: raise Exception('Unknown HPO-method!') # Use a warmstart configuration? if self.do_warmstart == 'Yes': try: # Initialize a list for saving the warmstart configuration warmstart_config = [] # Retrieve the warmstart hyperparameters for the ML-algorithm warmstart_params = self.get_warmstart_configuration() # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and append the default values # to the list for i in range(len(self.hp_space)): this_param = self.hp_space[i].name this_warmstart_value = warmstart_params[this_param] # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is # different (e.g. int) if this_warmstart_value is None and type(self.hp_space[i]) == skopt.space.space.Integer: # Try to impute these values by the mean value warmstart_config.append(int(0.5 * (self.hp_space[i].low + self.hp_space[i].high))) else: # Otherwise append the warmstart value (default case) warmstart_config.append(this_warmstart_value) # Pass the warmstart configuration as a kwargs dict kwargs['x0'] = warmstart_config # Set flag to indicate that a warmstart took place did_warmstart = True except: print('Warmstarting skopt failed!') # Set flag to indicate that NO warmstart took place did_warmstart = False # No warmstart requested else: # Set flag to indicate that NO warmstart took place did_warmstart = False # Optimize on the predefined n_func_evals and measure the wall clock times start_time = time.time() self.times = [] # Initialize a list for saving the wall clock times # Start the optimization try: trial_result = this_optimizer(self.objective, self.hp_space, n_calls=self.n_func_evals, random_state=self.random_seed, acq_func=this_acq_func, n_jobs=self.n_workers, verbose=True, n_initial_points=20, **kwargs) run_successful = True # Algorithm crashed except: run_successful = False # If the optimization run was successful, determine the optimization results if run_successful: for i in range(len(self.times)): # Subtract the start time to receive the wall clock time of each function evaluation self.times[i] = self.times[i] - start_time wall_clock_time = max(self.times) # Timestamps timestamps = self.times best_val_loss = trial_result.fun # Losses (not incumbent losses) losses = list(trial_result.func_vals) # Determine the best HP-configuration of this run best_configuration = {} for i in range(len(self.hp_space)): best_configuration[self.hp_space[i].name] = trial_result.x[i] # Number the evaluations / iterations of this run evaluation_ids = list(range(1, len(trial_result.func_vals) + 1)) # Determine the HP-configuration of each evaluation / iteration configurations = () for i in range(len(trial_result.x_iters)): this_config = {} for j in range(len(self.hp_space)): this_config[self.hp_space[j].name] = trial_result.x_iters[i][j] configurations = configurations + (this_config,) # Skopt uses full budgets for its HPO methods budget = [100.0] * len(losses) # Compute the loss on the test set for the best found configuration test_loss = self.train_evaluate_ml_model(params=best_configuration, cv_mode=False, test_mode=True) # Run not successful (algorithm crashed) else: evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \ test_loss, budget = self.impute_results_for_crash() # Pass the results to a TuningResult-object result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses, configurations=configurations, best_val_loss=best_val_loss, best_configuration=best_configuration, wall_clock_time=wall_clock_time, test_loss=test_loss, successful=run_successful, did_warmstart=did_warmstart, budget=budget) return result
def optimize(self) -> TuningResult: """ Method performs a hyperparameter optimization run according to the selected HPO-method. :return: result: TuningResult TuningResult-object that contains the results of this optimization run. """ # Start a nameserver NS = hpns.NameServer(run_id='hpbandster', host='127.0.0.1', port=None) NS.start() # Logging of the optimization results result_logger = hpres.json_result_logger( directory='./hpo/hpbandster_logs', overwrite=True) # Optimize on the predefined n_func_evals and measure the wall clock times start_time = time.time() self.times = [] # Initialize a list for saving the wall clock times # Use a warm start configuration if self.do_warmstart == 'Yes': try: # Initialize a dictionary for saving the warmstart configuration warmstart_dict = {} # Retrieve the warmstart hyperparameters for the ML-algorithm warmstart_params = self.get_warmstart_configuration() # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and add them to the dictionary for i in range(len(self.hp_space)): this_param = self.hp_space[i].name this_warmstart_value = warmstart_params[this_param] # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is # different (e.g. int) if this_warmstart_value is None and type( self.hp_space[i]) == skopt.space.space.Integer: # Try to impute these values by the mean value this_warmstart_value = int( 0.5 * (self.hp_space[i].low + self.hp_space[i].high)) # Add the warmstart value to the dictionary warmstart_dict[this_param] = this_warmstart_value # Start a HPBandsterWorker to evaluate the warmstart configuration ws_worker = HPBandsterWorker(ml_algorithm=self.ml_algorithm, optimizer_object=self, nameserver='127.0.0.1', run_id='hpbandster') ws_worker.run(background=True) # Initialize the optimizer / HPO-method if self.hpo_method == 'BOHB': ws_optimizer = BOHB( configspace=HPBandsterWorker.get_warmstart_config( self.hp_space, warmstart_dict), run_id='hpbandster', nameserver='127.0.0.1', min_budget=10, max_budget=10, eta=3.0, result_logger=result_logger) elif self.hpo_method == 'Hyperband': ws_optimizer = HyperBand( configspace=HPBandsterWorker.get_warmstart_config( self.hp_space, warmstart_dict), run_id='hpbandster', nameserver='127.0.0.1', min_budget=1, max_budget=10, eta=3.0, result_logger=result_logger) else: raise Exception('Unknown HPO-method!') # Run the optimization / evaluation of the warmstart configuration # (only a single iteration / evaluation) _ = ws_optimizer.run(n_iterations=1) ws_optimizer.shutdown(shutdown_workers=True) # Load the results and pass them to the kwargs dictionary ws_results = hpres.logged_results_to_HBS_result( directory='./hpo/hpbandster_logs') kwargs = {'previous_result': ws_results} # Set flag to indicate that a warmstart took place did_warmstart = True except: print('Warmstarting hpbandster failed!') kwargs = {} # Set flag to indicate that NO warmstart took place did_warmstart = False # No warmstart requested else: kwargs = {} # Set flag to indicate that NO warmstart took place did_warmstart = False # No parallelization if self.n_workers == 1: # Start a worker worker = HPBandsterWorker(ml_algorithm=self.ml_algorithm, optimizer_object=self, nameserver='127.0.0.1', run_id='hpbandster') worker.run(background=True) # Process based parallelization - Start the workers elif self.n_workers > 1: processes = [] for i in range(self.n_workers): p = Process(target=multiproc_target_funcs.initialize_worker, args=(self.ml_algorithm, self, '127.0.0.1', 'hpbandster')) p.start() processes.append(p) # Run an optimizer # Select the specified HPO-tuning method if self.hpo_method == 'BOHB': eta = 3.0 optimizer = BOHB(configspace=HPBandsterWorker.get_configspace( self.hp_space), run_id='hpbandster', nameserver='127.0.0.1', min_budget=1, max_budget=10, eta=eta, result_logger=result_logger, **kwargs) # Values for budget stages: https://arxiv.org/abs/1905.04970 elif self.hpo_method == 'Hyperband': eta = 3.0 optimizer = HyperBand(configspace=HPBandsterWorker.get_configspace( self.hp_space), run_id='hpbandster', nameserver='127.0.0.1', min_budget=1, max_budget=10, eta=eta, result_logger=result_logger, **kwargs) # Values for budget stages: https://arxiv.org/abs/1905.04970 else: raise Exception('Unknown HPO-method!') # Start the optimization try: n_func_evals = self.n_func_evals n_iterations = int(n_func_evals / eta) if n_iterations < 1: n_iterations = 1 res = optimizer.run(n_iterations=n_iterations, min_n_workers=self.n_workers) # Relation of budget stages, halving iterations and the number of evaluations: # https://arxiv.org/abs/1905.04970 # number of function evaluations = eta * n_iterations run_successful = True # Check whether one of the evaluations failed (hpbandster continues the optimization procedure even if # the objective function cannot be evaluated) for config_key in res.data.keys(): this_result = res.data[config_key].results for this_eval in this_result.keys(): this_success_flag = this_result[this_eval]['info'] # The run wasn't successful, if one of the evaluations failed if not this_success_flag: run_successful = False break # Algorithm crashed except: # Add a warning here run_successful = False # Shutdown the optimizer and the server optimizer.shutdown(shutdown_workers=True) NS.shutdown() if self.n_workers > 1: # Join the processes (only for parallelization) for p in processes: p.join() # If the optimization run was successful, determine the optimization results if run_successful: # Extract the results and create an TuningResult instance to save them id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() # Best hyperparameter configuration best_configuration = id2config[incumbent]['config'] runs_df = pd.DataFrame(columns=[ 'config_id#0', 'config_id#1', 'config_id#2', 'iteration', 'budget', 'loss', 'timestamps [finished]', 'budget [%]' ]) all_runs = res.get_all_runs() # Iterate over all runs for i in range(len(all_runs)): this_run = all_runs[i] temp_dict = { 'run_id': [i], 'config_id#0': [this_run.config_id[0]], 'config_id#1': [this_run.config_id[1]], 'config_id#2': [this_run.config_id[2]], 'iteration': this_run.config_id[0], 'budget': this_run.budget, 'loss': this_run.loss, 'timestamps [finished]': this_run.time_stamps['finished'], 'budget [%]': round(this_run.budget * 10, 2) } # alternatively: 'timestamps [finished]': this_run.time_stamps['finished'] this_df = pd.DataFrame.from_dict(data=temp_dict) this_df.set_index('run_id', inplace=True) runs_df = pd.concat(objs=[runs_df, this_df], axis=0) # Sort according to the timestamps runs_df.sort_values(by=['timestamps [finished]'], ascending=True, inplace=True) losses = list(runs_df['loss']) best_val_loss = min(losses) evaluation_ids = list(range(1, len(losses) + 1)) timestamps = list( runs_df['timestamps [finished]'] ) # << hpbandster's capabilities for time measurement wall_clock_time = max(timestamps) budget = list(runs_df['budget [%]']) configurations = () for i in range(len(losses)): this_config = (list(runs_df['config_id#0'])[i], list(runs_df['config_id#1'])[i], list(runs_df['config_id#2'])[i]) configurations = configurations + ( id2config[this_config]['config'], ) # Compute the loss on the test set for the best found configuration test_loss = self.train_evaluate_ml_model(params=best_configuration, cv_mode=False, test_mode=True) # Run not successful (algorithm crashed) else: evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \ test_loss, budget = self.impute_results_for_crash() # Pass the results to a TuningResult-object result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses, configurations=configurations, best_val_loss=best_val_loss, best_configuration=best_configuration, wall_clock_time=wall_clock_time, test_loss=test_loss, successful=run_successful, did_warmstart=did_warmstart, budget=budget) return result