Python TuningResult примеры использования

Язык программирования: Python

Пространство имен/Пакет: hpo_framework.results

Класс/Тип: TuningResult

Примеров на hotexamples.com: 5

Python TuningResult - 5 примеров найдено. Это лучшие примеры Python кода для hpo_framework.results.TuningResult, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TuningResult(5)

Основные методы

TuningResult (5)

Пример #1

Показать файл

    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        """

        # Select the specified HPO-tuning method
        if self.hpo_method == 'CMA-ES':
            this_optimizer = CmaEsSampler(seed=self.random_seed)

        elif self.hpo_method == 'TPE':
            this_optimizer = TPESampler(seed=self.random_seed)

        elif self.hpo_method == 'RandomSearch':
            this_optimizer = RandomSampler(seed=self.random_seed)

        else:
            raise Exception('Unknown HPO-method!')

        # Create a study object and specify the optimization direction
        study_name = 'hpo_study'
        study_storage = 'sqlite:///hpo.db'

        # Optimize on the predefined n_func_evals and measure the wall clock times
        # start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        # Delete old study objects ('fresh start') >> otherwise the old results will be included
        try:
            optuna.delete_study(study_name, study_storage)

        except:
            print('No old optuna study objects found!')

        # Use a warmstart configuration?
        if self.do_warmstart == 'Yes':

            try:
                # Create a new study
                warmstart_study = optuna.create_study(direction='minimize',
                                                      storage=study_storage,
                                                      study_name=study_name,
                                                      load_if_exists=False)

                # Retrieve the warmstart hyperparameters for the ML-algorithm
                warmstart_params = self.get_warmstart_configuration()

                # Initialize a dictionary for the warmstart HP-configuration
                warmstart_dict = {}

                # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and append the default values
                # to the dictionary
                for i in range(len(self.hp_space)):

                    this_param = self.hp_space[i].name
                    this_warmstart_value = warmstart_params[this_param]

                    # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is
                    # different (e.g. int)
                    if this_warmstart_value is None and type(
                            self.hp_space[i]) == skopt.space.space.Integer:
                        # Try to impute these values by the mean value
                        this_warmstart_value = int(
                            0.5 *
                            (self.hp_space[i].low + self.hp_space[i].high))

                    # Add the warm start HP-value to the dictionary
                    warmstart_dict[this_param] = this_warmstart_value

                # Enqueue a trial with the warm start HP-values
                warmstart_study.enqueue_trial(params=warmstart_dict)

                # Optimize to ensure that the warm start configuration is evaluated first (e.g. for parallel processes)
                warmstart_study.optimize(func=self.objective, n_trials=1)

                # Set flag to indicate that a warmstart took place
                did_warmstart = True

            except:
                print('Warmstarting optuna failed!')

                # Set flag to indicate that NO warmstart took place
                did_warmstart = False

        # No warmstart requested
        else:

            # Set flag to indicate that NO warmstart took place
            did_warmstart = False

        # Create a new study or reload the warmstart study (if available and requested)
        study = optuna.create_study(sampler=this_optimizer,
                                    direction='minimize',
                                    study_name=study_name,
                                    storage=study_storage,
                                    load_if_exists=True)

        # If a warm start took place, reduce the number of remaining function evaluations to ensure comparability
        # (equal budgets)
        if did_warmstart:
            n_func_evals = self.n_func_evals - 1
        else:
            n_func_evals = self.n_func_evals

        # Start the optimization
        try:

            study.optimize(func=self.objective,
                           n_trials=n_func_evals,
                           n_jobs=self.n_workers)
            run_successful = True

        # Algorithm crashed
        except:
            # Add a warning here
            run_successful = False

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            # Create a TuningResult-object to store the optimization results
            # Transformation of the results into a TuningResult-Object
            all_trials = study.get_trials()
            best_configuration = study.best_params
            best_val_loss = study.best_value

            start_times = []  # Start time of each trial
            finish_times = []  # Finish time of each trial
            # evaluation_ids = []  # Number the evaluations / iterations of this run
            unsorted_losses = []  # Loss of each iteration
            unsorted_configurations = ()  # HP-configuration of each iteration

            # Number the evaluations / iterations of this run
            evaluation_ids = list(range(1, len(all_trials) + 1))

            for i in range(len(all_trials)):
                start_times.append(all_trials[i].datetime_start)
                finish_times.append(all_trials[i].datetime_complete)

                # evaluation_ids.append(all_trials[i].number)
                unsorted_losses.append(all_trials[i].value)
                unsorted_configurations = unsorted_configurations + (
                    all_trials[i].params, )

            abs_start_time = min(start_times)  # start time of the first trial
            unsorted_timestamps = []
            for i in range(len(start_times)):
                this_time = finish_times[
                    i] - abs_start_time  # time difference to the start of the first trial
                this_timestamp = this_time.total_seconds(
                )  # conversion into float value
                unsorted_timestamps.append(this_timestamp)

            wall_clock_time = max(unsorted_timestamps)

            ids = list(range(1, len(all_trials) + 1))
            temp_dict = {
                'ids': ids,
                'timestamps [finished]': unsorted_timestamps,
                'losses': unsorted_losses,
                'configurations': unsorted_configurations,
            }

            unsorted_df = pd.DataFrame.from_dict(data=temp_dict)
            unsorted_df.set_index('ids', inplace=True)

            # Sort DataFrame according to timestamps (necessary for multiprocessing)
            sorted_df = unsorted_df.sort_values(by=['timestamps [finished]'],
                                                ascending=True,
                                                inplace=False)

            timestamps = list(sorted_df['timestamps [finished]'])
            losses = list(sorted_df['losses'])
            configurations = tuple(sorted_df['configurations'])

            # Optuna uses full budgets for its HPO methods
            budget = [100.0] * len(losses)

            # Compute the loss on the test set for the best found configuration
            test_loss = self.train_evaluate_ml_model(params=best_configuration,
                                                     cv_mode=False,
                                                     test_mode=True)

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \
            test_loss, budget = self.impute_results_for_crash()

        # Pass the results to a TuningResult-object
        result = TuningResult(evaluation_ids=evaluation_ids,
                              timestamps=timestamps,
                              losses=losses,
                              configurations=configurations,
                              best_val_loss=best_val_loss,
                              best_configuration=best_configuration,
                              wall_clock_time=wall_clock_time,
                              test_loss=test_loss,
                              successful=run_successful,
                              did_warmstart=did_warmstart,
                              budget=budget)

        return result

Пример #2

Показать файл

Файл: hyperopt_optimizer.py Проект: maschimax/HPO-benchmarking-framework

    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        """

        # Select the specified HPO-tuning method
        if self.hpo_method == 'TPE':
            this_optimizer = tpe.suggest  # (seed=self.random_seed)

        else:
            raise Exception('Unknown HPO-method!')

        # Transform the skopt hp_space into an hyperopt space
        hyperopt_space = {}
        for i in range(len(self.hp_space)):
            if type(self.hp_space[i]) == skopt.space.space.Integer:
                hyperopt_space[self.hp_space[i].name] = hp.choice(self.hp_space[i].name,
                                                                  range(self.hp_space[i].low,
                                                                        self.hp_space[i].high + 1))

            elif type(self.hp_space[i]) == skopt.space.space.Categorical:
                hyperopt_space[self.hp_space[i].name] = hp.choice(self.hp_space[i].name,
                                                                  list(self.hp_space[i].categories))

            elif type(self.hp_space[i]) == skopt.space.space.Real:
                hyperopt_space[self.hp_space[i].name] = hp.uniform(self.hp_space[i].name,
                                                                   low=self.hp_space[i].low,
                                                                   high=self.hp_space[i].high)

            else:
                raise Exception('The skopt HP-space could not be converted correctly!')

        # Set the random seed of the random number generator
        rand_num_generator = np.random.RandomState(seed=self.random_seed)

        # Optimize on the predefined n_func_evals and measure the wall clock times
        start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        if self.n_workers == 1:
            # Initialize a trial instance
            trials = Trials()

            # Start the optimization
            try:
                res = fmin(fn=self.objective, space=hyperopt_space, trials=trials, algo=this_optimizer,
                           max_evals=self.n_func_evals, rstate=rand_num_generator)
                run_successful = True

            # Algorithm crashed
            except:
                # Add a warning here
                run_successful = False

        else:
            raise Exception('Parallelization not implemented for hyperopt framework.')

        # >>>>>>>>>>>
        # ATTEMPT TO PARALELLIZE HYPEROPT VIA MONGODB >>> problems to access the trials results
        # # No parallelization
        # if self.n_workers == 1:
        #
        #     # Initialize a trial instance
        #     trials = Trials()
        #
        #     # Start the optimization
        #     try:
        #         res = fmin(fn=self.objective, space=hyperopt_space, trials=trials, algo=this_optimizer,
        #                    max_evals=self.n_func_evals, rstate=rand_num_generator)
        #         run_successful = True
        #
        #     # Algorithm crashed
        #     except:
        #         # Add a warning here
        #         run_successful = False
        #
        # # Parallelization -> use multiprocessing
        # else:
        #     try:
        #         processes = []
        #         for i in range(self.n_workers + 1):
        #
        #             if i == 0:
        #                 # >>> USE UNIQUE EXPERIMENT ID FOR EACH TRIAL
        #                 trials = MongoTrials('mongo://localhost:27017/mongo_hpo/jobs', exp_key='exp104')
        #                 p = Process(target=multiproc_target_funcs.hyperopt_target1,
        #                             args=(self.objective, hyperopt_space, trials, this_optimizer, self.n_func_evals,
        #                                   rand_num_generator))
        #             else:
        #                 p = Process(target=multiproc_target_funcs.hyperopt_target2)
        #
        #             p.start()
        #             processes.append(p)
        #
        #         for p in processes:
        #             p.join()
        #
        #         run_successful = True
        #
        #     except:
        #         run_successful = False
        # >>>>>>>>>>>

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            # Determine the timestamps for each evaluation of the blackbox-function
            for i in range(len(trials.trials)):
                # Subtract the start time to receive the wall clock time of each function evaluation
                this_trial = trials.trials[i]
                this_eval_time = this_trial['result']['eval_time']
                self.times[i] = this_eval_time - start_time
            wall_clock_time = max(self.times)

            # Timestamps
            timestamps = self.times

            # Number the evaluations / iterations of this run
            evaluation_ids = list(range(1, len(trials.tids) + 1))

            # Loss of each iteration
            losses = []
            for this_result in trials.results:
                losses.append(this_result['loss'])

            # Best loss
            best_val_loss = min(losses)

            # Determine the best HP-configuration of this run
            best_configuration = {}
            for i in range(len(self.hp_space)):

                if type(self.hp_space[i]) == skopt.space.space.Categorical:
                    # Hyperopt only returns indexes for categorical hyperparameters
                    categories = self.hp_space[i].categories
                    # cat_idx = res[self.hp_space[i].name]
                    cat_idx = trials.best_trial['misc']['vals'][self.hp_space[i].name][0]
                    best_configuration[self.hp_space[i].name] = categories[cat_idx]

                else:
                    # best_configuration[self.hp_space[i].name] = res[self.hp_space[i].name]
                    best_configuration[self.hp_space[i].name] = \
                        trials.best_trial['misc']['vals'][self.hp_space[i].name][0]

            # HP-configuration of each iteration
            configurations = ()
            for trial in trials.trials:
                this_config = {}
                for i in range(len(self.hp_space)):

                    if type(self.hp_space[i]) == skopt.space.space.Categorical:
                        # Hyperopt only returns indexes for categorical hyperparameters
                        categories = self.hp_space[i].categories
                        cat_idx = trial['misc']['vals'][self.hp_space[i].name][0]
                        this_config[self.hp_space[i].name] = categories[cat_idx]
                    else:
                        this_config[self.hp_space[i].name] = trial['misc']['vals'][self.hp_space[i].name][0]

                configurations = configurations + (this_config,)

            # Hyperopt uses full budgets for its HPO methods
            budget = [100.0 * len(losses)]

            # Compute the loss on the test set for the best found configuration
            test_loss = self.train_evaluate_ml_model(params=best_configuration, cv_mode=False, test_mode=True)

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \
                test_loss, budget = self.impute_results_for_crash()

        # Pass the results to a TuningResult-object
        result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses,
                              configurations=configurations, best_val_loss=best_val_loss,
                              best_configuration=best_configuration, wall_clock_time=wall_clock_time,
                              test_loss=test_loss, successful=run_successful, budget=budget)

        return result

Пример #3

Показать файл

    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        :return:
        """

        # Convert the skopt hyperparameter space into a continuous space for RoBO
        hp_space_lower = np.zeros(shape=(len(self.hp_space), ))
        hp_space_upper = np.zeros(shape=(len(self.hp_space), ))

        for i in range(len(self.hp_space)):
            if type(self.hp_space[i]) == skopt.space.space.Integer:
                hp_space_lower[i, ] = self.hp_space[i].low
                hp_space_upper[i, ] = self.hp_space[i].high

            elif type(self.hp_space[i]) == skopt.space.space.Categorical:
                n_choices = len(list(self.hp_space[i].categories))
                hp_space_lower[i, ] = 0
                hp_space_upper[i, ] = n_choices - 1

            elif type(self.hp_space[i]) == skopt.space.space.Real:
                hp_space_lower[i, ] = self.hp_space[i].low
                hp_space_upper[i, ] = self.hp_space[i].high

            else:
                raise Exception(
                    'The skopt HP-space could not be converted correctly!')

        # Set the random seed of the random number generator
        rand_num_generator = np.random.RandomState(seed=self.random_seed)

        # Optimize on the predefined n_func_evals and measure the wall clock times
        start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        # Use a warmstart configuration (only possible for BOHAMIANN, not FABOLAS)
        if self.do_warmstart == 'Yes':

            # Initialize numpy arrays for saving the warmstart configuration and the warmstart loss
            warmstart_config = np.zeros(shape=(1, len(self.hp_space)))
            warmstart_loss = np.zeros(shape=(1, 1))

            # Retrieve the default hyperparameters and the default loss for the ML-algorithm
            default_params = self.get_warmstart_configuration()

            try:

                # Dictionary for saving the warmstart HP-configuration (only contains the HPs, which are part of the
                # 'tuned' HP-space
                warmstart_dict = {}

                # Iterate over all HPs of this ML-algorithm's tuned HP-space and append the default values to
                # the numpy array
                for i in range(len(self.hp_space)):

                    this_param = self.hp_space[i].name

                    # Categorical HPs need to be encoded as integer values for RoBO
                    if type(self.hp_space[i]) == skopt.space.space.Categorical:

                        choices = self.hp_space[i].categories
                        this_warmstart_value_cat = default_params[this_param]
                        dict_value = this_warmstart_value_cat

                        # Find the index of the default / warmstart HP in the list of possible choices
                        for j in range(len(choices)):
                            if this_warmstart_value_cat == choices[j]:
                                this_warmstart_value = j

                    # For all non-categorical HPs
                    else:
                        this_warmstart_value = default_params[this_param]
                        dict_value = this_warmstart_value

                        # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is
                        # different (e.g. int)
                        if this_warmstart_value is None:
                            # Try to impute these values by the mean value
                            this_warmstart_value = int(
                                0.5 *
                                (self.hp_space[i].low + self.hp_space[i].high))
                            dict_value = this_warmstart_value

                    # Pass the warmstart value to the according numpy array
                    warmstart_config[0, i] = this_warmstart_value
                    warmstart_dict[this_param] = dict_value

                # Pass the default loss to the according numpy array
                warmstart_loss[0, 0] = self.get_warmstart_loss(
                    warmstart_dict=warmstart_dict)

                # Pass the warmstart configuration as a kwargs dict
                kwargs = {'X_init': warmstart_config, 'Y_init': warmstart_loss}

                # Set flag to indicate that a warmstart took place
                did_warmstart = True

            except:
                print('Warmstarting RoBO failed!')
                kwargs = {}

                # Set flag to indicate that NO warmstart took place
                did_warmstart = False

        # No warmstart requested
        else:
            kwargs = {}

            # Set flag to indicate that NO warmstart took place
            did_warmstart = False

        # Select the specified HPO-tuning method
        try:
            if self.hpo_method == 'Fabolas':

                # Budget correct? // Set further parameters?
                s_max = len(
                    self.x_train
                )  # Maximum number of data points for the training data set
                s_min = int(
                    0.05 * s_max
                )  # Maximum number of data points for the training data set
                n_init = int(self.n_func_evals /
                             3)  # Requirement of the fabolas implementation

                result_dict = fabolas(
                    objective_function=self.objective_fabolas,
                    s_min=s_min,
                    s_max=s_max,
                    lower=hp_space_lower,
                    upper=hp_space_upper,
                    num_iterations=self.n_func_evals,
                    rng=rand_num_generator,
                    n_init=n_init)
                run_successful = True

            elif self.hpo_method == 'Bohamiann':

                if did_warmstart:
                    # A single initial design point (warm start hyperparameter configuration)
                    kwargs['n_init'] = 1

                # Budget correct? // Set further parameters?
                result_dict = bayesian_optimization(
                    objective_function=self.objective_bohamiann,
                    lower=hp_space_lower,
                    upper=hp_space_upper,
                    model_type='bohamiann',
                    num_iterations=self.n_func_evals,
                    rng=rand_num_generator,
                    **kwargs)
                run_successful = True

            else:
                raise Exception('Unknown HPO-method!')

        # Algorithm crashed
        except:
            # Add a warning here
            run_successful = False

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            for i in range(len(self.times)):
                # Subtract the start time to receive the wall clock time of each function evaluation
                self.times[i] = self.times[i] - start_time
            wall_clock_time = max(self.times)

            # Insert timestamp of 0.0 for the warm start hyperparameter configuration
            if did_warmstart:
                self.times.insert(0, 0.0)

            # Timestamps
            timestamps = self.times

            # Losses (not incumbent losses)
            losses = result_dict['y']

            evaluation_ids = list(range(1, len(losses) + 1))
            best_loss = min(losses)

            configurations = ()
            for config in result_dict['X']:
                # Cut off the unused Fabolas budget value at the end
                config = config[:len(self.hp_space)]
                config_dict = {}

                for i in range(len(config)):
                    if type(self.hp_space[i]) == skopt.space.space.Integer:
                        config_dict[self.hp_space[i].name] = int(
                            round(config[i]))

                    elif type(
                            self.hp_space[i]) == skopt.space.space.Categorical:
                        config_dict[self.hp_space[i].name] = list(
                            self.hp_space[i].categories)[int(round(config[i]))]

                    elif type(self.hp_space[i]) == skopt.space.space.Real:
                        config_dict[self.hp_space[i].name] = config[i]

                    else:
                        raise Exception(
                            'The continuous HP-space could not be converted correctly!'
                        )

                configurations = configurations + (config_dict, )

            # Find the best hyperparameter configuration (incumbent)
            best_configuration = {}
            x_opt = result_dict['x_opt']

            for i in range(len(x_opt)):
                if type(self.hp_space[i]) == skopt.space.space.Integer:
                    best_configuration[self.hp_space[i].name] = int(
                        round(x_opt[i]))

                elif type(self.hp_space[i]) == skopt.space.space.Categorical:
                    best_configuration[self.hp_space[i].name] = list(
                        self.hp_space[i].categories)[int(round(x_opt[i]))]

                elif type(self.hp_space[i]) == skopt.space.space.Real:
                    best_configuration[self.hp_space[i].name] = x_opt[i]

                else:
                    raise Exception(
                        'The continuous HP-space could not be converted correctly!'
                    )

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_loss, best_configuration, wall_clock_time = \
                self.impute_results_for_crash()

        # Pass the results to a TuningResult-Object
        result = TuningResult(evaluation_ids=evaluation_ids,
                              timestamps=timestamps,
                              losses=losses,
                              configurations=configurations,
                              best_loss=best_loss,
                              best_configuration=best_configuration,
                              wall_clock_time=wall_clock_time,
                              successful=run_successful,
                              did_warmstart=did_warmstart)

        return result

Пример #4

Показать файл

    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        """

        # Select the specified HPO-tuning method
        if self.hpo_method == 'SMAC':
            # SMAC expects a budget of at least 10 iterations / calls
            this_optimizer = forest_minimize
            this_acq_func = 'EI'
            kwargs = {'base_estimator': 'RF'}  # Use a Random Forest Regressor as a surrogate model

        elif self.hpo_method == 'GPBO':
            this_optimizer = gp_minimize
            this_acq_func = 'EI'
            kwargs = {}

        else:
            raise Exception('Unknown HPO-method!')

        # Use a warmstart configuration?
        if self.do_warmstart == 'Yes':

            try:

                # Initialize a list for saving the warmstart configuration
                warmstart_config = []

                # Retrieve the warmstart hyperparameters for the ML-algorithm
                warmstart_params = self.get_warmstart_configuration()

                # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and append the default values
                # to the list
                for i in range(len(self.hp_space)):

                    this_param = self.hp_space[i].name
                    this_warmstart_value = warmstart_params[this_param]

                    # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is
                    # different (e.g. int)
                    if this_warmstart_value is None and type(self.hp_space[i]) == skopt.space.space.Integer:
                        # Try to impute these values by the mean value
                        warmstart_config.append(int(0.5 * (self.hp_space[i].low + self.hp_space[i].high)))

                    else:
                        # Otherwise append the warmstart value (default case)
                        warmstart_config.append(this_warmstart_value)

                # Pass the warmstart configuration as a kwargs dict
                kwargs['x0'] = warmstart_config

                # Set flag to indicate that a warmstart took place
                did_warmstart = True

            except:
                print('Warmstarting skopt failed!')

                # Set flag to indicate that NO warmstart took place
                did_warmstart = False

        # No warmstart requested
        else:

            # Set flag to indicate that NO warmstart took place
            did_warmstart = False

        # Optimize on the predefined n_func_evals and measure the wall clock times
        start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        # Start the optimization
        try:
            trial_result = this_optimizer(self.objective, self.hp_space, n_calls=self.n_func_evals,
                                          random_state=self.random_seed, acq_func=this_acq_func,
                                          n_jobs=self.n_workers, verbose=True, n_initial_points=20, **kwargs)

            run_successful = True

        # Algorithm crashed
        except:
            run_successful = False

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            for i in range(len(self.times)):
                # Subtract the start time to receive the wall clock time of each function evaluation
                self.times[i] = self.times[i] - start_time
            wall_clock_time = max(self.times)

            # Timestamps
            timestamps = self.times

            best_val_loss = trial_result.fun

            # Losses (not incumbent losses)
            losses = list(trial_result.func_vals)

            # Determine the best HP-configuration of this run
            best_configuration = {}
            for i in range(len(self.hp_space)):
                best_configuration[self.hp_space[i].name] = trial_result.x[i]

            # Number the evaluations / iterations of this run
            evaluation_ids = list(range(1, len(trial_result.func_vals) + 1))

            # Determine the HP-configuration of each evaluation / iteration
            configurations = ()
            for i in range(len(trial_result.x_iters)):
                this_config = {}
                for j in range(len(self.hp_space)):
                    this_config[self.hp_space[j].name] = trial_result.x_iters[i][j]
                configurations = configurations + (this_config,)

            # Skopt uses full budgets for its HPO methods
            budget = [100.0] * len(losses)

            # Compute the loss on the test set for the best found configuration
            test_loss = self.train_evaluate_ml_model(params=best_configuration, cv_mode=False, test_mode=True)

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \
                test_loss, budget = self.impute_results_for_crash()

        # Pass the results to a TuningResult-object
        result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses,
                              configurations=configurations, best_val_loss=best_val_loss,
                              best_configuration=best_configuration, wall_clock_time=wall_clock_time,
                              test_loss=test_loss, successful=run_successful, did_warmstart=did_warmstart, budget=budget)

        return result

Пример #5

Показать файл

Файл: hpbandster_optimizer.py Проект: maschimax/HPO-benchmarking-framework

    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        """

        # Start a nameserver
        NS = hpns.NameServer(run_id='hpbandster', host='127.0.0.1', port=None)
        NS.start()

        # Logging of the optimization results
        result_logger = hpres.json_result_logger(
            directory='./hpo/hpbandster_logs', overwrite=True)

        # Optimize on the predefined n_func_evals and measure the wall clock times
        start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        # Use a warm start configuration
        if self.do_warmstart == 'Yes':

            try:

                # Initialize a dictionary for saving the warmstart configuration
                warmstart_dict = {}

                # Retrieve the warmstart hyperparameters for the ML-algorithm
                warmstart_params = self.get_warmstart_configuration()

                # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and add them to the dictionary
                for i in range(len(self.hp_space)):

                    this_param = self.hp_space[i].name
                    this_warmstart_value = warmstart_params[this_param]

                    # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is
                    # different (e.g. int)
                    if this_warmstart_value is None and type(
                            self.hp_space[i]) == skopt.space.space.Integer:
                        # Try to impute these values by the mean value
                        this_warmstart_value = int(
                            0.5 *
                            (self.hp_space[i].low + self.hp_space[i].high))

                    # Add the warmstart value to the dictionary
                    warmstart_dict[this_param] = this_warmstart_value

                # Start a HPBandsterWorker to evaluate the warmstart configuration
                ws_worker = HPBandsterWorker(ml_algorithm=self.ml_algorithm,
                                             optimizer_object=self,
                                             nameserver='127.0.0.1',
                                             run_id='hpbandster')

                ws_worker.run(background=True)

                # Initialize the  optimizer / HPO-method
                if self.hpo_method == 'BOHB':
                    ws_optimizer = BOHB(
                        configspace=HPBandsterWorker.get_warmstart_config(
                            self.hp_space, warmstart_dict),
                        run_id='hpbandster',
                        nameserver='127.0.0.1',
                        min_budget=10,
                        max_budget=10,
                        eta=3.0,
                        result_logger=result_logger)

                elif self.hpo_method == 'Hyperband':
                    ws_optimizer = HyperBand(
                        configspace=HPBandsterWorker.get_warmstart_config(
                            self.hp_space, warmstart_dict),
                        run_id='hpbandster',
                        nameserver='127.0.0.1',
                        min_budget=1,
                        max_budget=10,
                        eta=3.0,
                        result_logger=result_logger)

                else:
                    raise Exception('Unknown HPO-method!')

                # Run the optimization / evaluation of the warmstart configuration
                # (only a single iteration / evaluation)
                _ = ws_optimizer.run(n_iterations=1)

                ws_optimizer.shutdown(shutdown_workers=True)

                # Load the results and pass them to the kwargs dictionary
                ws_results = hpres.logged_results_to_HBS_result(
                    directory='./hpo/hpbandster_logs')
                kwargs = {'previous_result': ws_results}

                # Set flag to indicate that a warmstart took place
                did_warmstart = True

            except:
                print('Warmstarting hpbandster failed!')
                kwargs = {}

                # Set flag to indicate that NO warmstart took place
                did_warmstart = False

        # No warmstart requested
        else:
            kwargs = {}

            # Set flag to indicate that NO warmstart took place
            did_warmstart = False

        # No parallelization
        if self.n_workers == 1:
            # Start a worker
            worker = HPBandsterWorker(ml_algorithm=self.ml_algorithm,
                                      optimizer_object=self,
                                      nameserver='127.0.0.1',
                                      run_id='hpbandster')

            worker.run(background=True)

        # Process based parallelization - Start the workers
        elif self.n_workers > 1:
            processes = []
            for i in range(self.n_workers):
                p = Process(target=multiproc_target_funcs.initialize_worker,
                            args=(self.ml_algorithm, self, '127.0.0.1',
                                  'hpbandster'))

                p.start()
                processes.append(p)

        # Run an optimizer
        # Select the specified HPO-tuning method
        if self.hpo_method == 'BOHB':
            eta = 3.0
            optimizer = BOHB(configspace=HPBandsterWorker.get_configspace(
                self.hp_space),
                             run_id='hpbandster',
                             nameserver='127.0.0.1',
                             min_budget=1,
                             max_budget=10,
                             eta=eta,
                             result_logger=result_logger,
                             **kwargs)
            # Values for budget stages: https://arxiv.org/abs/1905.04970

        elif self.hpo_method == 'Hyperband':
            eta = 3.0
            optimizer = HyperBand(configspace=HPBandsterWorker.get_configspace(
                self.hp_space),
                                  run_id='hpbandster',
                                  nameserver='127.0.0.1',
                                  min_budget=1,
                                  max_budget=10,
                                  eta=eta,
                                  result_logger=result_logger,
                                  **kwargs)
            # Values for budget stages: https://arxiv.org/abs/1905.04970

        else:
            raise Exception('Unknown HPO-method!')

        # Start the optimization
        try:

            n_func_evals = self.n_func_evals

            n_iterations = int(n_func_evals / eta)
            if n_iterations < 1:
                n_iterations = 1

            res = optimizer.run(n_iterations=n_iterations,
                                min_n_workers=self.n_workers)
            # Relation of budget stages, halving iterations and the number of evaluations:
            # https://arxiv.org/abs/1905.04970
            # number of function evaluations = eta * n_iterations
            run_successful = True

            # Check whether one of the evaluations failed (hpbandster continues the optimization procedure even if
            # the objective function cannot be evaluated)
            for config_key in res.data.keys():
                this_result = res.data[config_key].results

                for this_eval in this_result.keys():
                    this_success_flag = this_result[this_eval]['info']

                    # The run wasn't successful, if one of the evaluations failed
                    if not this_success_flag:
                        run_successful = False
                        break

        # Algorithm crashed
        except:
            # Add a warning here
            run_successful = False

        # Shutdown the optimizer and the server
        optimizer.shutdown(shutdown_workers=True)
        NS.shutdown()

        if self.n_workers > 1:
            # Join the processes (only for parallelization)
            for p in processes:
                p.join()

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            # Extract the results and create an TuningResult instance to save them
            id2config = res.get_id2config_mapping()
            incumbent = res.get_incumbent_id()

            # Best hyperparameter configuration
            best_configuration = id2config[incumbent]['config']

            runs_df = pd.DataFrame(columns=[
                'config_id#0', 'config_id#1', 'config_id#2', 'iteration',
                'budget', 'loss', 'timestamps [finished]', 'budget [%]'
            ])
            all_runs = res.get_all_runs()

            # Iterate over all runs
            for i in range(len(all_runs)):
                this_run = all_runs[i]
                temp_dict = {
                    'run_id': [i],
                    'config_id#0': [this_run.config_id[0]],
                    'config_id#1': [this_run.config_id[1]],
                    'config_id#2': [this_run.config_id[2]],
                    'iteration': this_run.config_id[0],
                    'budget': this_run.budget,
                    'loss': this_run.loss,
                    'timestamps [finished]': this_run.time_stamps['finished'],
                    'budget [%]': round(this_run.budget * 10, 2)
                }
                # alternatively: 'timestamps [finished]': this_run.time_stamps['finished']
                this_df = pd.DataFrame.from_dict(data=temp_dict)
                this_df.set_index('run_id', inplace=True)
                runs_df = pd.concat(objs=[runs_df, this_df], axis=0)

            # Sort according to the timestamps
            runs_df.sort_values(by=['timestamps [finished]'],
                                ascending=True,
                                inplace=True)

            losses = list(runs_df['loss'])
            best_val_loss = min(losses)
            evaluation_ids = list(range(1, len(losses) + 1))
            timestamps = list(
                runs_df['timestamps [finished]']
            )  # << hpbandster's capabilities for time measurement
            wall_clock_time = max(timestamps)
            budget = list(runs_df['budget [%]'])

            configurations = ()
            for i in range(len(losses)):
                this_config = (list(runs_df['config_id#0'])[i],
                               list(runs_df['config_id#1'])[i],
                               list(runs_df['config_id#2'])[i])

                configurations = configurations + (
                    id2config[this_config]['config'], )

            # Compute the loss on the test set for the best found configuration
            test_loss = self.train_evaluate_ml_model(params=best_configuration,
                                                     cv_mode=False,
                                                     test_mode=True)

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \
                test_loss, budget = self.impute_results_for_crash()

        # Pass the results to a TuningResult-object
        result = TuningResult(evaluation_ids=evaluation_ids,
                              timestamps=timestamps,
                              losses=losses,
                              configurations=configurations,
                              best_val_loss=best_val_loss,
                              best_configuration=best_configuration,
                              wall_clock_time=wall_clock_time,
                              test_loss=test_loss,
                              successful=run_successful,
                              did_warmstart=did_warmstart,
                              budget=budget)

        return result