示例#1
0
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function with no prior data.

        :return:
        """
        input_space = SimpleHypergrid(name="input",
                                      dimensions=[
                                          ContinuousDimension(name='x_1',
                                                              min=-100,
                                                              max=100),
                                          ContinuousDimension(name='x_2',
                                                              min=-100,
                                                              max=100)
                                      ])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=BayesianOptimizerConfig.DEFAULT,
            logger=self.logger)

        num_guided_samples = 1000
        for i in range(num_guided_samples):
            suggested_params = bayesian_optimizer.suggest()
            suggested_params_dict = suggested_params.to_dict()

            target_value = quadratic(**suggested_params_dict)
            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params_dict.items()
            })
            target_values_df = pd.DataFrame({'y': [target_value]})

            bayesian_optimizer.register(input_values_df, target_values_df)
            if i > 20 and i % 20 == 0:
                self.logger.info(
                    f"[{i}/{num_guided_samples}] Optimum: {bayesian_optimizer.optimum()}"
                )

        self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}")
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data.

        """

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)
        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=10000)

        y_df = objective_function.evaluate_dataframe(random_params_df)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default,
            logger=self.logger)
        bayesian_optimizer.register(random_params_df, y_df)

        num_guided_samples = 20
        for i in range(num_guided_samples):
            # Suggest the parameters
            suggested_params = bayesian_optimizer.suggest()
            target_value = objective_function.evaluate_point(suggested_params)

            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            # Register the observation with the optimizer
            bayesian_optimizer.register(suggested_params.to_dataframe(),
                                        target_value.to_dataframe())

        self.validate_optima(bayesian_optimizer)
        best_config_point, best_objective = bayesian_optimizer.optimum()
        self.logger.info(
            f"Optimum: {best_objective} Best Configuration: {best_config_point}"
        )
        trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json")
        self.logger.info(f"Writing trace to {trace_output_path}")
        global_values.tracer.dump_trace_to_file(
            output_file_path=trace_output_path)
        global_values.tracer.clear_events()
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function with no prior data.

        """
        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default,
            logger=self.logger)

        num_guided_samples = 1000
        for i in range(num_guided_samples):
            suggested_params = bayesian_optimizer.suggest()
            target_value = objective_function.evaluate_point(suggested_params)
            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            bayesian_optimizer.register(suggested_params.to_dataframe(),
                                        target_value.to_dataframe())
            if i > 20 and i % 20 == 0:
                best_config_point, best_objective = bayesian_optimizer.optimum(
                )
                self.logger.info(
                    f"[{i}/{num_guided_samples}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
                )

        self.validate_optima(bayesian_optimizer)
        best_config, optimum = bayesian_optimizer.optimum()
        assert objective_function.parameter_space.contains_point(best_config)
        assert objective_function.output_space.contains_point(optimum)
        _, all_targets = bayesian_optimizer.get_all_observations()
        assert optimum.y == all_targets.min()[0]
        self.logger.info(
            f"Optimum: {optimum} best configuration: {best_config}")
    def test_hierarchical_quadratic_cold_start(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        num_restarts = 1000
        for restart_num in range(num_restarts):
            bayesian_optimizer = BayesianOptimizer(
                optimization_problem=optimization_problem,
                optimizer_config=bayesian_optimizer_config_store.default,
                logger=self.logger)

            num_guided_samples = 200
            for i in range(num_guided_samples):
                suggested_params = bayesian_optimizer.suggest()
                y = objective_function.evaluate_point(suggested_params)
                self.logger.info(
                    f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}")

                input_values_df = suggested_params.to_dataframe()
                target_values_df = y.to_dataframe()
                bayesian_optimizer.register(input_values_df, target_values_df)
            self.validate_optima(bayesian_optimizer)
            best_config_point, best_objective = bayesian_optimizer.optimum()
            self.logger.info(
                f"[{restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
            )
示例#5
0
    def test_hierarchical_quadratic_cold_start(self):

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=MultilevelQuadratic.CONFIG_SPACE,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        num_restarts = 1000
        for restart_num in range(num_restarts):
            bayesian_optimizer = BayesianOptimizer(
                optimization_problem=optimization_problem,
                optimizer_config=BayesianOptimizerConfig.DEFAULT,
                logger=self.logger)

            num_guided_samples = 200
            for i in range(num_guided_samples):
                suggested_params = bayesian_optimizer.suggest()
                y = MultilevelQuadratic.evaluate(suggested_params)
                self.logger.info(
                    f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}")

                input_values_df = pd.DataFrame({
                    param_name: [param_value]
                    for param_name, param_value in suggested_params
                })
                target_values_df = pd.DataFrame({'y': [y]})
                bayesian_optimizer.register(input_values_df, target_values_df)

            self.logger.info(
                f"[{restart_num}/{num_restarts}] Optimum: {bayesian_optimizer.optimum()}"
            )
    def test_hierarchical_quadratic_cold_start_random_configs(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        random_state = random.Random()
        num_restarts = 200
        for restart_num in range(num_restarts):

            # Let's set up random seeds so that we can easily repeat failed experiments
            #
            random_state.seed(restart_num)
            bayesian_optimizer_config_store.parameter_space.random_state = random_state
            objective_function.parameter_space.random_state = random_state

            optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
            )

            # The goal here is to make sure the optimizer works with a lot of different configurations.
            # So let's make sure each run is not too long.
            #
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 50
            if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__:
                random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config
                random_forest_config.n_estimators = min(
                    random_forest_config.n_estimators, 5)
                decision_tree_config = random_forest_config.decision_tree_regression_model_config
                decision_tree_config.min_samples_to_fit = 10
                decision_tree_config.n_new_samples_before_refit = 10

            if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__:
                optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5

            self.logger.info(
                f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: "
            )
            self.logger.info(
                f"Optimizer config: {optimizer_config.to_json(indent=2)}")
            bayesian_optimizer = BayesianOptimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config,
                logger=self.logger)

            num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50
            for i in range(num_guided_samples):
                suggested_params = bayesian_optimizer.suggest()
                y = objective_function.evaluate_point(suggested_params)
                self.logger.info(
                    f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}"
                )

                input_values_df = suggested_params.to_dataframe()
                target_values_df = y.to_dataframe()
                bayesian_optimizer.register(input_values_df, target_values_df)

            best_config_point, best_objective = bayesian_optimizer.optimum()
            self.logger.info(
                f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
            )
示例#7
0
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data.

        :return:
        """
        input_space = SimpleHypergrid(name="input",
                                      dimensions=[
                                          ContinuousDimension(name='x_1',
                                                              min=-100,
                                                              max=100),
                                          ContinuousDimension(name='x_2',
                                                              min=-100,
                                                              max=100)
                                      ])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        x_1, x_2 = np.meshgrid(input_space['x_1'].linspace(num=501),
                               input_space['x_2'].linspace(num=501))

        y = quadratic(x_1=x_1, x_2=x_2)

        input_values_dataframe = pd.DataFrame({
            'x_1': x_1.reshape(-1),
            'x_2': x_2.reshape(-1)
        })
        output_values_dataframe = pd.DataFrame({'y': y.reshape(-1)})

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=BayesianOptimizerConfig.DEFAULT,
            logger=self.logger)
        bayesian_optimizer.register(input_values_dataframe,
                                    output_values_dataframe)

        num_guided_samples = 20
        for i in range(num_guided_samples):
            # Suggest the parameters
            suggested_params = bayesian_optimizer.suggest()
            suggested_params_dict = suggested_params.to_dict()

            # Reformat them to feed the parameters to the target
            target_value = quadratic(**suggested_params_dict)
            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            # Reformat the observation to feed it back to the optimizer
            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params_dict.items()
            })
            target_values_df = pd.DataFrame({'y': [target_value]})

            # Register the observation with the optimizer
            bayesian_optimizer.register(input_values_df, target_values_df)

        self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}")
        trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json")
        self.logger.info(f"Writing trace to {trace_output_path}")
        global_values.tracer.dump_trace_to_file(
            output_file_path=trace_output_path)
        global_values.tracer.clear_events()
示例#8
0
    def test_hierarchical_quadratic_cold_start_random_configs(self):

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=MultilevelQuadratic.CONFIG_SPACE,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        random_state = random.Random()
        num_restarts = 100
        has_failed = False
        for restart_num in range(num_restarts):
            try:
                # Let's set up random seeds so that we can easily repeat failed experiments
                #
                random_state.seed(restart_num)
                BayesianOptimizerConfig.CONFIG_SPACE.random_state = random_state
                MultilevelQuadratic.CONFIG_SPACE.random_state = random_state

                optimizer_config = BayesianOptimizerConfig.CONFIG_SPACE.random(
                )
                self.logger.info(
                    f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: "
                )
                self.logger.info(
                    f"Optimizer config: {optimizer_config.to_json(indent=2)}")
                bayesian_optimizer = BayesianOptimizer(
                    optimization_problem=optimization_problem,
                    optimizer_config=optimizer_config,
                    logger=self.logger)

                num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50
                for i in range(num_guided_samples):
                    suggested_params = bayesian_optimizer.suggest()
                    y = MultilevelQuadratic.evaluate(suggested_params)
                    self.logger.info(
                        f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}"
                    )

                    input_values_df = pd.DataFrame({
                        param_name: [param_value]
                        for param_name, param_value in suggested_params
                    })
                    target_values_df = pd.DataFrame({'y': [y]})
                    bayesian_optimizer.register(input_values_df,
                                                target_values_df)

                self.logger.info(
                    f"[Restart: {restart_num}/{num_restarts}] Optimum: {bayesian_optimizer.optimum()}"
                )
            except Exception as e:
                has_failed = True
                error_file_path = os.path.join(os.getcwd(), "temp",
                                               "test_errors.txt")
                with open(error_file_path, 'a') as out_file:
                    out_file.write(
                        "##################################################################################\n"
                    )
                    out_file.write(f"{restart_num} failed.\n")
                    out_file.write(f"Exception: {e}")
        self.assertFalse(has_failed)
示例#9
0
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data.

        :return:
        """
        input_space = SimpleHypergrid(name="input",
                                      dimensions=[
                                          ContinuousDimension(name='x_1',
                                                              min=-100,
                                                              max=100),
                                          ContinuousDimension(name='x_2',
                                                              min=-100,
                                                              max=100)
                                      ])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        x_1, x_2 = np.meshgrid(input_space['x_1'].linspace(num=101),
                               input_space['x_2'].linspace(num=101))

        y = quadratic(x_1=x_1, x_2=x_2)

        input_values_dataframe = pd.DataFrame({
            'x_1': x_1.reshape(-1),
            'x_2': x_2.reshape(-1)
        })
        output_values_dataframe = pd.DataFrame({'y': y.reshape(-1)})

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=BayesianOptimizerConfig.DEFAULT,
            logger=self.logger)
        bayesian_optimizer.register(input_values_dataframe,
                                    output_values_dataframe)

        num_guided_samples = 2
        for _ in range(num_guided_samples):
            # Suggest the parameters
            suggested_params = bayesian_optimizer.suggest()
            suggested_params_dict = suggested_params.to_dict()

            # Reformat them to feed the parameters to the target
            target_value = quadratic(**suggested_params_dict)
            print(suggested_params, target_value)

            # Reformat the observation to feed it back to the optimizer
            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params_dict.items()
            })
            target_values_df = pd.DataFrame({'y': [target_value]})

            # Register the observation with the optimizer
            bayesian_optimizer.register(input_values_df, target_values_df)

        print(bayesian_optimizer.optimum())
示例#10
0
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function with no prior data.

        :return:
        """
        input_space = SimpleHypergrid(name="input",
                                      dimensions=[
                                          ContinuousDimension(name='x_1',
                                                              min=-10,
                                                              max=10),
                                          ContinuousDimension(name='x_2',
                                                              min=-10,
                                                              max=10)
                                      ])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        optimizer_config = BayesianOptimizerConfig.DEFAULT
        optimizer_config.min_samples_required_for_guided_design_of_experiments = 50
        optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10
        optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best"
        optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2

        print(optimizer_config.to_json(indent=2))

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=optimizer_config,
            logger=self.logger)

        num_iterations = 62
        for i in range(num_iterations):
            suggested_params = bayesian_optimizer.suggest()
            suggested_params_dict = suggested_params.to_dict()
            target_value = quadratic(**suggested_params_dict)
            print(
                f"[{i+1}/{num_iterations}] Suggested params: {suggested_params_dict}, target_value: {target_value}"
            )

            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params_dict.items()
            })
            target_values_df = pd.DataFrame({'y': [target_value]})

            bayesian_optimizer.register(input_values_df, target_values_df)
            if i > optimizer_config.min_samples_required_for_guided_design_of_experiments and i % 10 == 1:
                print(
                    f"[{i}/{num_iterations}] Optimum: {bayesian_optimizer.optimum()}"
                )
                convergence_state = bayesian_optimizer.get_optimizer_convergence_state(
                )
                random_forest_fit_state = convergence_state.surrogate_model_fit_state
                random_forest_gof_metrics = random_forest_fit_state.current_train_gof_metrics
                print(
                    f"Relative squared error: {random_forest_gof_metrics.relative_squared_error}, Relative absolute error: {random_forest_gof_metrics.relative_absolute_error}"
                )

        convergence_state = bayesian_optimizer.get_optimizer_convergence_state(
        )
        random_forest_fit_state = convergence_state.surrogate_model_fit_state
        random_forest_gof_metrics = random_forest_fit_state.current_train_gof_metrics
        self.assertTrue(
            random_forest_gof_metrics.last_refit_iteration_number > 0.7 *
            num_iterations)
        models_gof_metrics = [random_forest_gof_metrics]
        for decision_tree_fit_state in random_forest_fit_state.decision_trees_fit_states:
            models_gof_metrics.append(
                decision_tree_fit_state.current_train_gof_metrics)

        for model_gof_metrics in models_gof_metrics:
            self.assertTrue(
                0 <= model_gof_metrics.relative_absolute_error <= 1
            )  # This could fail if the models are really wrong. Not expected in this unit test though.
            self.assertTrue(0 <= model_gof_metrics.relative_squared_error <= 1)

            # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it.
            n = model_gof_metrics.last_refit_iteration_number
            self.assertTrue(
                model_gof_metrics.mean_absolute_error <=
                model_gof_metrics.root_mean_squared_error <= math.sqrt(n) *
                model_gof_metrics.mean_absolute_error)

            # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly.
            self.assertTrue(model_gof_metrics.sample_90_ci_hit_rate >=
                            model_gof_metrics.prediction_90_ci_hit_rate)

        goodness_of_fit_df = random_forest_fit_state.get_goodness_of_fit_dataframe(
            data_set_type=DataSetType.TRAIN)
        print(goodness_of_fit_df.head())

        goodness_of_fit_df = random_forest_fit_state.get_goodness_of_fit_dataframe(
            data_set_type=DataSetType.TRAIN, deep=True)
        print(goodness_of_fit_df.head())