class TestSmartCacheWithRemoteOptimizer:
    """ Tests SmartCache that's being tuned by the remote optimizer.

    This test will:
    1. Instantiate a SmartCache.
    2. Create an MlosExperiment that connects to a remote or in-process optimizer.
    3. Optimize the SmartCache with the help of the remote or in-process optimizer.
    """
    def setup_method(self, method):
        mlos_globals.init_mlos_global_context()
        mlos_globals.mlos_global_context.start_clock()
        self.logger = create_logger('TestSmartCacheWithRemoteOptimizer')
        self.logger.level = logging.DEBUG

        # Start up the gRPC service. Try a bunch of times before giving up.
        #
        max_num_tries = 100
        num_tries = 0
        for port in range(50051, 50051 + max_num_tries):
            num_tries += 1
            try:
                self.server = OptimizerServicesServer(port=port,
                                                      num_threads=10)
                self.server.start()
                self.port = port
                break
            except:
                self.logger.info(
                    f"Failed to create OptimizerMicroserviceServer on port {port}"
                )
                if num_tries == max_num_tries:
                    raise

        self.optimizer_service_channel = grpc.insecure_channel(
            f'localhost:{self.port}')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(
            grpc_channel=self.optimizer_service_channel, logger=self.logger)

        self.mlos_agent = MlosAgent(
            logger=self.logger,
            communication_channel=mlos_globals.mlos_global_context.
            communication_channel,
            shared_config=mlos_globals.mlos_global_context.shared_config,
            bayesian_optimizer_grpc_channel=self.optimizer_service_channel)

        self.mlos_agent_thread = Thread(target=self.mlos_agent.run)
        self.mlos_agent_thread.start()

        global_values.declare_singletons(
        )  # TODO: having both globals and global_values is a problem

        # Let's add the allowed component types
        self.mlos_agent.add_allowed_component_type(SmartCache)
        self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator)
        self.mlos_agent.set_configuration(
            component_type=SmartCacheWorkloadGenerator,
            new_config_values=Point(workload_type='cyclical_key_from_range',
                                    cyclical_key_from_range_config=Point(
                                        min=0, range_width=2048)))

        # Let's create the workload
        self.smart_cache_workload = SmartCacheWorkloadGenerator(
            logger=self.logger)

        self.optimizer = None
        self.working_set_size_estimator = WorkingSetSizeEstimator()
        self.hit_rate_monitor = HitRateMonitor()

        self.smart_cache_experiment = MlosExperiment(
            smart_component_types=[SmartCache],
            telemetry_aggregators=[
                self.working_set_size_estimator, self.hit_rate_monitor
            ])

        self.optimization_problem = OptimizationProblem(
            parameter_space=SmartCache.parameter_search_space,
            objective_space=SimpleHypergrid(name="objectives",
                                            dimensions=[
                                                ContinuousDimension(
                                                    name="hit_rate",
                                                    min=0,
                                                    max=1)
                                            ]),
            objectives=[Objective(name="hit_rate", minimize=False)])

    def teardown_method(self, method):
        mlos_globals.mlos_global_context.stop_clock()
        self.mlos_agent.stop_all()
        self.server.stop(grace=None).wait(timeout=1)
        self.server.wait_for_termination(timeout=1)
        self.optimizer_service_channel.close()

    def test_smart_cache_with_remote_optimizer_on_a_timer(self):
        """ Periodically invokes the optimizer to improve cache performance.

        """
        optimizer_config = bayesian_optimizer_config_store.default
        optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 5
        self.optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=self.optimization_problem,
            optimizer_config=optimizer_config)
        self.mlos_agent.start_experiment(self.smart_cache_experiment)

        num_iterations = 101
        for i in range(num_iterations):
            smart_cache_workload_thread = Thread(
                target=self.smart_cache_workload.run, args=(0.1, ))
            smart_cache_workload_thread.start()
            smart_cache_workload_thread.join()

            current_cache_config = self.mlos_agent.get_configuration(
                component_type=SmartCache)
            features_df = current_cache_config.to_dataframe()
            hit_rate = self.hit_rate_monitor.get_hit_rate()
            num_requests = self.hit_rate_monitor.num_requests
            working_set_size_estimate = self.working_set_size_estimator.estimate_working_set_size(
            )
            objectives_df = pd.DataFrame({'hit_rate': [hit_rate]})
            self.optimizer.register(features_df, objectives_df)
            new_config_values = self.optimizer.suggest()
            self.mlos_agent.set_configuration(
                component_type=SmartCache, new_config_values=new_config_values)
            self.hit_rate_monitor.reset()
            self.logger.info(
                f"Previous config: {current_cache_config.to_json()}")
            self.logger.info(
                f"Estimated working set size: {working_set_size_estimate.chapman_estimator}. Hit rate: {hit_rate:.2f}. Num requests: {num_requests} "
            )

        self.mlos_agent.stop_experiment(self.smart_cache_experiment)

        # Let's look at the goodness of fit.
        #
        multi_objective_gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit(
        )
        for objective_name, random_forest_gof_metrics in multi_objective_gof_metrics:

            # The model might not have used all of the samples, but should have used a majority of them (I expect about 90%), but 70% is a good sanity check
            # and should make this test not very flaky.
            assert random_forest_gof_metrics.last_refit_iteration_number > 0.5 * num_iterations

            # Those relative errors should generally be between 0 and 1 unless the model's predictions are worse than predicting average...
            # This unit tests occasionally doesn't have enough data to get us down to 1 so we'll pass the test if its less than 2.
            # Note, the point of this test is to check sanity. We'll use a separate suite to evaluate models' performance from an ML standpoint.
            self.logger.info(
                f"Relative absolute error: {random_forest_gof_metrics.relative_absolute_error}"
            )
            self.logger.info(
                f"Relative squared error: {random_forest_gof_metrics.relative_squared_error}"
            )
            assert random_forest_gof_metrics.relative_absolute_error is None or (
                0 <= random_forest_gof_metrics.relative_absolute_error <= 2)
            assert random_forest_gof_metrics.relative_squared_error is None or (
                0 <= random_forest_gof_metrics.relative_squared_error <= 2)

            # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it.
            n = random_forest_gof_metrics.last_refit_iteration_number
            self.logger.info(f"Last refit iteration number: {n}")
            self.logger.info(
                f"Mean absolute error: {random_forest_gof_metrics.mean_absolute_error}"
            )
            self.logger.info(
                f"Root mean squared error: {random_forest_gof_metrics.root_mean_squared_error}"
            )
            assert random_forest_gof_metrics.mean_absolute_error <= random_forest_gof_metrics.root_mean_squared_error <= math.sqrt(
                n) * random_forest_gof_metrics.mean_absolute_error

            # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly.
            assert random_forest_gof_metrics.sample_90_ci_hit_rate >= random_forest_gof_metrics.prediction_90_ci_hit_rate
示例#2
0
class TestBayesianOptimizerGrpcClient(unittest.TestCase):
    """ Tests the E2E Grpc Client-Service workflow.

    """

    @classmethod
    def setUpClass(cls):
        warnings.simplefilter("error")
        global_values.declare_singletons()

    def setUp(self):
        self.logger = create_logger(self.__class__.__name__)
        # Start up the gRPC service.
        #
        self.server = OptimizerMicroserviceServer(port=50051, num_threads=10)
        self.server.start()

        self.optimizer_service_channel = grpc.insecure_channel('localhost:50051')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger)
        self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger)

        objective_function_config = objective_function_config_store.get_config_by_name('2d_quadratic_concave_up')
        self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

        self.optimization_problem = OptimizationProblem(
            parameter_space=self.objective_function.parameter_space,
            objective_space=self.objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)]
        )

    def tearDown(self):
        """ We need to tear down the gRPC server here.

        :return:
        """
        self.server.stop(grace=None)

    def test_echo(self):
        optimizer_service_stub = OptimizerServiceStub(channel=self.optimizer_service_channel)
        response = optimizer_service_stub.Echo(Empty())
        self.assertTrue(isinstance(response, Empty))


    def test_optimizer_with_default_config(self):
        pre_existing_optimizers = {optimizer.id: optimizer for optimizer in self.optimizer_monitor.get_existing_optimizers()}
        print(bayesian_optimizer_config_store.default)
        bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=self.optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default
        )
        post_existing_optimizers = {optimizer.id: optimizer for optimizer in self.optimizer_monitor.get_existing_optimizers()}

        new_optimizers = {
            optimizer_id: optimizer
            for optimizer_id, optimizer in post_existing_optimizers.items()
            if optimizer_id not in pre_existing_optimizers
        }

        self.assertTrue(len(new_optimizers) == 1)

        new_optimizer_id = list(new_optimizers.keys())[0]
        new_optimizer = new_optimizers[new_optimizer_id]

        self.assertTrue(new_optimizer_id == bayesian_optimizer.id)
        self.assertTrue(new_optimizer.optimizer_config == bayesian_optimizer.optimizer_config)

        num_iterations = 100
        registered_features_df, registered_objectives_df = self.optimize_quadratic(optimizer=bayesian_optimizer, num_iterations=num_iterations)

        # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison.
        #
        registered_features_json = registered_features_df.to_json(orient='index', double_precision=15)
        registered_objectives_json = registered_objectives_df.to_json(orient='index', double_precision=15)

        # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away.
        #
        assert len(registered_features_json) > 0
        assert len(registered_objectives_json) > 0

        registered_features_df = pd.read_json(registered_features_json, orient='index')
        registered_objectives_df = pd.read_json(registered_objectives_json, orient='index')

        observed_features_df, observed_objectives_df = bayesian_optimizer.get_all_observations()

        self.assertTrue((np.abs(registered_features_df - observed_features_df) < 0.00000001).all().all())
        self.assertTrue((np.abs(registered_objectives_df - observed_objectives_df) < 0.00000001).all().all())

        # Let's look at the goodness of fit.
        #
        random_forest_gof_metrics = bayesian_optimizer.compute_surrogate_model_goodness_of_fit()

        # The model might not have used all of the samples, but should have used a majority of them (I expect about 90%), but 70% is a good sanity check
        # and should make this test not very flaky.
        self.assertTrue(random_forest_gof_metrics.last_refit_iteration_number > 0.7 * num_iterations)

        # The invariants below should be true for all surrogate models: the random forest, and all constituent decision trees. So let's iterate over them all.
        models_gof_metrics = [random_forest_gof_metrics]

        for model_gof_metrics in models_gof_metrics:
            self.assertTrue(0 <= model_gof_metrics.relative_absolute_error <= 1) # This could fail if the models are really wrong. Not expected in this unit test though.
            self.assertTrue(0 <= model_gof_metrics.relative_squared_error <= 1)

            # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it.
            n = model_gof_metrics.last_refit_iteration_number
            self.assertTrue(model_gof_metrics.mean_absolute_error <= model_gof_metrics.root_mean_squared_error <= math.sqrt(n) * model_gof_metrics.mean_absolute_error)

            # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly.
            self.assertTrue(model_gof_metrics.sample_90_ci_hit_rate >= model_gof_metrics.prediction_90_ci_hit_rate)
            self.assertTrue(0 <= model_gof_metrics.coefficient_of_determination <= 1)


    def test_optimizer_with_random_config(self):
        num_random_restarts = 10
        for i in range(num_random_restarts):
            optimizer_config = bayesian_optimizer_config_store.parameter_space.random()

            optimizer_config.min_samples_required_for_guided_design_of_experiments = min(optimizer_config.min_samples_required_for_guided_design_of_experiments, 100)
            if optimizer_config.surrogate_model_implementation == "HomogeneousRandomForestRegressionModel":
                rf_config = optimizer_config.homogeneous_random_forest_regression_model_config
                rf_config.n_estimators = min(rf_config.n_estimators, 20)

            print(f"[{i+1}/{num_random_restarts}] Creating a bayesian optimizer with config: {optimizer_config}")

            bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
                optimization_problem=self.optimization_problem,
                optimizer_config=optimizer_config
            )
            registered_features_df, registered_objectives_df = self.optimize_quadratic(optimizer=bayesian_optimizer, num_iterations=12)

            # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison.
            #
            registered_features_json = registered_features_df.to_json(orient='index', double_precision=15)
            registered_objectives_json = registered_objectives_df.to_json(orient='index', double_precision=15)

            # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away.
            #
            assert len(registered_features_json) > 0
            assert len(registered_objectives_json) > 0

            registered_features_df = pd.read_json(registered_features_json, orient='index')
            registered_objectives_df = pd.read_json(registered_objectives_json, orient='index')

            observed_features_df, observed_objectives_df = bayesian_optimizer.get_all_observations()

            self.assertTrue((np.abs(registered_features_df - observed_features_df) < 0.00000001).all().all())
            self.assertTrue((np.abs(registered_objectives_df - observed_objectives_df) < 0.00000001).all().all())


    @unittest.skip(reason="Not implemented yet.")
    def test_optimizer_with_named_config(self):
        ...

    def optimize_quadratic(self, optimizer, num_iterations):
        registered_features_df = None
        registered_objectives_df = None
        old_optimum = np.inf
        for i in range(num_iterations):
            suggested_params = optimizer.suggest()
            suggested_params_df = suggested_params.to_dataframe()
            y = self.objective_function.evaluate_point(suggested_params)
            optimizer.register(suggested_params_df, y.to_dataframe())
            if registered_features_df is None:
                registered_features_df = suggested_params_df
            else:
                registered_features_df = registered_features_df.append(suggested_params_df, ignore_index=True)

            if registered_objectives_df is None:
                registered_objectives_df = y.to_dataframe()
            else:
                registered_objectives_df = registered_objectives_df.append(y.to_dataframe(), ignore_index=True)

            best_params, optimum = optimizer.optimum()
            # ensure current optimum doesn't go up
            assert optimum.y <= old_optimum
            old_optimum = optimum.y
            print(f"[{i+1}/{num_iterations}]Best Params: {best_params}, Best Value: {optimum.y}")
        return registered_features_df, registered_objectives_df
class TestBayesianOptimizerGrpcClient:
    """ Tests the E2E Grpc Client-Service workflow.

    """
    @classmethod
    def setup_class(cls):
        warnings.simplefilter("error")
        global_values.declare_singletons()

    def setup_method(self, method):
        self.logger = create_logger(self.__class__.__name__)

        # Start up the gRPC service. Try a bunch of times before giving up.
        #
        max_num_tries = 100
        num_tries = 0
        for port in range(50051, 50051 + max_num_tries):
            num_tries += 1
            try:
                self.server = OptimizerServicesServer(port=port,
                                                      num_threads=10)
                self.server.start()
                self.port = port
                break
            except:
                self.logger.info(
                    f"Failed to create OptimizerMicroserviceServer on port {port}"
                )
                if num_tries == max_num_tries:
                    raise

        self.optimizer_service_channel = grpc.insecure_channel(
            f'localhost:{self.port}')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(
            grpc_channel=self.optimizer_service_channel, logger=self.logger)
        self.optimizer_monitor = OptimizerMonitor(
            grpc_channel=self.optimizer_service_channel, logger=self.logger)

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        self.objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        self.optimization_problem = OptimizationProblem(
            parameter_space=self.objective_function.parameter_space,
            objective_space=self.objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])

    def teardown_method(self, method):
        """ We need to tear down the gRPC server here.

        :return:
        """
        self.server.stop(grace=None).wait(timeout=1)
        self.server.wait_for_termination(timeout=1)
        self.optimizer_service_channel.close()

    def test_echo(self):
        optimizer_service_stub = OptimizerServiceStub(
            channel=self.optimizer_service_channel)
        response = optimizer_service_stub.Echo(Empty())
        assert isinstance(response, Empty)

    def test_optimizer_with_default_config(self):
        pre_existing_optimizers = {
            optimizer.id: optimizer
            for optimizer in self.optimizer_monitor.get_existing_optimizers()
        }
        print(bayesian_optimizer_config_store.default)
        bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=self.optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default)
        post_existing_optimizers = {
            optimizer.id: optimizer
            for optimizer in self.optimizer_monitor.get_existing_optimizers()
        }

        new_optimizers = {
            optimizer_id: optimizer
            for optimizer_id, optimizer in post_existing_optimizers.items()
            if optimizer_id not in pre_existing_optimizers
        }

        assert len(new_optimizers) == 1

        new_optimizer_id = list(new_optimizers.keys())[0]
        new_optimizer = new_optimizers[new_optimizer_id]

        assert new_optimizer_id == bayesian_optimizer.id
        assert new_optimizer.optimizer_config == bayesian_optimizer.optimizer_config

        num_iterations = 100
        registered_features_df, registered_objectives_df = self.optimize_objective_function(
            optimizer=bayesian_optimizer,
            objective_function=self.objective_function,
            num_iterations=num_iterations)

        # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison.
        #
        registered_features_json = registered_features_df.to_json(
            orient='index', double_precision=15)
        registered_objectives_json = registered_objectives_df.to_json(
            orient='index', double_precision=15)

        # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away.
        #
        assert len(registered_features_json) > 0
        assert len(registered_objectives_json) > 0

        registered_features_df = pd.read_json(registered_features_json,
                                              orient='index')
        registered_objectives_df = pd.read_json(registered_objectives_json,
                                                orient='index')

        observed_features_df, observed_objectives_df, _ = bayesian_optimizer.get_all_observations(
        )

        assert (np.abs(registered_features_df - observed_features_df) <
                0.00000001).all().all()
        assert (np.abs(registered_objectives_df - observed_objectives_df) <
                0.00000001).all().all()

        # Assert that the observations and predictions are returned in the right order from the remote optimizer
        #
        parameters_df, objectives_df, _ = bayesian_optimizer.get_all_observations(
        )
        predictions_df = bayesian_optimizer.predict(
            parameter_values_pandas_frame=parameters_df).get_dataframe()
        assert parameters_df.index.intersection(predictions_df.index).equals(
            predictions_df.index)

        # Let's look at the goodness of fit.
        #
        multi_objective_gof_metrics = bayesian_optimizer.compute_surrogate_model_goodness_of_fit(
        )
        for objective_name, random_forest_gof_metrics in multi_objective_gof_metrics:

            # The model might not have used all of the samples, but should have used a majority of them (I expect about 90%), but 50% is a good sanity check
            # and should make this test not very flaky.
            assert random_forest_gof_metrics.last_refit_iteration_number > 0.5 * num_iterations

            # The invariants below should be true for all surrogate models: the random forest, and all constituent decision trees. So let's iterate over them all.
            models_gof_metrics = [random_forest_gof_metrics]

            for model_gof_metrics in models_gof_metrics:
                assert 0 <= model_gof_metrics.relative_absolute_error <= 1  # This could fail if the models are really wrong. Not expected in this unit test though.
                assert 0 <= model_gof_metrics.relative_squared_error <= 1

                # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it.
                n = model_gof_metrics.last_refit_iteration_number
                assert model_gof_metrics.mean_absolute_error <= model_gof_metrics.root_mean_squared_error <= math.sqrt(
                    n) * model_gof_metrics.mean_absolute_error

                # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly.
                assert model_gof_metrics.sample_90_ci_hit_rate >= model_gof_metrics.prediction_90_ci_hit_rate
                assert 0 <= model_gof_metrics.coefficient_of_determination <= 1

    @pytest.mark.parametrize("i", [i for i in range(10)])
    def test_optimizer_with_random_config(self, i):
        optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
        )

        optimizer_config.min_samples_required_for_guided_design_of_experiments = max(
            min(
                optimizer_config.
                min_samples_required_for_guided_design_of_experiments, 100),
            20)
        if optimizer_config.surrogate_model_implementation == "HomogeneousRandomForestRegressionModel":
            rf_config = optimizer_config.homogeneous_random_forest_regression_model_config
            rf_config.n_estimators = min(rf_config.n_estimators, 20)

        if optimizer_config.surrogate_model_implementation == MultiObjectiveRegressionEnhancedRandomForest.__name__:
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 25
            rerf_model_config = optimizer_config.regression_enhanced_random_forest_regression_model_config
            rerf_model_config.max_basis_function_degree = min(
                rerf_model_config.max_basis_function_degree, 2)
            # increased polynomial degree requires more data to estimate model parameters (poly term coefficients)
            optimizer_config.min_samples_required_for_guided_design_of_experiments += 25 * (
                rerf_model_config.max_basis_function_degree - 1)
            rf_model_config = rerf_model_config.sklearn_random_forest_regression_model_config
            rf_model_config.perform_initial_random_forest_hyper_parameter_search = False
            rf_model_config.max_depth = min(rf_model_config.max_depth, 10)
            rf_model_config.n_jobs = min(rf_model_config.n_jobs, 4)
        print(
            f"[{i+1}] Creating a bayesian optimizer with config: {optimizer_config}"
        )

        bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=self.optimization_problem,
            optimizer_config=optimizer_config)
        registered_features_df, registered_objectives_df = self.optimize_objective_function(
            optimizer=bayesian_optimizer,
            objective_function=self.objective_function,
            num_iterations=12)

        # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison.
        #
        registered_features_json = registered_features_df.to_json(
            orient='index', double_precision=15)
        registered_objectives_json = registered_objectives_df.to_json(
            orient='index', double_precision=15)

        # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away.
        #
        assert len(registered_features_json) > 0
        assert len(registered_objectives_json) > 0

        registered_features_df = pd.read_json(registered_features_json,
                                              orient='index')
        registered_objectives_df = pd.read_json(registered_objectives_json,
                                                orient='index')

        observed_features_df, observed_objectives_df, _ = bayesian_optimizer.get_all_observations(
        )

        assert (np.abs(registered_features_df - observed_features_df) <
                0.00000001).all().all()
        assert (np.abs(registered_objectives_df - observed_objectives_df) <
                0.00000001).all().all()

    @pytest.mark.parametrize("i", [i for i in range(10)])
    def test_optimizer_with_random_config_random_objective(self, i):
        objective_function_config = objective_function_config_store.parameter_space.random(
        )
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)
        optimization_problem = objective_function.default_optimization_problem

        optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
        )

        optimizer_config.min_samples_required_for_guided_design_of_experiments = max(
            min(
                optimizer_config.
                min_samples_required_for_guided_design_of_experiments, 100),
            20)
        if optimizer_config.surrogate_model_implementation == "HomogeneousRandomForestRegressionModel":
            rf_config = optimizer_config.homogeneous_random_forest_regression_model_config
            rf_config.n_estimators = min(rf_config.n_estimators, 20)

        if optimizer_config.surrogate_model_implementation == MultiObjectiveRegressionEnhancedRandomForest.__name__:
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 25
            rerf_model_config = optimizer_config.regression_enhanced_random_forest_regression_model_config
            rerf_model_config.max_basis_function_degree = min(
                rerf_model_config.max_basis_function_degree, 2)
            # increased polynomial degree requires more data to estimate model parameters (poly term coefficients)
            optimizer_config.min_samples_required_for_guided_design_of_experiments += 25 * (
                rerf_model_config.max_basis_function_degree - 1)
            rf_model_config = rerf_model_config.sklearn_random_forest_regression_model_config
            rf_model_config.perform_initial_random_forest_hyper_parameter_search = False
            rf_model_config.max_depth = min(rf_model_config.max_depth, 10)
            rf_model_config.n_jobs = min(rf_model_config.n_jobs, 4)

        print(
            f"[{i+1}] Creating a bayesian optimizer with config: {optimizer_config} \n\n\nObjective function config: {objective_function_config}"
        )

        bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=optimizer_config)
        registered_params_df, registered_objectives_df = self.optimize_objective_function(
            optimizer=bayesian_optimizer,
            objective_function=objective_function,
            num_iterations=20)

        # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison.
        #
        registered_features_json = registered_params_df.to_json(
            orient='index', double_precision=15)
        registered_objectives_json = registered_objectives_df.to_json(
            orient='index', double_precision=15)

        # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away.
        #
        assert len(registered_features_json) > 0
        assert len(registered_objectives_json) > 0

        registered_params_df = pd.read_json(registered_features_json,
                                            orient='index')
        registered_objectives_df = pd.read_json(registered_objectives_json,
                                                orient='index')

        observed_params_df, observed_objectives_df, _ = bayesian_optimizer.get_all_observations(
        )

        numeric_params_names = [
            dimension.name
            for dimension in optimization_problem.parameter_space.dimensions
            if (isinstance(dimension, (ContinuousDimension, DiscreteDimension))
                or (isinstance(dimension, CategoricalDimension)
                    and dimension.is_numeric)) and (
                        dimension.name in registered_params_df.columns) and (
                            dimension.name in observed_params_df.columns)
        ]
        numeric_params_df = registered_params_df[numeric_params_names]
        observed_numeric_params_df = observed_params_df[numeric_params_names]

        assert (np.abs(
            numeric_params_df.fillna(0) - observed_numeric_params_df.fillna(0))
                < 0.00000001).all().all()
        assert (np.abs(registered_objectives_df - observed_objectives_df) <
                0.00000001).all().all()

    @staticmethod
    def optimize_objective_function(optimizer, objective_function,
                                    num_iterations):
        registered_features_df = None
        registered_objectives_df = None

        # Let's make sure that the optimum for the first objective doesn't get worse throughout the optimization loop.
        #
        first_objective = optimizer.optimization_problem.objectives[0]
        if first_objective.minimize:
            old_optimum = np.inf
        else:
            old_optimum = -np.inf

        for i in range(num_iterations):
            suggested_params = optimizer.suggest()
            suggested_params_df = suggested_params.to_dataframe()
            objective_values = objective_function.evaluate_point(
                suggested_params)
            optimizer.register(suggested_params_df,
                               objective_values.to_dataframe())
            if registered_features_df is None:
                registered_features_df = suggested_params_df
            else:
                registered_features_df = registered_features_df.append(
                    suggested_params_df, ignore_index=True)

            if registered_objectives_df is None:
                registered_objectives_df = objective_values.to_dataframe()
            else:
                registered_objectives_df = registered_objectives_df.append(
                    objective_values.to_dataframe(), ignore_index=True)

            best_params, optimum = optimizer.optimum()
            # ensure current optimum doesn't go up
            #
            if first_objective.minimize:
                assert optimum[first_objective.name] <= old_optimum
            else:
                assert optimum[first_objective.name] >= old_optimum

            old_optimum = optimum[first_objective.name]
            print(
                f"[{i+1}/{num_iterations}]Best Params: {best_params}, Best Value: {optimum[first_objective.name]}"
            )
        return registered_features_df, registered_objectives_df