Пример #1
0
    def test_construct_feature_dataframe_no_context(self):
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])
        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])
        n_samples = 100
        parameter_df = optimization_problem.parameter_space.random_dataframe(
            n_samples)
        feature_df = optimization_problem.construct_feature_dataframe(
            parameters_df=parameter_df)
        assert feature_df.shape == (
            n_samples,
            len(optimization_problem.parameter_space.dimension_names) + 1)
        expected_columns = sorted([
            f"three_level_quadratic_config.{n}"
            for n in optimization_problem.parameter_space.dimension_names
        ])
        assert (
            feature_df.columns[:-1].sort_values() == expected_columns).all()
        assert feature_df.columns[-1] == "contains_context"
        assert not feature_df.contains_context.any()
Пример #2
0
    def test_hierarchical_quadratic_cold_start(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        num_restarts = 2
        for restart_num in range(num_restarts):

            optimizer_config = bayesian_optimizer_config_store.default
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 20
            optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best"
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.min_samples_to_fit = 10
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2

            local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

            remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

            for bayesian_optimizer in [local_optimizer, remote_optimizer]:
                num_guided_samples = 50
                for i in range(num_guided_samples):
                    suggested_params = bayesian_optimizer.suggest()
                    y = objective_function.evaluate_point(suggested_params)
                    print(
                        f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}"
                    )

                    input_values_df = pd.DataFrame({
                        param_name: [param_value]
                        for param_name, param_value in suggested_params
                    })
                    target_values_df = y.to_dataframe()
                    bayesian_optimizer.register(
                        feature_values_pandas_frame=input_values_df,
                        target_values_pandas_frame=target_values_df)
                best_config_point, best_objective = bayesian_optimizer.optimum(
                    optimum_definition=OptimumDefinition.BEST_OBSERVATION)
                print(
                    f"[Restart:  {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
                )
                self.validate_optima(optimizer=bayesian_optimizer)
Пример #3
0
    def __init__(self,
                 optimizer_evaluator_config: Point,
                 optimizer: OptimizerBase = None,
                 optimizer_config: Point = None,
                 objective_function: ObjectiveFunctionBase = None,
                 objective_function_config: Point = None):
        assert optimizer_evaluator_config in optimizer_evaluator_config_store.parameter_space
        assert (optimizer is None) != (
            optimizer_config is None
        ), "A valid optimizer XOR a valid optimizer_config must be supplied."
        assert (objective_function is None) != (objective_function_config is None),\
            "A valid objective_function XOR a valid objective_function_config must be specified"

        self.optimizer_evaluator_config = optimizer_evaluator_config
        self.objective_function_config = None
        self.objective_function = None
        self.optimizer_config = None
        self.optimizer = None

        # Let's get the objective function assigned to self's fields.
        #
        if (objective_function_config
                is not None) and (objective_function is None):
            assert objective_function_config in objective_function_config_store.parameter_space
            self.objective_function_config = objective_function_config
            self.objective_function = ObjectiveFunctionFactory.create_objective_function(
                objective_function_config)

        elif (objective_function
              is not None) and (objective_function_config is None):
            self.objective_function_config = objective_function.objective_function_config
            self.objective_function = objective_function

        else:
            # The assert above should have caught it but just in case someone removes or changes it.
            #
            assert False, "A valid objective_function XOR a valid objective_function_config must be specified"

        # Let's get the optimizer and its config assigned to self's fields.
        #
        if (optimizer_config is not None) and (optimizer is None):
            assert optimizer_config in bayesian_optimizer_config_store.parameter_space
            optimization_problem = self.objective_function.default_optimization_problem
            self.optimizer_config = optimizer_config
            self.optimizer = BayesianOptimizerFactory().create_local_optimizer(
                optimizer_config=optimizer_config,
                optimization_problem=optimization_problem)

        elif (optimizer is not None) and (optimizer_config is None):
            # TODO: assert that the optimization problem in the optimizer matches the objective function.
            # But this requires Hypergrid.__eq__.
            #
            self.optimizer_config = optimizer.optimizer_config
            self.optimizer = optimizer

        else:
            # Again, the assert at the beginning of the constructor should have caught this. But more asserts => less bugs.
            #
            assert False, "A valid optimizer XOR a valid optimizer_config must be supplied."
    def test_lasso_hierarchical_categorical_predictions(self):
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        rerf = RegressionEnhancedRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space)

        # fit model with same degree as true y
        num_train_x = 100
        x_train_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        rerf.fit(x_train_df, y_train_df)
        num_detected_features = len(rerf.detected_feature_indices_)

        self.assertTrue(
            rerf.root_model_gradient_coef_.shape ==
            rerf.polynomial_features_powers_.shape,
            'Gradient coefficient shape is incorrect')
        self.assertTrue(
            rerf.fit_X_.shape == (num_train_x,
                                  rerf.polynomial_features_powers_.shape[0]),
            'Design matrix shape is incorrect')
        self.assertTrue(
            rerf.partial_hat_matrix_.shape == (num_detected_features,
                                               num_detected_features),
            'Hat matrix shape is incorrect')
        self.assertTrue(rerf.polynomial_features_powers_.shape == (28, 8),
                        'PolynomalFeature.power_ shape is incorrect')

        # test predictions
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        num_test_x = 10

        # by generating a single X feature on which to make the predictions, the
        y_test_list = []
        predicted_y_list = []
        for _ in range(num_test_x):
            x_test_df = objective_function.parameter_space.random_dataframe(
                num_samples=1)
            y_test_df = objective_function.evaluate_dataframe(x_test_df)
            y_test_list.append(y_test_df['y'].values[0])

            predictions = rerf.predict(x_test_df)
            pred_df = predictions.get_dataframe()
            predicted_y_list.append(pred_df[predicted_value_col].values[0])

        predicted_y = np.array(predicted_y_list)
        y_test = np.array(y_test_list)
        residual_sum_of_squares = ((y_test - predicted_y)**2).sum()
        total_sum_of_squares = ((y_test - y_test.mean())**2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        self.assertTrue(unexplained_variance < 10**-4,
                        '1 - R^2 larger than expected')
Пример #5
0
    def test_lasso_hierarchical_categorical_predictions(self):
        random.seed(11001)
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        rerf = RegressionEnhancedRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space)

        # fit model with same degree as true y
        # The input space consists of 3 2-d domains 200 x 200 units.  Hence random samples smaller than a certain size will produce too few points to
        # train reliable models.
        # TODO: Good place to use a non-random training set design
        num_train_x = 600
        x_train_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        rerf.fit(x_train_df, y_train_df)
        num_detected_features = len(rerf.detected_feature_indices_)

        self.assertTrue(
            rerf.root_model_gradient_coef_.shape ==
            rerf.polynomial_features_powers_.shape,
            'Gradient coefficient shape is incorrect')
        self.assertTrue(
            rerf.fit_X_.shape == (num_train_x,
                                  rerf.polynomial_features_powers_.shape[0]),
            'Design matrix shape is incorrect')
        self.assertTrue(
            rerf.partial_hat_matrix_.shape == (num_detected_features,
                                               num_detected_features),
            'Hat matrix shape is incorrect')
        self.assertTrue(rerf.polynomial_features_powers_.shape == (34, 9),
                        'PolynomalFeature.power_ shape is incorrect')

        # test predictions
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        num_test_x = 50
        x_test_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_test_x)
        predictions = rerf.predict(x_test_df)
        pred_df = predictions.get_dataframe()
        predicted_y = pred_df[predicted_value_col].to_numpy()
        y_test = objective_function.evaluate_dataframe(
            x_test_df).to_numpy().reshape(-1)
        residual_sum_of_squares = ((y_test - predicted_y)**2).sum()
        total_sum_of_squares = ((y_test - y_test.mean())**2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        test_threshold = 10**-3
        self.assertTrue(
            unexplained_variance < test_threshold,
            f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
        )
Пример #6
0
    def setup_class(cls):
        """ Set's up all the objects needed to test the RandomSearchOptimizer

        To test the RandomSearchOptimizer we need to first construct:
        * an optimization problem
        * a utility function

        To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction
        test.



        :return:
        """
        global_values.declare_singletons()
        global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0)

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        cls.input_space = objective_function.parameter_space
        cls.output_space = objective_function.output_space

        cls.input_values_dataframe = objective_function.parameter_space.random_dataframe(
            num_samples=2500)
        cls.output_values_dataframe = objective_function.evaluate_dataframe(
            cls.input_values_dataframe)

        cls.model_config = homogeneous_random_forest_config_store.default

        print(cls.model_config)

        cls.model = MultiObjectiveHomogeneousRandomForest(
            model_config=cls.model_config,
            input_space=cls.input_space,
            output_space=cls.output_space)
        cls.model.fit(cls.input_values_dataframe,
                      cls.output_values_dataframe,
                      iteration_number=len(cls.input_values_dataframe.index))

        cls.utility_function_config = Point(
            utility_function_name="upper_confidence_bound_on_improvement",
            alpha=0.05)

        cls.optimization_problem = OptimizationProblem(
            parameter_space=cls.input_space,
            objective_space=cls.output_space,
            objectives=[Objective(name='y', minimize=True)])

        cls.utility_function = ConfidenceBoundUtilityFunction(
            function_config=cls.utility_function_config,
            surrogate_model=cls.model,
            minimize=cls.optimization_problem.objectives[0].minimize)
    def test_default_config(self, objective_function_config_name):
        objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name)
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

        lasso_model_config = lasso_cross_validated_config_store.default
        multi_objective_rf = MultiObjectiveLassoCrossValidated(
            model_config=lasso_model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space,
            logger=self.logger
        )

        if objective_function_config_name == '2d_hypersphere_minimize_some':
            num_training_samples = 25
            num_testing_samples = 10
        elif objective_function_config_name == '10d_hypersphere_minimize_some':
            num_training_samples = 50
            num_testing_samples = 10
        elif objective_function_config_name == '5_mutually_exclusive_polynomials':
            num_training_samples = 100
            num_testing_samples = 50
        else:
            assert False
        train_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_training_samples)
        train_objectives_df = objective_function.evaluate_dataframe(train_params_df)

        test_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_testing_samples)
        test_objectives_df = objective_function.evaluate_dataframe(test_params_df)

        multi_objective_rf.fit(features_df=train_params_df, targets_df=train_objectives_df, iteration_number=num_training_samples)
        multi_objective_predictions = multi_objective_rf.predict(features_df=train_params_df, include_only_valid_rows=True)

        # TRAINING DATA
        #
        print("------------------------------------------------------------------------------------")
        print("--------------------------------------- TRAIN --------------------------------------")
        print("------------------------------------------------------------------------------------")
        training_gof = multi_objective_rf.compute_goodness_of_fit(features_df=train_params_df, targets_df=train_objectives_df, data_set_type=DataSetType.TRAIN)
        for objective_name in objective_function.output_space.dimension_names:
            print("------------------------------------------------------------------------------------")
            print(objective_name)
            print(training_gof[objective_name].to_json(indent=2))

        # TESTING DATA
        print("------------------------------------------------------------------------------------")
        print("--------------------------------------- TEST ---------------------------------------")
        print("------------------------------------------------------------------------------------")
        testing_gof = multi_objective_rf.compute_goodness_of_fit(features_df=test_params_df, targets_df=test_objectives_df, data_set_type=DataSetType.TEST_KNOWN_RANDOM)
        for objective_name in objective_function.output_space.dimension_names:
            print("------------------------------------------------------------------------------------")
            print(objective_name)
            print(testing_gof[objective_name].to_json(indent=2))
    def test_lasso_hierarchical_categorical_predictions(self):
        random.seed(11001)
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        polynomial_features_adapter = ContinuousToPolynomialBasisHypergridAdapter(
            adaptee=objective_function.parameter_space,
            degree=2,
            include_bias=True,
            interaction_only=False)

        lasso_cross_validated_model = LassoCrossValidatedRegressionModel(
            model_config=self.model_config,
            input_space=polynomial_features_adapter,
            output_space=objective_function.output_space)
        # since the model input_space stacked the polynomial basis function on in the original input space, we can skip validating input features
        lasso_cross_validated_model.skip_input_filtering_on_predict = True

        # fit model with same degree as true y
        # The input space consists of 3 2-d domains 200 x 200 units.  Hence random samples smaller than a certain size will produce too few points to
        # train reliable models.
        # TODO: Good place to use a non-random training set design
        num_train_x = 300
        x_train_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        lasso_cross_validated_model.fit(x_train_df,
                                        y_train_df,
                                        iteration_number=0)

        # test predictions
        num_test_x = 50
        x_test_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_test_x)
        y_test = objective_function.evaluate_dataframe(
            x_test_df).to_numpy().reshape(-1)
        predictions = lasso_cross_validated_model.predict(x_test_df)

        pred_df = predictions.get_dataframe()
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        predicted_y = pred_df[predicted_value_col].to_numpy()

        residual_sum_of_squares = ((y_test - predicted_y)**2).sum()
        total_sum_of_squares = ((y_test - y_test.mean())**2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        test_threshold = 10**-6
        print(f'Asserting {unexplained_variance} < {test_threshold}')
        assert unexplained_variance < test_threshold, f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data.

        """

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)
        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=10000)

        y_df = objective_function.evaluate_dataframe(random_params_df)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default,
            logger=self.logger)
        bayesian_optimizer.register(random_params_df, y_df)

        num_guided_samples = 20
        for i in range(num_guided_samples):
            # Suggest the parameters
            suggested_params = bayesian_optimizer.suggest()
            target_value = objective_function.evaluate_point(suggested_params)

            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            # Register the observation with the optimizer
            bayesian_optimizer.register(suggested_params.to_dataframe(),
                                        target_value.to_dataframe())

        self.validate_optima(bayesian_optimizer)
        best_config_point, best_objective = bayesian_optimizer.optimum()
        self.logger.info(
            f"Optimum: {best_objective} Best Configuration: {best_config_point}"
        )
        trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json")
        self.logger.info(f"Writing trace to {trace_output_path}")
        global_values.tracer.dump_trace_to_file(
            output_file_path=trace_output_path)
        global_values.tracer.clear_events()
    def test_glow_worm_on_three_level_quadratic(self):
        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)
        # Let's warm up the model a bit
        #
        num_warmup_samples = 1000
        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_warmup_samples)
        y = objective_function.evaluate_dataframe(random_params_df)

        model = HomogeneousRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=output_space)
        model.fit(feature_values_pandas_frame=random_params_df,
                  target_values_pandas_frame=y,
                  iteration_number=num_warmup_samples)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        utility_function = ConfidenceBoundUtilityFunction(
            function_config=self.utility_function_config,
            surrogate_model=model,
            minimize=optimization_problem.objectives[0].minimize)

        glow_worm_swarm_optimizer = GlowWormSwarmOptimizer(
            optimization_problem=optimization_problem,
            utility_function=utility_function,
            optimizer_config=glow_worm_swarm_optimizer_config_store.default)

        num_iterations = 5
        for i in range(num_iterations):
            suggested_params = glow_worm_swarm_optimizer.suggest()
            print(f"[{i+1}/{num_iterations}] {suggested_params.to_json()}")
            self.assertTrue(
                suggested_params in objective_function.parameter_space)
Пример #11
0
    def test_named_configs(self, config_name):
        objective_function_config = objective_function_config_store.get_config_by_name(
            config_name)
        print(objective_function_config.to_json(indent=2))
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        for _ in range(100):
            random_point = objective_function.parameter_space.random()
            value = objective_function.evaluate_point(random_point)
            assert value in objective_function.output_space

        for i in range(1, 100):
            random_dataframe = objective_function.parameter_space.random_dataframe(
                num_samples=i)
            values_df = objective_function.evaluate_dataframe(random_dataframe)
            assert values_df.index.equals(random_dataframe.index)
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function with no prior data.

        """
        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default,
            logger=self.logger)

        num_guided_samples = 1000
        for i in range(num_guided_samples):
            suggested_params = bayesian_optimizer.suggest()
            target_value = objective_function.evaluate_point(suggested_params)
            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            bayesian_optimizer.register(suggested_params.to_dataframe(),
                                        target_value.to_dataframe())
            if i > 20 and i % 20 == 0:
                best_config_point, best_objective = bayesian_optimizer.optimum(
                )
                self.logger.info(
                    f"[{i}/{num_guided_samples}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
                )

        self.validate_optima(bayesian_optimizer)
        best_config, optimum = bayesian_optimizer.optimum()
        assert objective_function.parameter_space.contains_point(best_config)
        assert objective_function.output_space.contains_point(optimum)
        _, all_targets = bayesian_optimizer.get_all_observations()
        assert optimum.y == all_targets.min()[0]
        self.logger.info(
            f"Optimum: {optimum} best configuration: {best_config}")
Пример #13
0
    def setUp(self):
        self.logger = create_logger(self.__class__.__name__)
        # Start up the gRPC service.
        #
        self.server = OptimizerMicroserviceServer(port=50051, num_threads=10)
        self.server.start()

        self.optimizer_service_channel = grpc.insecure_channel('localhost:50051')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger)
        self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger)

        objective_function_config = objective_function_config_store.get_config_by_name('2d_quadratic_concave_up')
        self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

        self.optimization_problem = OptimizationProblem(
            parameter_space=self.objective_function.parameter_space,
            objective_space=self.objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)]
        )
    def test_hierarchical_quadratic_cold_start(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        num_restarts = 1000
        for restart_num in range(num_restarts):
            bayesian_optimizer = BayesianOptimizer(
                optimization_problem=optimization_problem,
                optimizer_config=bayesian_optimizer_config_store.default,
                logger=self.logger)

            num_guided_samples = 200
            for i in range(num_guided_samples):
                suggested_params = bayesian_optimizer.suggest()
                y = objective_function.evaluate_point(suggested_params)
                self.logger.info(
                    f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}")

                input_values_df = suggested_params.to_dataframe()
                target_values_df = y.to_dataframe()
                bayesian_optimizer.register(input_values_df, target_values_df)
            self.validate_optima(bayesian_optimizer)
            best_config_point, best_objective = bayesian_optimizer.optimum()
            self.logger.info(
                f"[{restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
            )
Пример #15
0
    def __init__(
            self,
            model_config: Point,
            input_space: Hypergrid = None,
            output_space: Hypergrid = None,
            logger=None
    ):
        assert model_config in multi_objective_pass_through_model_config_store.parameter_space
        self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=model_config.objective_function_config)


        MultiObjectiveRegressionModel.__init__(
            self,
            model_type=type(self),
            model_config=model_config,
            input_space=self.objective_function.default_optimization_problem.feature_space,
            output_space=self.objective_function.output_space
        )

        if logger is None:
            logger = create_logger(self.__class__.__name__)
        self.logger = logger
Пример #16
0
    def test_bayesian_optimizer_with_random_near_incumbent(self):
        objective_function_config = objective_function_config_store.get_config_by_name(
            'multi_objective_waves_3_params_2_objectives_half_pi_phase_difference'
        )
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        optimization_problem = objective_function.default_optimization_problem

        optimizer_config = bayesian_optimizer_config_store.get_config_by_name(
            'default_with_random_near_incumbent_config')
        assert optimizer_config.experiment_designer_config.numeric_optimizer_implementation == "RandomNearIncumbentOptimizer"
        optimizer_config.experiment_designer_config.fraction_random_suggestions = 0

        # Let's give it a little more resolution.
        #
        optimizer_config.experiment_designer_config.multi_objective_probability_of_improvement_config.num_monte_carlo_samples = 200

        bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=optimizer_config)

        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=1000)
        objectives_df = objective_function.evaluate_dataframe(random_params_df)
        bayesian_optimizer.register(
            parameter_values_pandas_frame=random_params_df,
            target_values_pandas_frame=objectives_df)

        num_suggestions = 10
        for suggestion_number in range(num_suggestions):
            parameters = bayesian_optimizer.suggest()
            objectives = objective_function.evaluate_point(parameters)
            self.logger.info(
                f"[{suggestion_number}/{num_suggestions}] parameters: {parameters}, objectives: {objectives}"
            )
            bayesian_optimizer.register(
                parameter_values_pandas_frame=parameters.to_dataframe(),
                target_values_pandas_frame=objectives.to_dataframe())
Пример #17
0
    def setup_method(self, method):
        self.logger = create_logger(self.__class__.__name__)

        # Start up the gRPC service. Try a bunch of times before giving up.
        #
        max_num_tries = 100
        num_tries = 0
        for port in range(50051, 50051 + max_num_tries):
            num_tries += 1
            try:
                self.server = OptimizerServicesServer(port=port,
                                                      num_threads=10)
                self.server.start()
                self.port = port
                break
            except:
                self.logger.info(
                    f"Failed to create OptimizerMicroserviceServer on port {port}"
                )
                if num_tries == max_num_tries:
                    raise

        self.optimizer_service_channel = grpc.insecure_channel(
            f'localhost:{self.port}')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(
            grpc_channel=self.optimizer_service_channel, logger=self.logger)
        self.optimizer_monitor = OptimizerMonitor(
            grpc_channel=self.optimizer_service_channel, logger=self.logger)

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        self.objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        self.optimization_problem = OptimizationProblem(
            parameter_space=self.objective_function.parameter_space,
            objective_space=self.objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])
Пример #18
0
    def test_named_configs(self):

        named_configs = objective_function_config_store.list_named_configs()

        objective_function_configs_to_test = [
            named_config.config_point for named_config in named_configs
        ]

        for objective_function_config in objective_function_configs_to_test:
            print(objective_function_config.to_json(indent=2))
            objective_function = ObjectiveFunctionFactory.create_objective_function(
                objective_function_config=objective_function_config)
            default_polynomials_domain = objective_function.parameter_space
            for _ in range(100):
                random_point = default_polynomials_domain.random()
                value = objective_function.evaluate_point(random_point)
                self.assertTrue(value in objective_function.output_space)

            for i in range(1, 100):
                random_dataframe = default_polynomials_domain.random_dataframe(
                    num_samples=i)
                values_df = objective_function.evaluate_dataframe(
                    random_dataframe)
                self.assertTrue(values_df.index.equals(random_dataframe.index))
    def test_rerf_hierarchical_categorical_predictions(self):
        random.seed(11001)
        objective_function_config = objective_function_config_store.get_config_by_name('three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config)

        rerf = RegressionEnhancedRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space
        )

        # fit model with same degree as true y
        # The input space consists of 3 2-d domains 200 x 200 units.  Hence random samples smaller than a certain size will produce too few points to
        # train reliable models.
        # TODO: Good place to use a non-random training set design
        num_train_x = 300
        x_train_df = objective_function.parameter_space.random_dataframe(num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        rerf.fit(x_train_df, y_train_df)

        # test predictions
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        num_test_x = 50
        x_test_df = objective_function.parameter_space.random_dataframe(num_samples=num_test_x)
        y_test = objective_function.evaluate_dataframe(x_test_df).to_numpy().reshape(-1)

        predictions = rerf.predict(x_test_df)
        pred_df = predictions.get_dataframe()
        predicted_y = pred_df[predicted_value_col].to_numpy()

        residual_sum_of_squares = ((y_test - predicted_y) ** 2).sum()
        total_sum_of_squares = ((y_test - y_test.mean()) ** 2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        test_threshold = 10**-6
        print(unexplained_variance, test_threshold)
        assert unexplained_variance < test_threshold, f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
    def test_optimizer_with_random_config_random_objective(self, i):
        objective_function_config = objective_function_config_store.parameter_space.random(
        )
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)
        optimization_problem = objective_function.default_optimization_problem

        optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
        )

        optimizer_config.min_samples_required_for_guided_design_of_experiments = max(
            min(
                optimizer_config.
                min_samples_required_for_guided_design_of_experiments, 100),
            20)
        if optimizer_config.surrogate_model_implementation == "HomogeneousRandomForestRegressionModel":
            rf_config = optimizer_config.homogeneous_random_forest_regression_model_config
            rf_config.n_estimators = min(rf_config.n_estimators, 20)

        if optimizer_config.surrogate_model_implementation == MultiObjectiveRegressionEnhancedRandomForest.__name__:
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 25
            rerf_model_config = optimizer_config.regression_enhanced_random_forest_regression_model_config
            rerf_model_config.max_basis_function_degree = min(
                rerf_model_config.max_basis_function_degree, 2)
            # increased polynomial degree requires more data to estimate model parameters (poly term coefficients)
            optimizer_config.min_samples_required_for_guided_design_of_experiments += 25 * (
                rerf_model_config.max_basis_function_degree - 1)
            rf_model_config = rerf_model_config.sklearn_random_forest_regression_model_config
            rf_model_config.perform_initial_random_forest_hyper_parameter_search = False
            rf_model_config.max_depth = min(rf_model_config.max_depth, 10)
            rf_model_config.n_jobs = min(rf_model_config.n_jobs, 4)

        print(
            f"[{i+1}] Creating a bayesian optimizer with config: {optimizer_config} \n\n\nObjective function config: {objective_function_config}"
        )

        bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=optimizer_config)
        registered_params_df, registered_objectives_df = self.optimize_objective_function(
            optimizer=bayesian_optimizer,
            objective_function=objective_function,
            num_iterations=20)

        # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison.
        #
        registered_features_json = registered_params_df.to_json(
            orient='index', double_precision=15)
        registered_objectives_json = registered_objectives_df.to_json(
            orient='index', double_precision=15)

        # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away.
        #
        assert len(registered_features_json) > 0
        assert len(registered_objectives_json) > 0

        registered_params_df = pd.read_json(registered_features_json,
                                            orient='index')
        registered_objectives_df = pd.read_json(registered_objectives_json,
                                                orient='index')

        observed_params_df, observed_objectives_df, _ = bayesian_optimizer.get_all_observations(
        )

        numeric_params_names = [
            dimension.name
            for dimension in optimization_problem.parameter_space.dimensions
            if (isinstance(dimension, (ContinuousDimension, DiscreteDimension))
                or (isinstance(dimension, CategoricalDimension)
                    and dimension.is_numeric)) and (
                        dimension.name in registered_params_df.columns) and (
                            dimension.name in observed_params_df.columns)
        ]
        numeric_params_df = registered_params_df[numeric_params_names]
        observed_numeric_params_df = observed_params_df[numeric_params_names]

        assert (np.abs(
            numeric_params_df.fillna(0) - observed_numeric_params_df.fillna(0))
                < 0.00000001).all().all()
        assert (np.abs(registered_objectives_df - observed_objectives_df) <
                0.00000001).all().all()
Пример #21
0
    def test_hyperspheres(self, minimize, num_output_dimensions, num_points):
        """Uses a hypersphere to validate that ParetoFrontier can correctly identify pareto-optimal points."""


        hypersphere_radius = 10

        objective_function_config = Point(
            implementation=Hypersphere.__name__,
            hypersphere_config=Point(
                num_objectives=num_output_dimensions,
                minimize=minimize,
                radius=hypersphere_radius
            )
        )

        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config)
        optimization_problem = objective_function.default_optimization_problem
        random_params_df = optimization_problem.parameter_space.random_dataframe(num_points)

        # Let's randomly subsample 10% of points in random_params_df and make those points pareto optimal.
        #
        optimal_points_index = random_params_df.sample(
            frac=0.1,
            replace=False,
            axis='index'
        ).index

        random_params_df.loc[optimal_points_index, ['radius']] = hypersphere_radius
        objectives_df = objective_function.evaluate_dataframe(dataframe=random_params_df)



        # Conveniently, we can double check all of our math by invoking Pythagoras. Basically:
        #
        #   assert y0**2 + y1**2 + ... == radius**2
        #
        assert (np.power(objectives_df, 2).sum(axis=1) - np.power(random_params_df["radius"], 2) < 0.000001).all()


        # Just a few more sanity checks before we do the pareto computation.
        #
        if minimize == "all":
            assert (objectives_df <= 0).all().all()
        elif minimize == "none":
            assert (objectives_df >= 0).all().all()
        else:
            for column, minimize_column in zip(objectives_df, objective_function.minimize_mask):
                if minimize_column:
                    assert (objectives_df[column] <= 0).all()
                else:
                    assert (objectives_df[column] >= 0).all()


        pareto_frontier = ParetoFrontier(
            optimization_problem=optimization_problem,
            objectives_df=objectives_df,
            parameters_df=random_params_df
        )
        pareto_df = pareto_frontier.pareto_df

        # We know that all of the pareto efficient points must be on the frontier.
        #
        assert optimal_points_index.difference(pareto_df.index.intersection(optimal_points_index)).empty
        assert len(pareto_df.index) >= len(optimal_points_index)

        # If we flip all minimized objectives, we can assert on even more things.
        #
        for column, minimize_column in zip(objectives_df, objective_function.minimize_mask):
            if minimize_column:
                objectives_df[column] = -objectives_df[column]
                pareto_df[column] = - pareto_df[column]

        non_pareto_index = objectives_df.index.difference(pareto_df.index)
        for i, row in pareto_df.iterrows():
            # Now let's make sure that no point in pareto is dominated by any non-pareto point.
            #
            assert (objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index)

            # Let's also make sure that no point on the pareto is dominated by any other point there.
            #
            other_rows = pareto_df.index.difference([i])
            assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
Пример #22
0
    def test_pareto_frontier_volume_on_hyperspheres(self, minimize, num_dimensions):
        """Uses a known formula for the volume of the hyperspheres to validate the accuracy of the pareto frontier estimate.

        :return:
        """
        hypersphere_radius = 10
        inscribed_hypersphere_radius = 7  # For computing lower bound on volume

        # In order to validate the estimates, we must know the allowable upper and lower bounds.
        # We know that the estimate should not be higher than the volume of the n-ball (ball in n-dimensions).
        # We can also come up with a lower bound, by computing a volume of a slightly smaller ball inscribed
        # into the hypersphere. Note that the volume of an n-ball can be computed recursively, so we keep track
        # of n-ball volumes in lower dimensions.

        upper_bounds_on_sphere_volume_by_num_dimensions = {}
        lower_bounds_on_sphere_volume_by_num_dimensions = {}

        # Compute the base cases for the recursion.
        #
        upper_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (hypersphere_radius ** 2)
        upper_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (hypersphere_radius ** 3)

        lower_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (inscribed_hypersphere_radius ** 2)
        lower_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (inscribed_hypersphere_radius ** 3)

        # Compute the recursive values.
        #
        for n in range(4, num_dimensions + 1):
            upper_bounds_on_sphere_volume_by_num_dimensions[n] = upper_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (hypersphere_radius ** 2) / n
            lower_bounds_on_sphere_volume_by_num_dimensions[n] = lower_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (inscribed_hypersphere_radius ** 2) / n

        objective_function_config = Point(
            implementation=Hypersphere.__name__,
            hypersphere_config=Point(
                num_objectives=num_dimensions,
                minimize=minimize,
                radius=hypersphere_radius
            )
        )
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)
        parameter_space = objective_function.parameter_space

        num_points = max(4, num_dimensions)
        linspaces = []

        for dimension in parameter_space.dimensions:
            if dimension.name == 'radius':
                linspaces.append(np.array([hypersphere_radius]))
            else:
                linspaces.append(dimension.linspace(num_points))
        meshgrids = np.meshgrid(*linspaces)
        reshaped_meshgrids = [meshgrid.reshape(-1) for meshgrid in meshgrids]

        params_df = pd.DataFrame({
            dim_name: reshaped_meshgrids[i]
            for i, dim_name
            in enumerate(parameter_space.dimension_names)
        })

        objectives_df = objective_function.evaluate_dataframe(params_df)

        pareto_frontier = ParetoFrontier(
            optimization_problem=objective_function.default_optimization_problem,
            objectives_df=objectives_df,
            parameters_df=params_df
        )
        print("Num points in pareto frontier: ", len(objectives_df.index))
        assert len(pareto_frontier.pareto_df.index) == len(objectives_df.index)
        pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(num_samples=1000000)
        ci_lower_bound, ci_upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.05)

        lower_bound_on_pareto_volume = lower_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions)
        upper_bound_on_pareto_volume = upper_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions)
        print("True bounds:", lower_bound_on_pareto_volume, upper_bound_on_pareto_volume)
        print("CI bounds: ", ci_lower_bound, ci_upper_bound)
        assert lower_bound_on_pareto_volume <= ci_lower_bound <= ci_upper_bound <= upper_bound_on_pareto_volume
    def test_hierarchical_quadratic_cold_start_random_configs(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        random_state = random.Random()
        num_restarts = 200
        for restart_num in range(num_restarts):

            # Let's set up random seeds so that we can easily repeat failed experiments
            #
            random_state.seed(restart_num)
            bayesian_optimizer_config_store.parameter_space.random_state = random_state
            objective_function.parameter_space.random_state = random_state

            optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
            )

            # The goal here is to make sure the optimizer works with a lot of different configurations.
            # So let's make sure each run is not too long.
            #
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 50
            if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__:
                random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config
                random_forest_config.n_estimators = min(
                    random_forest_config.n_estimators, 5)
                decision_tree_config = random_forest_config.decision_tree_regression_model_config
                decision_tree_config.min_samples_to_fit = 10
                decision_tree_config.n_new_samples_before_refit = 10

            if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__:
                optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5

            self.logger.info(
                f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: "
            )
            self.logger.info(
                f"Optimizer config: {optimizer_config.to_json(indent=2)}")
            bayesian_optimizer = BayesianOptimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config,
                logger=self.logger)

            num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50
            for i in range(num_guided_samples):
                suggested_params = bayesian_optimizer.suggest()
                y = objective_function.evaluate_point(suggested_params)
                self.logger.info(
                    f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}"
                )

                input_values_df = suggested_params.to_dataframe()
                target_values_df = y.to_dataframe()
                bayesian_optimizer.register(input_values_df, target_values_df)

            best_config_point, best_objective = bayesian_optimizer.optimum()
            self.logger.info(
                f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
            )
Пример #24
0
    def test_optimizers_against_untrained_models(self, objective_function_config_name, utility_function_type_name, utility_function_optimizer_type_name):
        """Tests that the utility function optimizers throw appropriate exceptions when the utility function cannot be evaluated.

        :return:
        """
        self.logger.info(f"Creating test artifacts for objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")
        model_config = homogeneous_random_forest_config_store.default
        objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name)
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config)
        optimization_problem = objective_function.default_optimization_problem

        model = MultiObjectiveHomogeneousRandomForest(
            model_config=model_config,
            input_space=optimization_problem.feature_space,
            output_space=optimization_problem.objective_space,
            logger=self.logger
        )
        pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem)

        if utility_function_type_name == ConfidenceBoundUtilityFunction.__name__:
            utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05)
            utility_function = ConfidenceBoundUtilityFunction(
                function_config=utility_function_config,
                surrogate_model=model,
                minimize=optimization_problem.objectives[0].minimize,
                logger=self.logger
            )
        elif utility_function_type_name == MultiObjectiveProbabilityOfImprovementUtilityFunction.__name__:
            utility_function_config = multi_objective_probability_of_improvement_utility_function_config_store.default
            utility_function = MultiObjectiveProbabilityOfImprovementUtilityFunction(
                function_config=utility_function_config,
                pareto_frontier=pareto_frontier,
                surrogate_model=model,
                logger=self.logger
            )
        else:
            assert False

        if utility_function_optimizer_type_name == RandomSearchOptimizer.__name__:
            utility_function_optimizer_config = random_search_optimizer_config_store.default
        elif utility_function_optimizer_type_name == GlowWormSwarmOptimizer.__name__:
            utility_function_optimizer_config = glow_worm_swarm_optimizer_config_store.default
        elif utility_function_optimizer_type_name == RandomNearIncumbentOptimizer.__name__:
            utility_function_optimizer_config = random_near_incumbent_optimizer_config_store.default
        else:
            assert False, f"Unknown utility_function_optimizer_type_name: {utility_function_optimizer_type_name}"

        utility_function_optimizer = UtilityFunctionOptimizerFactory.create_utility_function_optimizer(
            utility_function=utility_function,
            optimizer_type_name=utility_function_optimizer_type_name,
            optimizer_config=utility_function_optimizer_config,
            optimization_problem=optimization_problem,
            pareto_frontier=pareto_frontier,
            logger=self.logger
        )

        assert not model.trained

        self.logger.info("Asserting the optimizer is throwing appropriate exceptions.")
        num_failed_suggestions = 3
        for i in range(num_failed_suggestions):
            with pytest.raises(expected_exception=UnableToProduceGuidedSuggestionException):
                utility_function_optimizer.suggest()
            self.logger.info(f"[{i+1}/{num_failed_suggestions}] worked.")


        # Now let's train the model a bit and make sure that we can produce the suggestions afterwards
        #
        random_params_df = optimization_problem.parameter_space.random_dataframe(1000)
        objectives_df = objective_function.evaluate_dataframe(random_params_df)
        features_df = optimization_problem.construct_feature_dataframe(parameters_df=random_params_df)

        self.logger.info("Training the model")
        model.fit(features_df=features_df, targets_df=objectives_df, iteration_number=1000)
        assert model.trained
        self.logger.info("Model trained.")

        self.logger.info("Updating pareto.")
        pareto_frontier.update_pareto(objectives_df=objectives_df, parameters_df=random_params_df)
        self.logger.info("Pareto updated.")

        self.logger.info("Asserting suggestions work.")
        num_successful_suggestions = 3
        for i in range(num_successful_suggestions):
            suggestion = utility_function_optimizer.suggest()
            assert suggestion in optimization_problem.parameter_space
            self.logger.info(f"[{i+1}/{num_successful_suggestions}] successfully produced suggestion: {suggestion}")

        self.logger.info(f"Done testing. Objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")
Пример #25
0
    def test_default_config(self, objective_function_config_name):
        objective_function_config = objective_function_config_store.get_config_by_name(
            objective_function_config_name)
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        rf_config = homogeneous_random_forest_config_store.default
        multi_objective_rf = MultiObjectiveHomogeneousRandomForest(
            model_config=rf_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space,
            logger=self.logger)

        num_training_samples = 1000
        num_testing_samples = 100
        train_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_training_samples)
        train_objectives_df = objective_function.evaluate_dataframe(
            train_params_df)

        test_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_testing_samples)
        test_objectives_df = objective_function.evaluate_dataframe(
            test_params_df)

        multi_objective_rf.fit(features_df=train_params_df,
                               targets_df=train_objectives_df,
                               iteration_number=num_training_samples)
        multi_objective_predictions = multi_objective_rf.predict(
            features_df=train_params_df, include_only_valid_rows=True)

        # TRAINING DATA
        #
        print(
            "------------------------------------------------------------------------------------"
        )
        print(
            "--------------------------------------- TRAIN --------------------------------------"
        )
        print(
            "------------------------------------------------------------------------------------"
        )
        training_gof = multi_objective_rf.compute_goodness_of_fit(
            features_df=train_params_df,
            targets_df=train_objectives_df,
            data_set_type=DataSetType.TRAIN)
        for objective_name in objective_function.output_space.dimension_names:
            print(
                "------------------------------------------------------------------------------------"
            )
            print(objective_name)
            print(training_gof[objective_name].to_json(indent=2))

        # TESTING DATA
        print(
            "------------------------------------------------------------------------------------"
        )
        print(
            "--------------------------------------- TEST ---------------------------------------"
        )
        print(
            "------------------------------------------------------------------------------------"
        )
        testing_gof = multi_objective_rf.compute_goodness_of_fit(
            features_df=test_params_df,
            targets_df=test_objectives_df,
            data_set_type=DataSetType.TEST_KNOWN_RANDOM)
        for objective_name in objective_function.output_space.dimension_names:
            print(
                "------------------------------------------------------------------------------------"
            )
            print(objective_name)
            print(testing_gof[objective_name].to_json(indent=2))
Пример #26
0
    def test_hierarchical_quadratic_cold_start_random_configs(
            self, restart_num, use_remote_optimizer):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        random_state = random.Random()
        # Let's set up random seeds so that we can easily repeat failed experiments
        #
        random_state.seed(restart_num)
        bayesian_optimizer_config_store.parameter_space.random_state = random_state
        objective_function.parameter_space.random_state = random_state

        optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
        )

        # We can make this test more useful as a Unit Test by restricting its duration.
        #
        optimizer_config.min_samples_required_for_guided_design_of_experiments = 20
        if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__:
            random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config
            random_forest_config.n_estimators = min(
                random_forest_config.n_estimators, 5)
            decision_tree_config = random_forest_config.decision_tree_regression_model_config
            decision_tree_config.min_samples_to_fit = 10
            decision_tree_config.n_new_samples_before_refit = 10

        if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__:
            optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5

        if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == RandomSearchOptimizer.__name__:
            optimizer_config.experiment_designer_config.random_search_optimizer_config.num_samples_per_iteration = min(
                optimizer_config.experiment_designer_config.
                random_search_optimizer_config.num_samples_per_iteration, 1000)

        print(
            f"[Restart: {restart_num}] Creating a BayesianOptimimizer with the following config: "
        )
        print(optimizer_config.to_json(indent=2))

        if not use_remote_optimizer:
            bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)
        else:
            bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

        num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 5
        for i in range(num_guided_samples):
            suggested_params = bayesian_optimizer.suggest()
            y = objective_function.evaluate_point(suggested_params)
            print(
                f"[Restart: {restart_num}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}"
            )

            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params
            })
            target_values_df = y.to_dataframe()
            bayesian_optimizer.register(
                parameter_values_pandas_frame=input_values_df,
                target_values_pandas_frame=target_values_df)

        best_config_point, best_objective = bayesian_optimizer.optimum(
            optimum_definition=OptimumDefinition.BEST_OBSERVATION)
        print(
            f"[Restart:  {restart_num}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
        )
        self.validate_optima(optimizer=bayesian_optimizer)

        if not use_remote_optimizer:
            # Test if pickling works
            #
            pickled_optimizer = pickle.dumps(bayesian_optimizer)
            unpickled_optimizer = pickle.loads(pickled_optimizer)
            assert unpickled_optimizer.suggest(
            ) in bayesian_optimizer.optimization_problem.parameter_space
Пример #27
0
    def test_multi_objective_optimization(self,
                                          objective_function_implementation,
                                          minimize, num_output_dimensions,
                                          num_points):
        if objective_function_implementation == Hypersphere:
            hypersphere_radius = 10
            objective_function_config = Point(
                implementation=Hypersphere.__name__,
                hypersphere_config=Point(num_objectives=num_output_dimensions,
                                         minimize=minimize,
                                         radius=hypersphere_radius))
        else:
            objective_function_config = Point(
                implementation=MultiObjectiveNestedPolynomialObjective.
                __name__,
                multi_objective_nested_polynomial_config=Point(
                    num_objectives=num_output_dimensions,
                    objective_function_implementation=NestedPolynomialObjective
                    .__name__,
                    nested_polynomial_objective_config=Point(
                        num_nested_polynomials=2,
                        nested_function_implementation=PolynomialObjective.
                        __name__,
                        polynomial_objective_config=Point(
                            seed=17,
                            input_domain_dimension=2,
                            input_domain_min=-2**10,
                            input_domain_width=2**11,
                            max_degree=2,
                            include_mixed_coefficients=True,
                            percent_coefficients_zeroed=0.0,
                            coefficient_domain_min=-10.0,
                            coefficient_domain_width=9.0,
                            include_noise=False,
                            noise_coefficient_of_variation=0.0))))
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)
        optimization_problem = objective_function.default_optimization_problem

        if objective_function_implementation == MultiObjectiveNestedPolynomialObjective:
            # We need to modify the default optimization problem to respect the "minimize" argument.
            #
            objectives = []
            for i, default_objective in enumerate(
                    optimization_problem.objectives):
                if minimize == "all":
                    minimize = True
                elif minimize == "some":
                    minimize = ((i % 2) == 0)
                else:
                    minimize = False
                new_objective = Objective(name=default_objective.name,
                                          minimize=minimize)
                objectives.append(new_objective)
            optimization_problem.objectives = objectives

        optimizer_config = bayesian_optimizer_config_store.get_config_by_name(
            "default_multi_objective_optimizer_config")
        self.logger.info(optimizer_config)

        optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=optimizer_config)

        assert optimizer.optimizer_config.surrogate_model_implementation == MultiObjectiveHomogeneousRandomForest.__name__

        # We can now go through the optimization loop, at each point validating that:
        #   1) The suggested point is valid.
        #   2) The volume of the pareto frontier is monotonically increasing.

        lower_bounds_on_pareto_volume = []
        upper_bounds_on_pareto_volume = []

        for i in range(num_points):
            suggestion = optimizer.suggest()
            assert suggestion in optimization_problem.parameter_space
            objectives = objective_function.evaluate_point(suggestion)
            optimizer.register(
                parameter_values_pandas_frame=suggestion.to_dataframe(),
                target_values_pandas_frame=objectives.to_dataframe())

            if i > 10:
                pareto_volume_estimator = optimizer.pareto_frontier.approximate_pareto_volume(
                    num_samples=1000000)
                lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(
                    alpha=0.95)
                lower_bounds_on_pareto_volume.append(lower_bound)
                upper_bounds_on_pareto_volume.append(upper_bound)

        pareto_volumes_over_time_df = pd.DataFrame({
            'lower_bounds':
            lower_bounds_on_pareto_volume,
            'upper_bounds':
            upper_bounds_on_pareto_volume
        })

        # If we had precise volume measurements, we would want to ascertain that the volume of the pareto frontier is monotonically increasing.
        # However, we only have estimates so we cannot assert that they are monotonic. But we can assert that they are approximately monotonic:
        # we can make sure that any dip between consecutive volumes is smaller than some small number. Actually we can make sure that there
        # is no drift, by looking over larger windows too.
        #
        threshold = -0.1
        for periods in [1, 10, 20]:
            min_pct_increase_in_lower_bound = pareto_volumes_over_time_df[
                'lower_bounds'].pct_change(periods=periods).fillna(0).min()
            if not (min_pct_increase_in_lower_bound > threshold):
                print(pareto_volumes_over_time_df)
                assert min_pct_increase_in_lower_bound > threshold

            min_pct_increase_in_upper_bound = pareto_volumes_over_time_df[
                'upper_bounds'].pct_change(periods=periods).fillna(0).min()
            if not (min_pct_increase_in_upper_bound > threshold):
                print(pareto_volumes_over_time_df)
                assert min_pct_increase_in_upper_bound > threshold