Exemplo n.º 1
0
    def test_construct_feature_dataframe_no_context(self):
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])
        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])
        n_samples = 100
        parameter_df = optimization_problem.parameter_space.random_dataframe(
            n_samples)
        feature_df = optimization_problem.construct_feature_dataframe(
            parameters_df=parameter_df)
        assert feature_df.shape == (
            n_samples,
            len(optimization_problem.parameter_space.dimension_names) + 1)
        expected_columns = sorted([
            f"three_level_quadratic_config.{n}"
            for n in optimization_problem.parameter_space.dimension_names
        ])
        assert (
            feature_df.columns[:-1].sort_values() == expected_columns).all()
        assert feature_df.columns[-1] == "contains_context"
        assert not feature_df.contains_context.any()
Exemplo n.º 2
0
    def test_hierarchical_quadratic_cold_start(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        num_restarts = 2
        for restart_num in range(num_restarts):

            optimizer_config = bayesian_optimizer_config_store.default
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 20
            optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best"
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.min_samples_to_fit = 10
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2

            local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

            remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

            for bayesian_optimizer in [local_optimizer, remote_optimizer]:
                num_guided_samples = 50
                for i in range(num_guided_samples):
                    suggested_params = bayesian_optimizer.suggest()
                    y = objective_function.evaluate_point(suggested_params)
                    print(
                        f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}"
                    )

                    input_values_df = pd.DataFrame({
                        param_name: [param_value]
                        for param_name, param_value in suggested_params
                    })
                    target_values_df = y.to_dataframe()
                    bayesian_optimizer.register(
                        feature_values_pandas_frame=input_values_df,
                        target_values_pandas_frame=target_values_df)
                best_config_point, best_objective = bayesian_optimizer.optimum(
                    optimum_definition=OptimumDefinition.BEST_OBSERVATION)
                print(
                    f"[Restart:  {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
                )
                self.validate_optima(optimizer=bayesian_optimizer)
    def test_lasso_hierarchical_categorical_predictions(self):
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        rerf = RegressionEnhancedRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space)

        # fit model with same degree as true y
        num_train_x = 100
        x_train_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        rerf.fit(x_train_df, y_train_df)
        num_detected_features = len(rerf.detected_feature_indices_)

        self.assertTrue(
            rerf.root_model_gradient_coef_.shape ==
            rerf.polynomial_features_powers_.shape,
            'Gradient coefficient shape is incorrect')
        self.assertTrue(
            rerf.fit_X_.shape == (num_train_x,
                                  rerf.polynomial_features_powers_.shape[0]),
            'Design matrix shape is incorrect')
        self.assertTrue(
            rerf.partial_hat_matrix_.shape == (num_detected_features,
                                               num_detected_features),
            'Hat matrix shape is incorrect')
        self.assertTrue(rerf.polynomial_features_powers_.shape == (28, 8),
                        'PolynomalFeature.power_ shape is incorrect')

        # test predictions
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        num_test_x = 10

        # by generating a single X feature on which to make the predictions, the
        y_test_list = []
        predicted_y_list = []
        for _ in range(num_test_x):
            x_test_df = objective_function.parameter_space.random_dataframe(
                num_samples=1)
            y_test_df = objective_function.evaluate_dataframe(x_test_df)
            y_test_list.append(y_test_df['y'].values[0])

            predictions = rerf.predict(x_test_df)
            pred_df = predictions.get_dataframe()
            predicted_y_list.append(pred_df[predicted_value_col].values[0])

        predicted_y = np.array(predicted_y_list)
        y_test = np.array(y_test_list)
        residual_sum_of_squares = ((y_test - predicted_y)**2).sum()
        total_sum_of_squares = ((y_test - y_test.mean())**2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        self.assertTrue(unexplained_variance < 10**-4,
                        '1 - R^2 larger than expected')
Exemplo n.º 4
0
    def test_lasso_hierarchical_categorical_predictions(self):
        random.seed(11001)
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        rerf = RegressionEnhancedRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space)

        # fit model with same degree as true y
        # The input space consists of 3 2-d domains 200 x 200 units.  Hence random samples smaller than a certain size will produce too few points to
        # train reliable models.
        # TODO: Good place to use a non-random training set design
        num_train_x = 600
        x_train_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        rerf.fit(x_train_df, y_train_df)
        num_detected_features = len(rerf.detected_feature_indices_)

        self.assertTrue(
            rerf.root_model_gradient_coef_.shape ==
            rerf.polynomial_features_powers_.shape,
            'Gradient coefficient shape is incorrect')
        self.assertTrue(
            rerf.fit_X_.shape == (num_train_x,
                                  rerf.polynomial_features_powers_.shape[0]),
            'Design matrix shape is incorrect')
        self.assertTrue(
            rerf.partial_hat_matrix_.shape == (num_detected_features,
                                               num_detected_features),
            'Hat matrix shape is incorrect')
        self.assertTrue(rerf.polynomial_features_powers_.shape == (34, 9),
                        'PolynomalFeature.power_ shape is incorrect')

        # test predictions
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        num_test_x = 50
        x_test_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_test_x)
        predictions = rerf.predict(x_test_df)
        pred_df = predictions.get_dataframe()
        predicted_y = pred_df[predicted_value_col].to_numpy()
        y_test = objective_function.evaluate_dataframe(
            x_test_df).to_numpy().reshape(-1)
        residual_sum_of_squares = ((y_test - predicted_y)**2).sum()
        total_sum_of_squares = ((y_test - y_test.mean())**2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        test_threshold = 10**-3
        self.assertTrue(
            unexplained_variance < test_threshold,
            f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
        )
Exemplo n.º 5
0
    def setup_class(cls):
        """ Set's up all the objects needed to test the RandomSearchOptimizer

        To test the RandomSearchOptimizer we need to first construct:
        * an optimization problem
        * a utility function

        To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction
        test.



        :return:
        """
        global_values.declare_singletons()
        global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0)

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        cls.input_space = objective_function.parameter_space
        cls.output_space = objective_function.output_space

        cls.input_values_dataframe = objective_function.parameter_space.random_dataframe(
            num_samples=2500)
        cls.output_values_dataframe = objective_function.evaluate_dataframe(
            cls.input_values_dataframe)

        cls.model_config = homogeneous_random_forest_config_store.default

        print(cls.model_config)

        cls.model = MultiObjectiveHomogeneousRandomForest(
            model_config=cls.model_config,
            input_space=cls.input_space,
            output_space=cls.output_space)
        cls.model.fit(cls.input_values_dataframe,
                      cls.output_values_dataframe,
                      iteration_number=len(cls.input_values_dataframe.index))

        cls.utility_function_config = Point(
            utility_function_name="upper_confidence_bound_on_improvement",
            alpha=0.05)

        cls.optimization_problem = OptimizationProblem(
            parameter_space=cls.input_space,
            objective_space=cls.output_space,
            objectives=[Objective(name='y', minimize=True)])

        cls.utility_function = ConfidenceBoundUtilityFunction(
            function_config=cls.utility_function_config,
            surrogate_model=cls.model,
            minimize=cls.optimization_problem.objectives[0].minimize)
    def test_default_config(self, objective_function_config_name):
        objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name)
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

        lasso_model_config = lasso_cross_validated_config_store.default
        multi_objective_rf = MultiObjectiveLassoCrossValidated(
            model_config=lasso_model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space,
            logger=self.logger
        )

        if objective_function_config_name == '2d_hypersphere_minimize_some':
            num_training_samples = 25
            num_testing_samples = 10
        elif objective_function_config_name == '10d_hypersphere_minimize_some':
            num_training_samples = 50
            num_testing_samples = 10
        elif objective_function_config_name == '5_mutually_exclusive_polynomials':
            num_training_samples = 100
            num_testing_samples = 50
        else:
            assert False
        train_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_training_samples)
        train_objectives_df = objective_function.evaluate_dataframe(train_params_df)

        test_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_testing_samples)
        test_objectives_df = objective_function.evaluate_dataframe(test_params_df)

        multi_objective_rf.fit(features_df=train_params_df, targets_df=train_objectives_df, iteration_number=num_training_samples)
        multi_objective_predictions = multi_objective_rf.predict(features_df=train_params_df, include_only_valid_rows=True)

        # TRAINING DATA
        #
        print("------------------------------------------------------------------------------------")
        print("--------------------------------------- TRAIN --------------------------------------")
        print("------------------------------------------------------------------------------------")
        training_gof = multi_objective_rf.compute_goodness_of_fit(features_df=train_params_df, targets_df=train_objectives_df, data_set_type=DataSetType.TRAIN)
        for objective_name in objective_function.output_space.dimension_names:
            print("------------------------------------------------------------------------------------")
            print(objective_name)
            print(training_gof[objective_name].to_json(indent=2))

        # TESTING DATA
        print("------------------------------------------------------------------------------------")
        print("--------------------------------------- TEST ---------------------------------------")
        print("------------------------------------------------------------------------------------")
        testing_gof = multi_objective_rf.compute_goodness_of_fit(features_df=test_params_df, targets_df=test_objectives_df, data_set_type=DataSetType.TEST_KNOWN_RANDOM)
        for objective_name in objective_function.output_space.dimension_names:
            print("------------------------------------------------------------------------------------")
            print(objective_name)
            print(testing_gof[objective_name].to_json(indent=2))
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data.

        """

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)
        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=10000)

        y_df = objective_function.evaluate_dataframe(random_params_df)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default,
            logger=self.logger)
        bayesian_optimizer.register(random_params_df, y_df)

        num_guided_samples = 20
        for i in range(num_guided_samples):
            # Suggest the parameters
            suggested_params = bayesian_optimizer.suggest()
            target_value = objective_function.evaluate_point(suggested_params)

            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            # Register the observation with the optimizer
            bayesian_optimizer.register(suggested_params.to_dataframe(),
                                        target_value.to_dataframe())

        self.validate_optima(bayesian_optimizer)
        best_config_point, best_objective = bayesian_optimizer.optimum()
        self.logger.info(
            f"Optimum: {best_objective} Best Configuration: {best_config_point}"
        )
        trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json")
        self.logger.info(f"Writing trace to {trace_output_path}")
        global_values.tracer.dump_trace_to_file(
            output_file_path=trace_output_path)
        global_values.tracer.clear_events()
    def test_lasso_hierarchical_categorical_predictions(self):
        random.seed(11001)
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        polynomial_features_adapter = ContinuousToPolynomialBasisHypergridAdapter(
            adaptee=objective_function.parameter_space,
            degree=2,
            include_bias=True,
            interaction_only=False)

        lasso_cross_validated_model = LassoCrossValidatedRegressionModel(
            model_config=self.model_config,
            input_space=polynomial_features_adapter,
            output_space=objective_function.output_space)
        # since the model input_space stacked the polynomial basis function on in the original input space, we can skip validating input features
        lasso_cross_validated_model.skip_input_filtering_on_predict = True

        # fit model with same degree as true y
        # The input space consists of 3 2-d domains 200 x 200 units.  Hence random samples smaller than a certain size will produce too few points to
        # train reliable models.
        # TODO: Good place to use a non-random training set design
        num_train_x = 300
        x_train_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        lasso_cross_validated_model.fit(x_train_df,
                                        y_train_df,
                                        iteration_number=0)

        # test predictions
        num_test_x = 50
        x_test_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_test_x)
        y_test = objective_function.evaluate_dataframe(
            x_test_df).to_numpy().reshape(-1)
        predictions = lasso_cross_validated_model.predict(x_test_df)

        pred_df = predictions.get_dataframe()
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        predicted_y = pred_df[predicted_value_col].to_numpy()

        residual_sum_of_squares = ((y_test - predicted_y)**2).sum()
        total_sum_of_squares = ((y_test - y_test.mean())**2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        test_threshold = 10**-6
        print(f'Asserting {unexplained_variance} < {test_threshold}')
        assert unexplained_variance < test_threshold, f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
    def test_glow_worm_on_three_level_quadratic(self):
        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)
        # Let's warm up the model a bit
        #
        num_warmup_samples = 1000
        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_warmup_samples)
        y = objective_function.evaluate_dataframe(random_params_df)

        model = HomogeneousRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=output_space)
        model.fit(feature_values_pandas_frame=random_params_df,
                  target_values_pandas_frame=y,
                  iteration_number=num_warmup_samples)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        utility_function = ConfidenceBoundUtilityFunction(
            function_config=self.utility_function_config,
            surrogate_model=model,
            minimize=optimization_problem.objectives[0].minimize)

        glow_worm_swarm_optimizer = GlowWormSwarmOptimizer(
            optimization_problem=optimization_problem,
            utility_function=utility_function,
            optimizer_config=glow_worm_swarm_optimizer_config_store.default)

        num_iterations = 5
        for i in range(num_iterations):
            suggested_params = glow_worm_swarm_optimizer.suggest()
            print(f"[{i+1}/{num_iterations}] {suggested_params.to_json()}")
            self.assertTrue(
                suggested_params in objective_function.parameter_space)
Exemplo n.º 10
0
    def test_named_configs(self, config_name):
        objective_function_config = objective_function_config_store.get_config_by_name(
            config_name)
        print(objective_function_config.to_json(indent=2))
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        for _ in range(100):
            random_point = objective_function.parameter_space.random()
            value = objective_function.evaluate_point(random_point)
            assert value in objective_function.output_space

        for i in range(1, 100):
            random_dataframe = objective_function.parameter_space.random_dataframe(
                num_samples=i)
            values_df = objective_function.evaluate_dataframe(random_dataframe)
            assert values_df.index.equals(random_dataframe.index)
Exemplo n.º 11
0
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function with no prior data.

        """
        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default,
            logger=self.logger)

        num_guided_samples = 1000
        for i in range(num_guided_samples):
            suggested_params = bayesian_optimizer.suggest()
            target_value = objective_function.evaluate_point(suggested_params)
            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            bayesian_optimizer.register(suggested_params.to_dataframe(),
                                        target_value.to_dataframe())
            if i > 20 and i % 20 == 0:
                best_config_point, best_objective = bayesian_optimizer.optimum(
                )
                self.logger.info(
                    f"[{i}/{num_guided_samples}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
                )

        self.validate_optima(bayesian_optimizer)
        best_config, optimum = bayesian_optimizer.optimum()
        assert objective_function.parameter_space.contains_point(best_config)
        assert objective_function.output_space.contains_point(optimum)
        _, all_targets = bayesian_optimizer.get_all_observations()
        assert optimum.y == all_targets.min()[0]
        self.logger.info(
            f"Optimum: {optimum} best configuration: {best_config}")
Exemplo n.º 12
0
    def setUp(self):
        self.logger = create_logger(self.__class__.__name__)
        # Start up the gRPC service.
        #
        self.server = OptimizerMicroserviceServer(port=50051, num_threads=10)
        self.server.start()

        self.optimizer_service_channel = grpc.insecure_channel('localhost:50051')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger)
        self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger)

        objective_function_config = objective_function_config_store.get_config_by_name('2d_quadratic_concave_up')
        self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)

        self.optimization_problem = OptimizationProblem(
            parameter_space=self.objective_function.parameter_space,
            objective_space=self.objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)]
        )
Exemplo n.º 13
0
    def test_hierarchical_quadratic_cold_start(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        num_restarts = 1000
        for restart_num in range(num_restarts):
            bayesian_optimizer = BayesianOptimizer(
                optimization_problem=optimization_problem,
                optimizer_config=bayesian_optimizer_config_store.default,
                logger=self.logger)

            num_guided_samples = 200
            for i in range(num_guided_samples):
                suggested_params = bayesian_optimizer.suggest()
                y = objective_function.evaluate_point(suggested_params)
                self.logger.info(
                    f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}")

                input_values_df = suggested_params.to_dataframe()
                target_values_df = y.to_dataframe()
                bayesian_optimizer.register(input_values_df, target_values_df)
            self.validate_optima(bayesian_optimizer)
            best_config_point, best_objective = bayesian_optimizer.optimum()
            self.logger.info(
                f"[{restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
            )
Exemplo n.º 14
0
    def test_bayesian_optimizer_with_random_near_incumbent(self):
        objective_function_config = objective_function_config_store.get_config_by_name(
            'multi_objective_waves_3_params_2_objectives_half_pi_phase_difference'
        )
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        optimization_problem = objective_function.default_optimization_problem

        optimizer_config = bayesian_optimizer_config_store.get_config_by_name(
            'default_with_random_near_incumbent_config')
        assert optimizer_config.experiment_designer_config.numeric_optimizer_implementation == "RandomNearIncumbentOptimizer"
        optimizer_config.experiment_designer_config.fraction_random_suggestions = 0

        # Let's give it a little more resolution.
        #
        optimizer_config.experiment_designer_config.multi_objective_probability_of_improvement_config.num_monte_carlo_samples = 200

        bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=optimizer_config)

        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=1000)
        objectives_df = objective_function.evaluate_dataframe(random_params_df)
        bayesian_optimizer.register(
            parameter_values_pandas_frame=random_params_df,
            target_values_pandas_frame=objectives_df)

        num_suggestions = 10
        for suggestion_number in range(num_suggestions):
            parameters = bayesian_optimizer.suggest()
            objectives = objective_function.evaluate_point(parameters)
            self.logger.info(
                f"[{suggestion_number}/{num_suggestions}] parameters: {parameters}, objectives: {objectives}"
            )
            bayesian_optimizer.register(
                parameter_values_pandas_frame=parameters.to_dataframe(),
                target_values_pandas_frame=objectives.to_dataframe())
Exemplo n.º 15
0
    def setup_method(self, method):
        self.logger = create_logger(self.__class__.__name__)

        # Start up the gRPC service. Try a bunch of times before giving up.
        #
        max_num_tries = 100
        num_tries = 0
        for port in range(50051, 50051 + max_num_tries):
            num_tries += 1
            try:
                self.server = OptimizerServicesServer(port=port,
                                                      num_threads=10)
                self.server.start()
                self.port = port
                break
            except:
                self.logger.info(
                    f"Failed to create OptimizerMicroserviceServer on port {port}"
                )
                if num_tries == max_num_tries:
                    raise

        self.optimizer_service_channel = grpc.insecure_channel(
            f'localhost:{self.port}')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(
            grpc_channel=self.optimizer_service_channel, logger=self.logger)
        self.optimizer_monitor = OptimizerMonitor(
            grpc_channel=self.optimizer_service_channel, logger=self.logger)

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        self.objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        self.optimization_problem = OptimizationProblem(
            parameter_space=self.objective_function.parameter_space,
            objective_space=self.objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])
    def test_rerf_hierarchical_categorical_predictions(self):
        random.seed(11001)
        objective_function_config = objective_function_config_store.get_config_by_name('three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config)

        rerf = RegressionEnhancedRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space
        )

        # fit model with same degree as true y
        # The input space consists of 3 2-d domains 200 x 200 units.  Hence random samples smaller than a certain size will produce too few points to
        # train reliable models.
        # TODO: Good place to use a non-random training set design
        num_train_x = 300
        x_train_df = objective_function.parameter_space.random_dataframe(num_samples=num_train_x)
        y_train_df = objective_function.evaluate_dataframe(x_train_df)
        rerf.fit(x_train_df, y_train_df)

        # test predictions
        predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value
        num_test_x = 50
        x_test_df = objective_function.parameter_space.random_dataframe(num_samples=num_test_x)
        y_test = objective_function.evaluate_dataframe(x_test_df).to_numpy().reshape(-1)

        predictions = rerf.predict(x_test_df)
        pred_df = predictions.get_dataframe()
        predicted_y = pred_df[predicted_value_col].to_numpy()

        residual_sum_of_squares = ((y_test - predicted_y) ** 2).sum()
        total_sum_of_squares = ((y_test - y_test.mean()) ** 2).sum()
        unexplained_variance = residual_sum_of_squares / total_sum_of_squares
        test_threshold = 10**-6
        print(unexplained_variance, test_threshold)
        assert unexplained_variance < test_threshold, f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
Exemplo n.º 17
0
    ).join(
        on_external_dimension=CategoricalDimension(name="uncertainty_type", values=["coefficient_of_variation"]),
        subgrid=SimpleHypergrid(
            name="coefficient_of_variation_config",
            dimensions=[ContinuousDimension(name="value", min=0, max=1)]
        )
    ).join(
        on_external_dimension=CategoricalDimension(name="use_objective_function", values=[True]),
        subgrid=objective_function_config_store.parameter_space
    ),
    default=Point(
        uncertainty_type="constant",
        use_objective_function=True,
        predicted_value_degrees_of_freedom=10,
        constant_uncertainty_config=Point(value=1),
        objective_function_config=objective_function_config_store.get_config_by_name("three_level_quadratic")
    ),
    description=""
)

multi_objective_pass_through_model_config_store.add_config_by_name(
    config_name="three_level_quadratic",
    config_point=Point(
        uncertainty_type="constant",
        use_objective_function=True,
        predicted_value_degrees_of_freedom=10,
        constant_uncertainty_config=Point(value=1),
        objective_function_config=objective_function_config_store.get_config_by_name("three_level_quadratic")
    )
)
Exemplo n.º 18
0
    def test_hierarchical_quadratic_cold_start_random_configs(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        random_state = random.Random()
        num_restarts = 200
        for restart_num in range(num_restarts):

            # Let's set up random seeds so that we can easily repeat failed experiments
            #
            random_state.seed(restart_num)
            bayesian_optimizer_config_store.parameter_space.random_state = random_state
            objective_function.parameter_space.random_state = random_state

            optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
            )

            # The goal here is to make sure the optimizer works with a lot of different configurations.
            # So let's make sure each run is not too long.
            #
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 50
            if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__:
                random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config
                random_forest_config.n_estimators = min(
                    random_forest_config.n_estimators, 5)
                decision_tree_config = random_forest_config.decision_tree_regression_model_config
                decision_tree_config.min_samples_to_fit = 10
                decision_tree_config.n_new_samples_before_refit = 10

            if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__:
                optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5

            self.logger.info(
                f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: "
            )
            self.logger.info(
                f"Optimizer config: {optimizer_config.to_json(indent=2)}")
            bayesian_optimizer = BayesianOptimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config,
                logger=self.logger)

            num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50
            for i in range(num_guided_samples):
                suggested_params = bayesian_optimizer.suggest()
                y = objective_function.evaluate_point(suggested_params)
                self.logger.info(
                    f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}"
                )

                input_values_df = suggested_params.to_dataframe()
                target_values_df = y.to_dataframe()
                bayesian_optimizer.register(input_values_df, target_values_df)

            best_config_point, best_objective = bayesian_optimizer.optimum()
            self.logger.info(
                f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
            )
Exemplo n.º 19
0
    def test_optimizers_against_untrained_models(self, objective_function_config_name, utility_function_type_name, utility_function_optimizer_type_name):
        """Tests that the utility function optimizers throw appropriate exceptions when the utility function cannot be evaluated.

        :return:
        """
        self.logger.info(f"Creating test artifacts for objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")
        model_config = homogeneous_random_forest_config_store.default
        objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name)
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config)
        optimization_problem = objective_function.default_optimization_problem

        model = MultiObjectiveHomogeneousRandomForest(
            model_config=model_config,
            input_space=optimization_problem.feature_space,
            output_space=optimization_problem.objective_space,
            logger=self.logger
        )
        pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem)

        if utility_function_type_name == ConfidenceBoundUtilityFunction.__name__:
            utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05)
            utility_function = ConfidenceBoundUtilityFunction(
                function_config=utility_function_config,
                surrogate_model=model,
                minimize=optimization_problem.objectives[0].minimize,
                logger=self.logger
            )
        elif utility_function_type_name == MultiObjectiveProbabilityOfImprovementUtilityFunction.__name__:
            utility_function_config = multi_objective_probability_of_improvement_utility_function_config_store.default
            utility_function = MultiObjectiveProbabilityOfImprovementUtilityFunction(
                function_config=utility_function_config,
                pareto_frontier=pareto_frontier,
                surrogate_model=model,
                logger=self.logger
            )
        else:
            assert False

        if utility_function_optimizer_type_name == RandomSearchOptimizer.__name__:
            utility_function_optimizer_config = random_search_optimizer_config_store.default
        elif utility_function_optimizer_type_name == GlowWormSwarmOptimizer.__name__:
            utility_function_optimizer_config = glow_worm_swarm_optimizer_config_store.default
        elif utility_function_optimizer_type_name == RandomNearIncumbentOptimizer.__name__:
            utility_function_optimizer_config = random_near_incumbent_optimizer_config_store.default
        else:
            assert False, f"Unknown utility_function_optimizer_type_name: {utility_function_optimizer_type_name}"

        utility_function_optimizer = UtilityFunctionOptimizerFactory.create_utility_function_optimizer(
            utility_function=utility_function,
            optimizer_type_name=utility_function_optimizer_type_name,
            optimizer_config=utility_function_optimizer_config,
            optimization_problem=optimization_problem,
            pareto_frontier=pareto_frontier,
            logger=self.logger
        )

        assert not model.trained

        self.logger.info("Asserting the optimizer is throwing appropriate exceptions.")
        num_failed_suggestions = 3
        for i in range(num_failed_suggestions):
            with pytest.raises(expected_exception=UnableToProduceGuidedSuggestionException):
                utility_function_optimizer.suggest()
            self.logger.info(f"[{i+1}/{num_failed_suggestions}] worked.")


        # Now let's train the model a bit and make sure that we can produce the suggestions afterwards
        #
        random_params_df = optimization_problem.parameter_space.random_dataframe(1000)
        objectives_df = objective_function.evaluate_dataframe(random_params_df)
        features_df = optimization_problem.construct_feature_dataframe(parameters_df=random_params_df)

        self.logger.info("Training the model")
        model.fit(features_df=features_df, targets_df=objectives_df, iteration_number=1000)
        assert model.trained
        self.logger.info("Model trained.")

        self.logger.info("Updating pareto.")
        pareto_frontier.update_pareto(objectives_df=objectives_df, parameters_df=random_params_df)
        self.logger.info("Pareto updated.")

        self.logger.info("Asserting suggestions work.")
        num_successful_suggestions = 3
        for i in range(num_successful_suggestions):
            suggestion = utility_function_optimizer.suggest()
            assert suggestion in optimization_problem.parameter_space
            self.logger.info(f"[{i+1}/{num_successful_suggestions}] successfully produced suggestion: {suggestion}")

        self.logger.info(f"Done testing. Objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")
Exemplo n.º 20
0
    def test_hierarchical_quadratic_cold_start_random_configs(
            self, restart_num, use_remote_optimizer):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        random_state = random.Random()
        # Let's set up random seeds so that we can easily repeat failed experiments
        #
        random_state.seed(restart_num)
        bayesian_optimizer_config_store.parameter_space.random_state = random_state
        objective_function.parameter_space.random_state = random_state

        optimizer_config = bayesian_optimizer_config_store.parameter_space.random(
        )

        # We can make this test more useful as a Unit Test by restricting its duration.
        #
        optimizer_config.min_samples_required_for_guided_design_of_experiments = 20
        if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__:
            random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config
            random_forest_config.n_estimators = min(
                random_forest_config.n_estimators, 5)
            decision_tree_config = random_forest_config.decision_tree_regression_model_config
            decision_tree_config.min_samples_to_fit = 10
            decision_tree_config.n_new_samples_before_refit = 10

        if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__:
            optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5

        if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == RandomSearchOptimizer.__name__:
            optimizer_config.experiment_designer_config.random_search_optimizer_config.num_samples_per_iteration = min(
                optimizer_config.experiment_designer_config.
                random_search_optimizer_config.num_samples_per_iteration, 1000)

        print(
            f"[Restart: {restart_num}] Creating a BayesianOptimimizer with the following config: "
        )
        print(optimizer_config.to_json(indent=2))

        if not use_remote_optimizer:
            bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)
        else:
            bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

        num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 5
        for i in range(num_guided_samples):
            suggested_params = bayesian_optimizer.suggest()
            y = objective_function.evaluate_point(suggested_params)
            print(
                f"[Restart: {restart_num}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}"
            )

            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params
            })
            target_values_df = y.to_dataframe()
            bayesian_optimizer.register(
                parameter_values_pandas_frame=input_values_df,
                target_values_pandas_frame=target_values_df)

        best_config_point, best_objective = bayesian_optimizer.optimum(
            optimum_definition=OptimumDefinition.BEST_OBSERVATION)
        print(
            f"[Restart:  {restart_num}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
        )
        self.validate_optima(optimizer=bayesian_optimizer)

        if not use_remote_optimizer:
            # Test if pickling works
            #
            pickled_optimizer = pickle.dumps(bayesian_optimizer)
            unpickled_optimizer = pickle.loads(pickled_optimizer)
            assert unpickled_optimizer.suggest(
            ) in bayesian_optimizer.optimization_problem.parameter_space
Exemplo n.º 21
0
    def test_default_config(self, objective_function_config_name):
        objective_function_config = objective_function_config_store.get_config_by_name(
            objective_function_config_name)
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config)

        rf_config = homogeneous_random_forest_config_store.default
        multi_objective_rf = MultiObjectiveHomogeneousRandomForest(
            model_config=rf_config,
            input_space=objective_function.parameter_space,
            output_space=objective_function.output_space,
            logger=self.logger)

        num_training_samples = 1000
        num_testing_samples = 100
        train_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_training_samples)
        train_objectives_df = objective_function.evaluate_dataframe(
            train_params_df)

        test_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_testing_samples)
        test_objectives_df = objective_function.evaluate_dataframe(
            test_params_df)

        multi_objective_rf.fit(features_df=train_params_df,
                               targets_df=train_objectives_df,
                               iteration_number=num_training_samples)
        multi_objective_predictions = multi_objective_rf.predict(
            features_df=train_params_df, include_only_valid_rows=True)

        # TRAINING DATA
        #
        print(
            "------------------------------------------------------------------------------------"
        )
        print(
            "--------------------------------------- TRAIN --------------------------------------"
        )
        print(
            "------------------------------------------------------------------------------------"
        )
        training_gof = multi_objective_rf.compute_goodness_of_fit(
            features_df=train_params_df,
            targets_df=train_objectives_df,
            data_set_type=DataSetType.TRAIN)
        for objective_name in objective_function.output_space.dimension_names:
            print(
                "------------------------------------------------------------------------------------"
            )
            print(objective_name)
            print(training_gof[objective_name].to_json(indent=2))

        # TESTING DATA
        print(
            "------------------------------------------------------------------------------------"
        )
        print(
            "--------------------------------------- TEST ---------------------------------------"
        )
        print(
            "------------------------------------------------------------------------------------"
        )
        testing_gof = multi_objective_rf.compute_goodness_of_fit(
            features_df=test_params_df,
            targets_df=test_objectives_df,
            data_set_type=DataSetType.TEST_KNOWN_RANDOM)
        for objective_name in objective_function.output_space.dimension_names:
            print(
                "------------------------------------------------------------------------------------"
            )
            print(objective_name)
            print(testing_gof[objective_name].to_json(indent=2))