Пример #1
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger
        # Let's initialize the optimizer.
        #
        assert len(optimization_problem.objectives
                   ) == 1, "For now this is a single-objective optimizer."
        OptimizerBase.__init__(self, optimization_problem)

        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."
        self.optimizer_config = optimizer_config

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more"
        self.surrogate_model = HomogeneousRandomForestRegressionModel(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.
            parameter_space,  # TODO: change to feature space
            output_space=self.optimization_problem.objective_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        # TODO: this will need a better home - either a DataSet class or the surrogate model itself.
        self._feature_values_df = pd.DataFrame(columns=[
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ])
        self._target_values_df = pd.DataFrame(columns=[
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ])
Пример #2
0
    def __init__(
            self,
            grpc_channel,
            optimization_problem,
            optimizer_config,
            id,  # pylint: disable=redefined-builtin
            logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizerClient")
        self.logger = logger

        OptimizerBase.__init__(self, optimization_problem)
        assert optimizer_config is not None

        self._grpc_channel = grpc_channel
        self._optimizer_stub = OptimizerService_pb2_grpc.OptimizerServiceStub(
            self._grpc_channel)
        self.optimizer_config = optimizer_config
        self.id = id
Пример #3
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger

        # Let's initialize the optimizer.
        #
        OptimizerBase.__init__(self, optimization_problem)

        assert not optimization_problem.objective_space.is_hierarchical(
        ), "Not supported."
        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."

        self.surrogate_model_output_space = optimization_problem.objective_space
        self.optimizer_config = optimizer_config
        self.pareto_frontier: ParetoFrontier = ParetoFrontier(
            optimization_problem=self.optimization_problem, objectives_df=None)

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation in (
            HomogeneousRandomForestRegressionModel.__name__,
            MultiObjectiveHomogeneousRandomForest.__name__)

        # Note that even if the user requested a HomogeneousRandomForestRegressionModel, we still create a MultiObjectiveRegressionModel
        # with just a single RandomForest inside it. This means we have to maintain only a single interface.
        #
        self.surrogate_model: MultiObjectiveRegressionModel = MultiObjectiveHomogeneousRandomForest(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.feature_space,
            output_space=self.surrogate_model_output_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            pareto_frontier=self.pareto_frontier,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        #
        self._parameter_names = [
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ]
        self._parameter_names_set = set(self._parameter_names)

        self._context_names = ([
            dimension.name
            for dimension in self.optimization_problem.context_space.dimensions
        ] if self.optimization_problem.context_space else [])
        self._context_names_set = set(self._context_names)

        self._target_names = [
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ]
        self._target_names_set = set(self._target_names)

        self._parameter_values_df = pd.DataFrame(columns=self._parameter_names)
        self._context_values_df = pd.DataFrame(columns=self._context_names)
        self._target_values_df = pd.DataFrame(columns=self._target_names)
Пример #4
0
    def validate_optima(self, optimizer: OptimizerBase):
        should_raise_for_predicted_value = False
        should_raise_for_confidence_bounds = False
        if not optimizer.trained:
            should_raise_for_predicted_value = True
            should_raise_for_confidence_bounds = True
        else:
            parameters_df, _, _ = optimizer.get_all_observations()
            predictions = optimizer.predict(
                parameter_values_pandas_frame=parameters_df)
            predictions_df = predictions.get_dataframe()

            if len(predictions_df.index) == 0:
                should_raise_for_predicted_value = True
                should_raise_for_confidence_bounds = True

            # Drop nulls and zeroes.
            #
            predictions_df = predictions_df[predictions_df[
                Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM.
                value].notna() & (predictions_df[
                    Prediction.LegalColumnNames.
                    PREDICTED_VALUE_DEGREES_OF_FREEDOM.value] != 0)]

            if len(predictions_df.index) == 0:
                should_raise_for_confidence_bounds = True

        if should_raise_for_predicted_value:

            assert should_raise_for_confidence_bounds

            # Computing prediction based optima should fail if the surrogate model is not fitted.
            #
            with pytest.raises(ValueError):
                optimizer.optimum(
                    OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG)

        else:
            predicted_best_config, predicted_optimum = optimizer.optimum(
                OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG)

        if should_raise_for_confidence_bounds:

            with pytest.raises(ValueError):
                optimizer.optimum(OptimumDefinition.
                                  UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG)

            with pytest.raises(ValueError):
                optimizer.optimum(OptimumDefinition.
                                  LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG)
        else:
            ucb_90_ci_config, ucb_90_ci_optimum = optimizer.optimum(
                OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG,
                alpha=0.1)
            ucb_95_ci_config, ucb_95_ci_optimum = optimizer.optimum(
                OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG,
                alpha=0.05)
            ucb_99_ci_config, ucb_99_ci_optimum = optimizer.optimum(
                OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG,
                alpha=0.01)

            lcb_90_ci_config, lcb_90_ci_optimum = optimizer.optimum(
                OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG,
                alpha=0.1)
            lcb_95_ci_config, lcb_95_ci_optimum = optimizer.optimum(
                OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG,
                alpha=0.05)
            lcb_99_ci_config, lcb_99_ci_optimum = optimizer.optimum(
                OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG,
                alpha=0.01)

            # At the very least we can assert the ordering. Note that the configs corresponding to each of the below confidence bounds can be different, as confidence intervals
            # change width non-linearily both with degrees of freedom, and with prediction variance.
            #
            if not (lcb_99_ci_optimum.lower_confidence_bound <=
                    lcb_95_ci_optimum.lower_confidence_bound <=
                    lcb_90_ci_optimum.lower_confidence_bound <=
                    predicted_optimum.predicted_value):
                # If the the prediction for predicted_value has too few degrees of freedom, it's impossible to construct a confidence interval for it.
                # If it was possible, then the inequality above would always hold. If it's not possible, then the inequality above can fail.
                #
                optimum_predicted_value_prediction = optimizer.predict(
                    parameter_values_pandas_frame=predicted_best_config.
                    to_dataframe())
                optimum_predicted_value_prediction_df = optimum_predicted_value_prediction.get_dataframe(
                )
                degrees_of_freedom = optimum_predicted_value_prediction_df[
                    Prediction.LegalColumnNames.
                    PREDICTED_VALUE_DEGREES_OF_FREEDOM.value][0]
                if degrees_of_freedom == 0:
                    assert lcb_99_ci_optimum.lower_confidence_bound <= lcb_95_ci_optimum.lower_confidence_bound <= lcb_90_ci_optimum.lower_confidence_bound
                else:
                    print(lcb_99_ci_optimum.lower_confidence_bound,
                          lcb_95_ci_optimum.lower_confidence_bound,
                          lcb_90_ci_optimum.lower_confidence_bound,
                          predicted_optimum.predicted_value)
                    assert False

            if not (predicted_optimum.predicted_value <=
                    ucb_90_ci_optimum.upper_confidence_bound <=
                    ucb_95_ci_optimum.upper_confidence_bound <=
                    ucb_99_ci_optimum.upper_confidence_bound):
                optimum_predicted_value_prediction = optimizer.predict(
                    parameter_values_pandas_frame=predicted_best_config.
                    to_dataframe())
                optimum_predicted_value_prediction_df = optimum_predicted_value_prediction.get_dataframe(
                )
                degrees_of_freedom = optimum_predicted_value_prediction_df[
                    Prediction.LegalColumnNames.
                    PREDICTED_VALUE_DEGREES_OF_FREEDOM.value][0]
                if degrees_of_freedom == 0:
                    assert ucb_90_ci_optimum.upper_confidence_bound <= ucb_95_ci_optimum.upper_confidence_bound <= ucb_99_ci_optimum.upper_confidence_bound
                else:
                    print(predicted_optimum.predicted_value,
                          ucb_90_ci_optimum.upper_confidence_bound,
                          ucb_95_ci_optimum.upper_confidence_bound,
                          ucb_99_ci_optimum.upper_confidence_bound)
                    assert False
Пример #5
0
    def validate_optima(self, optimizer: OptimizerBase):
        should_raise_for_predicted_value = False
        should_raise_for_confidence_bounds = False
        if not optimizer.get_surrogate_model_fit_state().fitted:
            should_raise_for_predicted_value = True
            should_raise_for_confidence_bounds = True
        else:
            features_df, _ = optimizer.get_all_observations()
            predictions = optimizer.predict(feature_values_pandas_frame=features_df)
            predictions_df = predictions.get_dataframe()

            if len(predictions_df.index) == 0:
                should_raise_for_predicted_value = True
                should_raise_for_confidence_bounds = True

            # Drop nulls and zeroes.
            #
            predictions_df = predictions_df[
                predictions_df[Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM.value].notna() &
                predictions_df[Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM.value] != 0
            ]

            if len(predictions_df.index) == 0:
                should_raise_for_confidence_bounds = True


        if should_raise_for_predicted_value:

            self.assertTrue(should_raise_for_confidence_bounds)

            # Computing prediction based optima should fail if the surrogate model is not fitted.
            #
            with self.assertRaises(ValueError):
                optimizer.optimum(OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG)

        else:
            predicted_best_config, predicted_optimum = optimizer.optimum(OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG)

        if should_raise_for_confidence_bounds:

            with self.assertRaises(ValueError):
                optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG)

            with self.assertRaises(ValueError):
                optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG)
        else:
            ucb_90_ci_config, ucb_90_ci_optimum = optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.1)
            ucb_95_ci_config, ucb_95_ci_optimum = optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.05)
            ucb_99_ci_config, ucb_99_ci_optimum = optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01)

            lcb_90_ci_config, lcb_90_ci_optimum = optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.1)
            lcb_95_ci_config, lcb_95_ci_optimum = optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.05)
            lcb_99_ci_config, lcb_99_ci_optimum = optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01)


            # At the very least we can assert the ordering. Note that the configs corresponding to each of the below confidence bounds can be different, as confidence intervals
            # change width non-linearily both with degrees of freedom, and with prediction variance.
            #
            assert lcb_99_ci_optimum.lower_confidence_bound <= lcb_95_ci_optimum.lower_confidence_bound <= lcb_90_ci_optimum.lower_confidence_bound <= predicted_optimum.predicted_value
            assert predicted_optimum.predicted_value <= ucb_90_ci_optimum.upper_confidence_bound <= ucb_95_ci_optimum.upper_confidence_bound <= ucb_99_ci_optimum.upper_confidence_bound
Пример #6
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger

        # Let's initialize the optimizer.
        #
        assert len(optimization_problem.objectives
                   ) == 1, "For now this is a single-objective optimizer."
        OptimizerBase.__init__(self, optimization_problem)

        # Since the optimization_problem.objective_space can now be multi-dimensional (as a milestone towards multi-objective
        # optimization), we have to prepare a smaller objective space for the surrogate model.
        # TODO: create multiple models each predicting a different objective. Also consider multi-objective models.
        #
        assert not optimization_problem.objective_space.is_hierarchical(
        ), "Not supported."
        only_objective = optimization_problem.objectives[0]
        self.surrogate_model_output_space = SimpleHypergrid(
            name="surrogate_model_output_space",
            dimensions=[
                optimization_problem.objective_space[only_objective.name]
            ])

        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."
        self.optimizer_config = optimizer_config

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more"
        self.surrogate_model = HomogeneousRandomForestRegressionModel(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.feature_space,
            output_space=self.surrogate_model_output_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        #
        self._parameter_names = [
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ]
        self._parameter_names_set = set(self._parameter_names)

        self._context_names = ([
            dimension.name
            for dimension in self.optimization_problem.context_space.dimensions
        ] if self.optimization_problem.context_space else [])
        self._context_names_set = set(self._context_names)

        self._target_names = [
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ]
        self._target_names_set = set(self._target_names)

        self._parameter_values_df = pd.DataFrame(columns=self._parameter_names)
        self._context_values_df = pd.DataFrame(columns=self._context_names)
        self._target_values_df = pd.DataFrame(columns=self._target_names)