def __init__(self, optimization_problem: OptimizationProblem, optimizer_config: Point, logger=None): if logger is None: logger = create_logger("BayesianOptimizer") self.logger = logger # Let's initialize the optimizer. # assert len(optimization_problem.objectives ) == 1, "For now this is a single-objective optimizer." OptimizerBase.__init__(self, optimization_problem) assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config." self.optimizer_config = optimizer_config # Now let's put together the surrogate model. # assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more" self.surrogate_model = HomogeneousRandomForestRegressionModel( model_config=self.optimizer_config. homogeneous_random_forest_regression_model_config, input_space=self.optimization_problem. parameter_space, # TODO: change to feature space output_space=self.optimization_problem.objective_space, logger=self.logger) # Now let's put together the experiment designer that will suggest parameters for each experiment. # assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__ self.experiment_designer = ExperimentDesigner( designer_config=self.optimizer_config.experiment_designer_config, optimization_problem=self.optimization_problem, surrogate_model=self.surrogate_model, logger=self.logger) self._optimizer_convergence_state = BayesianOptimizerConvergenceState( surrogate_model_fit_state=self.surrogate_model.fit_state) # Also let's make sure we have the dataframes we need for the surrogate model. # TODO: this will need a better home - either a DataSet class or the surrogate model itself. self._feature_values_df = pd.DataFrame(columns=[ dimension.name for dimension in self.optimization_problem.parameter_space.dimensions ]) self._target_values_df = pd.DataFrame(columns=[ dimension.name for dimension in self.optimization_problem.objective_space.dimensions ])
def __init__( self, grpc_channel, optimization_problem, optimizer_config, id, # pylint: disable=redefined-builtin logger=None): if logger is None: logger = create_logger("BayesianOptimizerClient") self.logger = logger OptimizerBase.__init__(self, optimization_problem) assert optimizer_config is not None self._grpc_channel = grpc_channel self._optimizer_stub = OptimizerService_pb2_grpc.OptimizerServiceStub( self._grpc_channel) self.optimizer_config = optimizer_config self.id = id
def __init__(self, optimization_problem: OptimizationProblem, optimizer_config: Point, logger=None): if logger is None: logger = create_logger("BayesianOptimizer") self.logger = logger # Let's initialize the optimizer. # OptimizerBase.__init__(self, optimization_problem) assert not optimization_problem.objective_space.is_hierarchical( ), "Not supported." assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config." self.surrogate_model_output_space = optimization_problem.objective_space self.optimizer_config = optimizer_config self.pareto_frontier: ParetoFrontier = ParetoFrontier( optimization_problem=self.optimization_problem, objectives_df=None) # Now let's put together the surrogate model. # assert self.optimizer_config.surrogate_model_implementation in ( HomogeneousRandomForestRegressionModel.__name__, MultiObjectiveHomogeneousRandomForest.__name__) # Note that even if the user requested a HomogeneousRandomForestRegressionModel, we still create a MultiObjectiveRegressionModel # with just a single RandomForest inside it. This means we have to maintain only a single interface. # self.surrogate_model: MultiObjectiveRegressionModel = MultiObjectiveHomogeneousRandomForest( model_config=self.optimizer_config. homogeneous_random_forest_regression_model_config, input_space=self.optimization_problem.feature_space, output_space=self.surrogate_model_output_space, logger=self.logger) # Now let's put together the experiment designer that will suggest parameters for each experiment. # assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__ self.experiment_designer = ExperimentDesigner( designer_config=self.optimizer_config.experiment_designer_config, optimization_problem=self.optimization_problem, pareto_frontier=self.pareto_frontier, surrogate_model=self.surrogate_model, logger=self.logger) self._optimizer_convergence_state = BayesianOptimizerConvergenceState( surrogate_model_fit_state=self.surrogate_model.fit_state) # Also let's make sure we have the dataframes we need for the surrogate model. # self._parameter_names = [ dimension.name for dimension in self.optimization_problem.parameter_space.dimensions ] self._parameter_names_set = set(self._parameter_names) self._context_names = ([ dimension.name for dimension in self.optimization_problem.context_space.dimensions ] if self.optimization_problem.context_space else []) self._context_names_set = set(self._context_names) self._target_names = [ dimension.name for dimension in self.optimization_problem.objective_space.dimensions ] self._target_names_set = set(self._target_names) self._parameter_values_df = pd.DataFrame(columns=self._parameter_names) self._context_values_df = pd.DataFrame(columns=self._context_names) self._target_values_df = pd.DataFrame(columns=self._target_names)
def validate_optima(self, optimizer: OptimizerBase): should_raise_for_predicted_value = False should_raise_for_confidence_bounds = False if not optimizer.trained: should_raise_for_predicted_value = True should_raise_for_confidence_bounds = True else: parameters_df, _, _ = optimizer.get_all_observations() predictions = optimizer.predict( parameter_values_pandas_frame=parameters_df) predictions_df = predictions.get_dataframe() if len(predictions_df.index) == 0: should_raise_for_predicted_value = True should_raise_for_confidence_bounds = True # Drop nulls and zeroes. # predictions_df = predictions_df[predictions_df[ Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM. value].notna() & (predictions_df[ Prediction.LegalColumnNames. PREDICTED_VALUE_DEGREES_OF_FREEDOM.value] != 0)] if len(predictions_df.index) == 0: should_raise_for_confidence_bounds = True if should_raise_for_predicted_value: assert should_raise_for_confidence_bounds # Computing prediction based optima should fail if the surrogate model is not fitted. # with pytest.raises(ValueError): optimizer.optimum( OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG) else: predicted_best_config, predicted_optimum = optimizer.optimum( OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG) if should_raise_for_confidence_bounds: with pytest.raises(ValueError): optimizer.optimum(OptimumDefinition. UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG) with pytest.raises(ValueError): optimizer.optimum(OptimumDefinition. LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG) else: ucb_90_ci_config, ucb_90_ci_optimum = optimizer.optimum( OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.1) ucb_95_ci_config, ucb_95_ci_optimum = optimizer.optimum( OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.05) ucb_99_ci_config, ucb_99_ci_optimum = optimizer.optimum( OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01) lcb_90_ci_config, lcb_90_ci_optimum = optimizer.optimum( OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.1) lcb_95_ci_config, lcb_95_ci_optimum = optimizer.optimum( OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.05) lcb_99_ci_config, lcb_99_ci_optimum = optimizer.optimum( OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01) # At the very least we can assert the ordering. Note that the configs corresponding to each of the below confidence bounds can be different, as confidence intervals # change width non-linearily both with degrees of freedom, and with prediction variance. # if not (lcb_99_ci_optimum.lower_confidence_bound <= lcb_95_ci_optimum.lower_confidence_bound <= lcb_90_ci_optimum.lower_confidence_bound <= predicted_optimum.predicted_value): # If the the prediction for predicted_value has too few degrees of freedom, it's impossible to construct a confidence interval for it. # If it was possible, then the inequality above would always hold. If it's not possible, then the inequality above can fail. # optimum_predicted_value_prediction = optimizer.predict( parameter_values_pandas_frame=predicted_best_config. to_dataframe()) optimum_predicted_value_prediction_df = optimum_predicted_value_prediction.get_dataframe( ) degrees_of_freedom = optimum_predicted_value_prediction_df[ Prediction.LegalColumnNames. PREDICTED_VALUE_DEGREES_OF_FREEDOM.value][0] if degrees_of_freedom == 0: assert lcb_99_ci_optimum.lower_confidence_bound <= lcb_95_ci_optimum.lower_confidence_bound <= lcb_90_ci_optimum.lower_confidence_bound else: print(lcb_99_ci_optimum.lower_confidence_bound, lcb_95_ci_optimum.lower_confidence_bound, lcb_90_ci_optimum.lower_confidence_bound, predicted_optimum.predicted_value) assert False if not (predicted_optimum.predicted_value <= ucb_90_ci_optimum.upper_confidence_bound <= ucb_95_ci_optimum.upper_confidence_bound <= ucb_99_ci_optimum.upper_confidence_bound): optimum_predicted_value_prediction = optimizer.predict( parameter_values_pandas_frame=predicted_best_config. to_dataframe()) optimum_predicted_value_prediction_df = optimum_predicted_value_prediction.get_dataframe( ) degrees_of_freedom = optimum_predicted_value_prediction_df[ Prediction.LegalColumnNames. PREDICTED_VALUE_DEGREES_OF_FREEDOM.value][0] if degrees_of_freedom == 0: assert ucb_90_ci_optimum.upper_confidence_bound <= ucb_95_ci_optimum.upper_confidence_bound <= ucb_99_ci_optimum.upper_confidence_bound else: print(predicted_optimum.predicted_value, ucb_90_ci_optimum.upper_confidence_bound, ucb_95_ci_optimum.upper_confidence_bound, ucb_99_ci_optimum.upper_confidence_bound) assert False
def validate_optima(self, optimizer: OptimizerBase): should_raise_for_predicted_value = False should_raise_for_confidence_bounds = False if not optimizer.get_surrogate_model_fit_state().fitted: should_raise_for_predicted_value = True should_raise_for_confidence_bounds = True else: features_df, _ = optimizer.get_all_observations() predictions = optimizer.predict(feature_values_pandas_frame=features_df) predictions_df = predictions.get_dataframe() if len(predictions_df.index) == 0: should_raise_for_predicted_value = True should_raise_for_confidence_bounds = True # Drop nulls and zeroes. # predictions_df = predictions_df[ predictions_df[Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM.value].notna() & predictions_df[Prediction.LegalColumnNames.PREDICTED_VALUE_DEGREES_OF_FREEDOM.value] != 0 ] if len(predictions_df.index) == 0: should_raise_for_confidence_bounds = True if should_raise_for_predicted_value: self.assertTrue(should_raise_for_confidence_bounds) # Computing prediction based optima should fail if the surrogate model is not fitted. # with self.assertRaises(ValueError): optimizer.optimum(OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG) else: predicted_best_config, predicted_optimum = optimizer.optimum(OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG) if should_raise_for_confidence_bounds: with self.assertRaises(ValueError): optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG) with self.assertRaises(ValueError): optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG) else: ucb_90_ci_config, ucb_90_ci_optimum = optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.1) ucb_95_ci_config, ucb_95_ci_optimum = optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.05) ucb_99_ci_config, ucb_99_ci_optimum = optimizer.optimum(OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01) lcb_90_ci_config, lcb_90_ci_optimum = optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.1) lcb_95_ci_config, lcb_95_ci_optimum = optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.05) lcb_99_ci_config, lcb_99_ci_optimum = optimizer.optimum(OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01) # At the very least we can assert the ordering. Note that the configs corresponding to each of the below confidence bounds can be different, as confidence intervals # change width non-linearily both with degrees of freedom, and with prediction variance. # assert lcb_99_ci_optimum.lower_confidence_bound <= lcb_95_ci_optimum.lower_confidence_bound <= lcb_90_ci_optimum.lower_confidence_bound <= predicted_optimum.predicted_value assert predicted_optimum.predicted_value <= ucb_90_ci_optimum.upper_confidence_bound <= ucb_95_ci_optimum.upper_confidence_bound <= ucb_99_ci_optimum.upper_confidence_bound
def __init__(self, optimization_problem: OptimizationProblem, optimizer_config: Point, logger=None): if logger is None: logger = create_logger("BayesianOptimizer") self.logger = logger # Let's initialize the optimizer. # assert len(optimization_problem.objectives ) == 1, "For now this is a single-objective optimizer." OptimizerBase.__init__(self, optimization_problem) # Since the optimization_problem.objective_space can now be multi-dimensional (as a milestone towards multi-objective # optimization), we have to prepare a smaller objective space for the surrogate model. # TODO: create multiple models each predicting a different objective. Also consider multi-objective models. # assert not optimization_problem.objective_space.is_hierarchical( ), "Not supported." only_objective = optimization_problem.objectives[0] self.surrogate_model_output_space = SimpleHypergrid( name="surrogate_model_output_space", dimensions=[ optimization_problem.objective_space[only_objective.name] ]) assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config." self.optimizer_config = optimizer_config # Now let's put together the surrogate model. # assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more" self.surrogate_model = HomogeneousRandomForestRegressionModel( model_config=self.optimizer_config. homogeneous_random_forest_regression_model_config, input_space=self.optimization_problem.feature_space, output_space=self.surrogate_model_output_space, logger=self.logger) # Now let's put together the experiment designer that will suggest parameters for each experiment. # assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__ self.experiment_designer = ExperimentDesigner( designer_config=self.optimizer_config.experiment_designer_config, optimization_problem=self.optimization_problem, surrogate_model=self.surrogate_model, logger=self.logger) self._optimizer_convergence_state = BayesianOptimizerConvergenceState( surrogate_model_fit_state=self.surrogate_model.fit_state) # Also let's make sure we have the dataframes we need for the surrogate model. # self._parameter_names = [ dimension.name for dimension in self.optimization_problem.parameter_space.dimensions ] self._parameter_names_set = set(self._parameter_names) self._context_names = ([ dimension.name for dimension in self.optimization_problem.context_space.dimensions ] if self.optimization_problem.context_space else []) self._context_names_set = set(self._context_names) self._target_names = [ dimension.name for dimension in self.optimization_problem.objective_space.dimensions ] self._target_names_set = set(self._target_names) self._parameter_values_df = pd.DataFrame(columns=self._parameter_names) self._context_values_df = pd.DataFrame(columns=self._context_names) self._target_values_df = pd.DataFrame(columns=self._target_names)