def test_basic_functionality_on_2d_objective_space(self): """Basic sanity check. Mainly used to help us develop the API. """ # Let's just create a bunch of random points, build a pareto frontier # and verify that the invariants hold. # parameter_space = SimpleHypergrid( name='params', dimensions=[ ContinuousDimension(name='x1', min=0, max=10) ] ) objective_space = SimpleHypergrid( name='objectives', dimensions=[ ContinuousDimension(name='y1', min=0, max=10), ContinuousDimension(name='y2', min=0, max=10) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ] ) num_rows = 100000 random_objectives_df = objective_space.random_dataframe(num_rows) # They don't match but they don't need to for this test. # random_params_df = parameter_space.random_dataframe(num_rows) pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=random_objectives_df, parameters_df=random_params_df ) pareto_df = pareto_frontier.pareto_df non_pareto_index = random_objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (random_objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def test_construct_feature_dataframe_context(self): def f(parameters, context): return pd.DataFrame({ 'function_value': -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2) }) input_space = SimpleHypergrid( name="my_input_name", dimensions=[ContinuousDimension(name="x", min=0, max=1)]) output_space = SimpleHypergrid(name="objective", dimensions=[ ContinuousDimension( name="function_value", min=-10, max=10) ]) context_space = SimpleHypergrid( name="my_context_name", dimensions=[ContinuousDimension(name="y", min=-1, max=1)]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, # we want to minimize the function objectives=[Objective(name="function_value", minimize=True)], context_space=context_space) n_samples = 100 parameter_df = input_space.random_dataframe(n_samples) context_df = context_space.random_dataframe(n_samples) with pytest.raises(ValueError, match="Context required"): optimization_problem.construct_feature_dataframe( parameters_df=parameter_df) feature_df = optimization_problem.construct_feature_dataframe( parameters_df=parameter_df, context_df=context_df) assert isinstance(feature_df, pd.DataFrame) assert feature_df.shape == (n_samples, 3) assert (feature_df.columns == [ 'my_input_name.x', 'contains_context', 'my_context_name.y' ]).all() assert feature_df.contains_context.all()
class TestDecisionTreeRegressionModel: @classmethod def setup_class(cls) -> None: global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) @classmethod def teardown_class(cls) -> None: temp_dir = os.path.join(os.getcwd(), "temp") if not os.path.exists(temp_dir): os.mkdir(temp_dir) trace_output_path = os.path.join( temp_dir, "TestDecisionTreeRegressionModel.json") print(f"Dumping trace to {trace_output_path}") global_values.tracer.dump_trace_to_file( output_file_path=trace_output_path) def setup_method(self, method): # Let's create a simple linear mapping self.gradient = 10 self.y_intercept = 10 self.input_values = np.linspace(start=0, stop=100, num=101, endpoint=True) self.output_values = self.input_values * self.gradient + self.y_intercept self.input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=100)]) self.output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension( name="y", min=-math.inf, max=math.inf) ]) self.input_pandas_dataframe = pd.DataFrame({"x": self.input_values}) self.output_pandas_dataframe = pd.DataFrame({"y": self.output_values}) def test_default_decision_tree_model(self): model_config = decision_tree_config_store.default model = DecisionTreeRegressionModel(model_config=model_config, input_space=self.input_space, output_space=self.output_space) model.fit(self.input_pandas_dataframe, self.output_pandas_dataframe, iteration_number=len(self.input_pandas_dataframe.index)) gof_metrics = model.compute_goodness_of_fit( features_df=self.input_pandas_dataframe, target_df=self.output_pandas_dataframe, data_set_type=DataSetType.TRAIN) print(gof_metrics) def test_random_decision_tree_models(self): sample_inputs_pandas_dataframe = self.input_space.random_dataframe( num_samples=100) num_iterations = 50 for i in range(num_iterations): if i % 10 == 0: print(f"{datetime.datetime.utcnow()} {i}/{num_iterations}") model_config = decision_tree_config_store.parameter_space.random() print(str(model_config)) model = DecisionTreeRegressionModel(model_config=model_config, input_space=self.input_space, output_space=self.output_space) model.fit(self.input_pandas_dataframe, self.output_pandas_dataframe, iteration_number=len( sample_inputs_pandas_dataframe.index)) gof_metrics = model.compute_goodness_of_fit( features_df=self.input_pandas_dataframe, target_df=self.output_pandas_dataframe, data_set_type=DataSetType.TRAIN) print(gof_metrics)
def test_optimization_with_context(self): # Gaussian blob in x with position dependent on context variable y. def f(parameters, context): if isinstance(parameters, pd.DataFrame): index = parameters.index else: index = [0] return pd.DataFrame( { 'function_value': -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2) }, index=index) input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=1)]) output_space = SimpleHypergrid(name="objective", dimensions=[ ContinuousDimension( name="function_value", min=-10, max=10) ]) context_space = SimpleHypergrid( name="context", dimensions=[ContinuousDimension(name="y", min=-1, max=1)]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, # we want to minimize the function objectives=[Objective(name="function_value", minimize=True)], context_space=context_space) # create some data points to eval n_samples = 5000 parameter_df = input_space.random_dataframe(n_samples) context_df = context_space.random_dataframe(n_samples) target_df = f(parameter_df, context_df) local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, ) with pytest.raises(ValueError, match="Context required"): local_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df) with pytest.raises( ValueError, match="Incompatible shape of parameters and context"): local_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df, context_values_pandas_frame=context_df.iloc[:-1]) local_optimizer.register(parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df, context_values_pandas_frame=context_df) with pytest.raises(ValueError, match="Context required"): local_optimizer.suggest() with pytest.raises(ValueError, match="Context required"): local_optimizer.predict(parameter_values_pandas_frame=parameter_df) suggestion = local_optimizer.suggest(context=context_space.random()) assert isinstance(suggestion, Point) assert suggestion in input_space with pytest.raises( ValueError, match="Incompatible shape of parameters and context"): # unaligned parameters and context local_optimizer.predict( parameter_values_pandas_frame=parameter_df, context_values_pandas_frame=context_df.iloc[:-1]) predictions = local_optimizer.predict( parameter_values_pandas_frame=parameter_df, context_values_pandas_frame=context_df) predictions_df = predictions.get_dataframe() assert len(predictions_df) == len(parameter_df) remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, ) with pytest.raises(ValueError, match="not supported if context is provided"): local_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION, context=Point(y=0).to_dataframe()) with pytest.raises(ValueError, match="not supported if context is provided"): local_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION) with pytest.raises(ValueError, match="requires context to be not None"): local_optimizer.optimum(optimum_definition=OptimumDefinition. BEST_SPECULATIVE_WITHIN_CONTEXT) # can't register, predict, suggest with context on remote optimizer with pytest.raises(NotImplementedError, match="Context not currently supported"): remote_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df, context_values_pandas_frame=context_df) with pytest.raises(NotImplementedError, match="Context not currently supported"): remote_optimizer.predict( parameter_values_pandas_frame=parameter_df, context_values_pandas_frame=context_df) with pytest.raises(NotImplementedError, match="Context not currently supported"): remote_optimizer.suggest(context=context_df) # context is missing but required by problem, should give error with pytest.raises(grpc.RpcError): remote_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df) # run some iterations on local optimizer to see we do something sensible for _ in range(100): # pick context at random context = context_space.random() suggested_config = local_optimizer.suggest(context=context) target_values = f(suggested_config, context) local_optimizer.register( parameter_values_pandas_frame=suggested_config.to_dataframe(), target_values_pandas_frame=target_values, context_values_pandas_frame=context.to_dataframe()) optimum_y_1 = local_optimizer.optimum( optimum_definition=OptimumDefinition. BEST_SPECULATIVE_WITHIN_CONTEXT, context=Point(y=-1).to_dataframe()) optimum_y1 = local_optimizer.optimum( optimum_definition=OptimumDefinition. BEST_SPECULATIVE_WITHIN_CONTEXT, context=Point(y=1).to_dataframe()) assert optimum_y1.x > .6 assert optimum_y_1.x < .4