def test_optimization_problem_none_context(self): parameter_space = SimpleHypergrid( name="test", dimensions=[ ContinuousDimension(name="x", min=0, max=1), OrdinalDimension(name="y", ordered_values=[1, 2, 3, 5, 10]), CategoricalDimension(name="y2", values=[True, False]) ]) objective_space = SimpleHypergrid(name="z", dimensions=[ ContinuousDimension( name="z\n special", min=-50, max=-49), ContinuousDimension(name="z1", min=-1, max=1) ]) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name="z\n special", minimize=True), Objective(name="z1", minimize=False) ]) encoded_problem = OptimizerServiceEncoder.encode_optimization_problem( optimization_problem) decoded_problem = OptimizerServiceDecoder.decode_optimization_problem( encoded_problem) print(f"Context space is: {decoded_problem.context_space}") assert decoded_problem.context_space is None # Ensure that the parameter space is still valid # Parameter Space for _ in range(1000): assert decoded_problem.parameter_space.random() in parameter_space assert parameter_space.random() in decoded_problem.parameter_space # Output Space for _ in range(1000): assert decoded_problem.objective_space.random() in objective_space assert objective_space.random() in decoded_problem.objective_space # Feature Space for _ in range(1000): assert decoded_problem.feature_space.random( ) in optimization_problem.feature_space assert optimization_problem.feature_space.random( ) in decoded_problem.feature_space
def test_randomly_generating_team_member(self): self.logger.info("Starting first check in test.") mlos_team = SimpleHypergrid( name="mlos_team", dimensions=[ CategoricalDimension(name="member", values=["Ed", "Greg", "Sergiy", "Yaser", "Adam", "Zack"]) ] ) random_member = mlos_team.random() assert random_member in mlos_team
class TestHierarchicalHypergrid3(unittest.TestCase): """ Tests the join on external dimension in hypergrids. In particular: * Hypergrid.join(subgrid, on_external_dimension=SomeDimension(...)) should: * Check if the dimension.name contains a subgrid name: * if yes - drop the prefix and call dimension_subgrid.join(subgrid, on_external_dimension) * otherwise we are joining here so: * if not dimension.intersects(self[dimension.name]): return self * self.joined_subgrids_by_pivot_dimension[dimension.name] = JoinedHypergrid(dimension, subgrid) * Randomly generating points from the supergrid should generate points from the newly joined subgrid * Point containment should work * Hypergrid containment should work (eventually) """ def setUp(self): self.cache_param_space = SimpleHypergrid( name='cache_param_space', dimensions=[ CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache']) ] ) self.lru_cache_param_space = SimpleHypergrid( name='lru_cache_config', dimensions=[ DiscreteDimension(name='size', min=1, max=2**20), OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red']) ] ) self.associative_cache_implementation_root_param_space = SimpleHypergrid( name='associative_cache_config', dimensions=[ CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']), CategoricalDimension(name='bucket_implementation', values=['single_value', 'binary_search_tree', 'linked_list']) ] ) self.mod_prime_hash_function_param_space = SimpleHypergrid( name='mod_prime_hash_function', dimensions=[ OrdinalDimension(name='prime', ordered_values=[1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59]) ] ) self.lowest_bits_param_space = SimpleHypergrid( name='lowest_bits', dimensions=[ DiscreteDimension(name='num_bits', min=1, max=64) ] ) self.binary_search_tree_param_space = SimpleHypergrid( name='binary_search_tree', dimensions=[ DiscreteDimension(name='max_depth', min=1, max=2**10) ] ) self.linked_list_param_space = SimpleHypergrid( name='linked_list', dimensions=[ DiscreteDimension(name='max_length', min=1, max=2**10) ] ) self.associative_cache_implementation_param_space = self.associative_cache_implementation_root_param_space.join( subgrid=self.mod_prime_hash_function_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function']) ).join( subgrid=self.lowest_bits_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values='lowest_bits') ).join( subgrid=self.binary_search_tree_param_space, on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['binary_search_tree']) ) self.cache_param_space = self.cache_param_space.join( subgrid=self.lru_cache_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['lru_cache']) ).join( subgrid=self.associative_cache_implementation_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache']) ).join( subgrid=self.linked_list_param_space, on_external_dimension=CategoricalDimension(name='associative_cache_config.bucket_implementation', values=['linked_list']) ) def test_external_dimension_join(self): for _ in range(10): print("################################################") random_config = self.cache_param_space.random() for param_name, value in random_config: print(param_name, value) print(random_config in self.cache_param_space) print("################################################")
def test_optimization_problem(self): parameter_space = SimpleHypergrid( name="test", dimensions=[ ContinuousDimension(name="x",min=0,max=1), CategoricalDimension(name="y",values=[1,2,3]) ] ) objective_space = SimpleHypergrid( name="z", dimensions=[ ContinuousDimension(name="z",min=0,max=1), ContinuousDimension(name="z1",min=-1,max=1) ] ) context_space = SimpleHypergrid( name="context_space", dimensions=[ ContinuousDimension(name="x_c",min=0,max=1), CategoricalDimension(name="y_c",values=[1,2,3,4,6]) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name="z",minimize=True), Objective(name="z1",minimize=False) ], context_space=context_space ) encoded_problem = OptimizerMonitoringServiceEncoder.encode_optimization_problem(optimization_problem) decoded_problem = OptimizerMonitoringServiceDecoder.decode_optimization_problem(encoded_problem) # A = B iff A >= B && B <= A # Could be condensed to single loop but easier to read this way. # Parameter Space for _ in range(1000): assert decoded_problem.parameter_space.random() in parameter_space assert parameter_space.random() in decoded_problem.parameter_space # Output Space for _ in range(1000): assert decoded_problem.objective_space.random() in objective_space assert objective_space.random() in decoded_problem.objective_space # Context Space for _ in range(1000): assert decoded_problem.context_space.random() in context_space assert context_space.random() in decoded_problem.context_space # Feature Space for _ in range(1000): assert decoded_problem.feature_space.random() in optimization_problem.feature_space assert optimization_problem.feature_space.random() in decoded_problem.feature_space print(decoded_problem.objectives) assert len(decoded_problem.objectives) == 2 assert decoded_problem.objectives[0].name == "z" assert decoded_problem.objectives[1].name == "z1" assert decoded_problem.objectives[0].minimize assert not decoded_problem.objectives[1].minimize
class TestHierarchicalSpaces(unittest.TestCase): def setUp(self): self.emergency_buffer_settings = SimpleHypergrid( name='emergency_buffer_config', dimensions=[ DiscreteDimension(name='log2_emergency_buffer_size', min=0, max=16), CategoricalDimension(name='use_colors', values=[True, False]) ]) self.emergency_buffer_color = SimpleHypergrid( name='emergency_buffer_color', dimensions=[ CategoricalDimension(name='color', values=['Maroon', 'Crimson', 'Tanager']) ]) self.emergency_buffer_settings_with_color = self.emergency_buffer_settings.join( subgrid=self.emergency_buffer_color, on_external_dimension=CategoricalDimension(name='use_colors', values=[True])) self.hierarchical_settings = SimpleHypergrid( name='communication_channel_config', dimensions=[ DiscreteDimension(name='num_readers', min=1, max=64), DiscreteDimension(name='log2_buffer_size', min=10, max=24), CategoricalDimension(name='use_emergency_buffer', values=[True, False]) ]).join(subgrid=self.emergency_buffer_settings_with_color, on_external_dimension=CategoricalDimension( name='use_emergency_buffer', values=[True])) def test_composite_spaces(self): valid_config_no_emergency_buffer = Point(num_readers=1, log2_buffer_size=10, use_emergency_buffer=False) self.assertTrue( valid_config_no_emergency_buffer in self.hierarchical_settings) valid_emergency_buffer_config = Point(log2_emergency_buffer_size=2, use_colors=False) self.assertTrue( valid_emergency_buffer_config in self.emergency_buffer_settings) valid_config_with_emergency_buffer = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True, emergency_buffer_config=valid_emergency_buffer_config) self.assertTrue( valid_config_with_emergency_buffer in self.hierarchical_settings) valid_emergency_buffer_color_config = Point(color='Crimson') valid_emergency_buffer_color_config_with_pivot_dimension = valid_emergency_buffer_color_config.copy( ) valid_emergency_buffer_color_config_with_pivot_dimension[ 'use_colors'] = True self.assertTrue( valid_emergency_buffer_color_config_with_pivot_dimension in self.emergency_buffer_color) valid_colorful_emergency_buffer_config = Point( log2_emergency_buffer_size=2, use_colors=True, emergency_buffer_color=valid_emergency_buffer_color_config) valid_colorful_emergency_buffer_config_with_pivot_dimension = valid_colorful_emergency_buffer_config.copy( ) valid_colorful_emergency_buffer_config_with_pivot_dimension[ 'use_emergency_buffer'] = True self.assertTrue( valid_colorful_emergency_buffer_config_with_pivot_dimension in self.emergency_buffer_settings_with_color) valid_config_with_emergency_buffer_colors = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True, emergency_buffer_config=valid_colorful_emergency_buffer_config) valid_config_with_emergency_buffer_and_redundant_coordinates = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=False, log2_emergency_buffer_size=2) self.assertTrue( valid_config_with_emergency_buffer_and_redundant_coordinates in self.hierarchical_settings) another_invalid_config_with_emergency_buffer = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True) yet_another_invalid_config_with_emergency_buffer = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True, log2_emergency_buffer_size=40) self.assertTrue( valid_config_no_emergency_buffer in self.hierarchical_settings) self.assertTrue( valid_config_no_emergency_buffer in self.hierarchical_settings) self.assertTrue( valid_config_with_emergency_buffer in self.hierarchical_settings) self.assertTrue(valid_config_with_emergency_buffer_colors in self.hierarchical_settings) self.assertTrue( valid_config_with_emergency_buffer_and_redundant_coordinates in self.hierarchical_settings) self.assertTrue(another_invalid_config_with_emergency_buffer not in self.hierarchical_settings) self.assertTrue(yet_another_invalid_config_with_emergency_buffer not in self.hierarchical_settings) def test_generating_random_configs(self): used_emergency_buffer = False used_color = False used_crimson = False # Let's seed it to make sure we get consistent test results random_state = random.Random() random_state.seed(1) self.hierarchical_settings.random_state = random_state for _ in range(100): random_config = self.hierarchical_settings.random() self.assertTrue(random_config in self.hierarchical_settings) used_emergency_buffer = used_emergency_buffer or random_config[ 'use_emergency_buffer'] if random_config['use_emergency_buffer']: used_color = used_color or random_config[ 'emergency_buffer_config']['use_colors'] if random_config['emergency_buffer_config']['use_colors']: used_crimson = used_crimson or ( random_config['emergency_buffer_config'] ['emergency_buffer_color']['color'] == 'Crimson') self.assertTrue(used_emergency_buffer) self.assertTrue(used_color) self.assertTrue(used_crimson) def test_reseeding_random_state(self): previous_iteration_first_pass_points = None for i in range(10): # let's seed the grid for the first time random_state = random.Random() random_state.seed(i) self.hierarchical_settings.random_state = random_state first_pass_points = [ self.hierarchical_settings.random() for _ in range(100) ] # let's do it again random_state = random.Random() random_state.seed(i) self.hierarchical_settings.random_state = random_state second_pass_points = [ self.hierarchical_settings.random() for _ in range(100) ] for first_pass_point, second_pass_point in zip( first_pass_points, second_pass_points): self.assertTrue(first_pass_point == second_pass_point) if previous_iteration_first_pass_points is not None: # Let's make sure we keep changing the points self.assertTrue( any(previous != current for previous, current in zip( previous_iteration_first_pass_points, first_pass_points))) previous_iteration_first_pass_points = first_pass_points
class TestHierarchicalHypergrid2(unittest.TestCase): """ Tests the improved implementation of the Hypergrids. In particular: * SimpleHypergrid.join() should attach to the root hypergrid if possible * SimpleHypergrids that are hierarchical implement a hierarchical namespace, where a coordinate within each subgrid is prefixed with the name of that subgrid: """ def setUp(self): self.lru_cache_param_space = SimpleHypergrid( name='lru_cache_config', dimensions=[ DiscreteDimension(name='size', min=1, max=2**20), OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red']) ]) self.mod_prime_hash_function_param_space = SimpleHypergrid( name='mod_prime_hash_function', dimensions=[ OrdinalDimension(name='prime', ordered_values=[ 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59 ]) ]) self.lowest_bits_param_space = SimpleHypergrid( name='lowest_bits', dimensions=[DiscreteDimension(name='num_bits', min=1, max=64)]) self.binary_search_tree_param_space = SimpleHypergrid( name='binary_search_tree', dimensions=[DiscreteDimension(name='max_depth', min=1, max=2**10)]) self.linked_list_param_space = SimpleHypergrid( name='linked_list', dimensions=[ DiscreteDimension(name='max_length', min=1, max=2**10) ]) self.associative_cache_implementation_param_space = SimpleHypergrid( name='associative_cache_config', dimensions=[ CategoricalDimension( name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']), CategoricalDimension(name='bucket_implementation', values=[ 'single_value', 'binary_search_tree', 'linked_list' ]) ]).join(subgrid=self.mod_prime_hash_function_param_space, on_external_dimension=CategoricalDimension( name='hash_function_name', values=['mod_prime_hash_function'])).join( subgrid=self.lowest_bits_param_space, on_external_dimension=CategoricalDimension( name='hash_function_name', values=['lowest_bits']) ).join( subgrid=self.binary_search_tree_param_space, on_external_dimension=CategoricalDimension( name='bucket_implementation', values=['binary_search_tree'])).join( subgrid=self.linked_list_param_space, on_external_dimension=CategoricalDimension( name='bucket_implementation', values=['linked_list'])) self.cache_param_space = SimpleHypergrid( name='cache_param_space', dimensions=[ CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache']) ]).join( subgrid=self.lru_cache_param_space, on_external_dimension=CategoricalDimension( name='cache_implementation_name', values=['lru_cache']) ).join(subgrid=self.associative_cache_implementation_param_space, on_external_dimension=CategoricalDimension( name='cache_implementation_name', values=['associative_cache'])) def test_efficient_join(self): """ Tests if the join efficiently flattens the tree of hypergrids. :return: """ self.assertTrue(self.cache_param_space.name == 'cache_param_space') subgrids_joined_on_cache_implementation_name_dimension = set( joined_subgrid.subgrid for joined_subgrid in self.cache_param_space. joined_subgrids_by_pivot_dimension['cache_implementation_name']) self.assertTrue(self.lru_cache_param_space in subgrids_joined_on_cache_implementation_name_dimension) self.assertTrue(self.associative_cache_implementation_param_space in subgrids_joined_on_cache_implementation_name_dimension) subgrids_joined_on_hash_function_name_dimension = set( guest_subgrid.subgrid for guest_subgrid in self.associative_cache_implementation_param_space. joined_subgrids_by_pivot_dimension['hash_function_name']) self.assertTrue(self.mod_prime_hash_function_param_space in subgrids_joined_on_hash_function_name_dimension) self.assertTrue(self.lowest_bits_param_space in subgrids_joined_on_hash_function_name_dimension) subgrids_joined_on_bucket_implementation_dimension = set( guest_subgrid.subgrid for guest_subgrid in self.associative_cache_implementation_param_space. joined_subgrids_by_pivot_dimension['bucket_implementation']) self.assertTrue(self.binary_search_tree_param_space in subgrids_joined_on_bucket_implementation_dimension) self.assertTrue(self.linked_list_param_space in subgrids_joined_on_bucket_implementation_dimension) def test_name_flattening(self): num_tests = 1000 for i in range(num_tests): random_config = self.cache_param_space.random() flat_dimensions = [] for dimension_name, value in random_config: original_dimension = self.cache_param_space[dimension_name] flat_dimension = original_dimension.copy() flat_dimension.name = Dimension.flatten_dimension_name( dimension_name) flat_dimensions.append(flat_dimension) # Let's create a flat hypergrid that contains that random_config flat_cache_param_space = SimpleHypergrid( name=f"Flat{self.cache_param_space.name}", dimensions=flat_dimensions) flat_random_config = random_config.flat_copy() self.assertTrue(flat_random_config in flat_cache_param_space) # let's try another random config another_random_config = self.cache_param_space.random() flattened_config = another_random_config.flat_copy() try: if flattened_config in flat_cache_param_space: ... self.assertTrue(True) except: self.assertTrue(False) def test_that_getitem_returns_subgrid(self): """ Tests if we can use the __getitem__ operator to retrieve a subgrid. :return: """ lru_cache_param_space = self.cache_param_space['lru_cache_config'] for _ in range(1000): self.assertTrue( lru_cache_param_space.random() in self.lru_cache_param_space) self.assertTrue( self.lru_cache_param_space.random() in lru_cache_param_space) def test_that_getitem_returns_dimensions(self): """ Tests if we can use the __getitem__ operator to retrieve a dimension. :return: """ cache_implementation_dimension = self.cache_param_space[ "cache_implementation_name"] self.assertTrue(cache_implementation_dimension == CategoricalDimension( name='cache_implementation_name', values=['lru_cache', 'associative_cache'])) num_bits_dimension = self.cache_param_space[ "associative_cache_config"]["lowest_bits"]["num_bits"] self.assertTrue( num_bits_dimension == self.lowest_bits_param_space["num_bits"]) def test_getitem_throws(self): with self.assertRaises(KeyError): self.cache_param_space["non_existent_dimension"] def test_that_collision_throws(self): """ Test that if we try to join on a subgrid that has the same name as an existing dimension, we throw. This is because the __getitem__ can return either a dimension or a subgrid, so their names cannot collide. :return: """ with self.assertRaises(ValueError): SimpleHypergrid( name="collisions", dimensions=[ CategoricalDimension(name="associative_cache_config", values=[True, False]), CategoricalDimension( name='cache_implementation_name', values=['lru_cache', 'associative_cache']) ]).join( subgrid=self.associative_cache_implementation_param_space, on_external_dimension=CategoricalDimension( name='cache_implementation_name', values=['associative_cache'])) def test_pickling(self): for _ in range(100): random_point = self.cache_param_space.random() pickled = pickle.dumps(random_point) unpickled = pickle.loads(pickled) self.assertTrue(unpickled == random_point)
def test_optimization_with_context(self): # Gaussian blob in x with position dependent on context variable y. def f(parameters, context): if isinstance(parameters, pd.DataFrame): index = parameters.index else: index = [0] return pd.DataFrame( { 'function_value': -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2) }, index=index) input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=1)]) output_space = SimpleHypergrid(name="objective", dimensions=[ ContinuousDimension( name="function_value", min=-10, max=10) ]) context_space = SimpleHypergrid( name="context", dimensions=[ContinuousDimension(name="y", min=-1, max=1)]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, # we want to minimize the function objectives=[Objective(name="function_value", minimize=True)], context_space=context_space) # create some data points to eval n_samples = 5000 parameter_df = input_space.random_dataframe(n_samples) context_df = context_space.random_dataframe(n_samples) target_df = f(parameter_df, context_df) local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, ) with pytest.raises(ValueError, match="Context required"): local_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df) with pytest.raises( ValueError, match="Incompatible shape of parameters and context"): local_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df, context_values_pandas_frame=context_df.iloc[:-1]) local_optimizer.register(parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df, context_values_pandas_frame=context_df) with pytest.raises(ValueError, match="Context required"): local_optimizer.suggest() with pytest.raises(ValueError, match="Context required"): local_optimizer.predict(parameter_values_pandas_frame=parameter_df) suggestion = local_optimizer.suggest(context=context_space.random()) assert isinstance(suggestion, Point) assert suggestion in input_space with pytest.raises( ValueError, match="Incompatible shape of parameters and context"): # unaligned parameters and context local_optimizer.predict( parameter_values_pandas_frame=parameter_df, context_values_pandas_frame=context_df.iloc[:-1]) predictions = local_optimizer.predict( parameter_values_pandas_frame=parameter_df, context_values_pandas_frame=context_df) predictions_df = predictions.get_dataframe() assert len(predictions_df) == len(parameter_df) remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, ) with pytest.raises(ValueError, match="not supported if context is provided"): local_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION, context=Point(y=0).to_dataframe()) with pytest.raises(ValueError, match="not supported if context is provided"): local_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION) with pytest.raises(ValueError, match="requires context to be not None"): local_optimizer.optimum(optimum_definition=OptimumDefinition. BEST_SPECULATIVE_WITHIN_CONTEXT) # can't register, predict, suggest with context on remote optimizer with pytest.raises(NotImplementedError, match="Context not currently supported"): remote_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df, context_values_pandas_frame=context_df) with pytest.raises(NotImplementedError, match="Context not currently supported"): remote_optimizer.predict( parameter_values_pandas_frame=parameter_df, context_values_pandas_frame=context_df) with pytest.raises(NotImplementedError, match="Context not currently supported"): remote_optimizer.suggest(context=context_df) # context is missing but required by problem, should give error with pytest.raises(grpc.RpcError): remote_optimizer.register( parameter_values_pandas_frame=parameter_df, target_values_pandas_frame=target_df) # run some iterations on local optimizer to see we do something sensible for _ in range(100): # pick context at random context = context_space.random() suggested_config = local_optimizer.suggest(context=context) target_values = f(suggested_config, context) local_optimizer.register( parameter_values_pandas_frame=suggested_config.to_dataframe(), target_values_pandas_frame=target_values, context_values_pandas_frame=context.to_dataframe()) optimum_y_1 = local_optimizer.optimum( optimum_definition=OptimumDefinition. BEST_SPECULATIVE_WITHIN_CONTEXT, context=Point(y=-1).to_dataframe()) optimum_y1 = local_optimizer.optimum( optimum_definition=OptimumDefinition. BEST_SPECULATIVE_WITHIN_CONTEXT, context=Point(y=1).to_dataframe()) assert optimum_y1.x > .6 assert optimum_y_1.x < .4
def test_registering_multiple_objectives(self): input_space = SimpleHypergrid(name='input', dimensions=[ ContinuousDimension(name="x_1", min=0, max=10), ContinuousDimension(name="x_2", min=0, max=10) ]) output_space = SimpleHypergrid(name='output', dimensions=[ ContinuousDimension(name="y_1", min=0, max=10), ContinuousDimension(name="y_2", min=0, max=10) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y_1', minimize=True)]) optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem) for _ in range(100): input = optimizer.suggest() output = Point(y_1=input.x_1, y_2=input.x_2) optimizer.register(input.to_dataframe(), output.to_dataframe()) num_predictions = 100 prediction = optimizer.predict( parameter_values_pandas_frame=optimization_problem.parameter_space. random_dataframe(num_predictions)) prediction_df = prediction.get_dataframe() assert len(prediction_df.index) == num_predictions # Let's test invalid observations. # input = input_space.random() input_df = input.to_dataframe() # We should only remember the valid dimensions. # output_with_extra_dimension = Point(y_1=input.x_1, y_2=input.x_2, invalid_dimension=42) output_with_extra_dimension_df = output_with_extra_dimension.to_dataframe( ) optimizer.register(input_df, output_with_extra_dimension_df) # Let's make sure that the invalid_dimension was not remembered. # all_inputs_df, all_outputs_df, _ = optimizer.get_all_observations() assert all(column in {'y_1', 'y_2'} for column in all_outputs_df.columns) # We should accept inputs with missing output dimensions, as long as at least one is specified. # output_with_missing_dimension = Point(y_1=input.x_1) output_with_missing_dimension_df = output_with_missing_dimension.to_dataframe( ) optimizer.register(input_df, output_with_missing_dimension_df) all_inputs_df, all_outputs_df, _ = optimizer.get_all_observations() # Let's make sure the missing dimension ends up being a null. # last_observation = all_outputs_df.iloc[[-1]] assert last_observation['y_2'].isnull().values.all() # Inserting an observation with no valid dimensions should fail. # empty_output = Point() empty_output_df = empty_output.to_dataframe() with pytest.raises(ValueError): optimizer.register(input_df, empty_output_df) only_invalid_outputs = Point(invalid_col1=0, invalid_col2=2) only_invalid_outputs_df = only_invalid_outputs.to_dataframe() with pytest.raises(ValueError): optimizer.register(input_df, only_invalid_outputs_df)
class TestHierarchicalHypergrid2(unittest.TestCase): """ Tests the improved implementation of the Hypergrids. In particular: * SimpleHypergrid.join() should attach to the root hypergrid if possible * SimpleHypergrids that are hierarchical implement a hierarchical namespace, where a coordinate within each subgrid is prefixed with the name of that subgrid: """ def setUp(self): self.lru_cache_param_space = SimpleHypergrid( name='lru_cache_config', dimensions=[ DiscreteDimension(name='size', min=1, max=2**20), OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red']) ] ) self.mod_prime_hash_function_param_space = SimpleHypergrid( name='mod_prime_hash_function', dimensions=[ OrdinalDimension(name='prime', ordered_values=[1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59]) ] ) self.lowest_bits_param_space = SimpleHypergrid( name='lowest_bits', dimensions=[ DiscreteDimension(name='num_bits', min=1, max=64) ] ) self.binary_search_tree_param_space = SimpleHypergrid( name='binary_search_tree', dimensions=[ DiscreteDimension(name='max_depth', min=1, max=2**10) ] ) self.linked_list_param_space = SimpleHypergrid( name='linked_list', dimensions=[ DiscreteDimension(name='max_length', min=1, max=2**10) ] ) self.associative_cache_implementation_param_space = SimpleHypergrid( name='associative_cache_config', dimensions=[ CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']), CategoricalDimension(name='bucket_implementation', values=['single_value', 'binary_search_tree', 'linked_list']) ] ).join( subgrid=self.mod_prime_hash_function_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function']) ).join( subgrid=self.lowest_bits_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values=['lowest_bits']) ).join( subgrid=self.binary_search_tree_param_space, on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['binary_search_tree']) ).join( subgrid=self.linked_list_param_space, on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['linked_list']) ) self.cache_param_space = SimpleHypergrid( name='cache_param_space', dimensions=[ CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache']) ] ).join( subgrid=self.lru_cache_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['lru_cache']) ).join( subgrid=self.associative_cache_implementation_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache']) ) def test_efficient_join(self): """ Tests if the join efficiently flattens the tree of hypergrids. :return: """ self.assertTrue(self.cache_param_space.name == 'cache_param_space') subgrids_joined_on_cache_implementation_name_dimension = set(guest_subgrid.subgrid for guest_subgrid in self.cache_param_space.guest_subgrids_by_pivot_dimension['cache_implementation_name']) self.assertTrue(self.lru_cache_param_space in subgrids_joined_on_cache_implementation_name_dimension) self.assertTrue(self.associative_cache_implementation_param_space in subgrids_joined_on_cache_implementation_name_dimension) subgrids_joined_on_hash_function_name_dimension = set(guest_subgrid.subgrid for guest_subgrid in self.associative_cache_implementation_param_space.guest_subgrids_by_pivot_dimension['hash_function_name']) self.assertTrue(self.mod_prime_hash_function_param_space in subgrids_joined_on_hash_function_name_dimension) self.assertTrue(self.lowest_bits_param_space in subgrids_joined_on_hash_function_name_dimension) subgrids_joined_on_bucket_implementation_dimension = set(guest_subgrid.subgrid for guest_subgrid in self.associative_cache_implementation_param_space.guest_subgrids_by_pivot_dimension['bucket_implementation']) self.assertTrue(self.binary_search_tree_param_space in subgrids_joined_on_bucket_implementation_dimension) self.assertTrue(self.linked_list_param_space in subgrids_joined_on_bucket_implementation_dimension) def test_name_flattening(self): num_tests = 1000 for i in range(num_tests): random_config = self.cache_param_space.random() flat_dimensions = [] for dimension_name, value in random_config: original_dimension = self.cache_param_space[dimension_name] flat_dimension = original_dimension.copy() flat_dimension.name = Dimension.flatten_dimension_name(dimension_name) flat_dimensions.append(flat_dimension) # Let's create a flat hypergrid that contains that random_config flat_cache_param_space = SimpleHypergrid( name=f"Flat{self.cache_param_space.name}", dimensions=flat_dimensions ) flat_random_config = random_config.flat_copy() self.assertTrue(flat_random_config in flat_cache_param_space) # let's try another random config another_random_config = self.cache_param_space.random() flattened_config = another_random_config.flat_copy() try: if flattened_config in flat_cache_param_space: ... self.assertTrue(True) except: self.assertTrue(False)