def setUp(self) -> None: self.x = np.array([[1, 2], [4, 5], [6, 7], [8, 9], [10, 11]]) self.noise_prior = Gaussian(mu=np.log(0.01), sigma=1) cov_1 = Covariance(RBF(1)) p1 = LogGaussian(20, 1) p2 = LogGaussian(0, 1.1) cov_1.raw_kernel.variance.set_prior(p1, warning=False) cov_1.raw_kernel.lengthscale.set_prior(p2, warning=False) cov_2 = Covariance(RBF(1)) p3 = LogGaussian(11, 1) p4 = LogGaussian(1, 1.21) cov_2.raw_kernel.variance.set_prior(p3, warning=False) cov_2.raw_kernel.lengthscale.set_prior(p4, warning=False) cov_3 = Covariance(RationalQuadratic(1)) p5 = LogGaussian(4, 1) p6 = LogGaussian(1.2, 1.21) p7 = LogGaussian(13, 1.21) cov_3.raw_kernel.variance.set_prior(p5, warning=False) cov_3.raw_kernel.lengthscale.set_prior(p6, warning=False) cov_3.raw_kernel.power.set_prior(p7, warning=False) models = [GPModel(cov_1), GPModel(cov_2), GPModel(cov_3)] self.active_models = ActiveSet(max_n_models=3) self.active_models.models = models self.ind_init = [0, 2]
def query(self, fitness_scores, x_train, y_train, eval_budget, gp_fn, gp_args, **ms_args): """""" model_proposer = BomsGrammar() model_proposer.build(x_train.shape[1]) candidate_covariances = model_proposer.expand( seed_models=self.active_models.get_selected_models()) likelihood = self.active_models.get_selected_models()[0].likelihood candidate_models = [ GPModel(cov, likelihood) for cov in candidate_covariances ] # Update active models. new_candidate_indices = self.active_models.update(candidate_models) # Pool of models. all_candidate_indices = self.active_models.get_candidate_indices() selected_indices = self.active_models.selected_indices # Update model distances using the kernel builder. self.kernel_builder.update(self.active_models, new_candidate_indices, all_candidate_indices, selected_indices, x_train) # Make sure all necessary indices are not NaN. assert_valid_kernel_kernel(self.kernel_builder, len(self.active_models), selected_indices, all_candidate_indices) meta_x_train = np.array(selected_indices)[:, None] meta_y_train = np.array(fitness_scores)[:, None] # Train the GP. self.kernel_kernel_gp_model.update(meta_x_train, meta_y_train, None, None) # Housekeeping for kernel kernel. Must update the number of active models. self.kernel_kernel_gp_model.model.kern.n_models = len( self.active_models) # Compute acquisition function values. x_test = np.array(all_candidate_indices)[:, None] acq_scores = self.acquisition_fn( x_test, self.kernel_kernel_gp_model).flatten().tolist() indices_acquisition = np.argsort(np.array(acq_scores).flatten()) # Argmax acquisition function. next_model_index = all_candidate_indices[indices_acquisition[-1]] next_node = self.active_models.models[next_model_index] next_model = GPModel(next_node.covariance, next_node.likelihood) # Save next model index. self.active_models.selected_indices += [next_model_index] # Set remove priority. self.active_models.remove_priority = [ all_candidate_indices[i] for i in indices_acquisition ] return next_model
def hellinger_metric(u: np.ndarray, v: np.ndarray, get_x_train: Callable[[], np.ndarray]) -> float: gp_model_1_enc, gp_model_2_enc = u[0], v[0] kern_1, kern_2 = decode_kernel(gp_model_1_enc[0]), decode_kernel( gp_model_2_enc[0]) has_priors = gp_model_1_enc[1] is not None and gp_model_2_enc[1] is not None if has_priors: priors_1 = [[decode_prior(enc) for enc in encs] for encs in gp_model_1_enc[1]] priors_2 = [[decode_prior(enc) for enc in encs] for encs in gp_model_2_enc[1]] prior_dict_1 = dict(zip(kern_1.parameter_names(), priors_1[0])) prior_dict_2 = dict(zip(kern_2.parameter_names(), priors_2[0])) kern_1 = set_priors(kern_1, prior_dict_1) kern_2 = set_priors(kern_2, prior_dict_2) x_train = get_x_train() noise_prior = Gaussian(np.log(0.01), 1) active_models = [GPModel(kern_1), GPModel(kern_2)] num_samples = 20 max_num_hyperparameters = 40 max_num_kernels = 1000 initial_model_indices = [0, 1] builder = HellingerDistanceBuilder(noise_prior, num_samples, max_num_hyperparameters, max_num_kernels, active_models, initial_model_indices, data_X=x_train) builder.compute_distance(active_models, [0], [1]) return builder._average_distance[0, 0]
def test_expand(self): grammar = CKSGrammar(base_kernel_names=['SE', 'RQ']) grammar.build(n_dims=2) scored_kernel = GPModel(self.se0) scored_kernel.score = 1 result = grammar.expand([scored_kernel]) self.assertIsInstance(result, list)
def test_save(self): kernel = Covariance(RBF(1) * RBF(1) + RationalQuadratic(1)) gp_model = GPModel(kernel) file_name = "test_save" out_fname = gp_model.save(file_name) self.addCleanup(os.remove, out_fname)
def test_index_same_candidate_expression(self): candidates = [GPModel(Covariance(RBF(1)))] self.active_set.update(candidates) new_model = GPModel(Covariance(RBF(1))) actual = self.active_set.index(new_model) self.assertIsInstance(actual, int) expected_ind = 0 self.assertEqual(expected_ind, actual)
def test_get_new_candidate_with_default(self): candidates = [GPModel(Covariance(RBF(1)))] self.active_set.update(candidates) new_model = GPModel(Covariance(RationalQuadratic(1))) default = 2 actual = self.active_set.get(new_model, default) self.assertIsInstance(actual, int) expected_ind = default self.assertEqual(expected_ind, actual)
def test_get_index_to_insert_full_no_priority(self): # add five models self.active_set.add_model(GPModel(Covariance(RBF(1)))) self.active_set.add_model(GPModel(Covariance(RationalQuadratic(1)))) self.active_set.add_model(GPModel(Covariance(StandardPeriodic(1)))) self.active_set.add_model(GPModel(Covariance(LinScaleShift(1)))) self.active_set.add_model(GPModel(Covariance(RBF(1) + RBF(1)))) self.assertRaises(ValueError, self.active_set.get_index_to_insert)
def test_shd_metric(self): gp_models = [ GPModel(Covariance(RBF(1) + RationalQuadratic(1))), GPModel(Covariance(RBF(1))) ] data = encode_gp_models(gp_models) u, v = data[0], data[1] result = shd_metric(u, v) self.assertEqual(result, 1)
def test_from_dict(self): test_cases = (GPModel, Serializable) for cls in test_cases: with self.subTest(name=cls.__name__): kernel = Covariance(RBF(1) * RBF(1) + RationalQuadratic(1)) gp_model = GPModel(kernel) actual = cls.from_dict(gp_model.to_dict()) self.assertIsInstance(actual, GPModel) self.assertEqual(gp_model.likelihood, actual.likelihood) self.assertEqual(gp_model.covariance.infix, actual.covariance.infix)
def test_update_with_duplicates(self): candidates = [GPModel(Covariance(RBF(1))), GPModel(Covariance(RBF(1)))] expected_candidates_ind = [0] new_candidates_ind = self.active_set.update(candidates) self.assertEqual(expected_candidates_ind, new_candidates_ind) expected_models = [candidates[0], None, None, None, None] self.assertListEqual(expected_models, self.active_set.models) expected_next_ind = 1 self.assertEqual(expected_next_ind, self.active_set.get_index_to_insert())
def test_load(self): kernel = Covariance(RBF(1) * RBF(1) + RationalQuadratic(1)) gp_model = GPModel(kernel) file_name = "test_save" out_file_name = gp_model.save(file_name) self.addCleanup(os.remove, out_file_name) new_gp_model = GPModel.load(out_file_name) self.assertIsInstance(new_gp_model, GPModel) self.assertEqual(gp_model.covariance.infix, new_gp_model.covariance.infix)
def _covariance_to_gp_model(self, cov: Covariance) -> GPModel: """Convert a covariance to a GP model.""" gp_model = GPModel(cov) # Set model dict gp_model.model_input_dict = self.model_dict gp_model.likelihood = self.model_dict["likelihood"].copy() # Convert to additive form if necessary if self.additive_form: gp_model.covariance = gp_model.covariance.to_additive_form() return gp_model
def test_encode_gp_models(self): gp_models = [GPModel(Covariance(RBF(1))), GPModel(Covariance(RationalQuadratic(1)))] result = encode_gp_models(gp_models) self.assertIsInstance(result, np.ndarray) self.assertEqual(result.shape, (len(gp_models), 1)) self.assertListEqual(result[0][0], [encode_kernel(gp_models[0].covariance.raw_kernel), [None]]) self.assertListEqual(result[1][0], [encode_kernel(gp_models[1].covariance.raw_kernel), [None]]) gp_models = [GPModel(Covariance(RBF(1) * RBF(1))), GPModel(Covariance(RationalQuadratic(1)))] result = encode_gp_models(gp_models) self.assertIsInstance(result, np.ndarray) self.assertEqual(result.shape, (len(gp_models), 1)) self.assertListEqual(result[0][0], [encode_kernel(gp_models[0].covariance.raw_kernel), [None]]) self.assertListEqual(result[1][0], [encode_kernel(gp_models[1].covariance.raw_kernel), [None]])
def test_euclidean_metric(self): x_train = np.array([[1, 2], [3, 4]]) gp_models = [ GPModel(Covariance(RBF(1) + RationalQuadratic(1))), GPModel(Covariance(RBF(1))) ] data = encode_gp_models(gp_models) u, v = data[0], data[1] result = euclidean_metric(u, v, get_x_train=lambda: x_train) self.assertIsInstance(result, float) self.assertAlmostEqual( result, np.linalg.norm( gp_models[0].covariance.raw_kernel.K(x_train, x_train) - gp_models[1].covariance.raw_kernel.K(x_train, x_train)))
def test_update_empty(self): candidates = [ GPModel(Covariance(RBF(1))), GPModel(Covariance(RationalQuadratic(1))) ] expected_candidates_ind = [0, 1] new_candidates_ind = self.active_set.update(candidates) self.assertEqual(expected_candidates_ind, new_candidates_ind) expected_models = [candidates[0], candidates[1], None, None, None] self.assertListEqual(expected_models, self.active_set.models) expected_next_ind = 2 self.assertEqual(expected_next_ind, self.active_set.get_index_to_insert())
def test_get_same_candidate_with_default(self): candidates = [GPModel(Covariance(RBF(1)))] self.active_set.update(candidates) actual = self.active_set.get(candidates[0], -1) self.assertIsInstance(actual, int) expected_ind = 0 self.assertEqual(expected_ind, actual)
def setUp(self): self.gp_models = [ GPModel(Covariance(RationalQuadratic(1))), GPModel(Covariance(RBF(1) + RBF(1))), GPModel(Covariance(RBF(1))) ] grammar = MagicMock() kernel_selector = MagicMock() objective = MagicMock() self.x_train = np.array([[1, 2, 3], [4, 5, 6]]) self.y_train = np.array([[5], [10]]) self.x_test = np.array([[10, 20, 30], [40, 50, 60]]) self.y_test = np.array([[2], [1]]) self.model_selector = BomsModelSelector(grammar, kernel_selector, objective)
def setUp(self): self.gp_models = [ GPModel(Covariance(RationalQuadratic(1))), GPModel(Covariance(RBF(1) + RBF(1))), GPModel(Covariance(RBF(1))) ] grammar = GeometricRandomGrammar() grammar.build(n_dims=1) fitness_fn = 'nbic' self.x_train = np.array([[1, 2, 3], [4, 5, 6]]) self.y_train = np.array([[5], [10]]) self.x_test = np.array([[10, 20, 30], [40, 50, 60]]) self.y_test = np.array([[2], [1]]) self.model_selector = ModelSelector(grammar, fitness_fn)
def test_expand_best(self): base_kernel_names = ['SE', 'RQ'] n_dim = 1 np.random.seed(5) grammar = BomsGrammar(base_kernel_names=base_kernel_names) grammar.build(n_dim) grammar._number_of_top_k_best = 1 num_random_walks = 5 kernels = grammar.expand_random(num_random_walks) fitness_score = list(np.random.permutation(len(kernels)).tolist()) index = int(np.argmax(fitness_score)) kernel_to_expand = kernels[index] models = [GPModel(kernel) for kernel in kernels] for model, model_score in zip(models, fitness_score): model.score = model_score new_kernels = grammar.expand_best(models, fitness_score) expanded_kernels = grammar.expand_single_kernel(kernel_to_expand) for i in range(len(expanded_kernels)): self.assertEqual(new_kernels[i].infix, expanded_kernels[i].infix)
def _build_from_input_dict(cls, input_dict: dict): n_evals = input_dict.pop('n_evals') total_eval_time = input_dict.pop('total_eval_time') total_expansion_time = input_dict.pop('total_expansion_time') total_model_search_time = input_dict.pop('total_model_search_time') name = input_dict.pop('name') built = input_dict.pop('built') selected_models = input_dict.pop('selected_models') x_train_mean = input_dict.pop('_x_train_mean') x_train_std = input_dict.pop('_x_train_std') y_train_mean = input_dict.pop('_y_train_mean') y_train_std = input_dict.pop('_y_train_std') model_selector = super()._build_from_input_dict(input_dict) model_selector.n_evals = n_evals model_selector.total_eval_time = total_eval_time model_selector.total_expansion_time = total_expansion_time model_selector.total_model_search_time = total_model_search_time model_selector.name = name model_selector.built = built model_selector.selected_models = [ GPModel.from_dict(m) for m in selected_models ] model_selector._x_train_mean = None if x_train_mean is None else np.array( x_train_mean) model_selector._x_train_std = None if x_train_std is None else np.array( x_train_std) model_selector._y_train_mean = None if y_train_mean is None else np.array( y_train_mean) model_selector._y_train_std = None if y_train_std is None else np.array( y_train_std) return model_selector
def test_get_index_to_insert_one_item(self): # add one model self.active_set.add_model(GPModel(Covariance(RBF(1)))) expected_index = 1 actual_index = self.active_set.get_index_to_insert() self.assertEqual(expected_index, actual_index)
def _evaluate_model(self, model: GPModel, verbose: int = 0) -> GPModel: """Evaluate a single model on some training data. :param model: :param verbose: :return: """ t0 = time() model.score_model(self._x_train, self._y_train, self.fitness_fn, optimizer=self.optimizer, n_restarts=self.n_restarts_optimizer) self.total_eval_time += time() - t0 self.n_evals += 1 self.visited.add(model.covariance.symbolic_expr_expanded) if verbose: self.pbar.update() return model
def test_get_index_to_insert_full_with_priority(self): # add five models self.active_set.add_model(GPModel(Covariance(RBF(1)))) self.active_set.add_model(GPModel(Covariance(RationalQuadratic(1)))) self.active_set.add_model(GPModel(Covariance(StandardPeriodic(1)))) self.active_set.add_model(GPModel(Covariance(LinScaleShift(1)))) self.active_set.add_model(GPModel(Covariance(RBF(1) + RBF(1)))) remove_priority = [2] self.active_set.remove_priority = remove_priority actual = self.active_set.get_index_to_insert() expected = 2 self.assertEqual(expected, actual) self.assertEqual(self.active_set.remove_priority, []) remove_priority = [0, 2, 3] self.active_set.remove_priority = remove_priority actual = self.active_set.get_index_to_insert() expected = 0 self.assertEqual(expected, actual) self.assertEqual(self.active_set.remove_priority, [2, 3])
def setUp(self) -> None: self.x = np.array([[1, 2], [4, 5], [6, 7], [8, 9], [10, 11]]) self.noise_prior = Gaussian(mu=np.log(0.01), sigma=1) self.cov_i = RBF(1) p1 = LogGaussian(20, 1) p2 = LogGaussian(0, 1.1) self.cov_i.variance.set_prior(p1, warning=False) self.cov_i.lengthscale.set_prior(p2, warning=False) models = [GPModel(self.cov_i)] self.active_models = ActiveSet(max_n_models=3) self.active_models.models = models self.ind_init = [0]
def test_add_model_empty(self): candidate = GPModel(Covariance(RBF(1))) expected_ind, expected_status = 0, True actual_ind, actual_status = self.active_set.add_model(candidate) self.assertEqual(expected_ind, actual_ind) self.assertEqual(expected_status, actual_status) expected_models = [candidate, None, None, None, None] self.assertListEqual(expected_models, self.active_set.models) expected_next_ind = 1 self.assertEqual(expected_next_ind, self.active_set.get_index_to_insert())
def test_to_dict(self): test_cases = ( (None, 'None score'), (10., 'Positive score') ) for score, description in test_cases: with self.subTest(description=description): kernel = Covariance(RBF(1) * RBF(1) + RationalQuadratic(1)) gp_model = GPModel(kernel) gp_model.score = score actual = gp_model.to_dict() self.assertIsInstance(actual, dict) self.assertIn('likelihood', actual) self.assertIn('covariance', actual) self.assertIn('score', actual) self.assertEqual(None, actual['likelihood']) self.assertEqual(gp_model.covariance.to_dict(), actual['covariance']) self.assertEqual(gp_model.score, actual['score'])
def test_update_exceed_max_no_remove(self): candidates = [ GPModel(Covariance(RBF(1))), GPModel(Covariance(RationalQuadratic(1))), GPModel(Covariance(LinScaleShift(1))), GPModel(Covariance(StandardPeriodic(1))), GPModel(Covariance(RBF(1) + RBF(1))), GPModel(Covariance(RBF(1) * RBF(1))) ] self.assertRaises(ValueError, self.active_set.update, candidates)
def test_get_candidates(self): base_kernel_names = ['SE', 'RQ'] n_dim = 2 grammar = BomsGrammar(base_kernel_names=base_kernel_names) grammar.build(n_dim) grammar._number_of_top_k_best = 1 grammar.num_random_walks = 5 kernels = grammar.expand_random(grammar._number_of_random_walks) fitness_score = np.random.permutation(len(kernels)) models = [GPModel(kernel) for kernel in kernels] candidates = grammar.get_candidates(models) for candidate in candidates: self.assertIsInstance(candidate, Covariance)
def test_add_model_same(self): # TODO: should this actually be same kernel? models = [GPModel(Covariance(RBF(1)))] * 2 self.active_set.add_model(models[0]) expected_ind = -1 expected_status = False actual_ind, actual_status = self.active_set.add_model(models[1]) self.assertEqual(expected_ind, actual_ind) self.assertEqual(expected_status, actual_status) expected_models = [models[0], None, None, None, None] self.assertListEqual(expected_models, self.active_set.models) expected_next_ind = 1 self.assertEqual(expected_next_ind, self.active_set.get_index_to_insert())