def setUp(self): self.data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 1])) self.d = LeafNode(Split(self.data)) self.e = LeafNode(Split(self.data)) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def test_single_condition_data(self): data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 2])) left_condition, right_condition = SplitCondition(0, 1, le), SplitCondition( 0, 1, gt) left_split, right_split = Split(data) + left_condition, Split( data) + right_condition self.assertListEqual([1], list(left_split.data.X[:, 0])) self.assertListEqual([2], list(right_split.data.X[:, 0]))
def test_head_prune(self): b, c = LeafNode(Split(self.data)), LeafNode(Split(self.data)) a = DecisionNode(Split(self.data), b, c) tree = Tree([a, b, c]) updated_a = LeafNode(Split(self.data)) prune_mutation = PruneMutation(a, updated_a) mutate(tree, prune_mutation) self.assertIn(updated_a, tree.leaf_nodes) self.assertNotIn(self.a, tree.nodes)
def test_grow(self): f, g = LeafNode(Split(self.data)), LeafNode(Split(self.data)) updated_d = DecisionNode(Split(self.data), f, g) grow_mutation = TreeMutation("grow", self.d, updated_d) mutate(self.tree, grow_mutation) self.assertIn(updated_d, self.tree.decision_nodes) self.assertIn(updated_d, self.tree.prunable_decision_nodes) self.assertIn(f, self.tree.leaf_nodes) self.assertNotIn(self.d, self.tree.nodes)
def setUp(self): self.data = Data(format_covariate_matrix(pd.DataFrame({"a": [1]})), np.array([1]).astype(float)) self.d = LeafNode(Split(self.data), None) self.e = LeafNode(Split(self.data), None) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def test_pruning_non_leaf_parent(self): a = LeafNode(Split(self.data)) b = LeafNode(Split(self.data)) c = LeafNode(Split(self.data)) d = DecisionNode(Split(self.data), a, b) e = DecisionNode(Split(self.data), c, d) with self.assertRaises(TypeError): PruneMutation(e, a)
def test_growing_decision_node(self): a = LeafNode(Split(self.data)) b = LeafNode(Split(self.data)) c = LeafNode(Split(self.data)) d = DecisionNode(Split(self.data), a, b) e = DecisionNode(Split(self.data), c, d) with self.assertRaises(TypeError): GrowMutation(d, a)
def setUp(self): self.data = make_bartpy_data(pd.DataFrame({"a": [1, 2]}), np.array([1, 2]), normalize=False) self.d = LeafNode(Split(self.data)) self.e = LeafNode(Split(self.data)) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def setUp(self): self.data = make_bartpy_data( pd.DataFrame({"a": np.random.normal(size=1000)}), np.array(np.random.normal(size=1000))) self.d = LeafNode(Split(self.data)) self.e = LeafNode(Split(self.data)) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def initialize_trees(self) -> List[Tree]: tree_data = copy(self.data) tree_data.update_y(tree_data.y / self.n_trees) trees = [ Tree([LeafNode(Split(tree_data))]) for _ in range(self.n_trees) ] return trees
def test_same_prediction(self): from sklearn.ensemble import GradientBoostingRegressor params = { 'n_estimators': 1, 'max_depth': 2, 'min_samples_split': 2, 'learning_rate': 0.8, 'loss': 'ls' } sklearn_model = GradientBoostingRegressor(**params) sklearn_model.fit(self.data.X.values, self.data.y.values) sklearn_tree = sklearn_model.estimators_[0][0].tree_ bartpy_tree = Tree([LeafNode(Split(self.data))]) map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree) sklearn_predictions = sklearn_tree.predict( self.data.X.values.astype(np.float32)) sklearn_predictions = [ round(x, 2) for x in sklearn_predictions.reshape(-1) ] bartpy_tree.cache_up_to_date = False bartpy_tree_predictions = bartpy_tree.predict(self.data.X.values) bartpy_tree_predictions = [ round(x, 2) for x in bartpy_tree_predictions ] self.assertListEqual(sklearn_predictions, bartpy_tree_predictions)
def initialize_trees(self) -> List[Tree]: tree_data = deepcopy(self.data) tree_data._y = tree_data.y / self.n_trees trees = [ Tree([LeafNode(Split(self.data))]) for _ in range(self.n_trees) ] return trees
def initialize_trees(self) -> List[Tree]: trees = [ Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees) ] for tree in trees: tree.update_y(tree.update_y(self.data.y.values / self.n_trees)) return trees
def test_internal_prune(self): updated_c = LeafNode(Split(self.data)) prune_mutation = TreeMutation("prune", self.c, updated_c) mutate(self.tree, prune_mutation) self.assertIn(updated_c, self.tree.leaf_nodes) self.assertNotIn(self.c, self.tree.nodes) self.assertNotIn(self.d, self.tree.nodes) self.assertNotIn(self.e, self.tree.nodes)
def test_null_split_returns_all_values(self): data = make_bartpy_data( pd.DataFrame({ "a": [1, 2] }).values, np.array([1, 2])) split = Split(data) conditioned_data = split.data self.assertListEqual(list(data.X.get_column(0)), list(conditioned_data.X.get_column(0)))
def test_combined_condition_data(self): data = make_bartpy_data( pd.DataFrame({ "a": [1, 2, 3, 4] }).values, np.array([1, 2, 1, 1])) first_left_condition, first_right_condition = SplitCondition( 0, 3, le), SplitCondition(0, 3, gt) second_left_condition, second_right_condition = SplitCondition( 0, 1, le), SplitCondition(0, 1, gt) split = Split(data) updated_split = split + first_left_condition + second_right_condition conditioned_data = updated_split.data self.assertListEqual([2, 3], list(conditioned_data.X.get_column(0)))
def test_most_recent_split(self): data = make_bartpy_data( pd.DataFrame({ "a": [1, 2, 3, 4] }).values, np.array([1, 2, 1, 1])) first_left_condition, first_right_condition = SplitCondition( 0, 3, le), SplitCondition(0, 3, gt) second_left_condition, second_right_condition = SplitCondition( 0, 1, le), SplitCondition(0, 1, gt) split = Split(data) updated_split = split + first_left_condition + second_right_condition self.assertEqual( (split + first_left_condition).most_recent_split_condition(), first_left_condition) self.assertEqual(updated_split.most_recent_split_condition(), second_right_condition)
def setUp(self): self.data = Data( pd.DataFrame({ "a": [1, 2, 3], "b": [1, 2, 3] }).values, np.array([1, 2, 3])) self.a = split_node(LeafNode(Split( self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt))) self.b = self.a.left_child self.x = self.a.right_child self.tree = Tree([self.a, self.b, self.x]) self.c = split_node( self.a._right_child, (SplitCondition(1, 2, le), SplitCondition(1, 2, gt))) mutate(self.tree, TreeMutation("grow", self.x, self.c)) self.d = self.c.left_child self.e = self.c.right_child
def setUp(self): X = format_covariate_matrix( pd.DataFrame({ "a": [1, 2, 3], "b": [1, 2, 3] })) self.data = Data(X, np.array([1, 2, 3]).astype(float)) self.a = split_node(LeafNode(Split( self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt))) self.b = self.a.left_child self.x = self.a.right_child self.tree = Tree([self.a, self.b, self.x]) self.c = split_node( self.a._right_child, (SplitCondition(1, 2, le), SplitCondition(1, 2, gt))) mutate(self.tree, TreeMutation("grow", self.x, self.c)) self.d = self.c.left_child self.e = self.c.right_child
def test_pruning_leaf(self): with self.assertRaises(TypeError): PruneMutation(LeafNode(Split(self.data)), LeafNode(Split(self.data)))
def test_invalid_prune(self): with self.assertRaises(TypeError): updated_a = LeafNode(Split(self.data)) PruneMutation(self.a, updated_a)
def setUp(self): self.X = format_covariate_matrix(pd.DataFrame({"a": [1, 2, 3, 4, 5]})) self.data = Data(format_covariate_matrix(self.X), np.array([1.0, 2.0, 3.0, 4.0, 5.0])) self.split = Split(self.data) self.node = LeafNode(self.split)
def test_null_split_returns_all_values(self): data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 2])) split = Split(data) conditioned_data = split.data self.assertListEqual(list(data.X[:, 0]), list(conditioned_data.X[:, 0]))