class TestTreeStructureDataUpdate(TestCase): def setUp(self): X = format_covariate_matrix( pd.DataFrame({ "a": [1, 2, 3], "b": [1, 2, 3] })) self.data = Data(X, np.array([1, 2, 3]).astype(float)) self.a = split_node(LeafNode(Split( self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt))) self.b = self.a.left_child self.x = self.a.right_child self.tree = Tree([self.a, self.b, self.x]) self.c = split_node( self.a._right_child, (SplitCondition(1, 2, le), SplitCondition(1, 2, gt))) mutate(self.tree, TreeMutation("grow", self.x, self.c)) self.d = self.c.left_child self.e = self.c.right_child def test_update_pushed_through_split(self): updated_y = np.array([5, 6, 7]) self.tree.update_y(updated_y) # Left child keeps LTE condition self.assertListEqual([5, 6, 7], list(self.a.data.y)) self.assertListEqual([5], list(self.b.data.y.compressed())) self.assertListEqual([6, 7], list(self.c.data.y.compressed())) self.assertListEqual([6], list(self.d.data.y.compressed())) self.assertListEqual([7], list(self.e.data.y.compressed()))
def test_same_prediction(self): from sklearn.ensemble import GradientBoostingRegressor params = { 'n_estimators': 1, 'max_depth': 2, 'min_samples_split': 2, 'learning_rate': 0.8, 'loss': 'ls' } sklearn_model = GradientBoostingRegressor(**params) sklearn_model.fit(self.data.X.values, self.data.y.values) sklearn_tree = sklearn_model.estimators_[0][0].tree_ bartpy_tree = Tree([LeafNode(Split(self.data))]) map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree) sklearn_predictions = sklearn_tree.predict( self.data.X.values.astype(np.float32)) sklearn_predictions = [ round(x, 2) for x in sklearn_predictions.reshape(-1) ] bartpy_tree.cache_up_to_date = False bartpy_tree_predictions = bartpy_tree.predict(self.data.X.values) bartpy_tree_predictions = [ round(x, 2) for x in bartpy_tree_predictions ] self.assertListEqual(sklearn_predictions, bartpy_tree_predictions)
def initialize_trees(self) -> List[Tree]: tree_data = deepcopy(self.data) tree_data._y = tree_data.y / self.n_trees trees = [ Tree([LeafNode(Split(self.data))]) for _ in range(self.n_trees) ] return trees
def initialize_trees(self) -> List[Tree]: tree_data = copy(self.data) tree_data.update_y(tree_data.y / self.n_trees) trees = [ Tree([LeafNode(Split(tree_data))]) for _ in range(self.n_trees) ] return trees
def setUp(self): self.data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 1])) self.d = LeafNode(Split(self.data)) self.e = LeafNode(Split(self.data)) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def initialize_trees(self) -> List[Tree]: trees = [ Tree([LeafNode(Split(deepcopy(self.data)))]) for _ in range(self.n_trees) ] for tree in trees: tree.update_y(tree.update_y(self.data.y.values / self.n_trees)) return trees
def test_head_prune(self): b, c = LeafNode(Split(self.data)), LeafNode(Split(self.data)) a = DecisionNode(Split(self.data), b, c) tree = Tree([a, b, c]) updated_a = LeafNode(Split(self.data)) prune_mutation = PruneMutation(a, updated_a) mutate(tree, prune_mutation) self.assertIn(updated_a, tree.leaf_nodes) self.assertNotIn(self.a, tree.nodes)
def setUp(self): self.data = Data(format_covariate_matrix(pd.DataFrame({"a": [1]})), np.array([1]).astype(float)) self.d = LeafNode(Split(self.data), None) self.e = LeafNode(Split(self.data), None) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def setUp(self): self.data = make_bartpy_data( pd.DataFrame({"a": np.random.normal(size=1000)}), np.array(np.random.normal(size=1000))) self.d = LeafNode(Split(self.data)) self.e = LeafNode(Split(self.data)) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def setUp(self): self.data = make_bartpy_data(pd.DataFrame({"a": [1, 2]}), np.array([1, 2]), normalize=False) self.d = LeafNode(Split(self.data)) self.e = LeafNode(Split(self.data)) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def setUp(self): self.data = Data( pd.DataFrame({ "a": [1, 2, 3], "b": [1, 2, 3] }).values, np.array([1, 2, 3])) self.a = split_node(LeafNode(Split( self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt))) self.b = self.a.left_child self.x = self.a.right_child self.tree = Tree([self.a, self.b, self.x]) self.c = split_node( self.a._right_child, (SplitCondition(1, 2, le), SplitCondition(1, 2, gt))) mutate(self.tree, TreeMutation("grow", self.x, self.c)) self.d = self.c.left_child self.e = self.c.right_child
def setUp(self): X = format_covariate_matrix( pd.DataFrame({ "a": [1, 2, 3], "b": [1, 2, 3] })) self.data = Data(X, np.array([1, 2, 3]).astype(float)) self.a = split_node(LeafNode(Split( self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt))) self.b = self.a.left_child self.x = self.a.right_child self.tree = Tree([self.a, self.b, self.x]) self.c = split_node( self.a._right_child, (SplitCondition(1, 2, le), SplitCondition(1, 2, gt))) mutate(self.tree, TreeMutation("grow", self.x, self.c)) self.d = self.c.left_child self.e = self.c.right_child