Exemplo n.º 1
0
class TestTreeStructureDataUpdate(TestCase):
    def setUp(self):
        X = format_covariate_matrix(
            pd.DataFrame({
                "a": [1, 2, 3],
                "b": [1, 2, 3]
            }))
        self.data = Data(X, np.array([1, 2, 3]).astype(float))

        self.a = split_node(LeafNode(Split(
            self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt)))
        self.b = self.a.left_child
        self.x = self.a.right_child
        self.tree = Tree([self.a, self.b, self.x])

        self.c = split_node(
            self.a._right_child,
            (SplitCondition(1, 2, le), SplitCondition(1, 2, gt)))
        mutate(self.tree, TreeMutation("grow", self.x, self.c))

        self.d = self.c.left_child
        self.e = self.c.right_child

    def test_update_pushed_through_split(self):
        updated_y = np.array([5, 6, 7])
        self.tree.update_y(updated_y)
        # Left child keeps LTE condition
        self.assertListEqual([5, 6, 7], list(self.a.data.y))
        self.assertListEqual([5], list(self.b.data.y.compressed()))
        self.assertListEqual([6, 7], list(self.c.data.y.compressed()))
        self.assertListEqual([6], list(self.d.data.y.compressed()))
        self.assertListEqual([7], list(self.e.data.y.compressed()))
Exemplo n.º 2
0
    def test_same_prediction(self):
        from sklearn.ensemble import GradientBoostingRegressor
        params = {
            'n_estimators': 1,
            'max_depth': 2,
            'min_samples_split': 2,
            'learning_rate': 0.8,
            'loss': 'ls'
        }
        sklearn_model = GradientBoostingRegressor(**params)
        sklearn_model.fit(self.data.X.values, self.data.y.values)

        sklearn_tree = sklearn_model.estimators_[0][0].tree_
        bartpy_tree = Tree([LeafNode(Split(self.data))])

        map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree)

        sklearn_predictions = sklearn_tree.predict(
            self.data.X.values.astype(np.float32))
        sklearn_predictions = [
            round(x, 2) for x in sklearn_predictions.reshape(-1)
        ]

        bartpy_tree.cache_up_to_date = False
        bartpy_tree_predictions = bartpy_tree.predict(self.data.X.values)
        bartpy_tree_predictions = [
            round(x, 2) for x in bartpy_tree_predictions
        ]

        self.assertListEqual(sklearn_predictions, bartpy_tree_predictions)
Exemplo n.º 3
0
 def initialize_trees(self) -> List[Tree]:
     tree_data = deepcopy(self.data)
     tree_data._y = tree_data.y / self.n_trees
     trees = [
         Tree([LeafNode(Split(self.data))]) for _ in range(self.n_trees)
     ]
     return trees
Exemplo n.º 4
0
 def initialize_trees(self) -> List[Tree]:
     tree_data = copy(self.data)
     tree_data.update_y(tree_data.y / self.n_trees)
     trees = [
         Tree([LeafNode(Split(tree_data))]) for _ in range(self.n_trees)
     ]
     return trees
Exemplo n.º 5
0
 def setUp(self):
     self.data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 1]))
     self.d = LeafNode(Split(self.data))
     self.e = LeafNode(Split(self.data))
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemplo n.º 6
0
 def initialize_trees(self) -> List[Tree]:
     trees = [
         Tree([LeafNode(Split(deepcopy(self.data)))])
         for _ in range(self.n_trees)
     ]
     for tree in trees:
         tree.update_y(tree.update_y(self.data.y.values / self.n_trees))
     return trees
Exemplo n.º 7
0
 def test_head_prune(self):
     b, c = LeafNode(Split(self.data)), LeafNode(Split(self.data))
     a = DecisionNode(Split(self.data), b, c)
     tree = Tree([a, b, c])
     updated_a = LeafNode(Split(self.data))
     prune_mutation = PruneMutation(a, updated_a)
     mutate(tree, prune_mutation)
     self.assertIn(updated_a, tree.leaf_nodes)
     self.assertNotIn(self.a, tree.nodes)
Exemplo n.º 8
0
 def setUp(self):
     self.data = Data(format_covariate_matrix(pd.DataFrame({"a": [1]})),
                      np.array([1]).astype(float))
     self.d = LeafNode(Split(self.data), None)
     self.e = LeafNode(Split(self.data), None)
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemplo n.º 9
0
 def setUp(self):
     self.data = make_bartpy_data(
         pd.DataFrame({"a": np.random.normal(size=1000)}),
         np.array(np.random.normal(size=1000)))
     self.d = LeafNode(Split(self.data))
     self.e = LeafNode(Split(self.data))
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemplo n.º 10
0
 def setUp(self):
     self.data = make_bartpy_data(pd.DataFrame({"a": [1, 2]}),
                                  np.array([1, 2]),
                                  normalize=False)
     self.d = LeafNode(Split(self.data))
     self.e = LeafNode(Split(self.data))
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemplo n.º 11
0
    def setUp(self):
        self.data = Data(
            pd.DataFrame({
                "a": [1, 2, 3],
                "b": [1, 2, 3]
            }).values, np.array([1, 2, 3]))

        self.a = split_node(LeafNode(Split(
            self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt)))
        self.b = self.a.left_child
        self.x = self.a.right_child
        self.tree = Tree([self.a, self.b, self.x])

        self.c = split_node(
            self.a._right_child,
            (SplitCondition(1, 2, le), SplitCondition(1, 2, gt)))
        mutate(self.tree, TreeMutation("grow", self.x, self.c))

        self.d = self.c.left_child
        self.e = self.c.right_child
Exemplo n.º 12
0
    def setUp(self):
        X = format_covariate_matrix(
            pd.DataFrame({
                "a": [1, 2, 3],
                "b": [1, 2, 3]
            }))
        self.data = Data(X, np.array([1, 2, 3]).astype(float))

        self.a = split_node(LeafNode(Split(
            self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt)))
        self.b = self.a.left_child
        self.x = self.a.right_child
        self.tree = Tree([self.a, self.b, self.x])

        self.c = split_node(
            self.a._right_child,
            (SplitCondition(1, 2, le), SplitCondition(1, 2, gt)))
        mutate(self.tree, TreeMutation("grow", self.x, self.c))

        self.d = self.c.left_child
        self.e = self.c.right_child