Exemplo n.º 1
0
 def setUp(self):
     self.data = Data(pd.DataFrame({"a": [1]}).values, np.array([1]))
     self.d = LeafNode(Split(self.data), None)
     self.e = LeafNode(Split(self.data), None)
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemplo n.º 2
0
    def test_growing_decision_node(self):
        a = LeafNode(Split(self.data))
        b = LeafNode(Split(self.data))
        c = LeafNode(Split(self.data))
        d = DecisionNode(Split(self.data), a, b)
        e = DecisionNode(Split(self.data), c, d)

        with self.assertRaises(TypeError):
            GrowMutation(d, a)
Exemplo n.º 3
0
 def test_head_prune(self):
     b, c = LeafNode(Split(self.data)), LeafNode(Split(self.data))
     a = DecisionNode(Split(self.data), b, c)
     tree = Tree([a, b, c])
     updated_a = LeafNode(Split(self.data))
     prune_mutation = PruneMutation(a, updated_a)
     mutate(tree, prune_mutation)
     self.assertIn(updated_a, tree.leaf_nodes)
     self.assertNotIn(self.a, tree.nodes)
Exemplo n.º 4
0
 def setUp(self):
     self.data = Data(format_covariate_matrix(pd.DataFrame({"a": [1]})),
                      np.array([1]).astype(float))
     self.d = LeafNode(Split(self.data), None)
     self.e = LeafNode(Split(self.data), None)
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemplo n.º 5
0
 def test_grow(self):
     f, g = LeafNode(Split(self.data)), LeafNode(Split(self.data))
     updated_d = DecisionNode(Split(self.data), f, g)
     grow_mutation = TreeMutation("grow", self.d, updated_d)
     mutate(self.tree, grow_mutation)
     self.assertIn(updated_d, self.tree.decision_nodes)
     self.assertIn(updated_d, self.tree.prunable_decision_nodes)
     self.assertIn(f, self.tree.leaf_nodes)
     self.assertNotIn(self.d, self.tree.nodes)
Exemplo n.º 6
0
    def test_pruning_non_leaf_parent(self):
        a = LeafNode(Split(self.data))
        b = LeafNode(Split(self.data))
        c = LeafNode(Split(self.data))
        d = DecisionNode(Split(self.data), a, b)
        e = DecisionNode(Split(self.data), c, d)

        with self.assertRaises(TypeError):
            PruneMutation(e, a)
Exemplo n.º 7
0
def sample_split_node(node: LeafNode) -> DecisionNode:
    """
    Split a leaf node into a decision node with two leaf children
    The variable and value to split on is determined by sampling from their respective distributions
    """
    if node.is_splittable():
        conditions = sample_split_condition(node)
        return split_node(node, conditions)
    else:
        return DecisionNode(node.split,
                            LeafNode(node.split, depth=node.depth + 1),
                            LeafNode(node.split, depth=node.depth + 1),
                            depth=node.depth)
Exemplo n.º 8
0
    def test_same_prediction(self):
        from sklearn.ensemble import GradientBoostingRegressor
        params = {
            'n_estimators': 1,
            'max_depth': 2,
            'min_samples_split': 2,
            'learning_rate': 0.8,
            'loss': 'ls'
        }
        sklearn_model = GradientBoostingRegressor(**params)
        sklearn_model.fit(self.data.X.values, self.data.y.values)

        sklearn_tree = sklearn_model.estimators_[0][0].tree_
        bartpy_tree = Tree([LeafNode(Split(self.data))])

        map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree)

        sklearn_predictions = sklearn_tree.predict(
            self.data.X.values.astype(np.float32))
        sklearn_predictions = [
            round(x, 2) for x in sklearn_predictions.reshape(-1)
        ]

        bartpy_tree.cache_up_to_date = False
        bartpy_tree_predictions = bartpy_tree.predict(self.data.X.values)
        bartpy_tree_predictions = [
            round(x, 2) for x in bartpy_tree_predictions
        ]

        self.assertListEqual(sklearn_predictions, bartpy_tree_predictions)
Exemplo n.º 9
0
 def test_internal_prune(self):
     updated_c = LeafNode(Split(self.data))
     prune_mutation = TreeMutation("prune", self.c, updated_c)
     mutate(self.tree, prune_mutation)
     self.assertIn(updated_c, self.tree.leaf_nodes)
     self.assertNotIn(self.c, self.tree.nodes)
     self.assertNotIn(self.d, self.tree.nodes)
     self.assertNotIn(self.e, self.tree.nodes)
Exemplo n.º 10
0
    def setUp(self):
        self.data = Data(
            pd.DataFrame({
                "a": [1, 2, 3],
                "b": [1, 2, 3]
            }).values, np.array([1, 2, 3]))

        self.a = split_node(LeafNode(Split(
            self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt)))
        self.b = self.a.left_child
        self.x = self.a.right_child
        self.tree = Tree([self.a, self.b, self.x])

        self.c = split_node(
            self.a._right_child,
            (SplitCondition(1, 2, le), SplitCondition(1, 2, gt)))
        mutate(self.tree, TreeMutation("grow", self.x, self.c))

        self.d = self.c.left_child
        self.e = self.c.right_child
Exemplo n.º 11
0
    def setUp(self):
        X = format_covariate_matrix(
            pd.DataFrame({
                "a": [1, 2, 3],
                "b": [1, 2, 3]
            }))
        self.data = Data(X, np.array([1, 2, 3]).astype(float))

        self.a = split_node(LeafNode(Split(
            self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt)))
        self.b = self.a.left_child
        self.x = self.a.right_child
        self.tree = Tree([self.a, self.b, self.x])

        self.c = split_node(
            self.a._right_child,
            (SplitCondition(1, 2, le), SplitCondition(1, 2, gt)))
        mutate(self.tree, TreeMutation("grow", self.x, self.c))

        self.d = self.c.left_child
        self.e = self.c.right_child
Exemplo n.º 12
0
 def test_pruning_leaf(self):
     with self.assertRaises(TypeError):
         PruneMutation(LeafNode(Split(self.data)),
                       LeafNode(Split(self.data)))
Exemplo n.º 13
0
def uniformly_sample_prune_mutation(tree: Tree) -> TreeMutation:
    node = random_prunable_decision_node(tree)
    updated_node = LeafNode(node.split, depth=node.depth)
    return PruneMutation(node, updated_node)
Exemplo n.º 14
0
def prune_mutations(tree: Tree) -> List[TreeMutation]:
    return [
        PruneMutation(x, LeafNode(x.split, depth=x.depth))
        for x in tree.prunable_decision_nodes
    ]
Exemplo n.º 15
0
 def test_invalid_prune(self):
     with self.assertRaises(TypeError):
         updated_a = LeafNode(Split(self.data))
         PruneMutation(self.a, updated_a)
Exemplo n.º 16
0
 def step(self, model: Model, node: LeafNode) -> float:
     sampled_value = self.sample(model, node)
     node.set_value(sampled_value)
     return sampled_value
Exemplo n.º 17
0
 def setUp(self):
     self.X = format_covariate_matrix(pd.DataFrame({"a": [1, 2, 3, 4, 5]}))
     self.data = Data(format_covariate_matrix(self.X),
                      np.array([1.0, 2.0, 3.0, 4.0, 5.0]))
     self.split = Split(self.data)
     self.node = LeafNode(self.split)
Exemplo n.º 18
0
 def step(self, model: Model, node: LeafNode):
     node.set_value(self.sample(model, node))