def run_tree(node: Snode): if node._belief < 1: # only exclude pure leaves self.assertIsNotNone(node._clf) self.assertIsNotNone(node._clf.coef_) if node.is_leaf(): return run_tree(node.get_up()) run_tree(node.get_down())
def test_copy_node(self): px = [1, 2, 3, 4] py = [1] test = Snode(Stree(), px, py, [], 0.0, "test") computed = Snode.copy(test) self.assertListEqual(computed._X, px) self.assertListEqual(computed._y, py) self.assertEqual("test", computed._title) self.assertIsInstance(computed._clf, Stree) self.assertEqual(test._partition_column, computed._partition_column) self.assertEqual(test._sample_weight, computed._sample_weight) self.assertEqual(test._scaler, computed._scaler)
def test_make_predictor_on_not_leaf(self): test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test") test.set_up(Snode(None, [1], [1], [], 0.0, "another_test")) test.make_predictor() self.assertIsNone(test._class) self.assertEqual(0, test._belief) self.assertEqual(-1, test._partition_column) self.assertEqual(-1, test.get_up()._partition_column)
def check_leave(node: Snode): if not node.is_leaf(): check_leave(node.get_down()) check_leave(node.get_up()) return # Check Belief in leave classes, card = np.unique(node._y, return_counts=True) max_card = max(card) min_card = min(card) if len(classes) > 1: belief = max_card / (max_card + min_card) else: belief = 1 self.assertEqual(belief, node._belief) # Check Class class_computed = classes[card == max_card] self.assertEqual(class_computed, node._class) # Check Partition column self.assertEqual(node._partition_column, -1)
def _check_tree(self, node: Snode): """Check recursively that the nodes that are not leaves have the correct number of labels and its sons have the right number of elements in their dataset Parameters ---------- node : Snode node to check """ if node.is_leaf(): return y_prediction = node._clf.predict(node._X) y_down = node.get_down()._y y_up = node.get_up()._y # Is a correct partition in terms of cadinality? # i.e. The partition algorithm didn't forget any sample self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0]) unique_y, count_y = np.unique(node._y, return_counts=True) labels_d, count_d = np.unique(y_down, return_counts=True) labels_u, count_u = np.unique(y_up, return_counts=True) dict_d = {label: count_d[i] for i, label in enumerate(labels_d)} dict_u = {label: count_u[i] for i, label in enumerate(labels_u)} # for i in unique_y: try: number_up = dict_u[i] except KeyError: number_up = 0 try: number_down = dict_d[i] except KeyError: number_down = 0 self.assertEqual(count_y[i], number_down + number_up) # Is the partition made the same as the prediction? # as the node is not a leaf... _, count_yp = np.unique(y_prediction, return_counts=True) self.assertEqual(count_yp[1], y_up.shape[0]) self.assertEqual(count_yp[0], y_down.shape[0]) self._check_tree(node.get_down()) self._check_tree(node.get_up())
def test_set_features(self): test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [0, 1], 0.0, "test") self.assertListEqual([0, 1], test.get_features()) test.set_features([1, 2]) self.assertListEqual([1, 2], test.get_features())
def test_set_impurity(self): test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test") self.assertEqual(0.0, test.get_impurity()) test.set_impurity(54.7) self.assertEqual(54.7, test.get_impurity())
def test_set_classifier(self): test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test") clf = Stree() self.assertIsNone(test.get_classifier()) test.set_classifier(clf) self.assertEqual(clf, test.get_classifier())
def test_set_title(self): test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test") self.assertEqual("test", test.get_title()) test.set_title("another") self.assertEqual("another", test.get_title())
def test_make_predictor_on_leaf(self): test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test") test.make_predictor() self.assertEqual(1, test._class) self.assertEqual(0.75, test._belief) self.assertEqual(-1, test._partition_column)
def test_make_predictor_on_leaf_bogus_data(self): test = Snode(None, [1, 2, 3, 4], [], [], 0.0, "test") test.make_predictor() self.assertIsNone(test._class) self.assertEqual(-1, test._partition_column)
def test_nodes_leaves_artificial(self): n1 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test1") n2 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test2") n3 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test3") n4 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test4") n5 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test5") n6 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test6") n1.set_up(n2) n2.set_up(n3) n2.set_down(n4) n3.set_up(n5) n4.set_down(n6) clf = Stree(random_state=self._random_state) clf.tree_ = n1 nodes, leaves = clf.nodes_leaves() self.assertEqual(6, nodes) self.assertEqual(2, leaves)