def run_tree(node: Snode): if node._belief < 1: # only exclude pure leaves self.assertIsNotNone(node._clf) self.assertIsNotNone(node._clf.coef_) if node.is_leaf(): return run_tree(node.get_up()) run_tree(node.get_down())
def _check_tree(self, node: Snode): """Check recursively that the nodes that are not leaves have the correct number of labels and its sons have the right number of elements in their dataset Parameters ---------- node : Snode node to check """ if node.is_leaf(): return y_prediction = node._clf.predict(node._X) y_down = node.get_down()._y y_up = node.get_up()._y # Is a correct partition in terms of cadinality? # i.e. The partition algorithm didn't forget any sample self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0]) unique_y, count_y = np.unique(node._y, return_counts=True) labels_d, count_d = np.unique(y_down, return_counts=True) labels_u, count_u = np.unique(y_up, return_counts=True) dict_d = {label: count_d[i] for i, label in enumerate(labels_d)} dict_u = {label: count_u[i] for i, label in enumerate(labels_u)} # for i in unique_y: try: number_up = dict_u[i] except KeyError: number_up = 0 try: number_down = dict_d[i] except KeyError: number_down = 0 self.assertEqual(count_y[i], number_down + number_up) # Is the partition made the same as the prediction? # as the node is not a leaf... _, count_yp = np.unique(y_prediction, return_counts=True) self.assertEqual(count_yp[1], y_up.shape[0]) self.assertEqual(count_yp[0], y_down.shape[0]) self._check_tree(node.get_down()) self._check_tree(node.get_up())
def check_leave(node: Snode): if not node.is_leaf(): check_leave(node.get_down()) check_leave(node.get_up()) return # Check Belief in leave classes, card = np.unique(node._y, return_counts=True) max_card = max(card) min_card = min(card) if len(classes) > 1: belief = max_card / (max_card + min_card) else: belief = 1 self.assertEqual(belief, node._belief) # Check Class class_computed = classes[card == max_card] self.assertEqual(class_computed, node._class) # Check Partition column self.assertEqual(node._partition_column, -1)