def fit_tree(self, X, y, depth=0): """Fit a decision tree with recursive splitting on nodes. Args: X (ndarray): training set without class labels. y (ndarray) : class labels for training set. depth (int) : starting depth of decision tree. Returns: tree (Node): root node of learned decision tree. """ # Get number of training observations in current node with each class label 0 and 1. class_distribution = [np.sum(y == i) for i in range(self.n_classes)] # Instantiate node to grow the decision tree. tree = Node(n=y.size, class_distribution=class_distribution, gini_index=_gini(y, self.n_classes)) # Perform recursive splitting to max depth. if depth < self.max_depth: gini_index, split_index = self.get_split(X, y) # Get indices for data and class labels to go to the left child, send the rest to the right child. if split_index is not None: index_left = (X[:, split_index] == 1) X_left, y_left = X[index_left], y[index_left] X_right, y_right = X[~index_left], y[~index_left] tree.gini_index = gini_index tree.feature_index = split_index depth += 1 tree.left = self.fit_tree(X_left, y_left, depth=depth) tree.right = self.fit_tree(X_right, y_right, depth=depth) return tree
def fit_tree(self, X, y, weights, depth=0): """Fit a decision tree with recursive splitting on nodes, takes additional weight argument for AdaBoost. Args: X (ndarray): training set without class labels. y (ndarray): class labels for training set. weights (ndarray): weights for each training instance. depth (int): starting depth of decision tree. Returns: tree (Node): root node of learned decision tree. """ # Get sum of weights from each class for the class distribution in current node. D = weights class_weights = [np.sum(D * (y == i)) for i in range(self.n_classes)] # Instantiate node to grow the decision tree. tree = Node(n=y.size, class_distribution=class_weights, gini_index=_gini(y, self.n_classes, weights=D)) # Perform recursive splitting to max depth. if depth < self.max_depth: gini_index, split_index = self.get_split(X, y, weights=D) # Get indices for data, class labels, and weights to go to the left child, send the rest to the right child. if split_index is not None: index_left = (X[:, split_index] == 1) X_left, y_left, D_left = X[index_left], y[index_left], D[ index_left] X_right, y_right, D_right = X[~index_left], y[~index_left], D[ ~index_left] tree.gini_index = gini_index tree.feature_index = split_index depth += 1 tree.left = self.fit_tree(X_left, y_left, weights=D_left, depth=depth) tree.right = self.fit_tree(X_right, y_right, weights=D_right, depth=depth) return tree