def build_tree(self, X,Y,D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X,Y) # if Condition 1 or 2 holds, stop splitting t.p = self.most_common(t.Y,D) if DT.stop1(t.Y) or DT.stop2(t.X): t.isleaf = True return t # find the best attribute to split t.i,t.th = self.best_attribute(t.X,t.Y,D) # configure each child node ind1 = [] ind2 = [] for j,x in enumerate(X[t.i,:]): if x < t.th: ind1.append(j) else: ind2.append(j) X1 = X[:,ind1] Y1 = Y[ind1] t.C1 = Node(X1,Y1,isleaf = True) D1 = D[ind1] s = float(sum(D1)) for i,w in enumerate(D[ind1]): D1[i] = float(w)/s t.C1.p = self.most_common(Y1,D1) X2 = X[:,ind2] Y2 = Y[ind2] t.C2 = Node(X2,Y2,isleaf = True) D2 = D[ind2] s = float(sum(D2)) for i,w in enumerate(D[ind2]): D2[i] = float(w)/s t.C2.p = self.most_common(Y2,D2) ######################################### return t
def build_tree(self, X, Y, D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X, Y) t.p = DS.most_common(Y, D) # if Condition 1 or 2 holds, stop splitting if DT.stop1(Y) or DT.stop2(X): t.isleaf = True return t # find the best attribute to split t.i, t.th = self.best_attribute(X, Y, D) # configure each child node t.C1, t.C2 = self.split(t.X, t.Y, t.i, t.th) D1, D2 = [], [] for j in range(len(D)): if X[t.i, j] < t.th: D1.append(D[j]) else: D2.append(D[j]) D1 = np.array(D1) D2 = np.array(D2) t.C1.p = DS.most_common(t.C1.Y, D1) t.C2.p = DS.most_common(t.C2.Y, D2) t.C1.isleaf = True t.C2.isleaf = True ######################################### return t
def build_tree(self, X, Y, D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X, Y) t.p = DS.most_common(t.Y, D) # if Condition 1 or 2 holds, stop splitting if DT.stop1(t.Y) == False and DT.stop2(t.X) == False: t.i, t.th = DS().best_attribute(t.X, t.Y, D) t.C1, t.C2 = DT.split(t.X, t.Y, t.i, t.th) d1 = D[np.where(X[t.i] < t.th)] d2 = D[np.where(X[t.i] >= t.th)] t.C1.p = DS.most_common(t.C1.Y, d1) t.C2.p = DS.most_common(t.C2.Y, d2) t.C1.isleaf = True t.C2.isleaf = True else: t.isleaf = True # find the best attribute to split # configure each child node ######################################### return t
def build_tree(self, X, Y, D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X, Y, isleaf=False) t.p = DS.most_common(Y, D) # if Condition 1 or 2 holds, stop splitting if DT.stop1(Y) or DT.stop2(X): t.isleaf = True return t # find the best attribute to split t.i, t.th = self.best_attribute(X, Y, D) # configure each child node t.C1 = Node(X[:, X[t.i, :] < t.th], Y[X[t.i, :] < t.th], isleaf=True, p=DS.most_common(Y[X[t.i, :] < t.th], D[X[t.i, :] < t.th])) t.C2 = Node(X[:, X[t.i, :] >= t.th], Y[X[t.i, :] >= t.th], isleaf=True, p=DS.most_common(Y[X[t.i, :] >= t.th], D[X[t.i, :] >= t.th])) ######################################### return t