def build_tree(self, X,Y,D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X,Y) # if Condition 1 or 2 holds, stop splitting t.p = self.most_common(t.Y,D) if DT.stop1(t.Y) or DT.stop2(t.X): t.isleaf = True return t # find the best attribute to split t.i,t.th = self.best_attribute(t.X,t.Y,D) # configure each child node ind1 = [] ind2 = [] for j,x in enumerate(X[t.i,:]): if x < t.th: ind1.append(j) else: ind2.append(j) X1 = X[:,ind1] Y1 = Y[ind1] t.C1 = Node(X1,Y1,isleaf = True) D1 = D[ind1] s = float(sum(D1)) for i,w in enumerate(D[ind1]): D1[i] = float(w)/s t.C1.p = self.most_common(Y1,D1) X2 = X[:,ind2] Y2 = Y[ind2] t.C2 = Node(X2,Y2,isleaf = True) D2 = D[ind2] s = float(sum(D2)) for i,w in enumerate(D[ind2]): D2[i] = float(w)/s t.C2.p = self.most_common(Y2,D2) ######################################### return t
def build_tree(self, X,Y,D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' t = Node(X = X, Y = Y) t.p = DS.most_common(t.Y,D) # if Condition 1 or 2 holds, stop splitting if DS.stop1(t.Y) or DS.stop2(t.X): t.isleaf = True return t # find the best attribute to split t.i,t.th = DS.best_attribute(self,t.X,t.Y,D) t.C1 = Node(X=X.T[np.where(X[t.i]<t.th)].T,Y=Y[X[t.i]<t.th]) t.C2 = Node(X=X.T[np.where(X[t.i]>=t.th)].T,Y=Y[X[t.i]>=t.th]) t.C1.p = DS.most_common(t.C1.Y,D[X[t.i]<t.th]) t.C2.p = DS.most_common(t.C2.Y,D[X[t.i]>=t.th]) t.C1.isleaf = True t.C2.isleaf = True return t
def build_tree(self, X, Y, D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X, Y, isleaf=False) t.p = DS.most_common(Y, D) # if Condition 1 or 2 holds, stop splitting if DT.stop1(Y) or DT.stop2(X): t.isleaf = True return t # find the best attribute to split t.i, t.th = self.best_attribute(X, Y, D) # configure each child node t.C1 = Node(X[:, X[t.i, :] < t.th], Y[X[t.i, :] < t.th], isleaf=True, p=DS.most_common(Y[X[t.i, :] < t.th], D[X[t.i, :] < t.th])) t.C2 = Node(X[:, X[t.i, :] >= t.th], Y[X[t.i, :] >= t.th], isleaf=True, p=DS.most_common(Y[X[t.i, :] >= t.th], D[X[t.i, :] >= t.th])) ######################################### return t
def build_tree(self, X, Y, D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X, Y) t.p = DS.most_common(Y, D) # if Condition 1 or 2 holds, stop splitting if DT.stop1(Y) or DT.stop2(X): t.isleaf = True return t # find the best attribute to split t.i, t.th = self.best_attribute(X, Y, D) # configure each child node t.C1, t.C2 = self.split(t.X, t.Y, t.i, t.th) D1, D2 = [], [] for j in range(len(D)): if X[t.i, j] < t.th: D1.append(D[j]) else: D2.append(D[j]) D1 = np.array(D1) D2 = np.array(D2) t.C1.p = DS.most_common(t.C1.Y, D1) t.C2.p = DS.most_common(t.C2.Y, D2) t.C1.isleaf = True t.C2.isleaf = True ######################################### return t
def build_tree(self, X, Y, D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X, Y) t.p = DS.most_common(t.Y, D) # if Condition 1 or 2 holds, stop splitting if DT.stop1(t.Y) == False and DT.stop2(t.X) == False: t.i, t.th = DS().best_attribute(t.X, t.Y, D) t.C1, t.C2 = DT.split(t.X, t.Y, t.i, t.th) d1 = D[np.where(X[t.i] < t.th)] d2 = D[np.where(X[t.i] >= t.th)] t.C1.p = DS.most_common(t.C1.Y, d1) t.C2.p = DS.most_common(t.C2.Y, d2) t.C1.isleaf = True t.C2.isleaf = True else: t.isleaf = True # find the best attribute to split # configure each child node ######################################### return t
def build_tree(self, X, Y, D): ''' build decision stump by overwritting the build_tree function in DT class. Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes. Input: X: the feature matrix, a numpy matrix of shape p by n. Each element can be int/float/string. Here n is the number data instances in the node, p is the number of attributes. Y: the class labels, a numpy array of length n. Each element can be int/float/string. D: the weights of instances, a numpy float vector of length n Return: t: the root node of the decision stump. ''' ######################################### ## INSERT YOUR CODE HERE t = Node(X, Y) t.p = DS.most_common(t.Y, D) # if Condition 1 or 2 holds, stop splitting if DS.stop1(t.Y) or DS.stop2(t.X): t.isleaf = True return t # find the best attribute to split t.i, t.th = self.best_attribute(t.X, t.Y, D) t.C1, t.C2 = DS.split(t.X, t.Y, t.i, t.th) # configure each child node D_child = sorted(zip(X[t.i, :], D)) k = 0 for j in range(len(D_child)): if D_child[j][0] > t.th: k = j break t.C1.p = DS.most_common(t.C1.Y, [x[1] for x in D_child[:k]]) t.C2.p = DS.most_common(t.C2.Y, [x[1] for x in D_child[k:]]) t.C1.isleaf = True t.C2.isleaf = True ######################################### return t
def test_inference(): ''' (3 points) inference''' t = Node(None, None) t.isleaf = True t.p = 'good job' T = [t, t, t] x = np.random.random(10) y = Bag.inference(T, x) assert y == 'good job' #----------------- t.p = 'c1' t2 = Node(None, None) t2.isleaf = False t2.i = 1 t2.th = 1.5 c1 = Node(None, None) c2 = Node(None, None) c1.isleaf = True c2.isleaf = True c1.p = 'c1' c2.p = 'c2' t2.C1 = c1 t2.C2 = c2 x = np.array([1., 2., 3., 1.]) y = Bag.inference([t, t2, t2], x) assert y == 'c2' y = Bag.inference([t, t, t2], x) assert y == 'c1'
def test_predict(): ''' (2 points) predict ''' t = Node(None, None) t.isleaf = True t.p = 'c1' t2 = Node(None, None) t2.isleaf = False t2.i = 1 t2.th = 1.5 c1 = Node(None, None) c2 = Node(None, None) c1.isleaf = True c2.isleaf = True c1.p = 'c1' c2.p = 'c2' t2.C1 = c1 t2.C2 = c2 X = np.array([[1., 1., 1., 1.], [1., 2., 3., 1.]]) Y = Bag.predict([t, t, t2], X) assert type(Y) == np.ndarray assert Y.shape == (4, ) assert Y[0] == 'c1' assert Y[1] == 'c1' assert Y[2] == 'c1' assert Y[3] == 'c1' Y = Bag.predict([t, t2, t2], X) assert Y[0] == 'c1' assert Y[1] == 'c2' assert Y[2] == 'c2' assert Y[3] == 'c1'