def build_tree(self, X,Y,D):
     '''
         build decision stump by overwritting the build_tree function in DT class.
         Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
         Input:
             X: the feature matrix, a numpy matrix of shape p by n.
                Each element can be int/float/string.
                Here n is the number data instances in the node, p is the number of attributes.
             Y: the class labels, a numpy array of length n. Each element can be int/float/string.
             D: the weights of instances, a numpy float vector of length n
         Return:
             t: the root node of the decision stump.
     '''
     #########################################
     ## INSERT YOUR CODE HERE
     t = Node(X,Y)
     # if Condition 1 or 2 holds, stop splitting
     t.p = self.most_common(t.Y,D)
     if DT.stop1(t.Y) or DT.stop2(t.X):
         t.isleaf = True
         return t
     # find the best attribute to split
     t.i,t.th = self.best_attribute(t.X,t.Y,D)
     # configure each child node
     ind1 = []
     ind2 = []
     for j,x in enumerate(X[t.i,:]):
         if x < t.th:
             ind1.append(j)
         else:
             ind2.append(j)
     X1 = X[:,ind1]
     Y1 = Y[ind1]
     t.C1 = Node(X1,Y1,isleaf = True)
     D1 = D[ind1]
     s = float(sum(D1))
     for i,w in enumerate(D[ind1]):
         D1[i] = float(w)/s
     t.C1.p = self.most_common(Y1,D1)
     X2 = X[:,ind2]
     Y2 = Y[ind2]
     t.C2 = Node(X2,Y2,isleaf = True)
     D2 = D[ind2]
     s = float(sum(D2))
     for i,w in enumerate(D[ind2]):
         D2[i] = float(w)/s
     t.C2.p = self.most_common(Y2,D2)
     #########################################
     return t
示例#2
0
    def build_tree(self, X, Y, D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = Node(X, Y)
        t.p = DS.most_common(Y, D)
        # if Condition 1 or 2 holds, stop splitting
        if DT.stop1(Y) or DT.stop2(X):
            t.isleaf = True
            return t

        # find the best attribute to split
        t.i, t.th = self.best_attribute(X, Y, D)

        # configure each child node
        t.C1, t.C2 = self.split(t.X, t.Y, t.i, t.th)

        D1, D2 = [], []
        for j in range(len(D)):
            if X[t.i, j] < t.th:
                D1.append(D[j])
            else:
                D2.append(D[j])
        D1 = np.array(D1)
        D2 = np.array(D2)

        t.C1.p = DS.most_common(t.C1.Y, D1)
        t.C2.p = DS.most_common(t.C2.Y, D2)

        t.C1.isleaf = True
        t.C2.isleaf = True

        #########################################
        return t
示例#3
0
    def build_tree(self, X, Y, D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = Node(X, Y)
        t.p = DS.most_common(t.Y, D)

        # if Condition 1 or 2 holds, stop splitting
        if DT.stop1(t.Y) == False and DT.stop2(t.X) == False:
            t.i, t.th = DS().best_attribute(t.X, t.Y, D)
            t.C1, t.C2 = DT.split(t.X, t.Y, t.i, t.th)
            d1 = D[np.where(X[t.i] < t.th)]
            d2 = D[np.where(X[t.i] >= t.th)]
            t.C1.p = DS.most_common(t.C1.Y, d1)
            t.C2.p = DS.most_common(t.C2.Y, d2)
            t.C1.isleaf = True
            t.C2.isleaf = True

        else:
            t.isleaf = True

        # find the best attribute to split

        # configure each child node

        #########################################
        return t
示例#4
0
    def build_tree(self, X, Y, D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = Node(X, Y, isleaf=False)
        t.p = DS.most_common(Y, D)
        # if Condition 1 or 2 holds, stop splitting
        if DT.stop1(Y) or DT.stop2(X):
            t.isleaf = True
            return t

        # find the best attribute to split
        t.i, t.th = self.best_attribute(X, Y, D)

        # configure each child node
        t.C1 = Node(X[:, X[t.i, :] < t.th],
                    Y[X[t.i, :] < t.th],
                    isleaf=True,
                    p=DS.most_common(Y[X[t.i, :] < t.th], D[X[t.i, :] < t.th]))
        t.C2 = Node(X[:, X[t.i, :] >= t.th],
                    Y[X[t.i, :] >= t.th],
                    isleaf=True,
                    p=DS.most_common(Y[X[t.i, :] >= t.th],
                                     D[X[t.i, :] >= t.th]))
        #########################################
        return t