Пример #1
0
 def build_tree(self, X,Y,D):
     '''
         build decision stump by overwritting the build_tree function in DT class.
         Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
         Input:
             X: the feature matrix, a numpy matrix of shape p by n.
                Each element can be int/float/string.
                Here n is the number data instances in the node, p is the number of attributes.
             Y: the class labels, a numpy array of length n. Each element can be int/float/string.
             D: the weights of instances, a numpy float vector of length n
         Return:
             t: the root node of the decision stump.
     '''
     #########################################
     ## INSERT YOUR CODE HERE
     t = Node(X,Y)
     # if Condition 1 or 2 holds, stop splitting
     t.p = self.most_common(t.Y,D)
     if DT.stop1(t.Y) or DT.stop2(t.X):
         t.isleaf = True
         return t
     # find the best attribute to split
     t.i,t.th = self.best_attribute(t.X,t.Y,D)
     # configure each child node
     ind1 = []
     ind2 = []
     for j,x in enumerate(X[t.i,:]):
         if x < t.th:
             ind1.append(j)
         else:
             ind2.append(j)
     X1 = X[:,ind1]
     Y1 = Y[ind1]
     t.C1 = Node(X1,Y1,isleaf = True)
     D1 = D[ind1]
     s = float(sum(D1))
     for i,w in enumerate(D[ind1]):
         D1[i] = float(w)/s
     t.C1.p = self.most_common(Y1,D1)
     X2 = X[:,ind2]
     Y2 = Y[ind2]
     t.C2 = Node(X2,Y2,isleaf = True)
     D2 = D[ind2]
     s = float(sum(D2))
     for i,w in enumerate(D[ind2]):
         D2[i] = float(w)/s
     t.C2.p = self.most_common(Y2,D2)
     #########################################
     return t
Пример #2
0
    def build_tree(self, X,Y,D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump. 
        '''

        t = Node(X = X, Y = Y)
        t.p = DS.most_common(t.Y,D)
    
        # if Condition 1 or 2 holds, stop splitting
        if DS.stop1(t.Y) or DS.stop2(t.X):
            t.isleaf = True
            return t
        # find the best attribute to split
        
        t.i,t.th = DS.best_attribute(self,t.X,t.Y,D)
        
        t.C1 = Node(X=X.T[np.where(X[t.i]<t.th)].T,Y=Y[X[t.i]<t.th])
        
        t.C2 = Node(X=X.T[np.where(X[t.i]>=t.th)].T,Y=Y[X[t.i]>=t.th])
        
        t.C1.p = DS.most_common(t.C1.Y,D[X[t.i]<t.th])
        
        t.C2.p = DS.most_common(t.C2.Y,D[X[t.i]>=t.th])
        
        t.C1.isleaf = True
        
        t.C2.isleaf = True

        return t
Пример #3
0
    def build_tree(self, X, Y, D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = Node(X, Y, isleaf=False)
        t.p = DS.most_common(Y, D)
        # if Condition 1 or 2 holds, stop splitting
        if DT.stop1(Y) or DT.stop2(X):
            t.isleaf = True
            return t

        # find the best attribute to split
        t.i, t.th = self.best_attribute(X, Y, D)

        # configure each child node
        t.C1 = Node(X[:, X[t.i, :] < t.th],
                    Y[X[t.i, :] < t.th],
                    isleaf=True,
                    p=DS.most_common(Y[X[t.i, :] < t.th], D[X[t.i, :] < t.th]))
        t.C2 = Node(X[:, X[t.i, :] >= t.th],
                    Y[X[t.i, :] >= t.th],
                    isleaf=True,
                    p=DS.most_common(Y[X[t.i, :] >= t.th],
                                     D[X[t.i, :] >= t.th]))
        #########################################
        return t
Пример #4
0
    def build_tree(self, X, Y, D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = Node(X, Y)
        t.p = DS.most_common(Y, D)
        # if Condition 1 or 2 holds, stop splitting
        if DT.stop1(Y) or DT.stop2(X):
            t.isleaf = True
            return t

        # find the best attribute to split
        t.i, t.th = self.best_attribute(X, Y, D)

        # configure each child node
        t.C1, t.C2 = self.split(t.X, t.Y, t.i, t.th)

        D1, D2 = [], []
        for j in range(len(D)):
            if X[t.i, j] < t.th:
                D1.append(D[j])
            else:
                D2.append(D[j])
        D1 = np.array(D1)
        D2 = np.array(D2)

        t.C1.p = DS.most_common(t.C1.Y, D1)
        t.C2.p = DS.most_common(t.C2.Y, D2)

        t.C1.isleaf = True
        t.C2.isleaf = True

        #########################################
        return t
Пример #5
0
    def build_tree(self, X, Y, D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = Node(X, Y)
        t.p = DS.most_common(t.Y, D)

        # if Condition 1 or 2 holds, stop splitting
        if DT.stop1(t.Y) == False and DT.stop2(t.X) == False:
            t.i, t.th = DS().best_attribute(t.X, t.Y, D)
            t.C1, t.C2 = DT.split(t.X, t.Y, t.i, t.th)
            d1 = D[np.where(X[t.i] < t.th)]
            d2 = D[np.where(X[t.i] >= t.th)]
            t.C1.p = DS.most_common(t.C1.Y, d1)
            t.C2.p = DS.most_common(t.C2.Y, d2)
            t.C1.isleaf = True
            t.C2.isleaf = True

        else:
            t.isleaf = True

        # find the best attribute to split

        # configure each child node

        #########################################
        return t
Пример #6
0
    def build_tree(self, X, Y, D):
        '''
            build decision stump by overwritting the build_tree function in DT class.
            Instead of building tree nodes recursively in DT, here we only build at most one level of children nodes.
            Input:
                X: the feature matrix, a numpy matrix of shape p by n. 
                   Each element can be int/float/string.
                   Here n is the number data instances in the node, p is the number of attributes.
                Y: the class labels, a numpy array of length n. Each element can be int/float/string.
                D: the weights of instances, a numpy float vector of length n
            Return:
                t: the root node of the decision stump.
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        t = Node(X, Y)
        t.p = DS.most_common(t.Y, D)
        # if Condition 1 or 2 holds, stop splitting
        if DS.stop1(t.Y) or DS.stop2(t.X):
            t.isleaf = True
            return t
        # find the best attribute to split
        t.i, t.th = self.best_attribute(t.X, t.Y, D)
        t.C1, t.C2 = DS.split(t.X, t.Y, t.i, t.th)
        # configure each child node
        D_child = sorted(zip(X[t.i, :], D))
        k = 0
        for j in range(len(D_child)):
            if D_child[j][0] > t.th:
                k = j
                break
        t.C1.p = DS.most_common(t.C1.Y, [x[1] for x in D_child[:k]])
        t.C2.p = DS.most_common(t.C2.Y, [x[1] for x in D_child[k:]])
        t.C1.isleaf = True
        t.C2.isleaf = True

        #########################################
        return t
Пример #7
0
def test_inference():
    ''' (3 points) inference'''

    t = Node(None, None)
    t.isleaf = True
    t.p = 'good job'
    T = [t, t, t]

    x = np.random.random(10)

    y = Bag.inference(T, x)
    assert y == 'good job'

    #-----------------
    t.p = 'c1'
    t2 = Node(None, None)
    t2.isleaf = False
    t2.i = 1
    t2.th = 1.5
    c1 = Node(None, None)
    c2 = Node(None, None)
    c1.isleaf = True
    c2.isleaf = True

    c1.p = 'c1'
    c2.p = 'c2'
    t2.C1 = c1
    t2.C2 = c2

    x = np.array([1., 2., 3., 1.])
    y = Bag.inference([t, t2, t2], x)
    assert y == 'c2'

    y = Bag.inference([t, t, t2], x)
    assert y == 'c1'
Пример #8
0
def test_predict():
    ''' (2 points) predict '''
    t = Node(None, None)
    t.isleaf = True
    t.p = 'c1'
    t2 = Node(None, None)
    t2.isleaf = False
    t2.i = 1
    t2.th = 1.5
    c1 = Node(None, None)
    c2 = Node(None, None)
    c1.isleaf = True
    c2.isleaf = True
    c1.p = 'c1'
    c2.p = 'c2'
    t2.C1 = c1
    t2.C2 = c2

    X = np.array([[1., 1., 1., 1.], [1., 2., 3., 1.]])
    Y = Bag.predict([t, t, t2], X)

    assert type(Y) == np.ndarray
    assert Y.shape == (4, )
    assert Y[0] == 'c1'
    assert Y[1] == 'c1'
    assert Y[2] == 'c1'
    assert Y[3] == 'c1'

    Y = Bag.predict([t, t2, t2], X)
    assert Y[0] == 'c1'
    assert Y[1] == 'c2'
    assert Y[2] == 'c2'
    assert Y[3] == 'c1'