Exemplo n.º 1
0
    def best_threshold(X, Y, D):
        '''
            Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted.
            Input:
                X: a list of values, a numpy array of int/float values.
                Y: a list of values, a numpy array of int/float/string values.
                D: the weights of instances, a numpy float vector of length n
            Output:
            Output:
                th: the best threhold, a float scalar.
                g: the weighted information gain by using the best threhold, a float scalar.
        '''
        #########################################
        # INSERT YOUR CODE HERE
        # Find a list of possible cutting points
        threshold_potential = DT.cutting_points(X, Y)

        # Check for input of threhodl potential
        if np.all(threshold_potential == -np.inf):
            return -float('inf'), -1

        # Getting infomation_gain list
        info_list = []
        for threshold in np.nditer(threshold_potential):
            info_list.append(DS.information_gain(Y, X >= threshold, D))

        # Getting the best threshold and information gain using best threshold
        g = max(info_list)
        th = threshold_potential[np.argmax(info_list)]

        #########################################
        return th, g
Exemplo n.º 2
0
    def best_threshold(X, Y, D):
        '''
            Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. 
            Input:
                X: a list of values, a numpy array of int/float values.
                Y: a list of values, a numpy array of int/float/string values.
                D: the weights of instances, a numpy float vector of length n
            Output:
                th: the best threhold, a float scalar. 
                g: the weighted information gain by using the best threhold, a float scalar. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        cp = DT.cutting_points(X, Y)
        if type(cp) == type(np.array([1])):
            g = -1
            th = float('-inf')
            for i in cp:
                a = (np.ma.masked_where(X > i, X)).mask
                if DS.information_gain(Y, a, D) > g:
                    g = DS.information_gain(Y, a, D)
                    th = i

        else:
            g = -1
            th = float('-inf')

        #########################################
        return th, g
Exemplo n.º 3
0
 def best_threshold(X, Y, D):
     '''
         Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. 
         Input:
             X: a list of values, a numpy array of int/float values.
             Y: a list of values, a numpy array of int/float/string values.
             D: the weights of instances, a numpy float vector of length n
         Output:
         Output:
             th: the best threhold, a float scalar. 
             g: the weighted information gain by using the best threhold, a float scalar. 
     '''
     #########################################
     ## INSERT YOUR CODE HERE
     cp = DT.cutting_points(X, Y)
     th = -1
     g = -1
     try:
         for v in cp:
             XX = np.copy(X)
             XX = np.array(["T" if x > v else "F" for x in XX])
             ig = DS.information_gain(Y, XX, D)
             if ig > g:
                 th = v
                 g = ig
     except TypeError:
         return -float('Inf'), -1
     #########################################
     return th, g
Exemplo n.º 4
0
    def best_threshold(X,Y,D):
        '''
            Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted.
            Input:
                X: a list of values, a numpy array of int/float values.
                Y: a list of values, a numpy array of int/float/string values.
                D: the weights of instances, a numpy float vector of length n
            Output:
            Output:
                th: the best threhold, a float scalar.
                g: the weighted information gain by using the best threhold, a float scalar.
        '''
        #########################################
        ## INSERT YOUR CODE HERE
        cp = DT.cutting_points(X,Y)
        ig = []
        cp = list(cp)
        for p in cp:
            newX = X.copy()
            for i,x in enumerate(newX):
                if x < p:
                    newX[i] = 0
                else:
                    newX[i] = 1
            ig.append(DS.information_gain(Y,newX,D))

        g = max(ig)
        th = cp[ig.index(g)]

        if th == float('-inf'):
            g = -1

        #########################################
        return th,g
Exemplo n.º 5
0
    def best_threshold(X, Y, D):
        '''
            Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. 
            Input:
                X: a list of values, a numpy array of int/float values.
                Y: a list of values, a numpy array of int/float/string values.
                D: the weights of instances, a numpy float vector of length n
            Output:
            Output:
                th: the best threhold, a float scalar. 
                g: the weighted information gain by using the best threhold, a float scalar. 
        '''
        #########################################
        ## INSERT YOUR CODE HERE

        cp = DT.cutting_points(X, Y)
        th = g = -1

        if type(cp) == float:
            return -float('Inf'), -1

        for c in cp:
            helper = []
            for x in X:
                if x > c:
                    helper.append('L')
                else:
                    helper.append('S')
            # print(DS.entropy(Y, D), DS.conditional_entropy(Y, helper, D))
            # print(helper)
            helper = np.asarray(helper)
            gg = DS.information_gain(Y, helper, D)

            if gg > g:
                th = c
                g = gg
        print(th, g)

        #########################################
        return th, g
Exemplo n.º 6
0
    def best_threshold(X,Y,D):
        '''
            Find the best threshold among all possible cutting points in the continous attribute of X. The data instances are weighted. 
            Input:
                X: a list of values, a numpy array of int/float values.
                Y: a list of values, a numpy array of int/float/string values.
                D: the weights of instances, a numpy float vector of length n

            Output:
                th: the best threhold, a float scalar. 
                g: the weighted information gain by using the best threhold, a float scalar. 
        '''

        ig = lambda X,Y,threshold,D: DS.information_gain(Y,X>=threshold,D)
        ths = DT.cutting_points(X,Y)
    
        if np.all(ths == -np.inf):
            return -float('inf'),-1
        gs = [ig(X,Y,i,D) for i in ths]
        g = max(gs)
        th = ths[np.argmax(gs)]

        return th,g