Exemplo n.º 1
0
    def calculateProbability(self, idHyperbox, X_l, X_u, memVal):
        """
        Compute the selected probability of current hyperbox
        
        INPUT:
            + idHyperbox        Index of the hyperbox being considered
            + X_l, X_u          Lower and upper bounds of input data
            
        OUTPUT:
            The probability value = the number of samples located in hyperbox / total samples belonging to the hyperbox
        """
        index_Samples = self.cardin[idHyperbox]
        num_in = num_out = 0

        for i in index_Samples:
            b = memberG(X_l[i], X_u[i], self.V[idHyperbox], self.W[idHyperbox],
                        self.gamma)

            if b[0] == 1:
                num_in = num_in + 1  # Increate the number of samples located within the current hyperbox
            else:
                num_out = num_out + 1

        if num_in + num_out == 0:
            prob = 1
        else:
            # prob = (3 * (num_in / (num_in + num_out)) + memVal) / 4
            prob = num_in / (num_in + num_out)

        return prob
Exemplo n.º 2
0
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama = 1, oper = 'min'):
    """
    GFMM classifier (test routine)

      result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper)

    INPUT
      V                 Tested model hyperbox lower bounds
      W                 Tested model hyperbox upper bounds
      classId	          Input data (hyperbox) class labels (crisp)
      XlT               Test data lower bounds (rows = objects, columns = features)
      XuT               Test data upper bounds (rows = objects, columns = features)
      patClassIdTest    Test data class labels (crisp)
      gama              Membership function slope (default: 1)
      oper              Membership calculation operation: 'min' or 'prod' (default: 'min')

   OUTPUT
      result           A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + sumamb           Number of objects with maximum membership in more than one class
                          + out              Soft class memberships
                          + mem              Hyperbox memberships

    """
	if len(XlT.shape) == 1:
        XlT = XlT.reshape(1, -1)
    if len(XuT.shape) == 1:
        XuT = XuT.reshape(1, -1)
		
    #initialization
    yX = XlT.shape[0]
    misclass = np.zeros(yX)

    # classifications
    for i in range(yX):
        mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes
        bmax = mem.max()	                                          # get max membership value
        maxVind = np.nonzero(mem == bmax)[0]                         # get indexes of all hyperboxes with max membership

        if bmax == 0:
            print('zero maximum membership value')                     # this is probably bad...
            misclass[i] = True
        else:
            if len(np.unique(classId[maxVind])) > 1:
                #print('Input is in the boundary')
                misclass[i] = True
            else:
                if np.any(classId[maxVind] == patClassIdTest[i]) == True or patClassIdTest[i] == UNLABELED_CLASS:
                    misclass[i] = False
                else:
                    misclass[i] = True
                #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0))

    # results
    summis = np.sum(misclass).astype(np.int64)

    result = Bunch(summis = summis, misclass = misclass)
    return result
Exemplo n.º 3
0
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama = 1, oper = 'min'):
    """
    GFMM classifier (test routine)

      result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper)

    INPUT
      V                 Tested model hyperbox lower bounds
      W                 Tested model hyperbox upper bounds
      classId	          Input data (hyperbox) class labels (crisp)
      XlT               Test data lower bounds (rows = objects, columns = features)
      XuT               Test data upper bounds (rows = objects, columns = features)
      patClassIdTest    Test data class labels (crisp)
      gama              Membership function slope (default: 1)
      oper              Membership calculation operation: 'min' or 'prod' (default: 'min')

   OUTPUT
      result           A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + predicted_class   Predicted class

    """
    
    if len(XlT.shape) == 1:
        XlT = XlT.reshape(1, -1)
    if len(XuT.shape) == 1:
        XuT = XuT.reshape(1, -1)
        
    #initialization        
    yX = XlT.shape[0]
    misclass = np.zeros(yX)
    predicted_class = np.full(yX, None)
    # classifications
    for i in range(yX):
        mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes
        bmax = mem.max()	                                          # get max membership value
        maxVind = np.nonzero(mem == bmax)[0]                         # get indexes of all hyperboxes with max membership

        winner_cls = np.unique(classId[maxVind])
        if len(winner_cls) > 1:
            #print('Input is in the boundary')
            # make random selection
            predicted_class[i] = random.choice(winner_cls)
        else:
            predicted_class[i] = classId[maxVind[0]]
        
        if predicted_class[i] == patClassIdTest[i] or patClassIdTest[i] == UNLABELED_CLASS:
            misclass[i] = False
        else:
            misclass[i] = True
                
    # results
    summis = np.sum(misclass).astype(np.int64)

    result = Bunch(summis = summis, misclass = misclass, predicted_class=predicted_class)
    return result
Exemplo n.º 4
0
    def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True):
        """
        pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId
    
          result = pruning_val(XlT,XuT,patClassIdTest)
    
            INPUT
              XlT               Test data lower bounds (rows = objects, columns = features)
              XuT               Test data upper bounds (rows = objects, columns = features)
              patClassIdTest    Test data class labels (crisp)
              accuracy_threshold  The minimum accuracy for each hyperbox
              newVerPredict     + True: using Manhattan distance in addition to fuzzy membership
                                + False: No using Manhattan distance
        """
    
        #initialization
        yX = XlT.shape[0]
        mem = np.zeros((yX, self.V.shape[0]))
        no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2))
        # classifications
        for i in range(yX):
            mem[i, :] = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes
            bmax = mem[i,:].max()	                                          # get max membership value
            maxVind = np.nonzero(mem[i,:] == bmax)[0]                         # get indexes of all hyperboxes with max membership
            
            if len(maxVind) == 1:
                # Only one hyperbox with the highest membership function
                
                if self.classId[maxVind[0]] == patClassIdTest[i]:
                    no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1                 
                else:
                    no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1
            else:
                # More than one hyperbox with highest membership => random choosing
                id_min = maxVind[np.random.randint(len(maxVind))]
                        
                if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != 0:
                    no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1
                else:
                    no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1
                    
        # pruning handling based on the validation results
        tmp_no_box = no_predicted_samples_hyperboxes.shape[0]
        accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool)
        accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool)
        for i in range(tmp_no_box):
            if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold:
                accuracy_larger_half[i] = True
                accuracy_larger_half_keep_nojoin[i] = True
            if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0):
                accuracy_larger_half_keep_nojoin[i] = True
        
        # keep one hyperbox for class prunned all
        current_classes = np.unique(self.classId)
        class_tmp = self.classId[accuracy_larger_half]
		class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin]
    def removeContainedHyperboxes(self):
        """
        Remove all hyperboxes contained in other hyperboxes
        """
        numBoxes = len(self.classId)
        indtokeep = np.ones(numBoxes, dtype=np.bool)

        for i in range(numBoxes):
            memValue = memberG(self.V[i], self.W[i], self.V, self.W,
                               self.gamma, self.oper)
            isInclude = (self.classId[memValue == 1] == self.classId[i]).all()

            # memValue always has one value being 1 because of self-containing
            if np.sum(memValue == 1) > 1 and isInclude == True:
                indtokeep[i] = False

        self.V = self.V[indtokeep, :]
        self.W = self.W[indtokeep, :]
        self.classId = self.classId[indtokeep]
Exemplo n.º 6
0
    def fit(self, X_l, X_u, patClassId, num_pat=None):
        """
        Training the classifier

         Xl             Input data lower bounds (rows = objects, columns = features)
         Xu             Input data upper bounds (rows = objects, columns = features)
         patClassId     Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item
         num_pat        Save the number of samples in hyperboxes [X_l, X_u]
        """
        #print('--Online Learning--')

        if self.isNorm == True:
            X_l, X_u = self.dataPreprocessing(X_l, X_u)
        
        #X_l = X_l.astype(np.float32)
        #X_u = X_u.astype(np.float32)
        
        time_start = time.perf_counter()

        yX, xX = X_l.shape
        teta = self.teta

        mark = np.array(['*', 'o', 'x', '+', '.', ',', 'v', '^', '<', '>', '1', '2', '3', '4', '8', 's', 'p', 'P', 'h', 'H', 'X', 'D', '|', '_'])
        mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k'])

        listLines = list()
        listInputSamplePoints = list()

        if self.isDraw:
            drawing_canvas = self.initializeCanvasGraph("GFMM - Online learning", xX)

            if self.V.size > 0:
                # draw existed hyperboxes
                color_ = np.array(['k'] * len(self.classId), dtype = object)
                for c in range(len(self.classId)):
                    if self.classId[c] < len(mark_col):
                        color_[c] = mark_col[self.classId[c]]

                hyperboxes = drawbox(self.V[:, 0:np.minimum(xX,3)], self.W[:, 0:np.minimum(xX,3)], drawing_canvas, color_)
                listLines.extend(hyperboxes)
                self.delay()

        self.misclass = 1
        threshold = 0 # No using lemma for branch and bound
        # for each input sample
        for i in range(yX):
            classOfX = patClassId[i]
            # draw input samples
            if self.isDraw:
                if i == 0 and len(listInputSamplePoints) > 0:
                    # reset input point drawing
                    for point in listInputSamplePoints:
                        point.remove()
                    listInputSamplePoints.clear()

                color_ = 'k'
                if classOfX < len(mark_col):
                    color_ = mark_col[classOfX]

                if (X_l[i, :] == X_u[i, :]).all():
                    marker_ = 'd'
                    if classOfX < len(mark):
                        marker_ = mark[classOfX]

                    if xX == 2:
                        inputPoint = drawing_canvas.plot(X_l[i, 0], X_l[i, 1], color = color_, marker=marker_)
                    else:
                        inputPoint = drawing_canvas.plot([X_l[i, 0]], [X_l[i, 1]], [X_l[i, 2]], color = color_, marker=marker_)

                    #listInputSamplePoints.append(inputPoint)
                else:
                    inputPoint = drawbox(np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, color_)

                listInputSamplePoints.append(inputPoint[0])
                self.delay()

            if self.V.size == 0:   # no model provided - starting from scratch
                self.V = np.array([X_l[0]])
                self.W = np.array([X_u[0]])
                self.classId = np.array([patClassId[0]])
                if num_pat is None:
                    self.counter = np.array([1]) # save number of samples of each hyperbox
                else:
                    self.counter = np.array([num_pat[0]])
                    
                if self.isDraw == True:
                    # draw hyperbox
                    box_color = 'k'
                    if patClassId[0] < len(mark_col):
                        box_color = mark_col[patClassId[0]]

                    hyperbox = drawbox(np.asmatrix(self.V[0, 0:np.minimum(xX,3)]), np.asmatrix(self.W[0, 0:np.minimum(xX,3)]), drawing_canvas, box_color)
                    listLines.append(hyperbox[0])
                    self.delay()

            else:
                id_lb_sameX = np.nonzero(((self.classId == classOfX) | (self.classId == UNLABELED_CLASS)))[0]
                V_sameX = self.V[id_lb_sameX]                
                
                if len(V_sameX) > 0: 
                    # if we have small number of hyperboxes with low dimension, this operation takes more time compared to computing membership value with all hyperboxes and ignore
                    # hyperboxes with different class (the membership computation on small dimensionality is so rapidly). However, if we have hyperboxes with high dimensionality, the membership computing on all hyperboxes take so long => reduced to only hyperboxes with the
                    # same class will significantly decrease the running time
                    W_sameX = self.W[id_lb_sameX]
                    lb_sameX = self.classId[id_lb_sameX]

                    b = memberG(X_l[i], X_u[i], V_sameX, W_sameX, self.gamma)
                    index = np.argsort(b)[::-1]
                    
                    if b[index[0]] != 1 or (classOfX != lb_sameX[index[0]] and classOfX != UNLABELED_CLASS):
                        adjust = False
                        
                        id_lb_diff = ((self.classId != classOfX) | (self.classId == UNLABELED_CLASS))
                        V_diff = self.V[id_lb_diff]
                        W_diff = self.W[id_lb_diff]
                        
                        indcomp = np.nonzero((W_diff >= V_diff).all(axis = 1))[0] 	# examine only hyperboxes w/o missing dimensions, meaning that in each dimension upper bound is larger than lowerbound
                        no_check_overlap = False
                        if len(indcomp) == 0 or len(V_diff) == 0:
                            no_check_overlap = True
                        else:
                            V_diff = V_diff_save = V_diff[indcomp]
                            W_diff = W_diff_save = W_diff[indcomp]
                        
                        for j in id_lb_sameX[index]:
                            minV_new = np.minimum(self.V[j], X_l[i])
                            maxW_new = np.maximum(self.W[j], X_u[i])
                            
                            # test violation of max hyperbox size and class labels
                            if ((maxW_new - minV_new) <= teta).all() == True:
                                if no_check_overlap == False and classOfX == UNLABELED_CLASS and self.classId[j] == UNLABELED_CLASS:
                                    # remove hyperbox themself
                                    keep_id = (V_diff != self.V[j]).any(1)
                                    V_diff = V_diff[keep_id]
                                    W_diff = W_diff[keep_id]
                                # Test overlap    
                                if no_check_overlap == True or directedIsOverlap(V_diff, W_diff, minV_new, maxW_new) == False:		# overlap test
                                    # adjust the j-th hyperbox
                                    self.V[j] = minV_new
                                    self.W[j] = maxW_new
                                    if num_pat is None:
                                        self.counter[j] = self.counter[j] + 1
                                    else:
                                        self.counter[j] = self.counter[j] + num_pat[i]
                                    
                                    if classOfX != UNLABELED_CLASS and self.classId[j] == UNLABELED_CLASS:
                                        self.classId[j] = classOfX
                                    
                                    if self.isDraw:
                                        # Handle drawing graph
                                        box_color = 'k'
                                        if self.classId[j] < len(mark_col):
                                            box_color = mark_col[self.classId[j]]
    
                                        try:
                                            listLines[j].remove()
                                        except:
                                            pass
    
                                        hyperbox = drawbox(np.asmatrix(self.V[j, 0:np.minimum(xX, 3)]), np.asmatrix(self.W[j, 0:np.minimum(xX, 3)]), drawing_canvas, box_color)
                                        listLines[j] = hyperbox[0]
                                        self.delay()
                                    
                                    adjust = True
                                    break
                                else:
                                    if no_check_overlap == False and classOfX == UNLABELED_CLASS and self.classId[j] == UNLABELED_CLASS:                                   
                                        V_diff = V_diff_save
                                        W_diff = W_diff_save
                                   
    
                        # if i-th sample did not fit into any existing box, create a new one
                        if not adjust:
                            self.V = np.concatenate((self.V, X_l[i].reshape(1, -1)), axis = 0)
                            self.W = np.concatenate((self.W, X_u[i].reshape(1, -1)), axis = 0)
                            self.classId = np.concatenate((self.classId, [classOfX]))
                            if num_pat is None:
                                self.counter = np.concatenate((self.counter, [1]))
                            else:
                                self.counter = np.concatenate((self.counter, [num_pat[i]]))
    
                            if self.isDraw:
                                # handle drawing graph
                                box_color = 'k'
                                if self.classId[-1] < len(mark_col):
                                    box_color = mark_col[self.classId[-1]]
    
                                hyperbox = drawbox(np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color)
                                listLines.append(hyperbox[0])
                                self.delay()
					else:
						t = 0
                        while (t + 1 < len(index)) and (b[index[t]] == 1) and (self.classId[id_lb_sameX[index[t]]] != classOfX):
                            t = t + 1
                        if b[index[t]] == 1 and self.classId[id_lb_sameX[index[t]]] == classOfX:
                            if num_pat is None:
                                self.counter[id_lb_sameX[index[t]]] = self.counter[id_lb_sameX[index[t]]] + 1
                            else:
                                self.counter[id_lb_sameX[index[t]]] = self.counter[id_lb_sameX[index[t]]] + num_pat[i]
                else:
                    self.V = np.concatenate((self.V, X_l[i].reshape(1, -1)), axis = 0)
                    self.W = np.concatenate((self.W, X_u[i].reshape(1, -1)), axis = 0)
                    self.classId = np.concatenate((self.classId, [classOfX]))
                    
                    if num_pat is None:
                        self.counter = np.concatenate((self.counter, [1]))
                    else:
                        self.counter = np.concatenate((self.counter, [num_pat[i]]))

                    if self.isDraw:
                        # handle drawing graph
                        box_color = 'k'
                        if self.classId[-1] < len(mark_col):
                            box_color = mark_col[self.classId[-1]]

                        hyperbox = drawbox(np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color)
                        listLines.append(hyperbox[0])
                        self.delay()
Exemplo n.º 7
0
 def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True):
     """
     pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId
 
       result = pruning_val(XlT,XuT,patClassIdTest)
 
         INPUT
           XlT               Test data lower bounds (rows = objects, columns = features)
           XuT               Test data upper bounds (rows = objects, columns = features)
           patClassIdTest    Test data class labels (crisp)
           accuracy_threshold  The minimum accuracy for each hyperbox
           newVerPredict     + True: using probability formula for prediction in addition to fuzzy membership
                             + False: No using probability formula for prediction
     """
 
     #initialization
     yX = XlT.shape[0]
     no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2))
     # classifications
     for i in range(yX):
         mem = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes
         bmax = mem.max()	                                          # get max membership value
         maxVind = np.nonzero(mem == bmax)[0]                         # get indexes of all hyperboxes with max membership
         
         if len(maxVind) == 1:
             # Only one hyperbox with the highest membership function
             
             if self.classId[maxVind[0]] == patClassIdTest[i]:
                 no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1                 
             else:
                 no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1
         else:
             if newVerPredict == True:
                 cls_same_mem = np.unique(self.classId[maxVind])
                 if len(cls_same_mem) > 1:
                     is_find_prob_val = True
                     if bmax == 1:
                         id_box_with_one_sample = np.nonzero(self.counter[maxVind] == 1)[0]
                         if len(id_box_with_one_sample) > 0:
                             is_find_prob_val = False
                             id_min = random.choice(maxVind[id_box_with_one_sample])
                     
                     if is_find_prob_val == True:
                         sum_prod_denum = (mem[maxVind] * self.counter[maxVind]).sum()
                         max_prob = -1
                         pre_id_cls = None
                         for c in cls_same_mem:
                             id_cls = np.nonzero(self.classId[maxVind] == c)[0]
                             sum_pro_num = (mem[maxVind[id_cls]] * self.counter[maxVind[id_cls]]).sum()
                             tmp = sum_pro_num / sum_prod_denum
                             
                             if tmp > max_prob or (tmp == max_prob and pre_id_cls is not None and self.counter[maxVind[id_cls]].sum() > self.counter[maxVind[pre_id_cls]].sum()):
                                 max_prob = tmp
                                 pre_id_cls = id_cls
                                 id_min = random.choice(maxVind[id_cls])
                 else:
                     id_min = random.choice(maxVind)
             else:
                 # More than one hyperbox with highest membership => random choosing
                 id_min = maxVind[np.random.randint(len(maxVind))]
                     
             if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS:
                 no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1
             else:
                 no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1
                 
     # pruning handling based on the validation results
     tmp_no_box = no_predicted_samples_hyperboxes.shape[0]
     accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool)
     accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool)
     for i in range(tmp_no_box):
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold:
             accuracy_larger_half[i] = True
             accuracy_larger_half_keep_nojoin[i] = True
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0):
             accuracy_larger_half_keep_nojoin[i] = True
     
     # keep one hyperbox for class prunned all
     current_classes = np.unique(self.classId)
     class_tmp = self.classId[accuracy_larger_half]
     class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin]
     for c in current_classes:
         if c not in class_tmp:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             # keep pos[id_kept]
             accuracy_larger_half[pos[id_kept]] = True
         if c not in class_tmp_keep:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             accuracy_larger_half_keep_nojoin[pos[id_kept]] = True
     
     V_prun_remove = self.V[accuracy_larger_half]
     W_prun_remove = self.W[accuracy_larger_half]
     classId_prun_remove = self.classId[accuracy_larger_half]
     numSample_prun_remove = self.counter[accuracy_larger_half]
     
     W_prun_keep = self.W[accuracy_larger_half_keep_nojoin]
     V_prun_keep = self.V[accuracy_larger_half_keep_nojoin]
     
     classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
     numSample_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
     
     if newVerPredict == True:
         result_prun_remove = predict_with_probability(V_prun_remove, W_prun_remove, classId_prun_remove, numSample_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict_with_probability(V_prun_keep, W_prun_keep, classId_prun_keep, numSample_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     else:
         result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     
     if (result_prun_remove.summis <= result_prun_keep_nojoin.summis):
         self.V = V_prun_remove
         self.W = W_prun_remove
         self.classId = classId_prun_remove
         self.counter = numSample_prun_remove
     else:
         self.V = V_prun_keep
         self.W = W_prun_keep
         self.classId = classId_prun_keep
         self.counter = numSample_prun_keep
    def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5):
        """
        pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId
    
          result = pruning_val(XlT,XuT,patClassIdTest)
    
            INPUT
              XlT               Test data lower bounds (rows = objects, columns = features)
              XuT               Test data upper bounds (rows = objects, columns = features)
              patClassIdTest    Test data class labels (crisp)
              accuracy_threshold  The minimum accuracy for each hyperbox
          
        """
    
        #initialization
        yX = XlT.shape[0]
        mem = np.zeros((yX, self.V.shape[0]))
        no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2))
        # classifications
        for i in range(yX):
            mem[i, :] = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes
            bmax = mem[i,:].max()	                                          # get max membership value
            maxVind = np.nonzero(mem[i,:] == bmax)[0]                         # get indexes of all hyperboxes with max membership
            
            if len(maxVind) == 1:
                # Only one hyperbox with the highest membership function
                
                if self.classId[maxVind[0]] == patClassIdTest[i]:
                    no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1                 
                else:
                    no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1
            else:
                # More than one hyperbox with highest membership => random choosing
                id_min = maxVind[np.random.randint(len(maxVind))]
                        
                if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != 0:
                    no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1
                else:
                    no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1
                    
        # pruning handling based on the validation results
        tmp_no_box = no_predicted_samples_hyperboxes.shape[0]
        accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool)
        accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool)
        for i in range(tmp_no_box):
            if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold:
                accuracy_larger_half[i] = True
                accuracy_larger_half_keep_nojoin[i] = True
            if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0):
                accuracy_larger_half_keep_nojoin[i] = True
        
        # keep one hyperbox for class prunned all
        current_classes = np.unique(self.classId)
        class_tmp = self.classId[accuracy_larger_half]
        for c in current_classes:
            if c not in class_tmp:
                pos = np.nonzero(self.classId == c)
                id_kept = np.random.randint(len(pos))
                # keep pos[id_kept]
                accuracy_larger_half[pos[id_kept]] = True
        
        V_prun_remove = self.V[accuracy_larger_half]
        W_prun_remove = self.W[accuracy_larger_half]
        classId_prun_remove = self.classId[accuracy_larger_half]
        
        W_prun_keep = self.W[accuracy_larger_half_keep_nojoin]
        V_prun_keep = self.V[accuracy_larger_half_keep_nojoin]
        classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
        
        result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
        result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
        
		# Pruning
        if (result_prun_remove.summis <= result_prun_keep_nojoin.summis):
            self.V = V_prun_remove
            self.W = W_prun_remove
            self.classId = classId_prun_remove
        else:
            self.V = V_prun_keep
            self.W = W_prun_keep
            self.classId = classId_prun_keep
Exemplo n.º 9
0
def predictOnlineOfflineCombination(onlClassifier,
                                    offClassifier,
                                    XlT,
                                    XuT,
                                    patClassIdTest,
                                    gama=1,
                                    oper='min'):
    """
    GFMM online-offline classifier (test routine)

      result = predictOnlineOfflineCombination(onlClassifier, offClassifier, XlT,XuT,patClassIdTest,gama,oper)

    INPUT
      onlClassifier   online classifier with the following attributes:
                        + V: hyperbox lower bounds
                        + W: hyperbox upper bounds
                        + classId: hyperbox class labels (crisp)

      offClassifier   offline classifier with the following attributes:
                        + V: hyperbox lower bounds
                        + W: hyperbox upper bounds
                        + classId: hyperbox class labels (crisp)

      XlT               Test data lower bounds (rows = objects, columns = features)
      XuT               Test data upper bounds (rows = objects, columns = features)
      patClassIdTest    Test data class labels (crisp)
      gama              Membership function slope (default: 1)
      oper              Membership calculation operation: 'min' or 'prod' (default: 'min')

   OUTPUT
      result           A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + out              Soft class memberships

    """

    #initialization
    yX = XlT.shape[0]
    misclass = np.zeros(yX)
    classes = np.union1d(onlClassifier.classId, offClassifier.classId)
    noClasses = classes.size
    mem_onl = np.zeros((yX, onlClassifier.V.shape[0]))
    mem_off = np.zeros((yX, offClassifier.V.shape[0]))
    out = np.zeros((yX, noClasses))

    # classifications
    for i in range(yX):
        mem_onl[i, :] = memberG(
            XlT[i, :], XuT[i, :], onlClassifier.V, onlClassifier.W, gama, oper
        )  # calculate memberships for all hyperboxes in the online classifier
        bmax_onl = mem_onl[i, :].max(
        )  # get max membership value among hyperboxes in the online classifier
        maxVind_onl = np.nonzero(
            mem_onl[i, :] == bmax_onl
        )[0]  # get indexes of all hyperboxes in the online classifier with max membership

        mem_off[i, :] = memberG(
            XlT[i, :], XuT[i, :], offClassifier.V, offClassifier.W, gama, oper
        )  # calculate memberships for all hyperboxes in the offline classifier
        bmax_off = mem_off[i, :].max(
        )  # get max membership value among hyperboxes in the offline classifier
        maxVind_off = np.nonzero(
            mem_off[i, :] == bmax_off
        )[0]  # get indexes of all hyperboxes in the offline classifier with max membership

        for j in range(noClasses):
            out_onl_mems = mem_onl[i, onlClassifier.classId == classes[
                j]]  # get max memberships for each class of online classifier
            if len(out_onl_mems) > 0:
                out_onl = out_onl_mems.max()
            else:
                out_onl = 0

            out_off_mems = mem_off[i, offClassifier.classId == classes[
                j]]  # get max memberships for each class of offline classifier
            if len(out_off_mems) > 0:
                out_off = out_off_mems.max()
            else:
                out_off = 0

            if out_onl > out_off:
                out[i, j] = out_onl
            else:
                out[i, j] = out_off

        if bmax_onl > bmax_off:
            misclass[i] = ~(np.any(
                onlClassifier.classId[maxVind_onl] == patClassIdTest[i]) |
                            (patClassIdTest[i] == 0))
        else:
            misclass[i] = ~(np.any(
                offClassifier.classId[maxVind_off] == patClassIdTest[i]) |
                            (patClassIdTest[i] == 0))

    # results
    summis = np.sum(misclass).astype(np.int64)

    result = Bunch(summis=summis, misclass=misclass, out=out)
    return result
Exemplo n.º 10
0
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama=1, oper='min'):
    """
    GFMM classifier (test routine)

      result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper)

    INPUT
      V                 Tested model hyperbox lower bounds
      W                 Tested model hyperbox upper bounds
      classId	          Input data (hyperbox) class labels (crisp)
      XlT               Test data lower bounds (rows = objects, columns = features)
      XuT               Test data upper bounds (rows = objects, columns = features)
      patClassIdTest    Test data class labels (crisp)
      gama              Membership function slope (default: 1)
      oper              Membership calculation operation: 'min' or 'prod' (default: 'min')

   OUTPUT
      result           A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + sumamb           Number of objects with maximum membership in more than one class
                          + out              Soft class memberships
                          + mem              Hyperbox memberships

    """

    #initialization
    yX = XlT.shape[0]
    misclass = np.zeros(yX)
    classes = np.unique(classId)
    noClasses = classes.size
    ambiguity = np.zeros((yX, 1))
    mem = np.zeros((yX, V.shape[0]))
    out = np.zeros((yX, noClasses))

    # classifications
    for i in range(yX):
        mem[i, :] = memberG(XlT[i, :], XuT[i, :], V, W, gama,
                            oper)  # calculate memberships for all hyperboxes
        bmax = mem[i, :].max()  # get max membership value
        maxVind = np.nonzero(mem[i, :] == bmax)[
            0]  # get indexes of all hyperboxes with max membership

        for j in range(noClasses):
            out[i, j] = mem[i, classId == classes[j]].max(
            )  # get max memberships for each class

        ambiguity[i, :] = np.sum(out[
            i, :] == bmax)  # number of different classes with max membership

        if bmax == 0:
            print('zero maximum membership value')  # this is probably bad...

        misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) |
                        (patClassIdTest[i] == 0))

    # results
    sumamb = np.sum(ambiguity[:, 0] > 1)
    summis = np.sum(misclass).astype(np.int64)

    result = Bunch(summis=summis,
                   misclass=misclass,
                   sumamb=sumamb,
                   out=out,
                   mem=mem)
    return result
    def fit(self, X_l, X_u, patClassId):
        """
        Training the classifier

         Xl             Input data lower bounds (rows = objects, columns = features)
         Xu             Input data upper bounds (rows = objects, columns = features)
         patClassId     Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item

        """
        print('--Online Learning--')

        if self.isNorm == True:
            X_l, X_u = self.dataPreprocessing(X_l, X_u)
        #X_l = X_l.astype(np.float32)
        #X_u = X_u.astype(np.float32)
        time_start = time.perf_counter()

        yX, xX = X_l.shape
        teta = self.teta

        mark = np.array([
            '*', 'o', 'x', '+', '.', ',', 'v', '^', '<', '>', '1', '2', '3',
            '4', '8', 's', 'p', 'P', 'h', 'H', 'X', 'D', '|', '_'
        ])
        mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k'])

        listLines = list()
        listInputSamplePoints = list()

        if self.isDraw:
            drawing_canvas = self.initializeCanvasGraph(
                "GFMM - Online learning", xX)

            if self.V.size > 0:
                # draw existed hyperboxes
                color_ = np.array(['k'] * len(self.classId), dtype=object)
                for c in range(len(self.classId)):
                    if self.classId[c] < len(mark_col):
                        color_[c] = mark_col[self.classId[c]]

                hyperboxes = drawbox(self.V[:, 0:np.minimum(xX, 3)],
                                     self.W[:, 0:np.minimum(xX, 3)],
                                     drawing_canvas, color_)
                listLines.extend(hyperboxes)
                self.delay()

        self.misclass = 1

        while self.misclass > 0 and teta >= self.tMin:
            # for each input sample
            for i in range(yX):
                classOfX = patClassId[i]
                # draw input samples
                if self.isDraw:
                    if i == 0 and len(listInputSamplePoints) > 0:
                        # reset input point drawing
                        for point in listInputSamplePoints:
                            point.remove()
                        listInputSamplePoints.clear()

                    color_ = 'k'
                    if classOfX < len(mark_col):
                        color_ = mark_col[classOfX]

                    if (X_l[i, :] == X_u[i, :]).all():
                        marker_ = 'd'
                        if classOfX < len(mark):
                            marker_ = mark[classOfX]

                        if xX == 2:
                            inputPoint = drawing_canvas.plot(X_l[i, 0],
                                                             X_l[i, 1],
                                                             color=color_,
                                                             marker=marker_)
                        else:
                            inputPoint = drawing_canvas.plot([X_l[i, 0]],
                                                             [X_l[i, 1]],
                                                             [X_l[i, 2]],
                                                             color=color_,
                                                             marker=marker_)

                        #listInputSamplePoints.append(inputPoint)
                    else:
                        inputPoint = drawbox(
                            np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]),
                            np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]),
                            drawing_canvas, color_)

                    listInputSamplePoints.append(inputPoint[0])
                    self.delay()

                if self.V.size == 0:  # no model provided - starting from scratch
                    self.V = np.array([X_l[0]])
                    self.W = np.array([X_u[0]])
                    self.classId = np.array([patClassId[0]])

                    if self.isDraw == True:
                        # draw hyperbox
                        box_color = 'k'
                        if patClassId[0] < len(mark_col):
                            box_color = mark_col[patClassId[0]]

                        hyperbox = drawbox(
                            np.asmatrix(self.V[0, 0:np.minimum(xX, 3)]),
                            np.asmatrix(self.W[0, 0:np.minimum(xX, 3)]),
                            drawing_canvas, box_color)
                        listLines.append(hyperbox[0])
                        self.delay()

                else:
                    id_lb_sameX = np.logical_or(
                        self.classId == classOfX,
                        self.classId == UNLABELED_CLASS)

                    if id_lb_sameX.any() == True:
                        V_sameX = self.V[id_lb_sameX]
                        W_sameX = self.W[id_lb_sameX]
                        lb_sameX = self.classId[id_lb_sameX]
                        id_range = np.arange(len(self.classId))
                        id_processing = id_range[id_lb_sameX]

                        b = memberG(X_l[i], X_u[i],
                                    np.minimum(V_sameX, W_sameX),
                                    np.maximum(V_sameX, W_sameX), self.gamma)
                        index = np.argsort(b)[::-1]
                        bSort = b[index]

                        if bSort[0] != 1 or (classOfX != lb_sameX[index[0]]
                                             and classOfX != UNLABELED_CLASS):
                            adjust = False
                            for j in id_processing[index]:
                                # test violation of max hyperbox size and class labels
                                if (classOfX == self.classId[j]
                                        or self.classId[j] == UNLABELED_CLASS
                                        or classOfX == UNLABELED_CLASS) and (
                                            (np.maximum(self.W[j], X_u[i]) -
                                             np.minimum(self.V[j], X_l[i])) <=
                                            teta).all() == True:
                                    # adjust the j-th hyperbox
                                    self.V[j] = np.minimum(self.V[j], X_l[i])
                                    self.W[j] = np.maximum(self.W[j], X_u[i])
                                    indOfWinner = j
                                    adjust = True
                                    if classOfX != UNLABELED_CLASS and self.classId[
                                            j] == UNLABELED_CLASS:
                                        self.classId[j] = classOfX

                                    if self.isDraw:
                                        # Handle drawing graph
                                        box_color = 'k'
                                        if self.classId[j] < len(mark_col):
                                            box_color = mark_col[
                                                self.classId[j]]

                                        try:
                                            listLines[j].remove()
                                        except:
                                            pass

                                        hyperbox = drawbox(
                                            np.asmatrix(
                                                self.V[j,
                                                       0:np.minimum(xX, 3)]),
                                            np.asmatrix(
                                                self.W[j,
                                                       0:np.minimum(xX, 3)]),
                                            drawing_canvas, box_color)
                                        listLines[j] = hyperbox[0]
                                        self.delay()

                                    break

                            # if i-th sample did not fit into any existing box, create a new one
                            if not adjust:
                                self.V = np.concatenate(
                                    (self.V, X_l[i].reshape(1, -1)), axis=0)
                                self.W = np.concatenate(
                                    (self.W, X_u[i].reshape(1, -1)), axis=0)
                                self.classId = np.concatenate(
                                    (self.classId, [classOfX]))

                                if self.isDraw:
                                    # handle drawing graph
                                    box_color = 'k'
                                    if self.classId[-1] < len(mark_col):
                                        box_color = mark_col[self.classId[-1]]

                                    hyperbox = drawbox(
                                        np.asmatrix(X_l[i,
                                                        0:np.minimum(xX, 3)]),
                                        np.asmatrix(X_u[i,
                                                        0:np.minimum(xX, 3)]),
                                        drawing_canvas, box_color)
                                    listLines.append(hyperbox[0])
                                    self.delay()

                            elif self.V.shape[0] > 1:
                                for ii in range(self.V.shape[0]):
                                    if ii != indOfWinner and (
                                            self.classId[ii] !=
                                            self.classId[indOfWinner]
                                            or self.classId[indOfWinner]
                                            == UNLABELED_CLASS):
                                        caseDim = hyperboxOverlapTest(
                                            self.V, self.W, indOfWinner,
                                            ii)  # overlap test

                                        if caseDim.size > 0:
                                            self.V, self.W = hyperboxContraction(
                                                self.V, self.W, caseDim, ii,
                                                indOfWinner)
                                            if self.isDraw:
                                                # Handle graph drawing
                                                boxii_color = boxwin_color = 'k'
                                                if self.classId[ii] < len(
                                                        mark_col):
                                                    boxii_color = mark_col[
                                                        self.classId[ii]]

                                                if self.classId[
                                                        indOfWinner] < len(
                                                            mark_col):
                                                    boxwin_color = mark_col[
                                                        self.
                                                        classId[indOfWinner]]

                                                try:
                                                    listLines[ii].remove()
                                                    listLines[
                                                        indOfWinner].remove()
                                                except:
                                                    pass

                                                hyperboxes = drawbox(
                                                    self.V[
                                                        [ii, indOfWinner],
                                                        0:np.minimum(xX, 3)],
                                                    self.W[
                                                        [ii, indOfWinner],
                                                        0:np.minimum(xX, 3)],
                                                    drawing_canvas, [
                                                        boxii_color,
                                                        boxwin_color
                                                    ])
                                                listLines[ii] = hyperboxes[0]
                                                listLines[
                                                    indOfWinner] = hyperboxes[
                                                        1]
                                                self.delay()

                    else:
                        self.V = np.concatenate(
                            (self.V, X_l[i].reshape(1, -1)), axis=0)
                        self.W = np.concatenate(
                            (self.W, X_u[i].reshape(1, -1)), axis=0)
                        self.classId = np.concatenate(
                            (self.classId, [classOfX]))

                        if self.isDraw:
                            # handle drawing graph
                            box_color = 'k'
                            if self.classId[-1] < len(mark_col):
                                box_color = mark_col[self.classId[-1]]

                            hyperbox = drawbox(
                                np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]),
                                np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]),
                                drawing_canvas, box_color)
                            listLines.append(hyperbox[0])
                            self.delay()

            teta = teta * 0.9
            if teta >= self.tMin:
                result = predict(self.V, self.W, self.classId, X_l, X_u,
                                 patClassId, self.gamma, self.oper)
                self.misclass = result.summis

        # Draw last result


#        if self.isDraw == True:
#            # Handle drawing graph
#            drawing_canvas.cla()
#            color_ = np.empty(len(self.classId), dtype = object)
#            for c in range(len(self.classId)):
#                color_[c] = mark_col[self.classId[c]]
#
#            drawbox(self.V[:, 0:np.minimum(xX, 3)], self.W[:, 0:np.minimum(xX, 3)], drawing_canvas, color_)
#            self.delay()
#
#        if self.isDraw:
#            plt.show()

        time_end = time.perf_counter()
        self.elapsed_training_time = time_end - time_start

        return self
Exemplo n.º 12
0
def predict_with_manhattan(V,
                           W,
                           classId,
                           XlT,
                           XuT,
                           patClassIdTest,
                           gama=1,
                           oper='min'):
    """
    GFMM classifier (test routine): Using Manhattan distance in the case of many hyperboxes with different classes having the same maximum membership value

      result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper)

    INPUT
      V                 Tested model hyperbox lower bounds
      W                 Tested model hyperbox upper bounds
      classId	          Input data (hyperbox) class labels (crisp)
      XlT               Test data lower bounds (rows = objects, columns = features)
      XuT               Test data upper bounds (rows = objects, columns = features)
      patClassIdTest    Test data class labels (crisp)
      gama              Membership function slope (default: 1)
      oper              Membership calculation operation: 'min' or 'prod' (default: 'min')

   OUTPUT
      result           A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + numSampleInBoundary     The number of samples in decision boundary
                          + predicted_class   Predicted class

    """
    if len(XlT.shape) == 1:
        XlT = XlT.reshape(1, -1)
    if len(XuT.shape) == 1:
        XuT = XuT.reshape(1, -1)

    #initialization
    yX = XlT.shape[0]
    misclass = np.zeros(yX)
    mem_vals = np.zeros(yX)
    numPointInBoundary = 0
    predicted_class = np.full(yX, None)
    # classifications
    for i in range(yX):
        if patClassIdTest[i] == UNLABELED_CLASS:
            misclass[i] = False
        else:
            mem = memberG(XlT[i, :], XuT[i, :], V, W, gama,
                          oper)  # calculate memberships for all hyperboxes
            bmax = mem.max()  # get max membership value
            maxVind = np.nonzero(mem == bmax)[
                0]  # get indexes of all hyperboxes with max membership
            mem_vals[i] = bmax

            #            if bmax == 0:
            #                predicted_class[i] = classId[maxVind[0]]
            #                if predicted_class[i] == patClassIdTest[i]:
            #                    misclass[i] = False
            #                else:
            #                    misclass[i] = True
            #            else:
            if len(np.unique(classId[maxVind])) > 1:
                numPointInBoundary = numPointInBoundary + 1
                #print("Using Manhattan function")
                if (XlT[i] == XuT[i]).all() == False:
                    XlT_mat = np.ones((len(maxVind), 1)) * XlT[i]
                    XuT_mat = np.ones((len(maxVind), 1)) * XuT[i]
                    XgT_mat = (XlT_mat + XuT_mat) / 2
                else:
                    XgT_mat = np.ones((len(maxVind), 1)) * XlT[i]
                # Find all average points of all hyperboxes with the same membership value
                avg_point_mat = (V[maxVind] + W[maxVind]) / 2
                # compute the manhattan distance from XgT_mat to all average points of all hyperboxes with the same membership value
                maht_dist = manhattan_distance(avg_point_mat, XgT_mat)
                #maht_dist = min_distance(avg_point_mat, XgT_mat)
                id_min_dist = maht_dist.argmin()

                predicted_class[i] = classId[maxVind[id_min_dist]]
                if classId[maxVind[id_min_dist]] == patClassIdTest[i]:
                    misclass[i] = False
                else:
                    misclass[i] = True
            else:
                predicted_class[i] = classId[maxVind[0]]
                if classId[maxVind[0]] == patClassIdTest[i]:
                    misclass[i] = False
                else:
                    misclass[i] = True
                    #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0))

    # results
    summis = np.sum(misclass).astype(np.int64)

    result = Bunch(summis=summis,
                   misclass=misclass,
                   numSampleInBoundary=numPointInBoundary,
                   predicted_class=predicted_class,
                   mem_vals=mem_vals)

    return result
Exemplo n.º 13
0
 def fit(self, X_l, X_u, patClassId):
     """
     X_l          Input data lower bounds (rows = objects, columns = features)
     X_u          Input data upper bounds (rows = objects, columns = features)
     patClassId  Input data class labels (crisp)
     """
     
     if self.isNorm == True:
         X_l, X_u = self.dataPreprocessing(X_l, X_u)
     
     time_start = time.perf_counter()
     
     self.V = X_l
     self.W = X_u
     self.classId = patClassId
     
     yX, xX = X_l.shape
     
     if len(self.cardin) == 0 or len(self.clusters) == 0:
         self.cardin = np.ones(yX)
         self.clusters = np.empty(yX, dtype=object)
         for i in range(yX):
             self.clusters[i] = np.array([i], dtype = np.int32)
     
     if self.isDraw:
         mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k'])
         drawing_canvas = self.initializeCanvasGraph("GFMM - AGGLO-SM-Slow version", xX)
         self.delay()
             
         # plot initial hyperbox
         Vt, Wt = self.pcatransform()
         color_ = np.empty(len(self.classId), dtype = object)
         for c in range(len(self.classId)):
             color_[c] = mark_col[self.classId[c]]
         drawbox(Vt, Wt, drawing_canvas, color_)
         self.delay()
     
     # calculate all pairwise memberships
     b = np.zeros(shape = (yX, yX))
     if self.simil == 'short':
         for j in range(yX):
             b[j, :] = memberG(self.W[j], self.V[j], self.V, self.W, self.gamma, self.oper)
     
     elif self.simil == 'long':
         for j in range(yX):
             b[j, :] = memberG(self.V[j], self.W[j], self.W, self.V, self.gamma, self.oper)
     
     else:
         for j in range(yX):
             b[j, :] = memberG(self.V[j], self.W[j], self.V, self.W, self.gamma, self.oper)
             
     maxb = self.splitSimilarityMaxtrix(b, self.sing) # get a sorted similarity (membership) list
     if len(maxb) > 0:
         maxb = maxb[maxb[:, 2] >= self.bthres, :] # scrap memberhsip values below threshold
         
     # training
     isTraining = True
     while isTraining:
         isTraining = False
         
         i = 0
         while i < maxb.shape[0]:
             # if maxb(i, 0)-th and maxb(i, 1)-th come from the same class, try to join them
             if self.classId[int(maxb[i, 0])] == self.classId[int(maxb[i, 1])]:
                 # calculate new coordinates of maxb(i,0)-th hyperbox by including maxb(i,1)-th box, scrap the latter and leave the rest intact
                 # agglomorate maxb(i, 0) and maxb(i, 1) by adjust maxb(i, 0), remove maxb(i, 1) by get newV from 1:maxb(i, 0) - 1, new coordinates for maxb(i, 0), maxb(i, 0) + 1:maxb(i, 1) - 1, maxb(i, 1) + 1:end
                 newV = np.concatenate((self.V[:int(maxb[i, 0])], np.minimum(self.V[int(maxb[i, 0])], self.V[int(maxb[i, 1])]).reshape(1, -1), self.V[int(maxb[i, 0]) + 1:int(maxb[i, 1])], self.V[int(maxb[i, 1]) + 1:]), axis=0)
                 newW = np.concatenate((self.W[:int(maxb[i, 0])], np.maximum(self.W[int(maxb[i, 0])], self.W[int(maxb[i, 1])]).reshape(1, -1), self.W[int(maxb[i, 0]) + 1:int(maxb[i, 1])], self.W[int(maxb[i, 1]) + 1:]), axis=0)
                 newClassId = np.concatenate((self.classId[:int(maxb[i, 1])], self.classId[int(maxb[i, 1]) + 1:]))
                     
                 # adjust the hyperbox if no overlap and maximum hyperbox size is not violated
                 if (not isOverlap(newV, newW, int(maxb[i, 0]), newClassId)) and (((newW[int(maxb[i, 0])] - newV[int(maxb[i, 0])]) <= self.teta).all() == True):
                     isTraining = True
                     self.V = newV
                     self.W = newW
                     self.classId = newClassId
                     
                     self.cardin[int(maxb[i, 0])] = self.cardin[int(maxb[i, 0])] + self.cardin[int(maxb[i, 1])]
                     self.cardin = np.append(self.cardin[0:int(maxb[i, 1])], self.cardin[int(maxb[i, 1]) + 1:])
                             
                     self.clusters[int(maxb[i, 0])] = np.append(self.clusters[int(maxb[i, 0])], self.clusters[int(maxb[i, 1])])
                     self.clusters = np.append(self.clusters[0:int(maxb[i, 1])], self.clusters[int(maxb[i, 1]) + 1:])
                     
                     # recalculate all pairwise memberships
                     yX, xX = self.V.shape
                     b = np.zeros(shape = (yX, yX))
                     if self.simil == 'short':
                         for j in range(yX):
                             b[j, :] = memberG(self.W[j], self.V[j], self.V, self.W, self.gamma, self.oper)
                     
                     elif self.simil == 'long':
                         for j in range(yX):
                             b[j, :] = memberG(self.V[j], self.W[j], self.W, self.V, self.gamma, self.oper)
                     
                     else:
                         for j in range(yX):
                             b[j, :] = memberG(self.V[j], self.W[j], self.V, self.W, self.gamma, self.oper)
                             
                     if self.V.shape[0] == 1:
                         maxb = np.array([])
                     else:
                         maxb = self.splitSimilarityMaxtrix(b, self.sing) # get a sorted similarity (membership) list
                         
                         if len(maxb) > 0:
                             maxb = maxb[maxb[:, 2] >= self.bthres, :]
                     if self.isDraw:
                         Vt, Wt = self.pcatransform()
                         color_ = np.empty(len(self.classId), dtype = object)
                         for c in range(len(self.classId)):
                             color_[c] = mark_col[self.classId[c]]
                         drawing_canvas.cla()
                         drawbox(Vt, Wt, drawing_canvas, color_)
                         self.delay()
                     
                     break
                     
             i = i + 1
     
     time_end = time.perf_counter()
     self.elapsed_training_time = time_end - time_start
     
     return self
Exemplo n.º 14
0
def predict_with_probability_k_voting_new(V,
                                          W,
                                          classId,
                                          weights,
                                          XlT,
                                          XuT,
                                          patClassIdTest,
                                          K_threshold=5,
                                          gama=1,
                                          oper='min'):
    """
    GFMM classifier (test routine): Using K voting of values in weights for K hyperboxes with the highest membership values

      result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper)

    INPUT
      V                 Tested model hyperbox lower bounds
      W                 Tested model hyperbox upper bounds
      classId	        Input data (hyperbox) class labels (crisp)
      numSamples        Save number of samples of each corresponding hyperboxes contained in V and W
      weights           The weights of hyperboxes
      XlT               Test data lower bounds (rows = objects, columns = features)
      XuT               Test data upper bounds (rows = objects, columns = features)
      patClassIdTest    Test data class labels (crisp)
      gama              Membership function slope (default: 1)
      oper              Membership calculation operation: 'min' or 'prod' (default: 'min')

   OUTPUT
      result           A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + predicted_class   Predicted class

    """
    if len(XlT.shape) == 1:
        XlT = XlT.reshape(1, -1)
    if len(XuT.shape) == 1:
        XuT = XuT.reshape(1, -1)

    #initialization
    yX = XlT.shape[0]
    misclass = np.zeros(yX)
    predicted_class = np.full(yX, None)
    # classifications
    for i in range(yX):
        if patClassIdTest[i] == UNLABELED_CLASS:
            misclass[i] = False
        else:
            mem = memberG(XlT[i, :], XuT[i, :], V, W, gama,
                          oper)  # calculate memberships for all hyperboxes
            mem = mem * weights
            sort_id_mem = np.argsort(mem)[::-1]
            selected_id = sort_id_mem[:K_threshold]
            selected_cls = np.unique(classId[selected_id])

            if len(selected_cls) == 1:
                predicted_class[i] = selected_cls[0]
                if predicted_class[i] == patClassIdTest[i]:
                    misclass[i] = False
                else:
                    misclass[i] = True
            else:
                # voting based on sum of weights
                max_prob = -1
                max_mem_sum = -1
                for c in selected_cls:
                    id_cls = classId[selected_id] == c
                    cur_prob = np.sum(mem[selected_id[id_cls]])
                    cur_mem = np.max(weights[selected_id[id_cls]])

                    if max_prob < cur_prob:
                        max_prob = cur_prob
                        predicted_class[i] = c
                        max_mem_sum = cur_mem
                    else:
                        if max_prob == cur_prob and max_mem_sum < cur_mem:
                            max_prob = cur_prob
                            predicted_class[i] = c
                            max_mem_sum = cur_mem

                if predicted_class[i] == patClassIdTest[i]:
                    misclass[i] = False
                else:
                    misclass[i] = True

                    #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0))

    #print(numPointInBoundary)
    # results
    summis = np.sum(misclass).astype(np.int64)

    result = Bunch(summis=summis,
                   misclass=misclass,
                   predicted_class=predicted_class)

    return result
Exemplo n.º 15
0
def predict_with_probability_weighted(V,
                                      W,
                                      classId,
                                      numSamples,
                                      weights,
                                      XlT,
                                      XuT,
                                      patClassIdTest,
                                      gama=1,
                                      oper='min'):
    """
    GFMM classifier (test routine): Using probability formular based on the number of samples in the case of many hyperboxes with different classes having the same maximum membership value

      result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper)

    INPUT
      V                 Tested model hyperbox lower bounds
      W                 Tested model hyperbox upper bounds
      classId	        Input data (hyperbox) class labels (crisp)
      numSamples        Save number of samples of each corresponding hyperboxes contained in V and W
      weights           The weights of hyperboxes
      XlT               Test data lower bounds (rows = objects, columns = features)
      XuT               Test data upper bounds (rows = objects, columns = features)
      patClassIdTest    Test data class labels (crisp)
      gama              Membership function slope (default: 1)
      oper              Membership calculation operation: 'min' or 'prod' (default: 'min')

   OUTPUT
      result           A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + numSampleInBoundary     The number of samples in decision boundary
                          + predicted_class   Predicted class

    """
    if len(XlT.shape) == 1:
        XlT = XlT.reshape(1, -1)
    if len(XuT.shape) == 1:
        XuT = XuT.reshape(1, -1)

    #initialization
    yX = XlT.shape[0]
    misclass = np.zeros(yX)
    predicted_class = np.full(yX, None)
    mem_vals = np.zeros(yX)
    # classifications
    numPointInBoundary = 0
    for i in range(yX):
        if patClassIdTest[i] == UNLABELED_CLASS:
            misclass[i] = False
        else:
            mem = memberG(XlT[i, :], XuT[i, :], V, W, gama,
                          oper)  # calculate memberships for all hyperboxes
            mem = mem * weights
            bmax = mem.max()  # get max membership value
            maxVind = np.nonzero(mem == bmax)[
                0]  # get indexes of all hyperboxes with max membership
            mem_vals[i] = bmax
            #            if bmax == 0:
            #                #print('zero maximum membership value')                     # this is probably bad...
            #                predicted_class[i] = classId[maxVind[0]]
            #                if predicted_class[i] == patClassIdTest[i]:
            #                    misclass[i] = False
            #                else:
            #                    misclass[i] = True
            #            else:
            cls_same_mem = np.unique(classId[maxVind])
            if len(cls_same_mem) > 1:
                cls_val = UNLABELED_CLASS

                is_find_prob_val = True
                if bmax == 1:
                    id_box_with_one_sample = np.nonzero(
                        numSamples[maxVind] == 1)[0]
                    if len(id_box_with_one_sample) > 0:
                        is_find_prob_val = False
                        cls_val = classId[int(
                            random.choice(maxVind[id_box_with_one_sample]))]

                if is_find_prob_val == True:
                    numPointInBoundary = numPointInBoundary + 1
                    #print('bmax=', bmax)
                    #print("Using probability function")
                    sum_prod_denum = (mem[maxVind] * numSamples[maxVind]).sum()
                    max_prob = -1
                    pre_id_cls = None
                    for c in cls_same_mem:
                        id_cls = np.nonzero(classId[maxVind] == c)[0]
                        sum_pro_num = (mem[maxVind[id_cls]] *
                                       numSamples[maxVind[id_cls]]).sum()
                        tmp = sum_pro_num / sum_prod_denum

                        if tmp > max_prob or (
                                tmp == max_prob and pre_id_cls is not None
                                and numSamples[maxVind[id_cls]].sum() >
                                numSamples[maxVind[pre_id_cls]].sum()):
                            max_prob = tmp
                            cls_val = c
                            pre_id_cls = id_cls

                predicted_class[i] = cls_val
                if cls_val == patClassIdTest[i]:
                    misclass[i] = False
                else:
                    misclass[i] = True
            else:
                predicted_class[i] = classId[maxVind[0]]
                if predicted_class[i] == patClassIdTest[i]:
                    misclass[i] = False
                else:
                    misclass[i] = True
                    #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0))

    #print(numPointInBoundary)
    # results
    summis = np.sum(misclass).astype(np.int64)

    result = Bunch(summis=summis,
                   misclass=misclass,
                   numSampleInBoundary=numPointInBoundary,
                   predicted_class=predicted_class,
                   mem_vals=mem_vals)

    return result
Exemplo n.º 16
0
    def fit(self, X_l, X_u, patClassId):  
        """
        Xl          Input data lower bounds (rows = objects, columns = features)
        Xu          Input data upper bounds (rows = objects, columns = features)
        patClassId  Input data class labels (crisp)
        """
        
        if self.isNorm == True:
            X_l, X_u = self.dataPreprocessing(X_l, X_u)
            
        time_start = time.perf_counter()
         
        self.V = X_l
        self.W = X_u
        self.classId = patClassId
        
        yX, xX = X_l.shape
        
#        if len(self.cardin) == 0 or len(self.clusters) == 0:
#            self.cardin = np.ones(yX)
#            self.clusters = np.empty(yX, dtype=object)
#            for i in range(yX):
#                self.clusters[i] = np.array([i], dtype = np.int64)
        
        if self.isDraw:
            mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k'])
            drawing_canvas = self.initializeCanvasGraph("GFMM - Faster AGGLO-2", xX)
                
            # plot initial hyperbox
            Vt, Wt = self.pcatransform()
            color_ = np.empty(len(self.classId), dtype = object)
            for c in range(len(self.classId)):
                color_[c] = mark_col[self.classId[c]]
            boxes = drawbox(Vt, Wt, drawing_canvas, color_)
            self.delay()
            hyperboxes = list(boxes)
            
        # training
        isTraining = True
        while isTraining:
            isTraining = False
            
            k = 0 # input pattern index
            while k < len(self.classId):
                idx_same_classes = (self.classId == self.classId[k]) | (self.classId == UNLABELED_CLASS) | ((self.classId != self.classId[k]) & (self.classId[k] == UNLABELED_CLASS))
                idx_same_classes[k] = False # remove element in the position k
                idex = np.arange(len(self.classId))
                idex = idex[idx_same_classes] # keep the indices of elements retained
                V_same_class = self.V[idx_same_classes]
                W_same_class = self.W[idx_same_classes]
                
                if self.simil == 'short':
                    b = memberG(np.maximum(self.W[k], self.V[k]), np.minimum(self.V[k], self.W[k]), np.minimum(V_same_class, W_same_class), np.maximum(W_same_class, V_same_class), self.gamma, self.oper)
                elif self.simil == 'long':
                    b = memberG(self.V[k], self.W[k], W_same_class, V_same_class, self.gamma, self.oper)
                else:
                    b = asym_similarity_one_many_with_missing_value(self.V[k], self.W[k], V_same_class, W_same_class, self.gamma, self.sing, self.oper)
                
                indB = np.argsort(b)[::-1]
                idex = idex[indB]
                sortB = b[indB]
                
                maxB = sortB[sortB >= self.bthres]	# apply membership threshold
                
                if len(maxB) > 0:
                    idexmax = idex[sortB >= self.bthres]
                    
                    pairewise_maxb = np.concatenate((np.minimum(k, idexmax)[:, np.newaxis], np.maximum(k,idexmax)[:, np.newaxis], maxB[:, np.newaxis]), axis=1)

                    for i in range(pairewise_maxb.shape[0]):
                        # calculate new coordinates of k-th hyperbox by including pairewise_maxb(i,1)-th box, scrap the latter and leave the rest intact
                        # agglomorate pairewise_maxb(i, 0) and pairewise_maxb(i, 1) by adjusting pairewise_maxb(i, 0)
                        # remove pairewise_maxb(i, 1) by getting newV from 1 -> pairewise_maxb(i, 0) - 1, new coordinates for pairewise_maxb(i, 0), from pairewise_maxb(i, 0) + 1 -> pairewise_maxb(i, 1) - 1, pairewise_maxb(i, 1) + 1 -> end
                        
                        row1 = int(pairewise_maxb[i, 0])
                        row2 = int(pairewise_maxb[i, 1])
                        newV = np.concatenate((self.V[:row1], np.minimum(self.V[row1], self.V[row2]).reshape(1, -1), self.V[row1 + 1:row2], self.V[row2 + 1:]), axis=0)
                        newW = np.concatenate((self.W[:row1], np.maximum(self.W[row1], self.W[row2]).reshape(1, -1), self.W[row1 + 1:row2], self.W[row2 + 1:]), axis=0)
                        newClassId = np.concatenate((self.classId[:row2], self.classId[row2 + 1:]))
                        if (newClassId[row1] == UNLABELED_CLASS):
                            newClassId[row1] = self.classId[row2]
#                        index_remain = np.ones(len(self.classId)).astype(np.bool)
#                        index_remain[row2] = False
#                        newV = self.V[index_remain]
#                        newW = self.W[index_remain]
#                        newClassId = self.classId[index_remain]
#                        if row1 < row2:
#                            tmp_row = row1
#                        else:
#                            tmp_row = row1 - 1
#                        newV[tmp_row] = np.minimum(self.V[row1], self.V[row2])
#                        newW[tmp_row] = np.maximum(self.W[row1], self.W[row2])
                       
                        # adjust the hyperbox if no overlap and maximum hyperbox size is not violated
                        # position of adjustment is pairewise_maxb[i, 0] in new bounds
                        if ((((newW[pairewise_maxb[i, 0].astype(np.int64)] - newV[pairewise_maxb[i, 0].astype(np.int64)]) <= self.teta).all() == True) and (not modifiedIsOverlap(newV, newW, pairewise_maxb[i, 0].astype(np.int64), newClassId))):
                            self.V = newV
                            self.W = newW
                            self.classId = newClassId
                            
#                            self.cardin[int(pairewise_maxb[i, 0])] = self.cardin[int(pairewise_maxb[i, 0])] + self.cardin[int(pairewise_maxb[i, 1])]
#                            #self.cardin = np.delete(self.cardin, int(pairewise_maxb[i, 1]))
#                            self.cardin = np.append(self.cardin[0:int(pairewise_maxb[i, 1])], self.cardin[int(pairewise_maxb[i, 1]) + 1:])
#                            
#                            self.clusters[int(pairewise_maxb[i, 0])] = np.append(self.clusters[int(pairewise_maxb[i, 0])], self.clusters[int(pairewise_maxb[i, 1])])
#                            #self.clusters = np.delete(self.clusters, int(pairewise_maxb[i, 1]))
#                            self.clusters = np.append(self.clusters[0:int(pairewise_maxb[i, 1])], self.clusters[int(pairewise_maxb[i, 1]) + 1:])
#                            
                            isTraining = True
                            
                            if k != pairewise_maxb[i, 0]: # position pairewise_maxb[i, 1] (also k) is removed, so next step should start from pairewise_maxb[i, 1]
                                k = k - 1
                                
                            if self.isDraw:
                                try:
                                    hyperboxes[int(pairewise_maxb[i, 1])].remove()
                                    hyperboxes[int(pairewise_maxb[i, 0])].remove()
                                except:
                                    print("No remove old hyperbox")
                                
                                Vt, Wt = self.pcatransform()
                                
                                box_color = 'k'
                                if self.classId[int(pairewise_maxb[i, 0])] < len(mark_col):
                                    box_color = mark_col[self.classId[int(pairewise_maxb[i, 0])]]
                                
                                box = drawbox(np.asmatrix(Vt[int(pairewise_maxb[i, 0])]), np.asmatrix(Wt[int(pairewise_maxb[i, 0])]), drawing_canvas, box_color)
                                self.delay()
                                hyperboxes[int(pairewise_maxb[i, 0])] = box[0]
                                hyperboxes.remove(hyperboxes[int(pairewise_maxb[i, 1])])
                                
                            break # if hyperbox adjusted there's no need to look at other hyperboxes
                            
                        
                k = k + 1
        
        time_end = time.perf_counter()
        self.elapsed_training_time = time_end - time_start
         
        return self
    def fit(self, X_l, X_u, patClassId):
        """
        X_l          Input data lower bounds (rows = objects, columns = features)
        X_u          Input data upper bounds (rows = objects, columns = features)
        patClassId  Input data class labels (crisp)
        """

        if self.isNorm == True:
            X_l, X_u = self.dataPreprocessing(X_l, X_u)

        time_start = time.perf_counter()

        self.V = X_l
        self.W = X_u
        self.classId = patClassId

        yX, xX = X_l.shape

        #        if len(self.cardin) == 0 or len(self.clusters) == 0:
        #            self.cardin = np.ones(yX)
        #            self.clusters = np.empty(yX, dtype=object)
        #            for i in range(yX):
        #                self.clusters[i] = np.array([i], dtype = np.int32)

        if self.isDraw:
            mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k'])
            drawing_canvas = self.initializeCanvasGraph(
                "GFMM - AGGLO-SM-Fast version", xX)

            # plot initial hyperbox
            Vt, Wt = self.pcatransform()
            color_ = np.empty(len(self.classId), dtype=object)
            for c in range(len(self.classId)):
                color_[c] = mark_col[self.classId[c]]
            drawbox(Vt, Wt, drawing_canvas, color_)
            self.delay()

        # training
        isTraining = True
        while isTraining:
            isTraining = False

            # calculate class masks
            yX, xX = self.V.shape
            labList = np.unique(
                self.classId[self.classId != UNLABELED_CLASS])[::-1]
            clMask = np.zeros(shape=(yX, len(labList)), dtype=np.bool)
            for i in range(len(labList)):
                clMask[:,
                       i] = (self.classId == labList[i]) | (self.classId
                                                            == UNLABELED_CLASS)

        # calculate pairwise memberships *ONLY* within each class (faster!)
            b = np.zeros(shape=(yX, yX))

            for i in range(len(labList)):
                Vi = self.V[
                    clMask[:, i]]  # get bounds of patterns with class label i
                Wi = self.W[clMask[:, i]]
                clSize = np.sum(clMask[:,
                                       i])  # get number of patterns of class i
                clIdxs = np.nonzero(
                    clMask[:, i]
                )[0]  # get position of patterns with class label i in the training set

                if self.simil == 'short':
                    for j in range(clSize):
                        b[clIdxs[j],
                          clIdxs] = memberG(np.maximum(Wi[j], Vi[j]),
                                            np.minimum(Vi[j], Wi[j]),
                                            np.minimum(Vi, Wi),
                                            np.maximum(Wi, Vi), self.gamma,
                                            self.oper)
                elif self.simil == 'long':
                    for j in range(clSize):
                        b[clIdxs[j], clIdxs] = memberG(Vi[j], Wi[j], Wi, Vi,
                                                       self.gamma, self.oper)
                else:
                    for j in range(clSize):
                        b[clIdxs[j], clIdxs] = memberG(Vi[j], Wi[j],
                                                       np.minimum(Vi, Wi),
                                                       np.maximum(Wi, Vi),
                                                       self.gamma, self.oper)

            if yX == 1:
                maxb = np.array([])
            else:
                maxb = self.splitSimilarityMaxtrix(b, self.sing, False)
                if len(maxb) > 0:
                    maxb = maxb[(maxb[:, 2] >= self.bthres), :]

                    if len(maxb) > 0:
                        # sort maxb in the decending order following the last column
                        idx_smaxb = np.argsort(maxb[:, 2])[::-1]
                        maxb = np.hstack(
                            (maxb[idx_smaxb, 0].reshape(-1, 1),
                             maxb[idx_smaxb,
                                  1].reshape(-1, 1), maxb[idx_smaxb,
                                                          2].reshape(-1, 1)))
                        #maxb = maxb[idx_smaxb]

            while len(maxb) > 0:
                curmaxb = maxb[0, :]  # current position handling

                # calculate new coordinates of curmaxb(0)-th hyperbox by including curmaxb(1)-th box, scrap the latter and leave the rest intact
                row1 = int(curmaxb[0])
                row2 = int(curmaxb[1])
                newV = np.concatenate(
                    (self.V[0:row1, :],
                     np.minimum(self.V[row1, :], self.V[row2, :]).reshape(
                         1,
                         -1), self.V[row1 + 1:row2, :], self.V[row2 + 1:, :]),
                    axis=0)
                newW = np.concatenate(
                    (self.W[0:row1, :],
                     np.maximum(self.W[row1, :], self.W[row2, :]).reshape(
                         1,
                         -1), self.W[row1 + 1:row2, :], self.W[row2 + 1:, :]),
                    axis=0)
                newClassId = np.concatenate(
                    (self.classId[0:row2], self.classId[row2 + 1:]))
                if (newClassId[row1] == UNLABELED_CLASS):
                    newClassId[row1] = self.classId[row2]


#                index_remain = np.ones(len(self.classId)).astype(np.bool)
#                index_remain[row2] = False
#                newV = self.V[index_remain]
#                newW = self.W[index_remain]
#                newClassId = self.classId[index_remain]
#                if row1 < row2:
#                    tmp_row = row1
#                else:
#                    tmp_row = row1 - 1
#                newV[tmp_row] = np.minimum(self.V[row1], self.V[row2])
#                newW[tmp_row] = np.maximum(self.W[row1], self.W[row2])

# adjust the hyperbox if no overlap and maximum hyperbox size is not violated
                if ((((newW[int(curmaxb[0])] - newV[int(curmaxb[0])]) <=
                      self.teta).all() == True) and (not modifiedIsOverlap(
                          newV, newW, int(curmaxb[0]), newClassId))):
                    isTraining = True
                    self.V = newV
                    self.W = newW
                    self.classId = newClassId

                    #                    self.cardin[int(curmaxb[0])] = self.cardin[int(curmaxb[0])] + self.cardin[int(curmaxb[1])]
                    #                    self.cardin = np.append(self.cardin[0:int(curmaxb[1])], self.cardin[int(curmaxb[1]) + 1:])
                    #
                    #                    self.clusters[int(curmaxb[0])] = np.append(self.clusters[int(curmaxb[0])], self.clusters[int(curmaxb[1])])
                    #                    self.clusters = np.append(self.clusters[0:int(curmaxb[1])], self.clusters[int(curmaxb[1]) + 1:])
                    #
                    # remove joined pair from the list as well as any pair with lower membership and consisting of any of joined boxes
                    mask = (maxb[:, 0] != int(curmaxb[0])) & (
                        maxb[:, 1] != int(curmaxb[0])) & (maxb[:, 0] != int(
                            curmaxb[1])) & (maxb[:, 1] != int(
                                curmaxb[1])) & (maxb[:, 2] >= curmaxb[2])
                    maxb = maxb[mask, :]

                    # update indexes to accomodate removed hyperbox
                    # indices of V and W larger than curmaxb(1,2) are decreased 1 by the element whithin the location curmaxb(1,2) was removed
                    if len(maxb) > 0:
                        maxb[maxb[:, 0] > int(curmaxb[1]),
                             0] = maxb[maxb[:, 0] > int(curmaxb[1]), 0] - 1
                        maxb[maxb[:, 1] > int(curmaxb[1]),
                             1] = maxb[maxb[:, 1] > int(curmaxb[1]), 1] - 1

                    if self.isDraw:
                        Vt, Wt = self.pcatransform()
                        color_ = np.empty(len(self.classId), dtype=object)
                        for c in range(len(self.classId)):
                            color_[c] = mark_col[self.classId[c]]
                        drawing_canvas.cla()
                        drawbox(Vt, Wt, drawing_canvas, color_)
                        self.delay()
                else:
                    maxb = maxb[1:, :]  # scrap examined pair from the list

        time_end = time.perf_counter()
        self.elapsed_training_time = time_end - time_start

        return self
Exemplo n.º 18
0
 def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True):
     """
     pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId
 
       result = pruning_val(XlT,XuT,patClassIdTest)
 
         INPUT
           XlT               Test data lower bounds (rows = objects, columns = features)
           XuT               Test data upper bounds (rows = objects, columns = features)
           patClassIdTest    Test data class labels (crisp)
           accuracy_threshold  The minimum accuracy for each hyperbox
           newVerPredict     + True: using Manhattan distance in addition to fuzzy membership
                             + False: No using Manhattan distance
     """
 
     #initialization
     yX = XlT.shape[0]
     mem = np.zeros((yX, self.V.shape[0]))
     no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2))
     # classifications
     for i in range(yX):
         mem[i, :] = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes
         bmax = mem[i,:].max()	                                          # get max membership value
         maxVind = np.nonzero(mem[i,:] == bmax)[0]                         # get indexes of all hyperboxes with max membership
         
         if len(maxVind) == 1:
             # Only one hyperbox with the highest membership function
             
             if self.classId[maxVind[0]] == patClassIdTest[i]:
                 no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1                 
             else:
                 no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1
         else:
             if (newVerPredict == True):
                 # More than one hyperbox with highest membership => using Manhattan distance
                 if (XlT[i] == XuT[i]).all() == False:
                     XlT_mat = np.ones((len(maxVind), 1)) * XlT[i]
                     XuT_mat = np.ones((len(maxVind), 1)) * XuT[i]
                     XgT_mat = (XlT_mat + XuT_mat) / 2
                 else:
                     XgT_mat = np.ones((len(maxVind), 1)) * XlT[i]
                 
                 # Find all average points of all hyperboxes with the same membership value
                 avg_point_mat = (self.V[maxVind] + self.W[maxVind]) / 2
                 # compute the manhattan distance from XgT_mat to all average points of all hyperboxes with the same membership value
                 maht_dist = manhattan_distance(avg_point_mat, XgT_mat)
                 #maht_dist = min_distance(avg_point_mat, XgT_mat)
                 id_min_dist = maht_dist.argmin()
                 
                 id_min = maxVind[id_min_dist]
             else:
                 # select randomly
                 id_min = maxVind[np.random.randint(len(maxVind))]
                     
             if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS:
                 no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1
             else:
                 no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1
                 
     # pruning handling based on the validation results
     tmp_no_box = no_predicted_samples_hyperboxes.shape[0]
     accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool)
     accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool)
     for i in range(tmp_no_box):
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold:
             accuracy_larger_half[i] = True
             accuracy_larger_half_keep_nojoin[i] = True
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0):
             accuracy_larger_half_keep_nojoin[i] = True
     
     # keep one hyperbox for class prunned all
     current_classes = np.unique(self.classId)
     class_tmp = self.classId[accuracy_larger_half]
     class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin]
     for c in current_classes:
         if c not in class_tmp:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             # keep pos[id_kept]
             accuracy_larger_half[pos[id_kept]] = True
         if c not in class_tmp_keep:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             accuracy_larger_half_keep_nojoin[pos[id_kept]] = True
     
     V_prun_remove = self.V[accuracy_larger_half]
     W_prun_remove = self.W[accuracy_larger_half]
     classId_prun_remove = self.classId[accuracy_larger_half]
     
     W_prun_keep = self.W[accuracy_larger_half_keep_nojoin]
     V_prun_keep = self.V[accuracy_larger_half_keep_nojoin]
     classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
     
     if newVerPredict == True:
         result_prun_remove = predict_with_manhattan(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict_with_manhattan(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     else:
         result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     
     if (result_prun_remove.summis <= result_prun_keep_nojoin.summis):
         self.V = V_prun_remove
         self.W = W_prun_remove
         self.classId = classId_prun_remove
     else:
         self.V = V_prun_keep
         self.W = W_prun_keep
         self.classId = classId_prun_keep
Exemplo n.º 19
0
    def predict_based_mem(self, Xl_Test, Xu_Test, patClassIdTest):
        """
        This function is to evaluate the performance of the model
        
        The predictive class is given based on average membership value for each class of all base learners
            
            Parameters:
                + Xl_Test, Xu_Test: Lower and upper bounds of the testing set
                + patClassIdTest: classes of the testing set
        
            Returns:
                + Predictive results are saved into attribute 'predicted_class' of each element in self.list_learners
                + Accuracy
                + Number of wrong predicted samples
        """
        numClassifier = len(self.list_learners)

        yX = Xl_Test.shape[0]
        # get all class labels of all base classifiers
        classes = self.list_learners[0].classId
        for i in range(1, numClassifier):
            classes = np.union1d(classes, self.list_learners[i].classId)

        noClasses = len(classes)
        out = np.zeros((yX, noClasses), dtype=np.float64)

        predicted_classes = []

        # classification of each testing pattern i
        for i in range(yX):
            for idClf in range(numClassifier):
                # calculate memberships for all hyperboxes of classifier idClf
                mem_tmp = memberG(Xl_Test[i, :], Xu_Test[i, :],
                                  self.list_learners[idClf].V,
                                  self.list_learners[idClf].W, self.gamma)

                for j in range(noClasses):
                    # get max membership of hyperboxes with class label j
                    same_j_labels = mem_tmp[self.list_learners[idClf].classId
                                            == classes[j]]
                    if len(same_j_labels) > 0:
                        mem_max = same_j_labels.max()
                        out[i, j] = out[i, j] + mem_max

            # compute membership value of each class over all classifiers
            out[i, :] = out[i, :] / numClassifier
            # get max membership value for each class with regard to the i-th sample
            maxb = out[i].max()
            # get positions of indices of all classes with max membership
            maxMemInd = np.nonzero(out[i] == maxb)[0]
            if len(maxMemInd) == 1:
                predicted_classes.append(classes[maxMemInd[0]])
            else:
                # choose random class
                selected_cls_id = random.choice(maxMemInd)
                predicted_classes.append(classes[selected_cls_id])

        predicted_classes = np.array(predicted_classes, dtype=np.int)
        num_correct_samples = np.sum(predicted_classes == patClassIdTest)
        num_wrong_samples = yX - num_correct_samples
        accuracy = num_correct_samples / yX

        return (accuracy, num_wrong_samples, predicted_classes)
Exemplo n.º 20
0
def predictDecisionLevelEnsemble(classifiers,
                                 XlT,
                                 XuT,
                                 patClassIdTest,
                                 gama=1,
                                 oper='min'):
    """
    Perform classification for a decision level ensemble learning

                result = predictDecisionLevelEnsemble(classifiers, XlT, XuT, patClassIdTest, gama, oper)

    INPUT
        classifiers         An array of classifiers needed to combine, datatype of each element in the array is BaseGFMMClassifier
        XlT                 Test data lower bounds (rows = objects, columns = features)
        XuT                 Test data upper bounds (rows = objects, columns = features)
        patClassIdTest      Test data class labels (crisp)
        gama                Membership function slope (default: 1)
        oper                Membership calculation operation: 'min' or 'prod' (default: 'min')

    OUTPUT
        result              A object with Bunch datatype containing all results as follows:
                                + summis        Number of misclassified samples
                                + misclass      Binary error map for input samples
                                + out           Soft class memberships, rows are testing input patterns, columns are indices of classes
                                + classes       Store class labels corresponding column indices of out
    """
    numClassifier = len(classifiers)

    yX = XlT.shape[0]
    misclass = np.zeros(yX, dtype=np.bool)
    # get all class labels of all base classifiers
    classId = classifiers[0].classId
    for i in range(numClassifier):
        if i != 0:
            classId = np.union1d(classId, classifiers[i].classId)

    classes = np.unique(classId)
    noClasses = len(classes)
    out = np.zeros((yX, noClasses), dtype=np.float64)

    # classification of each testing pattern i
    for i in range(yX):
        for idClf in range(numClassifier):
            # calculate memberships for all hyperboxes of classifier idClf
            mem_tmp = memberG(XlT[i, :], XuT[i, :], classifiers[idClf].V,
                              classifiers[idClf].W, gama, oper)

            for j in range(noClasses):
                # get max membership of hyperboxes with class label j
                same_j_labels = mem_tmp[classifiers[idClf].classId ==
                                        classes[j]]
                if len(same_j_labels) > 0:
                    mem_max = same_j_labels.max()
                    out[i, j] = out[i, j] + mem_max

        # compute membership value of each class over all classifiers
        out[i, :] = out[i, :] / numClassifier
        # get max membership value for each class with regard to the i-th sample
        maxb = out[i].max()
        # get positions of indices of all classes with max membership
        maxMemInd = out[i] == maxb
        #misclass[i] = ~(np.any(classes[maxMemInd] == patClassIdTest[i]) | (patClassIdTest[i] == 0))
        misclass[i] = np.logical_or(
            (classes[maxMemInd]
             == patClassIdTest[i]).any(), patClassIdTest[i] == 0) != True

    # count number of missclassified patterns
    summis = np.sum(misclass)

    result = Bunch(summis=summis, misclass=misclass, out=out, classes=classes)
    return result