def predict(self, Xl_Test, Xu_Test, patClassIdTest): """ Perform classification result = predict(Xl_Test, Xu_Test, patClassIdTest) INPUT: Xl_Test Test data lower bounds (rows = objects, columns = features) Xu_Test Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) OUTPUT: result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ #Xl_Test, Xu_Test = delete_const_dims(Xl_Test, Xu_Test) # Normalize testing dataset if training datasets were normalized if len(self.mins) > 0: noSamples = Xl_Test.shape[0] Xl_Test = self.loLim + (self.hiLim - self.loLim) * ( Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones( (noSamples, 1)) * (self.maxs - self.mins)) Xu_Test = self.loLim + (self.hiLim - self.loLim) * ( Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones( (noSamples, 1)) * (self.maxs - self.mins)) if Xl_Test.min() < self.loLim or Xu_Test.min( ) < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max( ) > self.hiLim: print('Test sample falls outside', self.loLim, '-', self.hiLim, 'interval') print('Number of original samples = ', noSamples) # only keep samples within the interval loLim-hiLim indXl_good = np.where((Xl_Test >= self.loLim).all(axis=1) & (Xl_Test <= self.hiLim).all(axis=1))[0] indXu_good = np.where((Xu_Test >= self.loLim).all(axis=1) & (Xu_Test <= self.hiLim).all(axis=1))[0] indKeep = np.intersect1d(indXl_good, indXu_good) Xl_Test = Xl_Test[indKeep, :] Xu_Test = Xu_Test[indKeep, :] print('Number of kept samples =', Xl_Test.shape[0]) #return # do classification result = None if Xl_Test.shape[0] > 0: result = predict(np.minimum(self.V, self.W), np.maximum(self.V, self.W), self.classId, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper) return result
def predict(self, Xl_Test, Xu_Test, patClassIdTest, newVer = True): """ Perform classification result = predict(Xl_Test, Xu_Test, patClassIdTest) INPUT: Xl_Test Test data lower bounds (rows = objects, columns = features) Xu_Test Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) newVer + False: Don't use an additional criterion for predicting + True : Using an additional criterion for predicting in the case of the same membership value OUTPUT: result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + numSampleInBoundary The number of samples in decision boundary + predicted_class Predicted class """ #Xl_Test, Xu_Test = delete_const_dims(Xl_Test, Xu_Test) # Normalize testing dataset if training datasets were normalized if len(self.mins) > 0: noSamples = Xl_Test.shape[0] Xl_Test = self.loLim + (self.hiLim - self.loLim) * (Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins)) Xu_Test = self.loLim + (self.hiLim - self.loLim) * (Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins)) if Xl_Test.min() < self.loLim or Xu_Test.min() < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max() > self.hiLim: print('Test sample falls outside', self.loLim, '-', self.hiLim, 'interval') print('Number of original samples = ', noSamples) # only keep samples within the interval loLim-hiLim indXl_good = np.where((Xl_Test >= self.loLim).all(axis = 1) & (Xl_Test <= self.hiLim).all(axis = 1))[0] indXu_good = np.where((Xu_Test >= self.loLim).all(axis = 1) & (Xu_Test <= self.hiLim).all(axis = 1))[0] indKeep = np.intersect1d(indXl_good, indXu_good) Xl_Test = Xl_Test[indKeep, :] Xu_Test = Xu_Test[indKeep, :] print('Number of kept samples =', Xl_Test.shape[0]) #return # do classification result = None if Xl_Test.shape[0] > 0: if newVer: result = predict_with_probability(self.V, self.W, self.classId, self.counter, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper) else: result = predict(self.V, self.W, self.classId, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper) self.predicted_class = np.array(result.predicted_class, np.int) return result
def training(self, partitionedXtr): """ Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts INPUT partitionedXtr An numpy array contains k sub-arrays, in which each subarray is Bunch datatype: + lower: lower bounds + upper: upper bounds + label: class labels partitionedXtr should be normalized (if needed) beforehand using this function OUTPUT baseClassifier base classifier was validated using K-fold cross-validation """ baseClassifier = None minEr = 2 for k in range(self.numFold): classifier_tmp = AccelBatchGFMM(self.gamma, self.teta, self.bthres, self.simil, self.sing, False, self.oper, False) classifier_tmp.fit(partitionedXtr[k].lower, partitionedXtr[k].upper, partitionedXtr[k].label) # Create the validation set being the remaining training data for l in range(self.numFold): if l == k: continue else: if (k == 0 and l == 1) or (l == 0 and k != 0): lower_valid = partitionedXtr[l].lower upper_valid = partitionedXtr[l].upper label_valid = partitionedXtr[l].label else: lower_valid = np.concatenate( (lower_valid, partitionedXtr[l].lower), axis=0) upper_valid = np.concatenate( (upper_valid, partitionedXtr[l].upper), axis=0) label_valid = np.concatenate( (label_valid, partitionedXtr[l].label)) # validate the trained model rest = predict(classifier_tmp.V, classifier_tmp.W, classifier_tmp.classId, lower_valid, upper_valid, label_valid, self.gamma, self.oper) er = rest.summis / len(label_valid) if er < minEr: minEr = er baseClassifier = classifier_tmp return baseClassifier
def pruning(self, X_Val, classId_Val): """ prunning routine for GFMM classifier - Hyperboxes having the number of corrected patterns lower than that of uncorrected samples are prunned INPUT X_Val Validation data ClassId_Val Validation data class labels (crisp) OUTPUT Lower and upperbounds (V and W), classId, cardin are retained """ # test the model on validation data result = predict(self.V, self.W, self.classId, X_Val, X_Val, classId_Val, self.gamma, self.oper) mem = result.mem # find indexes of hyperboxes corresponding to max memberships for all validation patterns indmax = mem.argmax(axis=1) numBoxes = self.V.shape[0] corrinc = np.zeros((numBoxes, 2)) # for each hyperbox calculate the number of validation patterns classified correctly and incorrectly for ii in range(numBoxes): sampleLabelsInBox = classId_Val[indmax == ii] if len(sampleLabelsInBox) > 0: corrinc[ii, 0] = np.sum(sampleLabelsInBox == self.classId[ii]) corrinc[ii, 1] = len(sampleLabelsInBox) - corrinc[ii, 0] # retain only the hyperboxes which classify at least the same number of patterns correctly as incorrectly indRetainedBoxes = np.nonzero(corrinc[:, 0] > corrinc[:, 1])[0] self.V = self.V[indRetainedBoxes, :] self.W = self.W[indRetainedBoxes, :] self.classId = self.classId[indRetainedBoxes] self.cardin = self.cardin[indRetainedBoxes] return self
def fit(self, X_l, X_u, patClassId): """ Training the classifier Xl Input data lower bounds (rows = objects, columns = features) Xu Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item """ print('--Online Learning--') if self.isNorm == True: X_l, X_u = self.dataPreprocessing(X_l, X_u) #X_l = X_l.astype(np.float32) #X_u = X_u.astype(np.float32) time_start = time.perf_counter() yX, xX = X_l.shape teta = self.teta mark = np.array([ '*', 'o', 'x', '+', '.', ',', 'v', '^', '<', '>', '1', '2', '3', '4', '8', 's', 'p', 'P', 'h', 'H', 'X', 'D', '|', '_' ]) mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k']) listLines = list() listInputSamplePoints = list() if self.isDraw: drawing_canvas = self.initializeCanvasGraph( "GFMM - Online learning", xX) if self.V.size > 0: # draw existed hyperboxes color_ = np.array(['k'] * len(self.classId), dtype=object) for c in range(len(self.classId)): if self.classId[c] < len(mark_col): color_[c] = mark_col[self.classId[c]] hyperboxes = drawbox(self.V[:, 0:np.minimum(xX, 3)], self.W[:, 0:np.minimum(xX, 3)], drawing_canvas, color_) listLines.extend(hyperboxes) self.delay() self.misclass = 1 while self.misclass > 0 and teta >= self.tMin: # for each input sample for i in range(yX): classOfX = patClassId[i] # draw input samples if self.isDraw: if i == 0 and len(listInputSamplePoints) > 0: # reset input point drawing for point in listInputSamplePoints: point.remove() listInputSamplePoints.clear() color_ = 'k' if classOfX < len(mark_col): color_ = mark_col[classOfX] if (X_l[i, :] == X_u[i, :]).all(): marker_ = 'd' if classOfX < len(mark): marker_ = mark[classOfX] if xX == 2: inputPoint = drawing_canvas.plot(X_l[i, 0], X_l[i, 1], color=color_, marker=marker_) else: inputPoint = drawing_canvas.plot([X_l[i, 0]], [X_l[i, 1]], [X_l[i, 2]], color=color_, marker=marker_) #listInputSamplePoints.append(inputPoint) else: inputPoint = drawbox( np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, color_) listInputSamplePoints.append(inputPoint[0]) self.delay() if self.V.size == 0: # no model provided - starting from scratch self.V = np.array([X_l[0]]) self.W = np.array([X_u[0]]) self.classId = np.array([patClassId[0]]) if self.isDraw == True: # draw hyperbox box_color = 'k' if patClassId[0] < len(mark_col): box_color = mark_col[patClassId[0]] hyperbox = drawbox( np.asmatrix(self.V[0, 0:np.minimum(xX, 3)]), np.asmatrix(self.W[0, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() else: id_lb_sameX = np.logical_or( self.classId == classOfX, self.classId == UNLABELED_CLASS) if id_lb_sameX.any() == True: V_sameX = self.V[id_lb_sameX] W_sameX = self.W[id_lb_sameX] lb_sameX = self.classId[id_lb_sameX] id_range = np.arange(len(self.classId)) id_processing = id_range[id_lb_sameX] b = memberG(X_l[i], X_u[i], np.minimum(V_sameX, W_sameX), np.maximum(V_sameX, W_sameX), self.gamma) index = np.argsort(b)[::-1] bSort = b[index] if bSort[0] != 1 or (classOfX != lb_sameX[index[0]] and classOfX != UNLABELED_CLASS): adjust = False for j in id_processing[index]: # test violation of max hyperbox size and class labels if (classOfX == self.classId[j] or self.classId[j] == UNLABELED_CLASS or classOfX == UNLABELED_CLASS) and ( (np.maximum(self.W[j], X_u[i]) - np.minimum(self.V[j], X_l[i])) <= teta).all() == True: # adjust the j-th hyperbox self.V[j] = np.minimum(self.V[j], X_l[i]) self.W[j] = np.maximum(self.W[j], X_u[i]) indOfWinner = j adjust = True if classOfX != UNLABELED_CLASS and self.classId[ j] == UNLABELED_CLASS: self.classId[j] = classOfX if self.isDraw: # Handle drawing graph box_color = 'k' if self.classId[j] < len(mark_col): box_color = mark_col[ self.classId[j]] try: listLines[j].remove() except: pass hyperbox = drawbox( np.asmatrix( self.V[j, 0:np.minimum(xX, 3)]), np.asmatrix( self.W[j, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines[j] = hyperbox[0] self.delay() break # if i-th sample did not fit into any existing box, create a new one if not adjust: self.V = np.concatenate( (self.V, X_l[i].reshape(1, -1)), axis=0) self.W = np.concatenate( (self.W, X_u[i].reshape(1, -1)), axis=0) self.classId = np.concatenate( (self.classId, [classOfX])) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox( np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() elif self.V.shape[0] > 1: for ii in range(self.V.shape[0]): if ii != indOfWinner and ( self.classId[ii] != self.classId[indOfWinner] or self.classId[indOfWinner] == UNLABELED_CLASS): caseDim = hyperboxOverlapTest( self.V, self.W, indOfWinner, ii) # overlap test if caseDim.size > 0: self.V, self.W = hyperboxContraction( self.V, self.W, caseDim, ii, indOfWinner) if self.isDraw: # Handle graph drawing boxii_color = boxwin_color = 'k' if self.classId[ii] < len( mark_col): boxii_color = mark_col[ self.classId[ii]] if self.classId[ indOfWinner] < len( mark_col): boxwin_color = mark_col[ self. classId[indOfWinner]] try: listLines[ii].remove() listLines[ indOfWinner].remove() except: pass hyperboxes = drawbox( self.V[ [ii, indOfWinner], 0:np.minimum(xX, 3)], self.W[ [ii, indOfWinner], 0:np.minimum(xX, 3)], drawing_canvas, [ boxii_color, boxwin_color ]) listLines[ii] = hyperboxes[0] listLines[ indOfWinner] = hyperboxes[ 1] self.delay() else: self.V = np.concatenate( (self.V, X_l[i].reshape(1, -1)), axis=0) self.W = np.concatenate( (self.W, X_u[i].reshape(1, -1)), axis=0) self.classId = np.concatenate( (self.classId, [classOfX])) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox( np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() teta = teta * 0.9 if teta >= self.tMin: result = predict(self.V, self.W, self.classId, X_l, X_u, patClassId, self.gamma, self.oper) self.misclass = result.summis # Draw last result # if self.isDraw == True: # # Handle drawing graph # drawing_canvas.cla() # color_ = np.empty(len(self.classId), dtype = object) # for c in range(len(self.classId)): # color_[c] = mark_col[self.classId[c]] # # drawbox(self.V[:, 0:np.minimum(xX, 3)], self.W[:, 0:np.minimum(xX, 3)], drawing_canvas, color_) # self.delay() # # if self.isDraw: # plt.show() time_end = time.perf_counter() self.elapsed_training_time = time_end - time_start return self
def training(self, X_tr, X_val, isDeleteContainedHyperbox=True): """ Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts INPUT X_tr An object contains training data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_val An object contains validation data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_tr, X_val should be normalized (if needed) beforehand using this function isDeleteContainedHyperbox Identify if hyperboxes contained in other hyperboxes are discarded or not? """ V_train = X_tr.lower W_train = X_tr.upper classId_train = X_tr.label V_val = X_val.lower W_val = X_val.upper classId_val = X_val.label bthres = self.bthres self.numHyperboxes = 0 N = int(self.numClassifier / 2) + 1 delta_thres = (self.bthres - self.bthres_min) / N minEr_Tr = 2 minEr_Val = 2 opt_Tr = None opt_Val = None for k in range(N): classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Tr.fit(V_train, W_train, classId_train) classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Val.fit(V_val, W_val, classId_val) rest_Tr = predict(classifier_Tr.V, classifier_Tr.W, classifier_Tr.classId, V_val, W_val, classId_val, self.gamma, self.oper) rest_Val = predict(classifier_Val.V, classifier_Val.W, classifier_Val.classId, V_train, W_train, classId_train, self.gamma, self.oper) err_Tr = rest_Tr.summis / len(classifier_Val.classId) err_Val = rest_Val.summis / len(classifier_Tr.classId) if err_Tr < minEr_Tr: minEr_Tr = err_Tr opt_Tr = classifier_Tr if err_Val < minEr_Val: minEr_Val = err_Val opt_Val = classifier_Val V_train = classifier_Tr.V W_train = classifier_Tr.W classId_train = classifier_Tr.classId V_val = classifier_Val.V W_val = classifier_Val.W classId_val = classifier_Val.classId bthres = bthres - delta_thres self.V = np.concatenate((opt_Tr.V, opt_Val.V), axis=0) self.W = np.concatenate((opt_Tr.W, opt_Val.W), axis=0) self.classId = np.concatenate((opt_Tr.classId, opt_Val.classId)) if isDeleteContainedHyperbox == True: self.removeContainedHyperboxes() self.overlapResolve() # training using AGGLO-2 combClassifier = AccelBatchGFMM(self.gamma, self.teta, self.bthres_min, self.simil, self.sing, False, self.oper, False) combClassifier.fit(self.V, self.W, self.classId) self.V = combClassifier.V self.W = combClassifier.W self.classId = combClassifier.classId self.cardin = combClassifier.cardin self.clusters = combClassifier.clusters self.numHyperboxes = len(self.classId) return self
def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold=0.5, newVerPredict=True): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox newVerPredict + True: using Manhattan distance in addition to fuzzy membership + False: No using Manhattan distance """ #initialization yX = XlT.shape[0] mem = np.zeros((yX, self.V.shape[0])) no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem[i, :] = memberG( XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = np.nonzero(mem[i, :] == bmax)[ 0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[ maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[ maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: # More than one hyperbox with highest membership => random choosing id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[ i] and patClassIdTest[i] != 0: no_predicted_samples_hyperboxes[ id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[ id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / ( no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] ) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] for c in current_classes: if c not in class_tmp: pos = np.nonzero(self.classId == c)[0] id_kept = np.random.randint(len(pos)) # keep pos[id_kept] accuracy_larger_half[pos[id_kept]] = True V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] if newVerPredict == True: result_prun_remove = predict_with_manhattan( V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict_with_manhattan( V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) else: result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep
def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox newVerPredict + True: using probability formula for prediction in addition to fuzzy membership + False: No using probability formula for prediction """ #initialization yX = XlT.shape[0] no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: if newVerPredict == True: cls_same_mem = np.unique(self.classId[maxVind]) if len(cls_same_mem) > 1: is_find_prob_val = True if bmax == 1: id_box_with_one_sample = np.nonzero(self.counter[maxVind] == 1)[0] if len(id_box_with_one_sample) > 0: is_find_prob_val = False id_min = random.choice(maxVind[id_box_with_one_sample]) if is_find_prob_val == True: sum_prod_denum = (mem[maxVind] * self.counter[maxVind]).sum() max_prob = -1 pre_id_cls = None for c in cls_same_mem: id_cls = np.nonzero(self.classId[maxVind] == c)[0] sum_pro_num = (mem[maxVind[id_cls]] * self.counter[maxVind[id_cls]]).sum() tmp = sum_pro_num / sum_prod_denum if tmp > max_prob or (tmp == max_prob and pre_id_cls is not None and self.counter[maxVind[id_cls]].sum() > self.counter[maxVind[pre_id_cls]].sum()): max_prob = tmp pre_id_cls = id_cls id_min = random.choice(maxVind[id_cls]) else: id_min = random.choice(maxVind) else: # More than one hyperbox with highest membership => random choosing id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS: no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin] for c in current_classes: if c not in class_tmp: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) # keep pos[id_kept] accuracy_larger_half[pos[id_kept]] = True if c not in class_tmp_keep: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) accuracy_larger_half_keep_nojoin[pos[id_kept]] = True V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] numSample_prun_remove = self.counter[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] numSample_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] if newVerPredict == True: result_prun_remove = predict_with_probability(V_prun_remove, W_prun_remove, classId_prun_remove, numSample_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict_with_probability(V_prun_keep, W_prun_keep, classId_prun_keep, numSample_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) else: result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove self.counter = numSample_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep self.counter = numSample_prun_keep
id_kept = np.random.randint(len(pos)) accuracy_larger_half_keep_nojoin[pos[id_kept]] = True V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] if newVerPredict == True: result_prun_remove = predict_with_manhattan(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict_with_manhattan(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) else: result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep if __name__ == '__main__': """ INPUT parameters from command line
def training(self, X_tr, X_val): """ Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts INPUT X_tr An object contains training data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_val An object contains validation data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_tr, X_val should be normalized (if needed) beforehand using this function """ V_train = X_tr.lower W_train = X_tr.upper classId_train = X_tr.label V_val = X_val.lower W_val = X_val.upper classId_val = X_val.label delta_thres = (self.bthres - self.bthres_min) / self.numClassifier bthres = self.bthres self.numHyperboxes = 0 for k in range(self.numClassifier): classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Tr.fit(V_train, W_train, classId_train) classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Val.fit(V_val, W_val, classId_val) rest_Tr = predict(classifier_Tr.V, classifier_Tr.W, classifier_Tr.classId, V_val, W_val, classId_val, self.gamma, self.oper) rest_Val = predict(classifier_Val.V, classifier_Val.W, classifier_Val.classId, V_train, W_train, classId_train, self.gamma, self.oper) err_Tr = rest_Tr.summis / len(classifier_Val.classId) err_Val = rest_Val.summis / len(classifier_Tr.classId) if err_Tr < err_Val: self.baseClassifiers[k] = classifier_Tr else: self.baseClassifiers[k] = classifier_Val self.numHyperboxes = self.numHyperboxes + len( self.baseClassifiers[k].classId) V_train = classifier_Tr.V W_train = classifier_Tr.W classId_train = classifier_Tr.classId V_val = classifier_Val.V W_val = classifier_Val.W classId_val = classifier_Val.classId bthres = bthres - delta_thres return self.baseClassifiers