def splitDatasetRndTo2Part(Xl, Xu, patClassId, training_rate=0.5, isNorm=False, norm_range=[0, 1]): """ Split a dataset into 2 parts randomly on whole dataset, the proposition training_rate is applied for whole dataset INPUT Xl Input data lower bounds (rows = objects, columns = features) X_u Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) training_rate The percentage of the number of training samples needs to be split isNorm Do normalization of input training samples or not? norm_range New ranging of input data after normalization, for example: [0, 1] OUTPUT trainingSet One object belonging to Bunch datatype contains training data with the following attributes: + lower: lower bounds + upper: upper bounds + label: class labels validSet One object belonging to Bunch datatype contains validation data with the following attributes: + lower: lower bounds + upper: upper bounds + label: class labels """ if isNorm == True: Xl = normalize(Xl, norm_range) Xu = normalize(Xu, norm_range) numSamples = Xl.shape[0] # generate random permutation pos = np.random.permutation(numSamples) # Find the cut-off position pivot = int(numSamples * training_rate) # divide the training set into 2 sub-datasets trainingSet = Bunch(lower=Xl[pos[0:pivot]], upper=Xu[pos[0:pivot]], label=patClassId[pos[0:pivot]]) validSet = Bunch(lower=Xl[pos[pivot:]], upper=Xu[pos[pivot:]], label=patClassId[pos[pivot:]]) return (trainingSet, validSet)
def read_file_in_chunks(filePath, chunk_index, chunk_size): """ Read data in the file with path filePath in chunks and does not group data by label INPUT filePath The path to the file containing data in the hard disk (including file name and its extension) chunk_index The index of chunk needs to read chunk_size The number of data lines in each chunk (except for the last chunk with fewer lines than common maybe) OUTPUT A bunch datatype includes the list of data and labels (properties: data, label) """ with open(filePath) as f: start = chunk_index * chunk_size stop = (chunk_index + 1) * chunk_size returned_res = None result = [] for line in itertools.islice(f, start, stop): if line != None and len(line) > 0: num_data = np.fromstring(line.rstrip('\n').replace( ',', ' ').replace('?', 'nan'), dtype=np.float64, sep=' ').tolist() result.append(num_data) if len(result) > 0: input_data = np.array( result, dtype=dtype) # convert data from list to numpy array X_data = input_data[:, 0:-1] label = input_data[:, -1] returned_res = Bunch(data=X_data, label=label) return returned_res
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama = 1, oper = 'min'): """ GFMM classifier (test routine) result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) # classifications for i in range(yX): mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[0] # get indexes of all hyperboxes with max membership if bmax == 0: print('zero maximum membership value') # this is probably bad... misclass[i] = True else: if len(np.unique(classId[maxVind])) > 1: #print('Input is in the boundary') misclass[i] = True else: if np.any(classId[maxVind] == patClassIdTest[i]) == True or patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis = summis, misclass = misclass) return result
def fit(self, Xl_onl, Xu_onl, patClassId_onl, Xl_off, Xu_off, patClassId_off): """ Input data need to be normalized before using this function Xl_onl Input data lower bounds (rows = objects, columns = features) for online learning Xu_onl Input data upper bounds (rows = objects, columns = features) for online learning patClassId_onl Input data class labels (crisp) for online learning Xl_off Input data lower bounds (rows = objects, columns = features) for agglomerative learning Xu_off Input data upper bounds (rows = objects, columns = features) for agglomerative learning patClassId_off Input data class labels (crisp) for agglomerative learning """ time_start = time.clock() # Perform agglomerative learning aggloClassifier = AccelBatchGFMM(self.gamma, self.teta_agglo, bthres=self.bthres, simil=self.simil, sing=self.sing, isDraw=self.isDraw, oper=self.oper, isNorm=False) aggloClassifier.fit(Xl_off, Xu_off, patClassId_off) self.offClassifier = Bunch(V=aggloClassifier.V, W=aggloClassifier.W, classId=aggloClassifier.classId) # Perform online learning onlClassifier = OnlineGFMM(self.gamma, self.teta_onl, self.teta_onl, isDraw=self.isDraw, oper=self.oper, isNorm=False, norm_range=[self.loLim, self.hiLim]) onlClassifier.fit(Xl_onl, Xu_onl, patClassId_onl) self.onlClassifier = Bunch(V=onlClassifier.V, W=onlClassifier.W, classId=onlClassifier.classId) time_end = time.clock() self.elapsed_training_time = time_end - time_start return self
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama = 1, oper = 'min'): """ GFMM classifier (test routine) result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) predicted_class = np.full(yX, None) # classifications for i in range(yX): mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[0] # get indexes of all hyperboxes with max membership winner_cls = np.unique(classId[maxVind]) if len(winner_cls) > 1: #print('Input is in the boundary') # make random selection predicted_class[i] = random.choice(winner_cls) else: predicted_class[i] = classId[maxVind[0]] if predicted_class[i] == patClassIdTest[i] or patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: misclass[i] = True # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis = summis, misclass = misclass, predicted_class=predicted_class) return result
def predict_rfmm_distance(V, W, classId, XhT, patClassIdTest, gama=1): """ prediction using the distance in the paper "A refined Fuzzy min-max neural network with new learning procedure for pattern classification" """ if len(XhT.shape) == 1: XhT = XhT.reshape(1, -1) #initialization yX = XhT.shape[0] predicted_class = np.full(yX, None) misclass = np.zeros(yX) mem = np.zeros((yX, V.shape[0])) # classifications for i in range(yX): mem[i, :] = simpsonMembership( XhT[i, :], V, W, gama) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = np.nonzero(mem[i, :] == bmax)[ 0] # get indexes of all hyperboxes with max membership if len(np.unique(classId[maxVind])) > 1: misclass[i] = True else: misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i])) if len(np.unique(classId[maxVind])) > 1: #print("Using Manhattan function") XgT_mat = np.ones((len(maxVind), 1)) * XhT[i] # compute the distance from XgT_mat to all average points of all hyperboxes with the same membership value dist = rfmm_distance(XgT_mat, V[maxVind], W[maxVind]) id_min_dist = dist.argmin() predicted_class[i] = classId[maxVind[id_min_dist]] if classId[maxVind[id_min_dist]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if classId[maxVind[0]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, predicted_class=predicted_class) return result
def read_file_in_chunks_group_by_label(filePath, chunk_index, chunk_size): """ Read data in the file with path filePath in chunks and group data by labels in each chunk INPUT filePath The path to the file containing data in the hard disk (including file name and its extension) chunk_index The index of chunk needs to read chunk_size The number of data lines in each chunk (except for the last chunk with fewer lines than common maybe) OUTPUT results A dictionary contains the needed chunk, where key is label and value is a list of data corresponding to each key """ with open(filePath) as f: start = chunk_index * chunk_size stop = (chunk_index + 1) * chunk_size dic_results = {} for line in itertools.islice(f, start, stop): if line != None: num_data = np.fromstring(line.rstrip('\n').replace( ',', ' ').replace('?', 'nan'), dtype=np.float64, sep=' ').tolist() lb = num_data[-1] if lb in dic_results: dic_results[lb].data.append(num_data[0:-1]) dic_results[lb].label.append(lb) else: dic_results[lb] = Bunch(data=[num_data[0:-1]], label=[lb]) results = None for key in dic_results: if results == None: results = {} results[key] = Bunch(data=np.asarray(dic_results[key].data, dtype=dtype), label=np.asarray(dic_results[key].label, dtype=np.int64)) return results
def splitDatasetRndToKPart(Xl, Xu, patClassId, k=10, isNorm=False, norm_range=[0, 1]): """ Split a dataset into k parts randomly. INPUT Xl Input data lower bounds (rows = objects, columns = features) X_u Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) k Number of parts needs to be split isNorm Do normalization of input training samples or not? norm_range New ranging of input data after normalization, for example: [0, 1] OUTPUT partitionedA An numpy array contains k sub-arrays, in which each subarray is Bunch datatype: + lower: lower bounds + upper: upper bounds + label: class labels """ if isNorm == True: Xl = normalize(Xl, norm_range) Xu = normalize(Xu, norm_range) numSamples = Xl.shape[0] # generate random permutation pos = np.random.permutation(numSamples) # Bin the positions into numClassifier partitions anchors = np.round(np.linspace(0, numSamples, k + 1)).astype(np.int64) partitionedA = np.empty(k, dtype=Bunch) # divide the training set into k sub-datasets for i in range(k): partitionedA[i] = Bunch(lower=Xl[pos[anchors[i]:anchors[i + 1]], :], upper=Xu[pos[anchors[i]:anchors[i + 1]], :], label=patClassId[pos[anchors[i]:anchors[i + 1]]]) return partitionedA
def predict(V, W, classId, XhT, patClassIdTest, gama = 1): """ FMNN classifier (test routine) result = predict(V,W,classId,XhT,patClassIdTest,gama) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XhT Test input data (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ if len(XhT.shape) == 1: XhT = XhT.reshape(1, -1) #initialization yX = XhT.shape[0] misclass = np.zeros(yX) classes = np.unique(classId) noClasses = classes.size ambiguity = np.zeros(yX) mem = np.zeros((yX, V.shape[0])) out = np.zeros((yX, noClasses)) # classifications for i in range(yX): mem[i, :] = simpsonMembership(XhT[i, :], V, W, gama) # calculate memberships for all hyperboxes bmax = mem[i,:].max() # get max membership value maxVind = np.nonzero(mem[i,:] == bmax)[0] # get indexes of all hyperboxes with max membership for j in range(noClasses): out[i, j] = mem[i, classId == classes[j]].max() # get max memberships for each class ambiguity[i] = np.sum(out[i, :] == bmax) # number of different classes with max membership if bmax == 0: print('zero maximum membership value') # this is probably bad... # misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i])) # if len(np.unique(classId[maxVind])) > 1: misclass[i] = True else: misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i])) # results sumamb = np.sum(ambiguity > 1) summis = np.sum(misclass).astype(np.int64) result = Bunch(summis = summis, misclass = misclass, sumamb = sumamb, out = out, mem = mem) return result
def torch_predict(V, W, classId, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM classifier (test routine). Implemented by Pytorch result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ #initialization yX = XlT.size(0) isUsingGPU = False if is_Have_GPU and (W.size(0) * W.size(1) >= GPU_Computing_Threshold or XlT.size(1) >= GPU_Computing_Threshold): V = V.cuda() W = W.cuda() classId = classId.cuda() XlT = XlT.cuda() XuT = XuT.cuda() patClassIdTest = patClassIdTest.cuda() misclass = torch.cuda.FloatTensor(yX).fill_(0) classes = torch.unique(classId) noClasses = classes.size(0) ambiguity = torch.cuda.FloatTensor(yX, 1).fill_(0) mem = torch.cuda.FloatTensor(yX, V.size(0)).fill_(0) out = torch.cuda.FloatTensor(yX, noClasses).fill_(0) isUsingGPU = True els = torch.arange(yX).cuda() else: classes = torch.unique(classId) misclass = torch.zeros(yX) noClasses = classes.size(0) ambiguity = torch.zeros((yX, 1)) mem = torch.zeros((yX, V.size(0))) out = torch.zeros((yX, noClasses)) els = torch.arange(yX) # classifications for i in els: if isUsingGPU == True: mem[i, :] = gpu_memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) else: mem[i, :] = torch_memberG( XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = torch.nonzero(mem[ i, :] == bmax) # get indexes of all hyperboxes with max membership for j in torch.arange(noClasses): out[i, j] = mem[i, classId == classes[j]].max( ) # get max memberships for each class ambiguity[i, :] = torch.sum(out[ i, :] == bmax) # number of different classes with max membership if bmax == 0: print('zero maximum membership value') # this is probably bad... misclass[i] = ~(torch.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results sumamb = torch.sum(ambiguity[:, 0] > 1) summis = torch.sum(misclass) result = Bunch(summis=summis, misclass=misclass, sumamb=sumamb, out=out, mem=mem) return result
def predictOnlineOfflineCombination(onlClassifier, offClassifier, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM online-offline classifier (test routine) result = predictOnlineOfflineCombination(onlClassifier, offClassifier, XlT,XuT,patClassIdTest,gama,oper) INPUT onlClassifier online classifier with the following attributes: + V: hyperbox lower bounds + W: hyperbox upper bounds + classId: hyperbox class labels (crisp) offClassifier offline classifier with the following attributes: + V: hyperbox lower bounds + W: hyperbox upper bounds + classId: hyperbox class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + out Soft class memberships """ #initialization yX = XlT.shape[0] misclass = np.zeros(yX) classes = np.union1d(onlClassifier.classId, offClassifier.classId) noClasses = classes.size mem_onl = np.zeros((yX, onlClassifier.V.shape[0])) mem_off = np.zeros((yX, offClassifier.V.shape[0])) out = np.zeros((yX, noClasses)) # classifications for i in range(yX): mem_onl[i, :] = memberG( XlT[i, :], XuT[i, :], onlClassifier.V, onlClassifier.W, gama, oper ) # calculate memberships for all hyperboxes in the online classifier bmax_onl = mem_onl[i, :].max( ) # get max membership value among hyperboxes in the online classifier maxVind_onl = np.nonzero( mem_onl[i, :] == bmax_onl )[0] # get indexes of all hyperboxes in the online classifier with max membership mem_off[i, :] = memberG( XlT[i, :], XuT[i, :], offClassifier.V, offClassifier.W, gama, oper ) # calculate memberships for all hyperboxes in the offline classifier bmax_off = mem_off[i, :].max( ) # get max membership value among hyperboxes in the offline classifier maxVind_off = np.nonzero( mem_off[i, :] == bmax_off )[0] # get indexes of all hyperboxes in the offline classifier with max membership for j in range(noClasses): out_onl_mems = mem_onl[i, onlClassifier.classId == classes[ j]] # get max memberships for each class of online classifier if len(out_onl_mems) > 0: out_onl = out_onl_mems.max() else: out_onl = 0 out_off_mems = mem_off[i, offClassifier.classId == classes[ j]] # get max memberships for each class of offline classifier if len(out_off_mems) > 0: out_off = out_off_mems.max() else: out_off = 0 if out_onl > out_off: out[i, j] = out_onl else: out[i, j] = out_off if bmax_onl > bmax_off: misclass[i] = ~(np.any( onlClassifier.classId[maxVind_onl] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) else: misclass[i] = ~(np.any( offClassifier.classId[maxVind_off] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, out=out) return result
def predictDecisionLevelEnsemble(classifiers, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ Perform classification for a decision level ensemble learning result = predictDecisionLevelEnsemble(classifiers, XlT, XuT, patClassIdTest, gama, oper) INPUT classifiers An array of classifiers needed to combine, datatype of each element in the array is BaseGFMMClassifier XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified samples + misclass Binary error map for input samples + out Soft class memberships, rows are testing input patterns, columns are indices of classes + classes Store class labels corresponding column indices of out """ numClassifier = len(classifiers) yX = XlT.shape[0] misclass = np.zeros(yX, dtype=np.bool) # get all class labels of all base classifiers classId = classifiers[0].classId for i in range(numClassifier): if i != 0: classId = np.union1d(classId, classifiers[i].classId) classes = np.unique(classId) noClasses = len(classes) out = np.zeros((yX, noClasses), dtype=np.float64) # classification of each testing pattern i for i in range(yX): for idClf in range(numClassifier): # calculate memberships for all hyperboxes of classifier idClf mem_tmp = memberG(XlT[i, :], XuT[i, :], classifiers[idClf].V, classifiers[idClf].W, gama, oper) for j in range(noClasses): # get max membership of hyperboxes with class label j same_j_labels = mem_tmp[classifiers[idClf].classId == classes[j]] if len(same_j_labels) > 0: mem_max = same_j_labels.max() out[i, j] = out[i, j] + mem_max # compute membership value of each class over all classifiers out[i, :] = out[i, :] / numClassifier # get max membership value for each class with regard to the i-th sample maxb = out[i].max() # get positions of indices of all classes with max membership maxMemInd = out[i] == maxb #misclass[i] = ~(np.any(classes[maxMemInd] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) misclass[i] = np.logical_or( (classes[maxMemInd] == patClassIdTest[i]).any(), patClassIdTest[i] == 0) != True # count number of missclassified patterns summis = np.sum(misclass) result = Bunch(summis=summis, misclass=misclass, out=out, classes=classes) return result
def predict_with_manhattan(V, W, classId, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM classifier (test routine): Using Manhattan distance in the case of many hyperboxes with different classes having the same maximum membership value result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + numSampleInBoundary The number of samples in decision boundary + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) mem_vals = np.zeros(yX) numPointInBoundary = 0 predicted_class = np.full(yX, None) # classifications for i in range(yX): if patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[ 0] # get indexes of all hyperboxes with max membership mem_vals[i] = bmax # if bmax == 0: # predicted_class[i] = classId[maxVind[0]] # if predicted_class[i] == patClassIdTest[i]: # misclass[i] = False # else: # misclass[i] = True # else: if len(np.unique(classId[maxVind])) > 1: numPointInBoundary = numPointInBoundary + 1 #print("Using Manhattan function") if (XlT[i] == XuT[i]).all() == False: XlT_mat = np.ones((len(maxVind), 1)) * XlT[i] XuT_mat = np.ones((len(maxVind), 1)) * XuT[i] XgT_mat = (XlT_mat + XuT_mat) / 2 else: XgT_mat = np.ones((len(maxVind), 1)) * XlT[i] # Find all average points of all hyperboxes with the same membership value avg_point_mat = (V[maxVind] + W[maxVind]) / 2 # compute the manhattan distance from XgT_mat to all average points of all hyperboxes with the same membership value maht_dist = manhattan_distance(avg_point_mat, XgT_mat) #maht_dist = min_distance(avg_point_mat, XgT_mat) id_min_dist = maht_dist.argmin() predicted_class[i] = classId[maxVind[id_min_dist]] if classId[maxVind[id_min_dist]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if classId[maxVind[0]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, numSampleInBoundary=numPointInBoundary, predicted_class=predicted_class, mem_vals=mem_vals) return result
def predict_with_probability_k_voting_new(V, W, classId, weights, XlT, XuT, patClassIdTest, K_threshold=5, gama=1, oper='min'): """ GFMM classifier (test routine): Using K voting of values in weights for K hyperboxes with the highest membership values result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) numSamples Save number of samples of each corresponding hyperboxes contained in V and W weights The weights of hyperboxes XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) predicted_class = np.full(yX, None) # classifications for i in range(yX): if patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes mem = mem * weights sort_id_mem = np.argsort(mem)[::-1] selected_id = sort_id_mem[:K_threshold] selected_cls = np.unique(classId[selected_id]) if len(selected_cls) == 1: predicted_class[i] = selected_cls[0] if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: # voting based on sum of weights max_prob = -1 max_mem_sum = -1 for c in selected_cls: id_cls = classId[selected_id] == c cur_prob = np.sum(mem[selected_id[id_cls]]) cur_mem = np.max(weights[selected_id[id_cls]]) if max_prob < cur_prob: max_prob = cur_prob predicted_class[i] = c max_mem_sum = cur_mem else: if max_prob == cur_prob and max_mem_sum < cur_mem: max_prob = cur_prob predicted_class[i] = c max_mem_sum = cur_mem if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) #print(numPointInBoundary) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, predicted_class=predicted_class) return result
def predict_with_probability_weighted(V, W, classId, numSamples, weights, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM classifier (test routine): Using probability formular based on the number of samples in the case of many hyperboxes with different classes having the same maximum membership value result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) numSamples Save number of samples of each corresponding hyperboxes contained in V and W weights The weights of hyperboxes XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + numSampleInBoundary The number of samples in decision boundary + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) predicted_class = np.full(yX, None) mem_vals = np.zeros(yX) # classifications numPointInBoundary = 0 for i in range(yX): if patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes mem = mem * weights bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[ 0] # get indexes of all hyperboxes with max membership mem_vals[i] = bmax # if bmax == 0: # #print('zero maximum membership value') # this is probably bad... # predicted_class[i] = classId[maxVind[0]] # if predicted_class[i] == patClassIdTest[i]: # misclass[i] = False # else: # misclass[i] = True # else: cls_same_mem = np.unique(classId[maxVind]) if len(cls_same_mem) > 1: cls_val = UNLABELED_CLASS is_find_prob_val = True if bmax == 1: id_box_with_one_sample = np.nonzero( numSamples[maxVind] == 1)[0] if len(id_box_with_one_sample) > 0: is_find_prob_val = False cls_val = classId[int( random.choice(maxVind[id_box_with_one_sample]))] if is_find_prob_val == True: numPointInBoundary = numPointInBoundary + 1 #print('bmax=', bmax) #print("Using probability function") sum_prod_denum = (mem[maxVind] * numSamples[maxVind]).sum() max_prob = -1 pre_id_cls = None for c in cls_same_mem: id_cls = np.nonzero(classId[maxVind] == c)[0] sum_pro_num = (mem[maxVind[id_cls]] * numSamples[maxVind[id_cls]]).sum() tmp = sum_pro_num / sum_prod_denum if tmp > max_prob or ( tmp == max_prob and pre_id_cls is not None and numSamples[maxVind[id_cls]].sum() > numSamples[maxVind[pre_id_cls]].sum()): max_prob = tmp cls_val = c pre_id_cls = id_cls predicted_class[i] = cls_val if cls_val == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) #print(numPointInBoundary) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, numSampleInBoundary=numPointInBoundary, predicted_class=predicted_class, mem_vals=mem_vals) return result
pre_id_cls = None for c in cls_same_mem: id_cls = np.nonzero(classId[maxVind] == c)[0] sum_pro_num = (mem[maxVind[id_cls]] * numSamples[maxVind[id_cls]]).sum() tmp = sum_pro_num / sum_prod_denum if tmp > max_prob or (tmp == max_prob and pre_id_cls is not None and numSamples[maxVind[id_cls]].sum() > numSamples[maxVind[pre_id_cls]].sum()): max_prob = tmp cls_val = c pre_id_cls = id_cls predicted_class[i] = cls_val if cls_val == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if classId[maxVind[0]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis = summis, misclass = misclass, numSampleInBoundary = numPointInBoundary, predicted_class=predicted_class) return result
def splitDatasetRndClassBasedTo2Part(Xl, Xu, patClassId, training_rate=0.5, isNorm=False, norm_range=[0, 1]): """ Split a dataset into 2 parts randomly according to each class, the proposition training_rate is applied for each class INPUT Xl Input data lower bounds (rows = objects, columns = features) X_u Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) training_rate The percentage of the number of training samples needs to be split isNorm Do normalization of input training samples or not? norm_range New ranging of input data after normalization, for example: [0, 1] OUTPUT trainingSet One object belonging to Bunch datatype contains training data with the following attributes: + lower: lower bounds + upper: upper bounds + label: class labels validSet One object belonging to Bunch datatype contains validation data with the following attributes: + lower: lower bounds + upper: upper bounds + label: class labels """ if isNorm == True: Xl = normalize(Xl, norm_range) Xu = normalize(Xu, norm_range) classes = np.unique(patClassId) trainingSet = None validSet = None for cl in range(classes.size): # Find indices of input samples having the same label with classes[cl] indClass = patClassId == classes[cl] # filter samples having the same class label with classes[cl] Xl_cl = Xl[indClass] Xu_cl = Xu[indClass] pathClass_cl = patClassId[indClass] numSamples = Xl_cl.shape[0] # generate random permutation of positions of selected patterns pos = np.random.permutation(numSamples) # Find the cut-off position pivot = int(numSamples * training_rate) if cl == 0: trainingSet = Bunch(lower=Xl_cl[pos[0:pivot]], upper=Xu_cl[pos[0:pivot]], label=pathClass_cl[pos[0:pivot]]) validSet = Bunch(lower=Xl_cl[pos[pivot:]], upper=Xu_cl[pos[pivot:]], label=pathClass_cl[pos[pivot:]]) else: lower_train = np.concatenate( (trainingSet.lower, Xl_cl[pos[0:pivot]]), axis=0) upper_train = np.concatenate( (trainingSet.upper, Xu_cl[pos[0:pivot]]), axis=0) label_train = np.append(trainingSet.label, pathClass_cl[pos[0:pivot]]) trainingSet = Bunch(lower=lower_train, upper=upper_train, label=label_train) lower_valid = np.concatenate((validSet.lower, Xl_cl[pos[pivot:]]), axis=0) upper_valid = np.concatenate((validSet.upper, Xu_cl[pos[pivot:]]), axis=0) label_valid = np.append(validSet.label, pathClass_cl[pos[pivot:]]) validSet = Bunch(lower=lower_valid, upper=upper_valid, label=label_valid) return (trainingSet, validSet)
def splitDatasetRndClassBasedToKPart(Xl, Xu, patClassId, k=10, isNorm=False, norm_range=[0, 1]): """ Split a dataset into k parts randomly according to each class, where the number of samples of each class is equal among subsets INPUT Xl Input data lower bounds (rows = objects, columns = features) X_u Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) k Number of parts needs to be split isNorm Do normalization of input training samples or not? norm_range New ranging of input data after normalization, for example: [0, 1] OUTPUT partitionedA An numpy array contains k sub-arrays, in which each subarray is Bunch datatype: + lower: lower bounds + upper: upper bounds + label: class labels """ if isNorm == True: Xl = normalize(Xl, norm_range) Xu = normalize(Xu, norm_range) classes = np.unique(patClassId) partitionedA = np.empty(k, dtype=Bunch) for cl in range(classes.size): # Find indices of input samples having the same label with classes[cl] indClass = patClassId == classes[cl] # filter samples having the same class label with classes[cl] Xl_cl = Xl[indClass] Xu_cl = Xu[indClass] pathClass_cl = patClassId[indClass] numSamples = Xl_cl.shape[0] # generate random permutation of positions of selected patterns pos = np.random.permutation(numSamples) # Bin the positions into k partitions anchors = np.round(np.linspace(0, numSamples, k + 1)).astype(np.int64) for i in range(k): if cl == 0: lower_tmp = Xl_cl[pos[anchors[i]:anchors[i + 1]], :] upper_tmp = Xu_cl[pos[anchors[i]:anchors[i + 1]], :] label_tmp = pathClass_cl[pos[anchors[i]:anchors[i + 1]]] partitionedA[i] = Bunch(lower=lower_tmp, upper=upper_tmp, label=label_tmp) else: lower_tmp = np.concatenate( (partitionedA[i].lower, Xl_cl[pos[anchors[i]:anchors[i + 1]], :]), axis=0) upper_tmp = np.concatenate( (partitionedA[i].upper, Xu_cl[pos[anchors[i]:anchors[i + 1]], :]), axis=0) label_tmp = np.append( partitionedA[i].label, pathClass_cl[pos[anchors[i]:anchors[i + 1]]]) partitionedA[i] = Bunch(lower=lower_tmp, upper=upper_tmp, label=label_tmp) return partitionedA
def predict(V, W, classId, XhT, patClassIdTest, gama=1, is_using_manhattan=True): """ FMNN classifier (test routine) result = predict(V,W,classId,XhT,patClassIdTest,gama) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XhT Test input data (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ if len(XhT.shape) == 1: XhT = XhT.reshape(1, -1) #initialization yX = XhT.shape[0] predicted_class = np.full(yX, None) misclass = np.zeros(yX) mem = np.zeros((yX, V.shape[0])) # classifications for i in range(yX): mem[i, :] = simpsonMembership( XhT[i, :], V, W, gama) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = np.nonzero(mem[i, :] == bmax)[ 0] # get indexes of all hyperboxes with max membership winner_cls = np.unique(classId[maxVind]) if len(winner_cls) > 1: if is_using_manhattan == True: #print("Using Manhattan function") XgT_mat = np.ones((len(maxVind), 1)) * XhT[i] # Find all average points of all hyperboxes with the same membership value avg_point_mat = (V[maxVind] + W[maxVind]) / 2 # compute the manhattan distance from XgT_mat to all average points of all hyperboxes with the same membership value maht_dist = manhattan_distance(avg_point_mat, XgT_mat) id_min_dist = maht_dist.argmin() predicted_class[i] = classId[maxVind[id_min_dist]] else: # select random class predicted_class[i] = rd.choice(winner_cls) if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, predicted_class=predicted_class) return result
def torch_predict(V, W, classId, XlT, XuT, patClassIdTest, gama = 1, oper = 'min'): """ GFMM classifier (test routine). Implemented by Pytorch result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ if len(XlT.size()) == 1: XlT = XlT.reshape(1, -1) if len(XuT.size()) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.size(0) isUsingGPU = False if is_Have_GPU and (W.size(0) * W.size(1) >= GPU_Computing_Threshold or XlT.size(1) >= GPU_Computing_Threshold): V = V.cuda() W = W.cuda() classId = classId.cuda() XlT = XlT.cuda() XuT = XuT.cuda() patClassIdTest = patClassIdTest.cuda() misclass = torch.cuda.FloatTensor(yX).fill_(0) isUsingGPU = True els = torch.arange(yX).cuda() else: misclass = torch.zeros(yX) els = torch.arange(yX) # classifications for i in els: if isUsingGPU == True: mem = gpu_memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) else: mem = torch_memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = mem == bmax # get indexes of all hyperboxes with max membership if bmax == 0: print('zero maximum membership value') # this is probably bad... misclass[i] = 1 else: if len(torch.unique(classId[maxVind])) > 1: misclass[i] = 1 else: if (torch.any(classId[maxVind] == patClassIdTest[i]) == 1) or (patClassIdTest[i] == UNLABELED_CLASS): misclass[i] = 0 else: misclass[i] = 1 # results summis = torch.sum(misclass) result = Bunch(summis = summis, misclass = misclass) return result