def testSingle(self, original_row): row = original_row.copy() label = row.pop('class_label', None) #extract the label commonKeys = misc.findCommonKeys( row, self.mean_dict) #do the classifications based on shared keys row_subset = misc.subsetDictionary(row, commonKeys) mean_subset = misc.subsetDictionary(self.mean_dict, commonKeys) row_vector = misc.dictToNumpyArray(row_subset) mean_vector = misc.dictToNumpyArray(mean_subset) covariance_vector = misc.dictToNumpyArray( misc.subsetDictionary( self.covariance_dict, commonKeys)) #this is for alternative classification style if len(covariance_vector) == 0: return False if np.average(covariance_vector) > np.average( list(self.covariance_dict.values())) * 2.2: return np.random.choice([True, False]) if np.sum(mean_vector) != 0: #confidences= np.divide(np.reciprocal(covariance_vector), np.sum(np.reciprocal(covariance_vector))) mean_vector = np.multiply( np.divide(mean_vector, np.sum(np.abs(mean_vector))), np.sum(np.abs(list(self.mean_dict.values())))) #mean_vector= [a*b for a,b in zip(mean_vector,confidences)] return self.algorithm_methods.classify( row_vector, mean_vector, covariance_vector, label) #check if it can classify correctly
def train(self): error_count=0 feature_summary=[] for row_dict in self.training_dataset: #print(row_dict['class_label']) label=row_dict.pop('class_label', None) feature_summary.append(len(row_dict)) commonKeys=misc.findCommonKeys(row_dict, self.metadata) row_subset=misc.subsetDictionary(row_dict, commonKeys) new_attributes=misc.subsetDictionary(row_dict, misc.findDifferentKeys(row_dict, self.metadata)) #try to classify first if self.predict(label, row_subset)==-1: error_count+=1 #update metadata for key, value in new_attributes.items(): self.metadata[key]={label:[value, 1, 1], -label:[0, 1, 0]} #mean, variance, count for key, value in row_subset.items(): data=self.metadata[key][label] self.metadata[key][label]=[self.updateMean(data[0],data[2],value), self.updateVar(data[1], data[2], value, data[0]), self.updateCount(data[2])] return feature_summary, error_count/len(self.training_dataset)
def testSingle(self, original_row): row=original_row.copy() label=row.pop('class_label', None) #extract the label commonKeys=misc.findCommonKeys(row, self.weight_dict) #do the classifications based on shared keys row_subset=misc.subsetDictionary(row, commonKeys) weight_subset=misc.subsetDictionary(self.weight_dict, commonKeys) row_vector=misc.dictToNumpyArray(row_subset) weight_vector=misc.dictToNumpyArray(weight_subset) loss=np.maximum(0, 1-label*(weight_vector.dot(row_vector))) product=label*(weight_vector.dot(row_vector)) return loss, product #check if it can classify correctly
def testSingle(self, original_row): row = original_row.copy() label = row.pop('class_label', None) #extract the label commonKeys = misc.findCommonKeys( row, self.mean_dict) #do the classifications based on shared keys row_subset = misc.subsetDictionary(row, commonKeys) mean_subset = misc.subsetDictionary(self.mean_dict, commonKeys) row_vector = misc.dictToNumpyArray(row_subset) mean_vector = misc.dictToNumpyArray(mean_subset) covariance_vector = misc.dictToNumpyArray( misc.subsetDictionary( self.covariance_dict, commonKeys)) #this is for alternative classification style return self.classify(row_vector, mean_vector, covariance_vector, label) #check if it can classify correctly
def train(self): #use self.training_dataset if len(self.weight_dict)==0: self.setInitialClassifier(self.training_dataset[0].copy()) for i in range(0, self.epoch): train_error_vector=[] train_error=0 iterations=0 for original_row in self.training_dataset: iterations+=1 if(len(original_row))<=1: #empty row comes #train_error+=1 train_error_vector.append(train_error/iterations) continue row=original_row.copy() #check and record training error, for streaming accuracy label=row['class_label'] #get the class label of example and pop it from the dictionary loss, product=self.testSingle(row) if product<=0: train_error+=1 train_error_vector.append(train_error/iterations) tao=self.setParameter(loss, row) row.pop('class_label', None) #these dicts will be merged, needs initialization to generalize merging common_weight_dict={} new_weight_dict={} #Shared attributes if bool(misc.findCommonKeys(row, self.weight_dict))==True: commonKeys=misc.findCommonKeys(row, self.weight_dict) row_subset=misc.subsetDictionary(row, commonKeys) weight_subset=misc.subsetDictionary(self.weight_dict, commonKeys) common_weight_dict=self.learnCommon(weight_subset, row_subset, label, tao) #New attributes if bool(misc.subsetDictionary(row, misc.findDifferentKeys(row, self.weight_dict)))==True: #it means there are new attributes new_attribute_dict=misc.subsetDictionary(row, misc.findDifferentKeys(row, self.weight_dict)) new_weight_dict=self.learnNew(new_attribute_dict, label, tao) #Merge mean and covariance dictionaries #merge means common_weight_dict.update(new_weight_dict) self.weight_dict=common_weight_dict #sparsify the current classifier self.impute() #handle overflow and underflow self.weight_dict=self.sparsity_step() #only works if sparsity parameter is on #record classifier lengths self.classifier_summary.append(len(self.weight_dict)) #to plot change in classifier dimension through training, and train error for stream accuracy return self.classifier_summary, train_error_vector
def test(self): #returns average test accuracy counter = 0 #correct counter for original_row in self.test_dataset: row = original_row.copy() label = row.pop('class_label', None) #extract the label commonKeys = misc.findCommonKeys( row, self.mean_dict) #do the classifications based on shared keys row_subset = misc.subsetDictionary(row, commonKeys) mean_subset = misc.subsetDictionary(self.mean_dict, commonKeys) row_vector = misc.dictToNumpyArray(row_subset) mean_vector = misc.dictToNumpyArray(mean_subset) covariance_vector = misc.dictToNumpyArray( misc.subsetDictionary( self.covariance_dict, commonKeys)) #this is for alternative classification style if self.algorithm_methods.classify( row_vector, mean_vector, covariance_vector, label) == False: #check if it can classify correctly counter += 1 return counter / len( self.test_dataset ) #return number of false classification over all examples
def train(self): #use self.training_dataset #set initial arbitrary classifier with the dimensions of first example #print("Train called") self.setInitialClassifier(self.training_dataset[0].copy()) self.train_error = 0 #debug #print("After removing:") #print("Length of the current dataset:"+str(len(self.training_dataset))) #for i in range(0,5): # print("len. element "+str(i)+": "+str(len(self.training_dataset[len(self.training_dataset)-1]))) #debug for i in range(0, self.epoch): for original_row in self.training_dataset: row = original_row.copy( ) #copy the example to not make changes on original, otherwise no labels for following heldouts #check and record training error, for streaming accuracy if (len(row)) == 1: #empty row comes continue if self.testSingle(row) == False: self.train_error += 1 label = row.pop( 'class_label', None ) #get the class label of example and pop it from the dictionary #these dicts will be merged, needs initialization to generalize merging old_partial_mean_dict = {} old_partial_covariance_dict = {} common_mean_dict = {} common_covariance_dict = {} new_partial_mean_dict = {} new_partial_covariance_dict = {} #Shared attributes if bool(misc.findCommonKeys(row, self.mean_dict)) == True: commonKeys = misc.findCommonKeys(row, self.mean_dict) row_subset = misc.subsetDictionary(row, commonKeys) mean_subset = misc.subsetDictionary( self.mean_dict, commonKeys) covariance_subset = misc.subsetDictionary( self.covariance_dict, commonKeys) common_mean_dict, common_covariance_dict, indicator = self.algorithm_methods.learnCommon( mean_subset, covariance_subset, row_subset, label) #if classified large margin, then dont learn new attributes, skip to next if indicator == 1: continue #New attributes if bool( misc.subsetDictionary( row, misc.findDifferentKeys(row, self.mean_dict)) ) == True: #it means there are new attributes new_attribute_dict = misc.subsetDictionary( row, misc.findDifferentKeys(row, self.mean_dict)) new_partial_mean_dict, new_partial_covariance_dict = self.algorithm_methods.learnNew( new_attribute_dict, label) #Merge mean and covariance dictionaries #merge means old_partial_mean_dict.update(common_mean_dict) old_partial_mean_dict.update(new_partial_mean_dict) self.mean_dict = old_partial_mean_dict #merge covariances old_partial_covariance_dict.update(common_covariance_dict) old_partial_covariance_dict.update(new_partial_covariance_dict) self.covariance_dict = old_partial_covariance_dict #record classifier lengths self.classifier_summary.append(len(self.mean_dict)) #sparsify the current classifier #self.sparsity_step() #only works if sparsity parameter is on self.impute() #handle overflow and underflow #to plot change in classifier dimension through training, and train error for stream accuracy return self.classifier_summary, self.train_error / ( len(self.training_dataset) * self.epoch)
def train(self): #uses self.training_dataset if len(self.mean_dict) == 0: self.setInitialClassifier(self.training_dataset[0].copy()) for i in range(0, self.epoch): train_error_vector = [] train_error = 0 iterations = 0 for original_row in self.training_dataset: iterations += 1 if (len(original_row)) <= 1: #empty row comes #train_error+=1 train_error_vector.append(train_error / iterations) continue row = original_row.copy( ) #copy the example to not make changes on original if self.testSingle(row) == False: train_error += 1 train_error_vector.append(train_error / iterations) #init dicts old_partial_mean_dict = {} old_partial_covariance_dict = {} common_mean_dict = {} common_covariance_dict = {} new_partial_mean_dict = {} new_partial_covariance_dict = {} label = row.pop( 'class_label', None ) #get the class label of example and pop it from the dictionary #Old attributes if bool( misc.subsetDictionary( self.mean_dict, misc.findDifferentKeys(self.mean_dict, row))) == True: old_partial_mean_dict = misc.subsetDictionary( self.mean_dict, misc.findDifferentKeys(self.mean_dict, row)) old_partial_covariance_dict = misc.subsetDictionary( self.covariance_dict, misc.findDifferentKeys(self.mean_dict, row)) #Shared attributes if bool(misc.findCommonKeys(row, self.mean_dict)) == True: commonKeys = misc.findCommonKeys(row, self.mean_dict) row_subset = misc.subsetDictionary(row, commonKeys) mean_subset = misc.subsetDictionary( self.mean_dict, commonKeys) covariance_subset = misc.subsetDictionary( self.covariance_dict, commonKeys) common_mean_dict, common_covariance_dict, large_margin = self.algorithm_methods.learnCommon( mean_subset, covariance_subset, row_subset, label, self.covariance_dict) #if classified large margin, then dont learn new attributes, skip to next if large_margin == 1: continue #New attributes if bool( misc.subsetDictionary( row, misc.findDifferentKeys(row, self.mean_dict))) == True: new_attribute_dict = misc.subsetDictionary( row, misc.findDifferentKeys(row, self.mean_dict)) new_partial_mean_dict, new_partial_covariance_dict = self.algorithm_methods.learnNew( new_attribute_dict, label) #Merge mean and covariance dictionaries old_partial_mean_dict.update(common_mean_dict) old_partial_mean_dict.update(new_partial_mean_dict) self.mean_dict = old_partial_mean_dict old_partial_covariance_dict.update(common_covariance_dict) old_partial_covariance_dict.update(new_partial_covariance_dict) self.covariance_dict = old_partial_covariance_dict #record classifier lengths self.classifier_summary.append(len(self.mean_dict)) #sparsify the current classifier self.impute() #handle overflow and underflow if self.sparse == 1: self.mean_dict = self.sparsity_step( ) #only works if sparsity parameter is on #to plot change in classifier dimension through training, and train error for stream accuracy #return self.classifier_summary, self.train_error/(len(self.training_dataset)*self.epoch) return self.classifier_summary, train_error_vector