Пример #1
0
 def testSingle(self, original_row):
     row = original_row.copy()
     label = row.pop('class_label', None)  #extract the label
     commonKeys = misc.findCommonKeys(
         row, self.mean_dict)  #do the classifications based on shared keys
     row_subset = misc.subsetDictionary(row, commonKeys)
     mean_subset = misc.subsetDictionary(self.mean_dict, commonKeys)
     row_vector = misc.dictToNumpyArray(row_subset)
     mean_vector = misc.dictToNumpyArray(mean_subset)
     covariance_vector = misc.dictToNumpyArray(
         misc.subsetDictionary(
             self.covariance_dict,
             commonKeys))  #this is for alternative classification style
     if len(covariance_vector) == 0:
         return False
     if np.average(covariance_vector) > np.average(
             list(self.covariance_dict.values())) * 2.2:
         return np.random.choice([True, False])
     if np.sum(mean_vector) != 0:
         #confidences= np.divide(np.reciprocal(covariance_vector), np.sum(np.reciprocal(covariance_vector)))
         mean_vector = np.multiply(
             np.divide(mean_vector, np.sum(np.abs(mean_vector))),
             np.sum(np.abs(list(self.mean_dict.values()))))
         #mean_vector= [a*b for a,b in zip(mean_vector,confidences)]
     return self.algorithm_methods.classify(
         row_vector, mean_vector, covariance_vector,
         label)  #check if it can classify correctly
Пример #2
0
 def train(self):
     error_count=0
     feature_summary=[]
     for row_dict in self.training_dataset:
         #print(row_dict['class_label'])
         label=row_dict.pop('class_label', None)
         feature_summary.append(len(row_dict))
         commonKeys=misc.findCommonKeys(row_dict, self.metadata)
         row_subset=misc.subsetDictionary(row_dict, commonKeys)
         new_attributes=misc.subsetDictionary(row_dict, misc.findDifferentKeys(row_dict, self.metadata))
         #try to classify first
         if self.predict(label, row_subset)==-1:
             error_count+=1
         #update metadata
             for key, value in new_attributes.items():
                 self.metadata[key]={label:[value, 1, 1], -label:[0, 1, 0]} #mean, variance, count
                 
             for key, value in row_subset.items():
                 data=self.metadata[key][label]
                 self.metadata[key][label]=[self.updateMean(data[0],data[2],value),
                                            self.updateVar(data[1], data[2], value, data[0]),
                                            self.updateCount(data[2])]
     return feature_summary, error_count/len(self.training_dataset)
         
         
     
     
Пример #3
0
 def testSingle(self, original_row):
         row=original_row.copy()
         label=row.pop('class_label', None) #extract the label
         commonKeys=misc.findCommonKeys(row, self.weight_dict) #do the classifications based on shared keys
         row_subset=misc.subsetDictionary(row, commonKeys)
         weight_subset=misc.subsetDictionary(self.weight_dict, commonKeys)
         row_vector=misc.dictToNumpyArray(row_subset)
         weight_vector=misc.dictToNumpyArray(weight_subset)
         loss=np.maximum(0, 1-label*(weight_vector.dot(row_vector)))
         product=label*(weight_vector.dot(row_vector))
         return loss, product #check if it can classify correctly
Пример #4
0
 def testSingle(self, original_row):
     row = original_row.copy()
     label = row.pop('class_label', None)  #extract the label
     commonKeys = misc.findCommonKeys(
         row, self.mean_dict)  #do the classifications based on shared keys
     row_subset = misc.subsetDictionary(row, commonKeys)
     mean_subset = misc.subsetDictionary(self.mean_dict, commonKeys)
     row_vector = misc.dictToNumpyArray(row_subset)
     mean_vector = misc.dictToNumpyArray(mean_subset)
     covariance_vector = misc.dictToNumpyArray(
         misc.subsetDictionary(
             self.covariance_dict,
             commonKeys))  #this is for alternative classification style
     return self.classify(row_vector, mean_vector, covariance_vector,
                          label)  #check if it can classify correctly
Пример #5
0
 def train(self): #use self.training_dataset
     if len(self.weight_dict)==0:
         self.setInitialClassifier(self.training_dataset[0].copy())
     for i in range(0, self.epoch):
         train_error_vector=[]
         train_error=0
         iterations=0
         for original_row in self.training_dataset:
             iterations+=1
             if(len(original_row))<=1: #empty row comes
                 #train_error+=1
                 train_error_vector.append(train_error/iterations)
                 continue
             row=original_row.copy()
             #check and record training error, for streaming accuracy
             label=row['class_label'] #get the class label of example and pop it from the dictionary
             loss, product=self.testSingle(row)
             if product<=0:
                 train_error+=1
             train_error_vector.append(train_error/iterations)
             tao=self.setParameter(loss, row)
             row.pop('class_label', None)
             #these dicts will be merged, needs initialization to generalize merging
             common_weight_dict={}
             new_weight_dict={}
             #Shared attributes
             if bool(misc.findCommonKeys(row, self.weight_dict))==True:
                 commonKeys=misc.findCommonKeys(row, self.weight_dict)
                 row_subset=misc.subsetDictionary(row, commonKeys)
                 weight_subset=misc.subsetDictionary(self.weight_dict, commonKeys)
                 common_weight_dict=self.learnCommon(weight_subset, row_subset, label, tao)
             #New attributes
             if bool(misc.subsetDictionary(row, misc.findDifferentKeys(row, self.weight_dict)))==True: #it means there are new attributes
                 new_attribute_dict=misc.subsetDictionary(row, misc.findDifferentKeys(row, self.weight_dict))
                 new_weight_dict=self.learnNew(new_attribute_dict, label, tao)
             #Merge mean and covariance dictionaries
             #merge means
             common_weight_dict.update(new_weight_dict)
             self.weight_dict=common_weight_dict
             #sparsify the current classifier
             self.impute() #handle overflow and underflow
             self.weight_dict=self.sparsity_step() #only works if sparsity parameter is on
             #record classifier lengths
             self.classifier_summary.append(len(self.weight_dict))
             
     #to plot change in classifier dimension through training, and train error for stream accuracy
     return self.classifier_summary, train_error_vector
Пример #6
0
 def test(self):  #returns average test accuracy
     counter = 0  #correct counter
     for original_row in self.test_dataset:
         row = original_row.copy()
         label = row.pop('class_label', None)  #extract the label
         commonKeys = misc.findCommonKeys(
             row,
             self.mean_dict)  #do the classifications based on shared keys
         row_subset = misc.subsetDictionary(row, commonKeys)
         mean_subset = misc.subsetDictionary(self.mean_dict, commonKeys)
         row_vector = misc.dictToNumpyArray(row_subset)
         mean_vector = misc.dictToNumpyArray(mean_subset)
         covariance_vector = misc.dictToNumpyArray(
             misc.subsetDictionary(
                 self.covariance_dict,
                 commonKeys))  #this is for alternative classification style
         if self.algorithm_methods.classify(
                 row_vector, mean_vector, covariance_vector,
                 label) == False:  #check if it can classify correctly
             counter += 1
     return counter / len(
         self.test_dataset
     )  #return number of false classification over all examples
Пример #7
0
 def train(self):  #use self.training_dataset
     #set initial arbitrary classifier with the dimensions of first example
     #print("Train called")
     self.setInitialClassifier(self.training_dataset[0].copy())
     self.train_error = 0
     #debug
     #print("After removing:")
     #print("Length of the current dataset:"+str(len(self.training_dataset)))
     #for i in range(0,5):
     #   print("len. element "+str(i)+": "+str(len(self.training_dataset[len(self.training_dataset)-1])))
     #debug
     for i in range(0, self.epoch):
         for original_row in self.training_dataset:
             row = original_row.copy(
             )  #copy the example to not make changes on original, otherwise no labels for following heldouts
             #check and record training error, for streaming accuracy
             if (len(row)) == 1:  #empty row comes
                 continue
             if self.testSingle(row) == False:
                 self.train_error += 1
             label = row.pop(
                 'class_label', None
             )  #get the class label of example and pop it from the dictionary
             #these dicts will be merged, needs initialization to generalize merging
             old_partial_mean_dict = {}
             old_partial_covariance_dict = {}
             common_mean_dict = {}
             common_covariance_dict = {}
             new_partial_mean_dict = {}
             new_partial_covariance_dict = {}
             #Shared attributes
             if bool(misc.findCommonKeys(row, self.mean_dict)) == True:
                 commonKeys = misc.findCommonKeys(row, self.mean_dict)
                 row_subset = misc.subsetDictionary(row, commonKeys)
                 mean_subset = misc.subsetDictionary(
                     self.mean_dict, commonKeys)
                 covariance_subset = misc.subsetDictionary(
                     self.covariance_dict, commonKeys)
                 common_mean_dict, common_covariance_dict, indicator = self.algorithm_methods.learnCommon(
                     mean_subset, covariance_subset, row_subset, label)
                 #if classified large margin, then dont learn new attributes, skip to next
                 if indicator == 1:
                     continue
             #New attributes
             if bool(
                     misc.subsetDictionary(
                         row, misc.findDifferentKeys(row, self.mean_dict))
             ) == True:  #it means there are new attributes
                 new_attribute_dict = misc.subsetDictionary(
                     row, misc.findDifferentKeys(row, self.mean_dict))
                 new_partial_mean_dict, new_partial_covariance_dict = self.algorithm_methods.learnNew(
                     new_attribute_dict, label)
             #Merge mean and covariance dictionaries
             #merge means
             old_partial_mean_dict.update(common_mean_dict)
             old_partial_mean_dict.update(new_partial_mean_dict)
             self.mean_dict = old_partial_mean_dict
             #merge covariances
             old_partial_covariance_dict.update(common_covariance_dict)
             old_partial_covariance_dict.update(new_partial_covariance_dict)
             self.covariance_dict = old_partial_covariance_dict
             #record classifier lengths
             self.classifier_summary.append(len(self.mean_dict))
             #sparsify the current classifier
             #self.sparsity_step() #only works if sparsity parameter is on
             self.impute()  #handle overflow and underflow
     #to plot change in classifier dimension through training, and train error for stream accuracy
     return self.classifier_summary, self.train_error / (
         len(self.training_dataset) * self.epoch)
Пример #8
0
    def train(self):  #uses self.training_dataset
        if len(self.mean_dict) == 0:
            self.setInitialClassifier(self.training_dataset[0].copy())

        for i in range(0, self.epoch):
            train_error_vector = []
            train_error = 0
            iterations = 0
            for original_row in self.training_dataset:
                iterations += 1
                if (len(original_row)) <= 1:  #empty row comes
                    #train_error+=1
                    train_error_vector.append(train_error / iterations)
                    continue
                row = original_row.copy(
                )  #copy the example to not make changes on original
                if self.testSingle(row) == False:
                    train_error += 1
                train_error_vector.append(train_error / iterations)
                #init dicts
                old_partial_mean_dict = {}
                old_partial_covariance_dict = {}
                common_mean_dict = {}
                common_covariance_dict = {}
                new_partial_mean_dict = {}
                new_partial_covariance_dict = {}
                label = row.pop(
                    'class_label', None
                )  #get the class label of example and pop it from the dictionary
                #Old attributes
                if bool(
                        misc.subsetDictionary(
                            self.mean_dict,
                            misc.findDifferentKeys(self.mean_dict,
                                                   row))) == True:
                    old_partial_mean_dict = misc.subsetDictionary(
                        self.mean_dict,
                        misc.findDifferentKeys(self.mean_dict, row))
                    old_partial_covariance_dict = misc.subsetDictionary(
                        self.covariance_dict,
                        misc.findDifferentKeys(self.mean_dict, row))
                #Shared attributes
                if bool(misc.findCommonKeys(row, self.mean_dict)) == True:
                    commonKeys = misc.findCommonKeys(row, self.mean_dict)
                    row_subset = misc.subsetDictionary(row, commonKeys)
                    mean_subset = misc.subsetDictionary(
                        self.mean_dict, commonKeys)
                    covariance_subset = misc.subsetDictionary(
                        self.covariance_dict, commonKeys)
                    common_mean_dict, common_covariance_dict, large_margin = self.algorithm_methods.learnCommon(
                        mean_subset, covariance_subset, row_subset, label,
                        self.covariance_dict)
                    #if classified large margin, then dont learn new attributes, skip to next
                    if large_margin == 1:
                        continue
                #New attributes
                if bool(
                        misc.subsetDictionary(
                            row,
                            misc.findDifferentKeys(row,
                                                   self.mean_dict))) == True:
                    new_attribute_dict = misc.subsetDictionary(
                        row, misc.findDifferentKeys(row, self.mean_dict))
                    new_partial_mean_dict, new_partial_covariance_dict = self.algorithm_methods.learnNew(
                        new_attribute_dict, label)
                #Merge mean and covariance dictionaries
                old_partial_mean_dict.update(common_mean_dict)
                old_partial_mean_dict.update(new_partial_mean_dict)
                self.mean_dict = old_partial_mean_dict
                old_partial_covariance_dict.update(common_covariance_dict)
                old_partial_covariance_dict.update(new_partial_covariance_dict)
                self.covariance_dict = old_partial_covariance_dict
                #record classifier lengths
                self.classifier_summary.append(len(self.mean_dict))
                #sparsify the current classifier
                self.impute()  #handle overflow and underflow
                if self.sparse == 1:
                    self.mean_dict = self.sparsity_step(
                    )  #only works if sparsity parameter is on

        #to plot change in classifier dimension through training, and train error for stream accuracy
        #return self.classifier_summary, self.train_error/(len(self.training_dataset)*self.epoch)
        return self.classifier_summary, train_error_vector