def __init__ (self, retina_size, num_bits_addr, bleaching, confidence_threshold, ignore_zero_addr, set_of_classes, ss_confidence): self.__retina_size = retina_size self.__num_bits_addr = num_bits_addr self.__is_bleaching = bleaching self.__confidence_threshold = confidence_threshold self.__is_ignoring_zero_addr = ignore_zero_addr self.__set_of_classes = set_of_classes self.__ss_confidence = ss_confidence self.__num_of_used_Xun = 0 self.__main_wisard = WiSARD(retina_size = self.__retina_size, num_bits_addr = self.__num_bits_addr, bleaching = self.__is_bleaching, confidence_threshold = self.__confidence_threshold, ignore_zero_addr = self.__is_ignoring_zero_addr) self.__setup()
class SemiSupervisedWiSARD(): # Input: labeled examples and unlabeled examples - Output: classifier def __init__ (self, retina_size, num_bits_addr, bleaching, confidence_threshold, ignore_zero_addr, set_of_classes, ss_confidence): self.__retina_size = retina_size self.__num_bits_addr = num_bits_addr self.__is_bleaching = bleaching self.__confidence_threshold = confidence_threshold self.__is_ignoring_zero_addr = ignore_zero_addr self.__set_of_classes = set_of_classes self.__ss_confidence = ss_confidence self.__num_of_used_Xun = 0 self.__main_wisard = WiSARD(retina_size = self.__retina_size, num_bits_addr = self.__num_bits_addr, bleaching = self.__is_bleaching, confidence_threshold = self.__confidence_threshold, ignore_zero_addr = self.__is_ignoring_zero_addr) self.__setup() def fit(self, X = [], y = [], unlabeled_X = []): if(type(X) is not list): raise Exception("Type of X must be a list of examples") if(type(unlabeled_X) is not list): raise Exception("Type of unlabeled_X must be a list of examples") if(len(X) != len(y)): raise Exception("Number of Examples must match number of labels") if(len(X[0]) != self.__retina_size): raise Exception("Size of example must have the same size as retina:\n"+ "Size of example = %d, Size of Retina = %d"%(len(unlabeled_X[0]), self.__retina_size)) if(unlabeled_X != []): if(len(unlabeled_X[0]) != self.__retina_size): raise Exception("Size of example must have the same size as retina:\n"+ "Size of example = %d, Size of Retina = %d"%(len(unlabeled_X[0]), self.__retina_size)) #here the fit begins if(X != []): for position in xrange(len(X)): class_name = y[position] example = X[position] self.__main_wisard.add_training(class_name, example) if(unlabeled_X != []): for position in xrange(len(unlabeled_X)): #here comes the unsupervised part. Learning with unlabeled data. possible_classes = self.__main_wisard.classify(unlabeled_X[position]) #result of the classifying #returns a dictionary values = possible_classes.values() #list of results index = possible_classes.keys() #list of classes list_possible_classes = [] for i in xrange(len(values)): #pairing results and classes list_possible_classes.append([index[i], values[i]]) list_possible_classes = sorted(list_possible_classes, key=lambda a_entry: a_entry[1]) #sorting list by the values if(list_possible_classes[-1][1] != 0): #best result must be diferent from zero ss_confidence = 1 - float(list_possible_classes[-2][1])/float(list_possible_classes[-1][1]) #calculating confidence else: ss_confidence = 0.0 #if best result == 0, confidence is zero if(ss_confidence >= self.__ss_confidence): #if I have confidence in the result, i'll train this example to the selected class class_name = list_possible_classes[-1][0] self.__main_wisard.add_training(class_name, unlabeled_X[position]) self.__num_of_used_Xun += 1 def __setup(self): for class_name in self.__set_of_classes: self.__main_wisard.create_discriminator(class_name) def predict(self, testing_corpus): result = [] for position in xrange(len(testing_corpus)): result.append(self.__main_wisard.classify(testing_corpus[position])) return result def get_status(self): return self.__num_of_used_Xun
print("Finished reading and preprocessing data.") # retinaLength = numberRowsTrainImages*numberColumnsTrainImages (in the updated PyWANN, passing this value as an argument to the WiSARD function isn't needed anymore) probFile = open("predict_proba.txt", 'w') # file for the probabilities of the wrong classifications difFile = open( "difference_proba.txt", 'w' ) # file for the diference between the probability of the guessed class and the expected one numberBits = 32 # bleaching = False correct = 0 numberClasses = 10 wisard = WiSARD.WiSARD(numberBits) # wisard = WiSARD.WiSARD(numberBits, bleaching) print("Started training.") wisard.fit(trainImages, trainLabels) print("Finished training.") print("Started classifying.") result = wisard.predict(testImages) print("Finished classifying.") # gets the probabilitles for each class prob = wisard.predict_proba(testImages) # to count the frequency of wrong classifications in each expected class wrongLabelsFreq = [0] * numberClasses
mndata = MNIST('.') training_images, training_labels = mndata.load_training() testing_images, testing_labels = mndata.load_testing() actv_threshold = 128 tTraining_images = transform(training_images, actv_threshold) tTesting_images = transform(testing_images, actv_threshold) print("images loaded!") bleaching = True b_bleaching = 1 start_time = time.time() w = WiSARD.WiSARD(num_bits_addr, bleaching, seed=int(time.time()), defaul_b_bleaching=b_bleaching) print("beginning training...") # training discriminators w.fit(tTraining_images, training_labels) print("training complete!") print("beginning tests...") # Predicting class # Result will be a dictionary using the classes as key and the WiSARD result as values result = np.array(w.predict(tTesting_images)) # Return how many items present in both lists are equal correct_items = np.sum(np.array(testing_labels) == result)
def wisard_training(register_list, sw): # Get training set global vectorizer # Import wisard from file if the name is set if configs.wisard_name: with open(configs.wisard_name, 'rb') as input: wisard = pickle.load(input) with open('vectorizer_' + configs.wisard_name, 'rb') as input: vectorizer = pickle.load(input) #print vectorizer.vocabulary return wisard train_set = [] labels = [] count_dict = {} # Set SKL vectorizer #vectorizer = TfidfVectorizer(use_idf = True, lowercase=True, norm = 'l2', ngram_range=(1,1), min_df=0.0, max_df = 0.8) vectorizer = CountVectorizer(lowercase=True, ngram_range=(1, 1), min_df=0.0, max_df=0.8, binary=True) # Building training set for register in register_list: if register[configs.complaint_type_index] == '1' and register[ configs.key_index] in configs.classificationList: report_classification = register[configs.key_index] report_description = register[configs.description_index] if (report_classification not in count_dict): count_dict[report_classification] = 0 if (count_dict[report_classification] >= 10): continue count_dict[ report_classification] = count_dict[report_classification] + 1 labels.append(report_classification) train_set.append(register[configs.description_index]) print "Vectorizing sets..." V = vectorizer.fit_transform(train_set).toarray() print "Training Wisard" labels = np.array(labels) # labels wisard = WiSARD.WiSARD( 3, True, ignore_zero_addr=True) #,True, 3) # Atribui a classe Wisard para w v_1 = np.array(V) # conjunto de treinamento wisard.fit(v_1, labels) # Associacao dos labels aos conjuntos de treinamento # Saving wisard to file file_name = 'wisard' + datetime.datetime.now().strftime( "%Y-%m-%d-%H-%M-%S") + '.pkl' with open(file_name, 'wb') as output: pickle.dump(wisard, output, pickle.HIGHEST_PROTOCOL) with open('vectorizer_' + file_name, 'wb') as output: pickle.dump(vectorizer, output, pickle.HIGHEST_PROTOCOL) return wisard