print("Finished reading and preprocessing data.") # retinaLength = numberRowsTrainImages*numberColumnsTrainImages (in the updated PyWANN, passing this value as an argument to the WiSARD function isn't needed anymore) probFile = open("predict_proba.txt", 'w') # file for the probabilities of the wrong classifications difFile = open( "difference_proba.txt", 'w' ) # file for the diference between the probability of the guessed class and the expected one numberBits = 32 # bleaching = False correct = 0 numberClasses = 10 wisard = WiSARD.WiSARD(numberBits) # wisard = WiSARD.WiSARD(numberBits, bleaching) print("Started training.") wisard.fit(trainImages, trainLabels) print("Finished training.") print("Started classifying.") result = wisard.predict(testImages) print("Finished classifying.") # gets the probabilitles for each class prob = wisard.predict_proba(testImages) # to count the frequency of wrong classifications in each expected class wrongLabelsFreq = [0] * numberClasses
mndata = MNIST('.') training_images, training_labels = mndata.load_training() testing_images, testing_labels = mndata.load_testing() actv_threshold = 128 tTraining_images = transform(training_images, actv_threshold) tTesting_images = transform(testing_images, actv_threshold) print("images loaded!") bleaching = True b_bleaching = 1 start_time = time.time() w = WiSARD.WiSARD(num_bits_addr, bleaching, seed=int(time.time()), defaul_b_bleaching=b_bleaching) print("beginning training...") # training discriminators w.fit(tTraining_images, training_labels) print("training complete!") print("beginning tests...") # Predicting class # Result will be a dictionary using the classes as key and the WiSARD result as values result = np.array(w.predict(tTesting_images)) # Return how many items present in both lists are equal correct_items = np.sum(np.array(testing_labels) == result)
def wisard_training(register_list, sw): # Get training set global vectorizer # Import wisard from file if the name is set if configs.wisard_name: with open(configs.wisard_name, 'rb') as input: wisard = pickle.load(input) with open('vectorizer_' + configs.wisard_name, 'rb') as input: vectorizer = pickle.load(input) #print vectorizer.vocabulary return wisard train_set = [] labels = [] count_dict = {} # Set SKL vectorizer #vectorizer = TfidfVectorizer(use_idf = True, lowercase=True, norm = 'l2', ngram_range=(1,1), min_df=0.0, max_df = 0.8) vectorizer = CountVectorizer(lowercase=True, ngram_range=(1, 1), min_df=0.0, max_df=0.8, binary=True) # Building training set for register in register_list: if register[configs.complaint_type_index] == '1' and register[ configs.key_index] in configs.classificationList: report_classification = register[configs.key_index] report_description = register[configs.description_index] if (report_classification not in count_dict): count_dict[report_classification] = 0 if (count_dict[report_classification] >= 10): continue count_dict[ report_classification] = count_dict[report_classification] + 1 labels.append(report_classification) train_set.append(register[configs.description_index]) print "Vectorizing sets..." V = vectorizer.fit_transform(train_set).toarray() print "Training Wisard" labels = np.array(labels) # labels wisard = WiSARD.WiSARD( 3, True, ignore_zero_addr=True) #,True, 3) # Atribui a classe Wisard para w v_1 = np.array(V) # conjunto de treinamento wisard.fit(v_1, labels) # Associacao dos labels aos conjuntos de treinamento # Saving wisard to file file_name = 'wisard' + datetime.datetime.now().strftime( "%Y-%m-%d-%H-%M-%S") + '.pkl' with open(file_name, 'wb') as output: pickle.dump(wisard, output, pickle.HIGHEST_PROTOCOL) with open('vectorizer_' + file_name, 'wb') as output: pickle.dump(vectorizer, output, pickle.HIGHEST_PROTOCOL) return wisard