def GetMnistData(data_path): """ Function: Read mnist dataset and transfer it into wanted format. For input: if not CNN: (60000, 784) elif CNN: (60000, 28, 28, 1) For output: one hot [0, 1, 0, 0, ..., 0] """ # read dataset X_train, Y_train_original = loadlocal_mnist( images_path=data_path + "train-images-idx3-ubyte", labels_path=data_path + "train-labels-idx1-ubyte") X_test, Y_test_original = loadlocal_mnist( images_path=data_path + "t10k-images-idx3-ubyte", labels_path=data_path + "t10k-labels-idx1-ubyte") # transfer into float32 X_train = X_train.astype(np.float32) X_test = X_test.astype(np.float32) X_train /= 255. X_test /= 255. # find how many classes all_classes = np.unique(Y_train_original) num_class = len(all_classes) num_input = X_train.shape[1] # transfer label format Y_train = TranslateLables(Y_train_original, num_class) Y_test = TranslateLables(Y_test_original, num_class) return X_train, Y_train, X_test, Y_test, num_input, num_class
def load_mnist(path=MNIST_PATH): X_train, y_train = loadlocal_mnist( images_path=MNIST_PATH + 'train-images.idx3-ubyte', labels_path=MNIST_PATH + 'train-labels.idx1-ubyte') X_test, y_test = loadlocal_mnist( images_path=MNIST_PATH + 't10k-images.idx3-ubyte', labels_path=MNIST_PATH + 't10k-labels.idx1-ubyte') return X_train, X_test, y_train, y_test
def data_processing(self): train_x, train_y = loadlocal_mnist("/content/drive/MyDrive/ass3_data/train-images.idx3-ubyte", "/content/drive/MyDrive/ass3_data/train-labels.idx1-ubyte") test_x, test_y = loadlocal_mnist("/content/drive/MyDrive/ass3_data/t10k-images.idx3-ubyte", "/content/drive/MyDrive/ass3_data/t10k-labels.idx1-ubyte") self.train_x = preprocessing.normalize(train_x) self.test_x = preprocessing.normalize(test_x) enc = OneHotEncoder(sparse=False, categories='auto') self.train_y = enc.fit_transform(train_y.reshape(len(train_y), -1)) self.test_y = enc.transform(test_y.reshape(len(test_y), -1))
def load_idx(path): X_train, y_train = loadlocal_mnist( images_path=path + 'train-images-idx3-ubyte', labels_path=path + 'train-labels-idx1-ubyte') X_test, y_test = loadlocal_mnist( images_path=path + 't10k-images-idx3-ubyte', labels_path=path + 't10k-labels-idx1-ubyte') return (X_train, y_train, X_test, y_test)
def load_data_fashion_mnist(train_path, test_path): from mlxtend.data import loadlocal_mnist train_image, train_label = loadlocal_mnist( images_path=train_path + "/train_image/train_image", labels_path=train_path + "/train_label/train_label") test_image, test_label = loadlocal_mnist( images_path=test_path + "/test_image/test_image", labels_path=test_path + "/test_label/test_label") return train_image, train_label, test_image, test_label
def load_data(): files = { "X": "train-images-idx3-ubyte", "y": "train-labels-idx1-ubyte", "X_test": "t10k-images-idx3-ubyte", "y_test": "t10k-labels-idx1-ubyte" } X, y = loadlocal_mnist(images_path = files['X'], labels_path = files['y']) X_test, y_test = loadlocal_mnist(images_path = files['X_test'], labels_path = files['y_test']) return X, X_test, y, y_test
def load_MNIST(data_address): train_images, train_labels = loadlocal_mnist( images_path=data_address+'/train-images.idx3-ubyte', labels_path=data_address+'/MNIST dataset/train-labels.idx1-ubyte') test_images, test_labels = loadlocal_mnist( images_path=data_address+'/t10k-images.idx3-ubyte', labels_path=data_address+'/t10k-labels.idx1-ubyte') return train_images, train_labels, test_images, test_labels
def prepare_mnist_data(): X_train, y_train = loadlocal_mnist( images_path='../mnist/train-images-idx3-ubyte', labels_path='../mnist/train-labels-idx1-ubyte') X_test, y_test = loadlocal_mnist( images_path='../mnist/t10k-images-idx3-ubyte', labels_path='../mnist/t10k-labels-idx1-ubyte') return X_train, y_train, X_test, y_test
def loadMnist(): train_images, train_labels = loadlocal_mnist( images_path='MNIST/train-images-idx3-ubyte', labels_path='MNIST/train-labels-idx1-ubyte') print(train_images.shape) print(train_labels.shape) test_images, test_labels = loadlocal_mnist( images_path='MNIST/t10k-images-idx3-ubyte', labels_path='MNIST/t10k-labels-idx1-ubyte') return train_images, train_labels, test_images, test_labels
def __init__(self): self.On = True self.X, self.y = loadlocal_mnist( images_path='./data/train-images.idx3-ubyte', labels_path='./data/train-labels.idx1-ubyte') self.X_test, self.y_test = loadlocal_mnist( images_path='./data/t10k-images.idx3-ubyte', labels_path='./data/t10k-labels.idx1-ubyte') self.sizeX = self.X.shape[1] self.sizey = 10 self.network = False
def loadMnist(): data_train, label_train = loadlocal_mnist( images_path=os.getcwd() + '/train-images-idx3-ubyte', labels_path=os.getcwd() + '/train-labels-idx1-ubyte') data_test, label_test = loadlocal_mnist( images_path=os.getcwd() + '/t10k-images-idx3-ubyte', labels_path=os.getcwd() + '/t10k-labels-idx1-ubyte') #normalize the data data_train_norm = data_train.astype(np.float) / 255. data_test_norm = data_test.astype(np.float) / 255. return data_train_norm, data_test_norm, label_train, label_test
def __load_mnist(self): """ Load the mnist files """ self._x_train, self._y_train = loadlocal_mnist( images_path=join(self.mnist_path, 'train-images-idx3-ubyte'), labels_path=join(self.mnist_path, 'train-labels-idx1-ubyte')) self._x_test, self._y_test = loadlocal_mnist( images_path=join(self.mnist_path, 't10k-images-idx3-ubyte'), labels_path=join(self.mnist_path, 't10k-labels-idx1-ubyte'))
def __init__(self, folder_path, dataset_type='train'): self.dataset_type = dataset_type if dataset_type == 'train': self.x, self.y = loadlocal_mnist( images_path=folder_path + '/train-images.idx3-ubyte', labels_path=folder_path + '/train-labels.idx1-ubyte') else: self.x, self.y = loadlocal_mnist( images_path=folder_path + '/t10k-images.idx3-ubyte', labels_path=folder_path + '/t10k-labels.idx1-ubyte')
def get_test_data(): """ zwraca testowe znormalizowane dane, X to obrazy, y to odpowiadająca mu cyfry""" if not platform.system() == 'Windows': X, y = loadlocal_mnist(images_path='t10k-images-idx3-ubyte', labels_path='t10k-labels-idx1-ubyte') else: X, y = loadlocal_mnist(images_path='t10k-images.idx3-ubyte', labels_path='t10k-labels.idx1-ubyte') X = X.astype(float) / 255 return X, y
def load_data_from_file(): # load the data x_train, y_train = loadlocal_mnist(images_path='train-images-idx3-ubyte', labels_path='train-labels-idx1-ubyte') x_test, y_test = loadlocal_mnist(images_path='test-images-idx3-ubyte', labels_path='test-labels-idx1-ubyte') # normalize our inputs to be in the range[-1, 1] x_train = (x_train.astype(np.float32) - 127.5) / 127.5 # convert x_train with a shape of (7317, 100, 100) to (7317, 10000) so we have # 10000 columns per row x_train = x_train.reshape(7317, 10000) return (x_train, y_train, x_test, y_test)
def train(model, lrate, n_epochs, optim_fun): X, y = loadlocal_mnist("mnist_data/train-images.txt", "mnist_data/train-labels.txt") model.to(device) loss_fun = nn.NLLLoss() optimizer = optim_fun(model.parameters(), lr=lrate) for epoch in range(n_epochs): avg_loss = 0 start_time = time.time() for (img, label) in zip(X, y): model.zero_grad() x = x2tensor(img) target = y2tensor(label) logits = model(x) loss = loss_fun(logits, target) loss.backward() optimizer.step() avg_loss += loss.item() print("loss[" + str(epoch) + "] = " + str(avg_loss / X.shape[0])) print("------------------------------------took " + comp_time(start_time))
def ExtractAndReshape(imagesPath, labelsPath): images, images_labels = loadlocal_mnist(images_path=imagesPath, labels_path=labelsPath) images = images.reshape(len(images), 28, 28) images = np.array(images) return images, images_labels
def read_data_normalize_and_add_bias(classes, features): """Read in the file with classes (labels) and the features for each class Normalizes the features and prepends a bias to each data instance Arguments: classes -- the labels features -- the features for each class Returns: The normalized data with a bias and the the labels """ data, labels = loadlocal_mnist( images_path=features, labels_path=classes) rows = data.shape[0] # normalize data = add_bias(data / 255, rows) # # make a bias for every row # bais = np.ones([rows , 1], dtype=float) # # prepend the bias to the data # # axis = 1 means vertical, axis = 0 means horizontal # data = np.concatenate((bais, data), axis=1) # return the data and the bias return data, labels
def set_train_dataset(self): # Data source image = 'data/train-images.idx3-ubyte' label = 'data/train-labels.idx1-ubyte' x, y = loadlocal_mnist(images_path=image, labels_path=label) self.train_data = x self.train_label = np.array([self.get_label(l) for l in y])
def loadDataset(train_size=1000, batch_size=100, randSeed=17, image_path='./mnist/train-images-idx3-ubyte', label_path='./mnist/train-labels-idx1-ubyte'): """ return: list of dataloaders, each containing train-size images of each number with batch size """ random.seed(randSeed) train_images, train_labels = loadlocal_mnist(images_path=image_path, labels_path=label_path) sortedImages = [[] for _ in range(10)] for i in range(len(train_labels)): sortedImages[train_labels[i]].append(train_images[i]) for images in sortedImages: random.shuffle(images) allData = torch.zeros((0, 785)) dataLoaders = [] for i in range(10): data = (torch.tensor(sortedImages[i][:train_size]) - 128.) / 128 labeled = torch.cat((data, i * torch.ones((data.shape[0], 1))), 1) allData = torch.cat((allData, labeled), 0) dataLoaders.append( torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)) labeledDataLoader = torch.utils.data.DataLoader(allData, batch_size=batch_size, shuffle=True) return dataLoaders, labeledDataLoader
def get_dev_accuracy(classifier, dev_size=1000, randSeed=17, image_path='./mnist/train-images-idx3-ubyte', label_path='./mnist/train-labels-idx1-ubyte'): random.seed(randSeed) train_images, train_labels = loadlocal_mnist(images_path=image_path, labels_path=label_path) sortedImages = [[] for _ in range(10)] for i in range(len(train_labels)): sortedImages[train_labels[i]].append(train_images[i]) for images in sortedImages: random.shuffle(images) test_images = [] test_labels = [] for i in range(10): test_images += sortedImages[i][-dev_size:] test_labels += [i for j in range(dev_size)] test = (torch.tensor(test_images) - 128.) / 128 test_labels = torch.tensor(test_labels) predictions = classifier.predict(test) predictions = predictions.type(torch.uint8) return torch.mean(torch.eq(predictions, test_labels).float()).item()
def _save_as_numpy(self, image_path=None, label_path=None, image_save_path=None, label_save_path=None): """ save the files to *.npy format :param image_path: MNIST image data path (extract file path) :param label_path: MNIST image label path (extract file path) :param image_save_path: MNIST image save path as npy :param label_save_path: MNIST label save path as npy """ images, labels = loadlocal_mnist(images_path=image_path, labels_path=label_path) self._image_data = images self._label_data = labels self._image_data_path = image_save_path self._label_data_path = label_save_path if not FileUtils.check_exist_with_message( file_path=image_save_path, message="Images Already Saved!"): np.save(image_save_path, images) if not FileUtils.check_exist_with_message( file_path=label_save_path, message="Labels Already Saved!"): np.save(label_save_path, labels)
def trainData(): global epochsCount global optimizer global lossFunction global accuracyData #sys.stdout = open(os.devnull, 'w') myWin.consoleWrite("Loading training data ...") x_train, y_train = loadlocal_mnist(images_path = "train-images-idx3-ubyte", labels_path = "train-labels-idx1-ubyte") x_train = tf.keras.utils.normalize(x_train, axis = 1) myWin.consoleWrite("Adding neural network layers ...") model = tf.keras.models.Sequential() model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(784, activation = tf.nn.relu)) model.add(tf.keras.layers.Dense(196, activation = tf.nn.relu)) model.add(tf.keras.layers.Dense(10, activation = tf.nn.softmax)) myWin.consoleWrite("Optimizer = " + str(optimizer)) myWin.consoleWrite("Loss Function = " + str(lossFunction)) model.compile(optimizer = optimizer, loss = lossFunction, metrics = ["acc", "mse"]) myWin.consoleWrite("Fitting model ...") history = model.fit(x_train, y_train, epochs = epochsCount) accuracyData = history.history["acc"] for metric in model.metrics_names: metricList = history.history[metric] myWin.consoleWrite(metric + " = " + str(round(metricList[len(metricList) - 1], 4))) model.save("smartwardrobe.model")
def loadMNIST(path): # Loading the MNIST DataSet and Divinding them into Train, validation and test X_train, Y_train = loadlocal_mnist( images_path=path + 'train-images-idx3-ubyte', labels_path=path + 'train-labels-idx1-ubyte') X_test, Y_test = loadlocal_mnist( images_path=path + 't10k-images-idx3-ubyte', labels_path=path + 't10k-labels-idx1-ubyte') X_validation = X_train[55000:, :] Y_validation = Y_train[55000:] X_train = X_train[0:55000, :] Y_train = Y_train[0:55000] return X_train, Y_train, X_validation, Y_validation, X_test, Y_test
def load_mnist(): X, y = loadlocal_mnist( images_path= r"C:\data\git\Dict\Dictionary_Learning\data\t10k-images.idx3-ubyte", labels_path= r"C:\data\git\Dict\Dictionary_Learning\data\t10k-labels.idx1-ubyte") X = np.reshape(np.uint8(X), (X.shape[0], 28, 28)) return X, y
def get_train_validation_data(ratio): """ zwraca treningowe znormalizowane dane, X to obrazy, y to odpowiadająca mu cyfry ratio - jaka część będzie treningowa (z 60000)""" if not platform.system() == 'Windows': X, y = loadlocal_mnist(images_path='train-images-idx3-ubyte', labels_path='train-labels-idx1-ubyte') else: X, y = loadlocal_mnist(images_path='train-images.idx3-ubyte', labels_path='train-labels.idx1-ubyte') X = X.astype(float) / 255 X_train = X[0:int(ratio * len(X))] y_train = y[0:int(ratio * len(y))] X_validation = X[int(ratio * len(X)):len(X)] y_validation = y[int(ratio * len(y)):len(X)] return X_train, y_train, X_validation, y_validation
def extract_mnist_dataset(): PROJECT_DIR = os.getcwd() dataset_path = PROJECT_DIR + "/hw5/dataset/" X, Y = loadlocal_mnist( images_path=dataset_path + 'train-images.idx3-ubyte', labels_path=dataset_path + 'train-labels.idx1-ubyte') return X, Y
def read_emnist_data(): path = "../../../datasets/emnist" print("Path for EMNIST data is " + path) x_train, y_train = loadlocal_mnist(images_path=path + "/train-images-ubyte", labels_path=path + "/train-labels-ubyte") x_test, y_test = loadlocal_mnist(images_path=path + "/test-images-ubyte", labels_path=path + "/test-labels-ubyte") x_train = x_train.reshape((len(x_train), 28, 28), order='F') / 255.0 x_test = x_test.reshape((len(x_test), 28, 28), order='F') / 255.0 def reducer(x): return x - 1 # reduce every label by 1, because labels dont start at 0 y_train = reducer(y_train) y_test = reducer(y_test) return x_train, y_train, x_test, y_test
def read_test_mnist(): X, y = loadlocal_mnist( images_path= '/Users/marek/marek_files/priv/mini/perceptron/mnist/t10k-images.idx3-ubyte', labels_path= '/Users/marek/marek_files/priv/mini/perceptron/mnist/t10k-labels.idx1-ubyte' ) return X, y
def save_mnist(): X, y = loadlocal_mnist(images_path='train-images.idx3-ubyte', labels_path='train-labels.idx1-ubyte') y = y.reshape(len(y), 1) X = np.where(X > 0, 1, 0) data = np.concatenate((X, y), axis=1) np.savetxt(fname='data.csv', X=data, delimiter=',', fmt='%d')
def KNN_classifier(nearest_neighours = 3, metric = 'manhattan'): # print "Creating Dataset from MNIST Data" start_time = time.time() training_image_data, training_label_data = loadlocal_mnist( images_path=os.getcwd()+'/train-images.idx3-ubyte', labels_path=os.getcwd()+'/train-labels.idx1-ubyte') testing_image_data, testing_label_data = loadlocal_mnist( images_path=os.getcwd()+'/t10k-images.idx3-ubyte', labels_path=os.getcwd()+'/t10k-labels.idx1-ubyte') end_time = time.time() - start_time # print "It took "+ str(end_time) + " to make the dataset" # print '\nTraining data' start_time = time.time() knn_classifier = KNeighborsClassifier(n_neighbors=nearest_neighours, metric=metric) knn_classifier.fit(training_image_data, training_label_data) end_time = time.time() - start_time # print "It took "+ str(end_time) + " to train the classifier" # print 'Training Completed' # print '\nTesting data ' start_time = time.time() match_knn_classifier = 0 unmatch_knn_classifier = 0 predicted_labels = knn_classifier.predict(testing_image_data) for i in range(0,len(testing_image_data)): if( testing_label_data[i] == predicted_labels[i]): match_knn_classifier = match_knn_classifier + 1 else: unmatch_knn_classifier = unmatch_knn_classifier + 1 knn_classifier_accuracy = (float) (match_knn_classifier )/ (match_knn_classifier + unmatch_knn_classifier) # knn_classifier_accuracy = knn_classifier.score(images_test, labels_test) end_time = time.time() - start_time # print "It took "+ str(end_time) + " to test the data " # print '\nPrinting Accuracy' print "\nTesting for n_neighbors = "+str(nearest_neighours)+" and metric = "+str(metric) print "-------------------------------------------------" print "KNeighborsClassifier accuracy : "+ str(knn_classifier_accuracy) return knn_classifier_accuracy