def load_data(self, fileName, indicies): path = os.path.join(".","images",fileName) file = open(path, "rb") magic, size = struct.unpack(">II",file.read(8)) rows, cols = (0,0) data = 0 indices = range(size) if magic == 2051: rows,cols = struct.unpack(">IIII",file.read(16)) data = pyarray("B", file.read()) images = zeros((N, rows, cols), dtype = uint8) for i, index in enumerate(indices): images[i] = np.array(data[indices[i]*rows*cols : (indices[i]+1)*rows*cols]).reshape((rows, cols)) data = images elif magic == 2049: data = pyarray("b", file.read()) file.close() if indicies: ret = (indices, data) else: ret = data return ret
def load_mnist(dataset="training", digits=numpy.arange(10), path=__mnist_path__): if dataset == "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images.idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = numpy.zeros((N, rows, cols), dtype=numpy.uint8) labels = numpy.zeros((N, 1), dtype=numpy.int8) for i in range(len(ind)): images[i] = numpy.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset = "training_data",digits=np.arange(10),path = "."): if dataset =="training_data": frame_image = os.path.join(path, 'train-images-idx3-ubyte') frame_label = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset =="test_data": frame_image = os.path.join(path,'t10k-images-idx3-ubyte') frame_label = os.path.join(path,'t10k-labels-idx1-ubyte') else: raise ValueError("the dataset must be 'training_data' or 'test_data'") flbl = open(frame_label,'rb') magic_nr,size = struct.unpack(">II",flbl.read(8)) lbl = pyarray("b",flbl.read()) flbl.close() fimg = open(frame_image,'rb') magic_nr,size,row,col = struct.unpack(">IIII",fimg.read(16)) img = pyarray("B",fimg.read()) fimg.close() ind =[k in k in size if lbl[k] in digits] N = len(ind) images = np.zeros((N,row,col),dtype = uint8) lables = np.zeros((N,1),dtype = int8) for i in range(N): images[i] = array(img[ind[i]*row*col]:img[ind[i+1]*row*col]).reshape((row,col)) labels[i] = array(lbl[ind[i]]) return images,labels
def read(digits=np.arange(10), dataset="training", path="."): """ Python function for importing the MNIST data set. """ if dataset is "training": fname_img = os.path.join(path, 'mnist/train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'mnist/train-labels-idx1-ubyte') elif dataset is "testing": fname_img = os.path.join(path, 'mnist/t10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 'mnist/t10k-labels-idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in xrange(size) if lbl[k] in digits] N = len(ind) images = np.zeros((N, rows, cols), dtype=np.uint8) labels = np.zeros((N, 1), dtype=np.int8) for i in range(len(ind)): images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", path="."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py Source : http://g.sweyla.com/blog/2012/mnist-numpy/ """ if dataset == "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images.idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() images = np.zeros((size, rows, cols), dtype=np.uint8) labels = np.zeros((size, 1), dtype=np.int8) for i in range(size): images[i] = np.array(img[i * rows * cols:(i + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[i] return images, labels
def read(digits, dataset = "training", path = "data", download=True): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ try: if dataset is "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset is "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() except Exception as ex: print ex print "Downloading files..." path = "http://yann.lecun.com/exdb/mnist/" if dataset is "training": fname_img = path + 'train-images-idx3-ubyte.gz' fname_lbl = path + 'train-labels-idx1-ubyte.gz' elif dataset is "testing": fname_img = path + 't10k-images-idx3-ubyte.gz' fname_lbl = path + 't10k-labels-idx1-ubyte.gz' else: raise ValueError, "dataset must be 'testing' or 'training'" response = urllib2.urlopen(fname_img) compressedFile = StringIO.StringIO(response.read()) decompressedFile = gzip.GzipFile(fileobj=compressedFile) magic_nr, size, rows, cols = struct.unpack(">IIII", decompressedFile.read(16)) img = pyarray("b", decompressedFile.read()) response = urllib2.urlopen(fname_lbl) compressedFile = StringIO.StringIO(response.read()) decompressedFile = gzip.GzipFile(fileobj=compressedFile) magic_nr, size = struct.unpack(">II", decompressedFile.read(8)) lbl = pyarray("b", decompressedFile.read()) ind = [ k for k in xrange(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows * cols), dtype=float_) labels = zeros((N, 10), dtype=float_) for i in xrange(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows * cols)) labels[i,lbl[ind[i]]] = 1.0 return images, labels
def _load_mnist(path, dataset="training", digits=None, asbytes=False, selection=None, return_labels=True, return_indices=False): files = { 'training': ('train-images-idx3-ubyte', 'train-labels-idx1-ubyte'), 'testing': ('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte'), } try: images_fname = os.path.join(path, files[dataset][0]) labels_fname = os.path.join(path, files[dataset][1]) except KeyError: raise ValueError("Data set must be 'testing' or 'training'") if return_labels or digits is not None: flbl = open(labels_fname, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) labels_raw = pyarray("b", flbl.read()) flbl.close() fimg = open(images_fname, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) images_raw = pyarray("B", fimg.read()) fimg.close() if digits: indices = [k for k in range(size) if labels_raw[k] in digits] else: indices = range(size) if selection: indices = indices[selection] images = np.zeros((len(indices), rows, cols), dtype=np.uint8) if return_labels: labels = np.zeros((len(indices)), dtype=np.int8) for i in range(len(indices)): images[i] = np.array( images_raw[indices[i] * rows * cols:(indices[i] + 1) * rows * cols]).reshape((rows, cols)) if return_labels: labels[i] = labels_raw[indices[i]] if not asbytes: images = images.astype(float) / 255.0 ret = (images, ) if return_labels: ret += (labels, ) if return_indices: ret += (indices, ) if len(ret) == 1: return ret[0] return ret
def open_mnist_files(data_fpath, labels_fpath): """ For mnist1 References: http://g.sweyla.com/blog/2012/mnist-numpy/ """ import struct #import os import numpy as np from array import array as pyarray with open(labels_fpath, 'rb') as flbl: magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) with open(data_fpath, 'rb') as fimg: magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) digits = np.arange(10) ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = np.zeros((N, rows, cols), dtype=np.uint8) labels = np.zeros((N, 1), dtype=np.uint8) for i in range(len(ind)): images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training_data", digits=np.arange(9), path=".\\Mnist_by_benyuan\\MNIST_data"): print(os.path.curdir) print(os.listdir(path=".")) if dataset == "training_data": fname_image = os.path.join(path, 'train-images.idx3-ubyte') fname_label = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset == "testing_data": fname_image = os.path.join(path, 't10k-images.idx3-ubyte') fname_label = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError("dataset must be 'training_data' or 'testing_data'") flbl = open(fname_label, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_image, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=np.arange(10), path=".", size=60000): """Loads the MNIST raw data.""" # Courtesy of https://gist.github.com/mfathirirhas/f24d61d134b014da029a if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = size #int(len(ind) * size/100.) images = np.zeros((N, rows, cols), dtype=np.uint8) labels = np.zeros((N, 1), dtype=np.int8) for i in range(N): #int(len(ind) * size/100.)): images[i] = np.array(img[ind[i]*rows*cols : (ind[i]+1)*rows*cols])\ .reshape((rows, cols)) labels[i] = lbl[ind[i]] labels = [label[0] for label in labels] return images, labels
def read(digits, dataset = "training", path = "."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset is "training": fname_img = 'data/train-images.txt' fname_lbl = 'data/train-labels.txt' elif dataset is "testing": fname_img = os.path.join(path, 'data/test-images.txt') fname_lbl = os.path.join(path, 'data/test-labels.txt') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in xrange(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, 28*28), dtype=float64) labels = zeros(N, dtype=int8) for i in xrange(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=np.arange(10), path=".", size=60000): if dataset == "training": fname_img = os.path.join(path, "train-images-idx3-ubyte") fname_lbl = os.path.join(path, "train-labels-idx1-ubyte") elif dataset == "testing": fname_img = os.path.join(path, "t10k-images-idx3-ubyte") fname_lbl = os.path.join(path, "t10k-labels-idx1-ubyte") else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, "rb") magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, "rb") magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = size # int(len(ind) * size/100.) images = np.zeros((N, rows, cols), dtype=np.uint8) labels = np.zeros((N, 1), dtype=np.int8) for i in range(N): # int(len(ind) * size/100.)): images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] labels = [label[0] for label in labels] return images, labels
def load(dataset="training", digits=np.arange(10)): import struct from array import array as pyarray from numpy import array, int8, uint8, zeros if dataset == "train": fname_img = get_filename("data/mnist/train-images-idx3-ubyte") fname_lbl = get_filename("data/mnist/train-labels-idx1-ubyte") elif dataset == "test": fname_img = get_filename("data/mnist/t10k-images-idx3-ubyte") fname_lbl = get_filename("data/mnist/t10k-labels-idx1-ubyte") flbl = open(fname_lbl, "rb") magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, "rb") magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=range(10), path='data/3/'): """ Adapted from: http://cvxopt.org/applications/svm/index.html?highlight=mnist """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = np.zeros((N, rows, cols), dtype=np.uint8) labels = np.zeros((N, 1), dtype=np.int8) for i in range(len(ind)): images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=numpy.arange(10), path=__mnist_path__): if dataset == "training": fname_img = os.path.join(path, "train-images.idx3-ubyte") fname_lbl = os.path.join(path, "train-labels.idx1-ubyte") elif dataset == "testing": fname_img = os.path.join(path, "t10k-images.idx3-ubyte") fname_lbl = os.path.join(path, "t10k-labels.idx1-ubyte") else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, "rb") magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, "rb") magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = numpy.zeros((N, rows, cols), dtype=numpy.uint8) labels = numpy.zeros((N, 1), dtype=numpy.int8) for i in range(len(ind)): images[i] = numpy.array(img[ind[i] * rows * cols : (ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_data(training=True): """Adapted from http://g.sweyla.com/blog/2012/mnist-numpy/""" path = './data' if training: fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') else: fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') # Inputs fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) imgs = pyarray("B", fimg.read()) fimg.close() imgs = [imgs[n:n + 784] for n in xrange(0, len(imgs), 784)] inputs = [] V = Vol(28, 28, 1, 0.0) for img in imgs: V.w = [(px / 255.0) for px in img] inputs.append(augment(V, 24)) # Outputs flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) labels = pyarray("b", flbl.read()) flbl.close() return zip(inputs, labels)
def load_mnist(dataset="training", digits=np.arange(10), path='.'): fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() # ind = [ k for k in range(size) ] ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = np.zeros((N, rows, cols)) labels = np.zeros(N) for i in range(len(ind)): images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=range(10), path=DATA_PATH): #Set the filename if dataset == "training": fname_img = os.path.join(path, TRAIN_IMG_NAME) fname_lbl = os.path.join(path, TRAIN_LBL_NAME) elif dataset == "testing": fname_img = os.path.join(path, TEST_IMG_NAME) fname_lbl = os.path.join(path, TEST_LBL_NAME) else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=np.arange(10), path="."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 'test-images-idx3-ubyte') fname_lbl = os.path.join(path, 'test-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=np.arange(10), path='image_data'): """ Loads MNIST files into a 3D numpy array. """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() indices = [k for k in range(size) if lbl[k] in digits] N = len(indices) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i, ind in enumerate(indices): images[i] = array(img[ind * rows * cols:(ind + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind] return images, labels
def load_data(training=True): """Adapted from http://g.sweyla.com/blog/2012/mnist-numpy/""" path = "./data" if training: fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') else: fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') # Inputs fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) imgs = pyarray("B", fimg.read()) fimg.close() imgs = [imgs[n:n+784] for n in xrange(0, len(imgs), 784)] inputs = [] for img in imgs: V = Vol(28, 28, 1, 0.0) V.w = [ (px / 255.0) for px in img ] inputs.append(V) # Outputs flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) labels = pyarray("b", flbl.read()) flbl.close() return zip(inputs, labels)
def getData(digits, dataset="training", path="."): """this function is modified from http://g.sweyla.com/blog/2012/mnist-numpy/ , which returning 3D array.for our needs 2D array is better""" if dataset is "training": fname_img = os.path.join('rawdata\\train-images.idx3-ubyte') fname_lbl = os.path.join('rawdata\\train-labels.idx1-ubyte') elif dataset is "testing": fname_img = os.path.join('rawdata\\t10k-images.idx3-ubyte') fname_lbl = os.path.join('rawdata\\t10k-labels.idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') _, _ = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') _, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) ind = [k for k in xrange(size) if lbl[k] in digits] N = len(ind) images = sp.zeros((N, rows * cols), dtype=sp.uint8) labels = sp.zeros((N, 1), dtype=sp.int8) for i in xrange(len(ind)): images[i] = sp.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows * cols)) images[i][0] = 1 """because we want to classify by w*x,the bias b will be absorbed in the samples - (1,x1,x2....xn) """ labels[i] = lbl[ind[i]] return images, labels
def load_mnist(image_file, label_file, path="."): digits = np.arange(10) fname_image = os.path.join(path, image_file) fname_label = os.path.join(path, label_file) flbl = open(fname_label, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_image, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows * cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((1, rows * cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=range(10), path='/Users/SwatzMac/Documents/Study/Classes/Machine Learning, Statistics and Python/Python_Programs/PCA'): if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) print('N =', N) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training_data", digits=np.arange(10), path="."): if dataset == "training_data": fname_image = '../database/train-images-idx3-ubyte/data' fname_label = '../database/train-labels-idx1-ubyte/data' elif dataset == "testing_data": fname_image = '../database/t10k-images-idx3-ubyte/data' fname_label = '../database/t10k-labels-idx1-ubyte/data' else: raise ValueError("dataset must be 'training_data' or 'testing_data'") flbl = open(fname_label, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_image, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_alldata(self, fname_images, fname_labels, digits): # Load in the data into images and labels that we can then test on # Check the type of the data # Open and read the data label_file = open(fname_labels, 'rb') magic_number, size = struct.unpack(">II", label_file.read(8)) if magic_number != 2049: warnings.warn("Wrong magic number") label_raw = pyarray("b", label_file.read()) label_file.close() image_file = open(fname_images, 'rb') magic_number, size, rows, cols = struct.unpack(">IIII", image_file.read(16)) if magic_number != 2051: warnings.warn("Wrong magic number") image_raw = pyarray("B", image_file.read()) image_file.close() # Make an index we can iterate across to load in the values index = [k for k in range(size) if label_raw[k] in digits] length = len(index) # Initialize arrays in which we can store the values, set all to zeros images = np.zeros((length, rows, cols), dtype = np.uint8) labels = np.zeros((length, 1), dtype = np.uint8) # Make the data fit nicely into our matrix-like arrays for i in range(length): images[i] = np.array(image_raw[(index[i]*rows*cols) : ((index[i]+1)*rows*cols)]).reshape((rows,cols)) labels[i] = label_raw[index[i]] return images, labels
def load(dataset='train', digits=np.arange(10)): if dataset == 'train': fname_img = get_filename('data/mnist/train-images-idx3-ubyte') fname_lbl = get_filename('data/mnist/train-labels-idx1-ubyte') elif dataset == "test": fname_img = get_filename('data/mnist/t10k-images-idx3-ubyte') fname_lbl = get_filename('data/mnist/t10k-labels-idx1-ubyte') with open(fname_lbl, 'rb') as flbl: magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) with open(fname_img, 'rb') as fimg: magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def ExtractingData(): os.chdir('/Users/manaswipodduturi/Documents/Research/MachineLearning/Data/MNIST') img_data = os.path.join('.','train-images-idx3-ubyte') lbl_data = os.path.join('.','train-labels-idx1-ubyte') file_img = open(img_data,'rb') magic_nr, size,rows,cols = struct.unpack(">IIII",file_img.read(16)) img = pyarray("b",file_img.read()) file_img.close() file_lbl = open(lbl_data,'rb') magic_nr,size = struct.unpack(">II",file_lbl.read(8)) lbl = pyarray("B",file_lbl.read()) file_lbl.close() digits = np.arange(10) ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = np.zeros((N,rows*cols),dtype=np.uint8) labels = np.zeros((N,1),dtype=np.uint8) for i in range(len(ind)): images[i] = np.array(img[ind[i]*rows*cols : (ind[i]+1)*rows*cols]) labels[i] = lbl[ind[i]] return images,labels,rows,cols
def getData(digits, dataset = "training", path = "."): """this function is modified from http://g.sweyla.com/blog/2012/mnist-numpy/ , which returning 3D array.for our needs 2D array is better""" if dataset is "training": fname_img = os.path.join('rawdata\\train-images.idx3-ubyte') fname_lbl = os.path.join('rawdata\\train-labels.idx1-ubyte') elif dataset is "testing": fname_img = os.path.join('rawdata\\t10k-images.idx3-ubyte') fname_lbl = os.path.join('rawdata\\t10k-labels.idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') _, _ = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') _, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) ind = [ k for k in xrange(size) if lbl[k] in digits ] N = len(ind) images = sp.zeros((N, rows*cols), dtype=sp.uint8) labels = sp.zeros((N, 1), dtype=sp.int8) for i in xrange(len(ind)): images[i] = sp.array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows*cols)) images[i][0]=1 """because we want to classify by w*x,the bias b will be absorbed in the samples - (1,x1,x2....xn) """ labels[i] = lbl[ind[i]] return images, labels
def load_feature_vector(digit, dataset="training"): path="." fname_image_array = os.path.join(path, 't10k-images-idx3-ubyte') fname_label_array = os.path.join(path, 't10k-labels-idx1-ubyte') file_label = open(fname_label_array, 'rb') magic_nr, size = struct.unpack(">II", file_label.read(8)) label_array = pyarray("b", file_label.read()) file_label.close() file_image = open(fname_image_array, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", file_image.read(16)) image_array = pyarray("B", file_image.read()) file_image.close() matching_index_array =[] for k in range(size): if (label_array[k] == digit): matching_index_array.append(k) digit_cnt = len(matching_index_array) no_of_features= rows * cols images = zeros((digit_cnt, no_of_features), dtype=float) labels = zeros((digit_cnt, 1), dtype=int8) for i in range(digit_cnt): images[i] = numpy.array(image_array[ matching_index_array[i]*rows*cols : (matching_index_array[i]+1)*rows*cols ]).reshape((no_of_features)) labels[i] = label_array[matching_index_array[i]] return images, labels
def load_mnist(self, dataset="training", digits=np.arange(10), path="."): ''' Load the training data-set and the labels ''' if dataset == "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images.idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i] * rows * cols : (ind[i] + 1) * rows * cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=np.arange(10), path="."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load(dataset="training", digits=np.arange(10)): import struct from array import array as pyarray from numpy import array, int8, uint8, zeros if dataset == "train": fname_img = get_filename('data/mnist/train-images-idx3-ubyte') fname_lbl = get_filename('data/mnist/train-labels-idx1-ubyte') elif dataset == "test": fname_img = get_filename('data/mnist/t10k-images-idx3-ubyte') fname_lbl = get_filename('data/mnist/t10k-labels-idx1-ubyte') flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=arange(10), path="."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset == "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images.idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] i=1 data=[] index=0 for I in images: Ara = [] for J in I: for K in J: Ara.append(K) npAra=array(Ara) data.append(npAra) # print type(data_train[0]) # Label=labels if dataset == "training": Label = [vectorized_result(y) for y in labels] # print Label else: Label = [] for I in labels: for J in I: Label.append(J) Label=array(Label) return data, Label
def load_mnist(dataset="training", digits=np.arange(10), path='.', random=False, perDigitNum=100): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] counter = [0 for k in range(0, 10)] # initialize a counter for digit 0 ~ 9 N = len(ind) images = zeros((N, rows, cols)) labels = zeros((N)) shuffle(ind) # suffle the index of images for i in range(len(ind)): if random == False: images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] else: # check if the couter[digit] exceed perDigitNum if counter[lbl[ind[i]]] >= perDigitNum: continue else: images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] counter[lbl[ind[i]]] += 1 return images, labels
def read_data(digits=np.arange(10)): imageFile = './train-images.idx3-ubyte' labelFile = './train-labels.idx1-ubyte' imageFile1 = './t10k-images.idx3-ubyte' labelFile1 = './t10k-labels.idx1-ubyte' filelabel = open(labelFile, 'rb') magic_nr, size = struct.unpack(">II", filelabel.read(8)) labelArr = pyarray("b", filelabel.read()) filelabel.close() fimg = open(imageFile, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if labelArr[k] in digits ] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = labelArr[ind[i]] filelabel = open(labelFile1, 'rb') magic_nr, size = struct.unpack(">II", filelabel.read(8)) labelArr = pyarray("b", filelabel.read()) filelabel.close() fimg = open(imageFile1, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if labelArr[k] in digits ] N = len(ind) images1 = zeros((N, rows, cols), dtype=uint8) labels1 = zeros((N, 1), dtype=int8) for i in range(len(ind)): images1[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels1[i] = labelArr[ind[i]] ''' row,col,ch=images.shape gauss=np.random.randn(row,col,ch) gauss=gauss.reshape(row,col,ch) images=images+images*gauss row,col,ch=images1.shape gauss=np.random.randn(row,col,ch) gauss=gauss.reshape(row,col,ch) images1=images1+images1*gauss''' return images/255, labels,images1,labels1
def load_mnist(dataset="training", digits=numpy.arange(10), path=__mnist_path__): """ The load_mnist function is the main interface between the MNIST files and your machine-learning code. It fetches subsets of the entire training or test sets, as determined by the 'digits' argument. For example, when digits = [5,8], this returns all and only the images of 5's and 8's. Note that the 'path' argument is the complete file path to the directory in which you store the 4 -ubyte files. To test if this works, load this module and then type: "show_avg_digit(3)", which should produce a picture of the "average 3" in the training set. Also note that the training and test data are divided into two pairs of files. Each pair contains the images and the labels, each in a separate file. The functions in this file maintain that same distinction, always dealing with separate lists (or arrays) of images or labels. Your own code may package a case into a combination of a feature vector and a label, but that is not done here. The representations created by load_mnist are: 1) images (i.e. features) - A 3-dimensional numpy array, where the first dimension is the index of the image in the subset, and the remaining two dimensions are those of the rows and columns of each image. 2) labels - a 2-dimensional numpy array whose first dimension is the number of images in subset and whose second dimension is always 1. Check it out by calling and examining the results. """ if dataset == "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images.idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = numpy.zeros((N, rows, cols), dtype=numpy.uint8) labels = numpy.zeros((N, 1), dtype=numpy.int8) for i in range(len(ind)): images[i] = numpy.array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(self, dataset, mode, imgSize, digits, path): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset == "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images.idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = np.zeros((N, imgSize, imgSize), dtype=np.uint8) labels = np.zeros((N, 1), dtype=np.int8) if mode == "BW": for i in range(len(ind)): images[i] = scipy.misc.imresize( np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)), (imgSize, imgSize), "bilinear") images[i] = self.img2bw(images[i], self.bwThres) labels[i] = lbl[ind[i]] else: for i in range(len(ind)): images[i] = scipy.misc.imresize( np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)), (imgSize, imgSize)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=np.arange(10), path="."): """ Loads MNIST Note that you first need to download files http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz and unpack them """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] labels += 1 labels = labels.flatten() images = images.astype(np.float16) images = images.reshape((images.shape[0], -1)) # FLATTENING for plain DNN return images, labels
def getImagesArray(fname): fimg = open(fname, 'rb') magic_nr, size_img, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) img = np.asarray(img).reshape(size_img, rows * cols) fimg.close() return img, size_img
def Load(self, filename, output_type=numpy.ubyte): print("Loading ",filename) # Open the file fh = open(filename, 'rb') #Read magic number # The first 4 bytes should be # byte 0: 0 # byte 1: 0 # byte 2: datatype enum # byte 3: dimension z1,z2,type_code,dimensions = tuple(ord(i) for i in struct.unpack("4c", fh.read(4))) # Read size in each dimension dim_sizes = [struct.unpack(">I", fh.read(4))[0] for i in range(dimensions)] # For now we only support type 8 (unsigned byte) data_size = numpy.cumprod(dim_sizes)[-1] * 1 # # Read the data as a C-array all_data = pyarray('B', fh.read(data_size)) fh.close() # Return a vector if dimensions = 1 if dimensions == 1: return numpy.array(all_data, output_type) # Return a matrix if dimension > 1 reshape_dimension = (dim_sizes[0], data_size / dim_sizes[0]) return numpy.array(all_data, output_type).reshape(reshape_dimension)
def load(start, number_samples): file_name_img = '../dataset/infimnist/mnist8m-patterns-idx3-ubyte' file_name_lbl = '../dataset/infimnist/mnist8m-labels-idx1-ubyte' file_lbl = open(file_name_lbl, 'rb') magic_nr, size = struct.unpack(">II", file_lbl.read(8)) file_lbl.seek(start, 1) labels = np.fromstring(file_lbl.read(number_samples), dtype=np.int8) #labels = pyarray("B", file_lbl.read(number_samples)) file_lbl.close() file_img = open(file_name_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", file_img.read(16)) images = zeros((number_samples, rows*cols), dtype=np.uint8) file_img.seek(start*rows*cols, 1) for i in range(number_samples): images[i] = pyarray("B", file_img.read(rows*cols)) file_img.close() # normalize images = images/255.0 return images, labels
def load_mnist_from_binary(dataset="training", digits=numpy.arange(10), path="."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows * cols), dtype=float_) targets = zeros((N, 10), dtype=int8) for i in xrange(N): if i % 1000 == 0: logging.info("Loaded " + str(i) + " MNIST images") image = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols]) image_f = zeros(rows * cols, dtype=float_) for k in xrange(rows * cols): image_f[k] = image[k] / 255.0 images[i] = image_f targets[i][lbl[ind[i]]] = 1 return images, targets
def gather(self, time): values = self.func(time) if values is None: return if len(self.arr) == 0: if np.shape(values) != (): self.arr = [pyarray('d') for _ in values] self.arr_flat = False else: self.arr = pyarray('d') self.arr_flat = True if self.arr_flat: self.arr.append(values) else: for v, a in zip(values, self.arr): a.append(v)
def __init__ ( self, c ): path = os.path.dirname(__file__) c = str(c) if c in available_c: self.gfolds = pyarray('f',[]) self.gfolds.fromfile(file(os.path.join(path,'g'+str(c)+'.dat'),'rb'),table_size) else: raise InvalidCError(c)
def hexRead(): f = open('a.hex', 'rb') n, s = struct.unpack(">II", f.read(8)) print "%d %d" % (n, s) arr = pyarray("b", f.read()) for i in arr: print i f.close()
def solve_tsp(distances, optim_steps=3, pairs_by_dist=pairs_by_dist): """Given a distance matrix, finds a solution for the TSP problem. Returns list of vertex indices. Guarantees that the first index is lower than the last""" N = len(distances) if N == 0: return [] if N == 1: return [0] for row in distances: if len(row) != N: raise ValueError("Matrix is not square") # State of the TSP solver algorithm. node_valency = pyarray('i', [2]) * N # Initially, each node has 2 sticky ends # for each node, stores 1 or 2 connected nodes connections = [[] for i in xrange(N)] def join_segments(sorted_pairs): # segments of nodes. Initially, each segment contains only 1 node segments = [[i] for i in xrange(N)] def filtered_pairs(): # Generate sequence of for ij in sorted_pairs: i, j = ij if not node_valency[i] or\ not node_valency[j] or\ (segments[i] is segments[j]): continue yield ij for i, j in islice(filtered_pairs(), N - 1): node_valency[i] -= 1 node_valency[j] -= 1 connections[i].append(j) connections[j].append(i) # Merge segment J into segment I. seg_i = segments[i] seg_j = segments[j] if len(seg_j) > len(seg_i): seg_i, seg_j = seg_j, seg_i i, j = j, i for node_idx in seg_j: segments[node_idx] = seg_i seg_i.extend(seg_j) join_segments(pairs_by_dist(N, distances)) for passn in range(optim_steps): nopt, dtotal = optimize_solution(distances, connections) if nopt == 0: break path = restore_path(connections) return path
def load_feature_vector(digit, dataset="training"): path="." fname_image_array = os.path.join(path, 't10k-images-idx3-ubyte') fname_label_array = os.path.join(path, 't10k-labels-idx1-ubyte') file_label = open(fname_label_array, 'rb') magic_nr, size = struct.unpack(">II", file_label.read(8)) print "********************************************" print " READING CLASS LABEL of TRAINING set" print "********************************************" print "No of class labels :%d" %(size) label_array = pyarray("b", file_label.read()) print " Done reading class label from training set" file_label.close() file_image = open(fname_image_array, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", file_image.read(16)) print "********************************************" print " READING FEATURE VECTOR of TRAINING set" print "********************************************" print "No-of-records:%d, No-of-rows:%d, No-of-col:%d" %(size, rows, cols) print "********************************************" image_array = pyarray("B", file_image.read()) print " Done reading feature vector from training set" file_image.close() #ind = [ k for k in range(size) if label_array[k] in digit ] # following code replaces the above 1 liner.. matching_index_array =[] for k in range(size): if (label_array[k] == digit): matching_index_array.append(k) digit_cnt = len(matching_index_array) feature_vector = rows * cols images = zeros((digit_cnt, feature_vector), dtype=float) labels = zeros((digit_cnt, 1), dtype=int8) for i in range(digit_cnt): images[i] = numpy.array(image_array[ matching_index_array[i]*rows*cols : (matching_index_array[i]+1)*rows*cols ]).reshape((feature_vector)) labels[i] = label_array[matching_index_array[i]] return images, labels
def read(digits, dataset = "training", path = "."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset is "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset is "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in xrange(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows*cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in xrange(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]) labels[i] = lbl[ind[i]] # Matlab patches use an odd col-major order images = images / 255. # Remap between [0,1] for i in range(len(images)): # Convert them back over to row-major images[i,:] = images[i,:].reshape(28,28).T.flatten() labels = np.fromiter(chain.from_iterable(labels), dtype='int') return images, labels
def read(digits, dataset="training", path="."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ # assume these files exist and have been gunzipped. # download the 4 gz files from http://yann.lecun.com/exdb/mnist/ if dataset is "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') elif dataset is "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training" flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in xrange(size) if lbl[k] in digits ] N = len(ind) if DO_REALS: images = zeros((N, rows, cols), dtype=float) labels = zeros((N, 1), dtype=int8) # always need these to be int for H2O RF output else: images = zeros((N, rows, cols), dtype=int8) labels = zeros((N, 1), dtype=int8) for i in xrange(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def get_MNIST_dataset(digits, dataset = "training", path = "./data"): """ Loads MNIST files into 3D numpy arrays @param digits list of numbers (0 to 9) @param dataset "training" or "testing" @returns 2 numpy arrays : images, labels Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ if dataset is "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte') elif dataset is "testing": fname_img = os.path.join(path, 't10k-images.idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in xrange(size) if lbl[k] in digits ] N = len(ind) #images = zeros((N, rows, cols), dtype=uint8) #labels = zeros((N, 1), dtype=int8) # images = zeros((N, rows, cols), dtype="float64") labels = zeros((N, 1), dtype="float64") for i in xrange(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training", digits=np.arange(10), path=".", prop = 100): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py """ # Depending on whether the training or testing dataset is needed, # read in the appropriate images and labels. if dataset == "training": fname_img = os.path.join(path, '../data/train-images-idx3-ubyte') fname_lbl = os.path.join(path, '../data/train-labels-idx1-ubyte') elif dataset == "testing": fname_img = os.path.join(path, '../data/t10k-images-idx3-ubyte') fname_lbl = os.path.join(path, '../data/t10k-labels-idx1-ubyte') # If improper argument is provided to the "dataset" parameter, # raise an error. else: raise ValueError("dataset must be 'testing' or 'training'") # Read in labels file. flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() # Read in pixel values file. fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() # Find indices of images whose labels are in the specified digit labels. ind = [ k for k in range(size) if lbl[k] in digits ] N = int(len(ind) * prop/100.) # Generate images and labels. images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(int(len(ind) * prop/100.)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ])\ .reshape((rows, cols)) labels[i] = lbl[ind[i]] # flatten labels from format [[3],[2],[5],...] to [3,2,5,...] labels = [label[0] for label in labels] return images, labels
def load_mnist(dataset="training", digits=arange(10), path="."): """ Loads MNIST files into 3D numpy arrays Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py Adapted again from: http://g.sweyla.com/blog/2012/mnist-numpy/ (Ethan Sargent, 7/25/16) """ if dataset == "training": fname_img = os.path.join(path, 'train-images-idx3-ubyte.gz') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte.gz') elif dataset == "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte.gz') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte.gz') else: raise ValueError("dataset must be 'testing' or 'training'") flbl = gzip.open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = gzip.open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) # adapted to reduce dimension of images, put all pixel inputs in one long row vector for a given X[i] images = zeros((N, rows*cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]) labels[i] = lbl[ind[i]] # images: 784 x 60000 np array of images # labels is a 60000 x 1 column vector; to get a row vector use np.transpose return np.transpose(images), labels
def load_mnist(dataset="training", digits=np.arange(10), path="."): """ Python function for importing the MNIST data set. """ if dataset is "training": fname_img = os.path.join(path, 'train-images.idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') print "training" elif dataset is "testing": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') else: raise ValueError, "dataset must be 'testing' or 'training'" flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() # ind = [ k for k in range(size) if lbl[k] in digits ] # N = len(ind) N=1 images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) #for i in range(len(ind)): # images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) # labels[i] = lbl[ind[i]] return images, labels
def load_mnist(dataset="training_data", digits=np.arange(10), path="../mnist"): if dataset == "training_data": fname_image = os.path.join(path, 'train-images-idx3-ubyte.gz') fname_label = os.path.join(path, 'train-labels-idx1-ubyte.gz') elif dataset == "testing_data": fname_image = os.path.join(path, 't10k-images-idx3-ubyte.gz') fname_label = os.path.join(path, 't10k-labels-idx1-ubyte.gz') else: raise ValueError("dataset must be 'training_data' or 'testing_data'") # flbl = open(fname_label, 'rb') flbl = gzip.open(fname_label, 'rb') # magic_nr, size = struct.unpack(">II", flbl.read(8)) magic_nr, size = struct.unpack(">2I", flbl.read(8)) lbl = pyarray("b", flbl.read()) # print(magic_nr, size) flbl.close() # fimg = open(fname_image, 'rb') fimg = gzip.open(fname_image, 'rb') # print(fimg.tell()) # refer to https://docs.python.org/2/tutorial/inputosizeutput.html magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) # magic_nr, size, rows, cols = struct.unpack(">4I", fimg.read(16)) # print(magic_nr, size, rows, cols) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) # print(N) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(N): images[i] = array(img[ ind[i] * rows * cols : (ind[i] + 1) * rows * cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_data_from_mnist(): global images_train,labels_train,images_test,labels_test,size,rows,cols path = os.getcwd() training_images_location = os.path.join(path, 'train-images.idx3-ubyte') training_labels_location = os.path.join(path, 'train-labels.idx1-ubyte') testing_images_location = os.path.join(path, 't10k-images.idx3-ubyte') testing_labels_location = os.path.join(path, 't10k-labels.idx1-ubyte') with open(training_labels_location,"r") as training_labels_file: magic_nr, size = struct.unpack(">II", training_labels_file.read(8)) labels_train_data = pyarray("b", training_labels_file.read()) print "Creating training images " start_time = time.time() with open(training_images_location,"r") as training_images_file: magic_nr, size, rows, cols = struct.unpack(">IIII", training_images_file.read(16)) images_train_data = pyarray("b", training_images_file.read()) # images_train = np.fromfile(training_images_file,dtype=np.uint8).reshape(len(labels_train),rows*cols) end_time = time.time() - start_time print "It took "+ str(end_time) + " to create the training images" labels_train,images_train = process_labels_and_images(images_train_data,labels_train_data,size,rows, cols) with open(testing_labels_location,"r") as testing_labels_file: magic_nr, size = struct.unpack(">II", testing_labels_file.read(8)) labels_test_data = pyarray("b", testing_labels_file.read()) print "Creating testing images " start_time = time.time() with open(testing_images_location,"r") as testing_images_file: magic_nr, size, rows, cols = struct.unpack(">IIII", testing_images_file.read(16)) images_test_data = pyarray("b", testing_images_file.read()) # images_test = np.fromfile(testing_images_file,dtype=np.uint8).reshape(len(labels_test),rows*cols) end_time = time.time() - start_time print "It took "+ str(end_time) + " to create the testing images" labels_test,images_test =process_labels_and_images(images_test_data,labels_test_data,size,rows, cols)
def load_mnist(dataset="training_data", digits=np.arange(10), path="."): if dataset == "training_data": fname_image = os.path.join(path, 'train-images-idx3-ubyte.gz') fname_label = os.path.join(path, 'train-labels-idx1-ubyte.gz') elif dataset == "testing_data": fname_image = os.path.join(path, 't10k-images-idx3-ubyte.gz') fname_label = os.path.join(path, 't10k-labels-idx1-ubyte.gz') else: raise ValueError("dataset must be 'training_data' or 'testing_data'") flbl = open(fname_label, 'rb') with open(fname_label, 'rb')as f: with gzip.GzipFile(fileobj=f) as bytestream: magic_nr, size = struct.unpack(">II", bytestream.read(8)) lbl = pyarray("b", bytestream.read()) flbl.close() # magic_nr, size = struct.unpack(">II", flbl.read(8)) # lbl = pyarray("b", flbl.read()) # flbl.close() fimg = open(fname_image, 'rb') # magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) # img = pyarray("B", fimg.read()) with gzip.GzipFile(fileobj=fimg)as bytestream: magic_nr, size, rows, cols = struct.unpack(">IIII", bytestream.read(16)) img = pyarray("B", bytestream.read()) fimg.close() ind = [k for k in range(size) if lbl[k] in digits] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def load_mnist(choice,digits=np.arange(10), path="./minst_data"): if choice == "train": fname_img = os.path.join(path, 'train-images-idx3-ubyte') fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') if choice == "test": fname_img = os.path.join(path, 't10k-images-idx3-ubyte') fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) lbl = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) img = pyarray("B", fimg.read()) fimg.close() ind = [ k for k in range(size) if lbl[k] in digits ] N = len(ind) images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels
def loadMnist(dataset="training", digits=arange(10)): # Loads MNIST files into 3D numpy arrays if dataset == "training": fname_img = '../Data/train-images.idx3-ubyte' fname_lbl = '../Data/train-labels.idx1-ubyte' elif dataset == "testing": fname_img = '../Data/t10k-images.idx3-ubyte' fname_lbl = '../Data/t10k-labels.idx1-ubyte' else: raise ValueError("dataset must be 'testing' or 'training'") flbl = open(fname_lbl, 'rb') magic_nr, size = struct.unpack(">II", flbl.read(8)) labels_raw = pyarray("b", flbl.read()) flbl.close() fimg = open(fname_img, 'rb') magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16)) images_raw = pyarray("B", fimg.read()) fimg.close() indices = range(size) N = len(indices) images = zeros((N, rows*cols), dtype=uint8) labels = zeros((N), dtype=int8) for i, index in enumerate(indices): images[i] = array(images_raw[ indices[i]*rows*cols : (indices[i]+1)*rows*cols ]) labels[i] = labels_raw[indices[i]] images = images.astype(float)/255.0 return images, labels
def read_labels(path): f = open(path, 'rb') magic_nr, size = struct.unpack(">II", f.read(8)) lbl = pyarray("B", f.read()) f.close() labels = np.zeros( size ) for i in range(size): labels[i] = lbl[i] return labels