Пример #1
0
    def load_data(self, fileName, indicies):
        path = os.path.join(".","images",fileName)
        
        file = open(path, "rb")
        magic, size = struct.unpack(">II",file.read(8))
        rows, cols = (0,0)
        data = 0
        indices = range(size)

        if magic == 2051:
            rows,cols = struct.unpack(">IIII",file.read(16))
            data = pyarray("B", file.read())
            images = zeros((N, rows, cols), dtype = uint8)
            for i, index in enumerate(indices):
                images[i] = np.array(data[indices[i]*rows*cols : (indices[i]+1)*rows*cols]).reshape((rows, cols))
            data = images
        elif magic == 2049:
            data = pyarray("b", file.read())
            
        file.close()

        if indicies:
            ret = (indices, data)
        else:
            ret = data
        return ret
Пример #2
0
def load_mnist(dataset="training",
               digits=numpy.arange(10),
               path=__mnist_path__):

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = numpy.zeros((N, rows, cols), dtype=numpy.uint8)
    labels = numpy.zeros((N, 1), dtype=numpy.int8)
    for i in range(len(ind)):
        images[i] = numpy.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                    cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #3
0
def load_mnist(dataset = "training_data",digits=np.arange(10),path = "."):
    if dataset =="training_data":
        frame_image = os.path.join(path, 'train-images-idx3-ubyte')
        frame_label = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset =="test_data":
        frame_image = os.path.join(path,'t10k-images-idx3-ubyte')
        frame_label = os.path.join(path,'t10k-labels-idx1-ubyte')
    else:
        raise ValueError("the dataset must be 'training_data' or 'test_data'")

    flbl = open(frame_label,'rb')
    magic_nr,size = struct.unpack(">II",flbl.read(8))
    lbl = pyarray("b",flbl.read())
    flbl.close()

    fimg = open(frame_image,'rb')
    magic_nr,size,row,col = struct.unpack(">IIII",fimg.read(16))
    img = pyarray("B",fimg.read())
    fimg.close()

    ind =[k in k in size if lbl[k] in digits]
    N = len(ind)

    images = np.zeros((N,row,col),dtype = uint8)
    lables = np.zeros((N,1),dtype = int8)
    for i in range(N):
        images[i] = array(img[ind[i]*row*col]:img[ind[i+1]*row*col]).reshape((row,col))
        labels[i] = array(lbl[ind[i]])
    return images,labels
Пример #4
0
def read(digits=np.arange(10), dataset="training", path="."):
    """
    Python function for importing the MNIST data set.
    """
    if dataset is "training":
        fname_img = os.path.join(path, 'mnist/train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'mnist/train-labels-idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join(path, 'mnist/t10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'mnist/t10k-labels-idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in xrange(size) if lbl[k] in digits]
    N = len(ind)

    images = np.zeros((N, rows, cols), dtype=np.uint8)
    labels = np.zeros((N, 1), dtype=np.int8)
    for i in range(len(ind)):
        images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #5
0
def load_mnist(dataset="training", path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    Source : http://g.sweyla.com/blog/2012/mnist-numpy/
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    images = np.zeros((size, rows, cols), dtype=np.uint8)
    labels = np.zeros((size, 1), dtype=np.int8)
    for i in range(size):
        images[i] = np.array(img[i * rows * cols:(i + 1) * rows *
                                 cols]).reshape((rows, cols))
        labels[i] = lbl[i]

    return images, labels
Пример #6
0
def read(digits, dataset = "training", path = "data", download=True):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """
    try:
        if dataset is "training":
            fname_img = os.path.join(path, 'train-images-idx3-ubyte')
            fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
        elif dataset is "testing":
            fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
            fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
        else:
            raise ValueError, "dataset must be 'testing' or 'training'"

        flbl = open(fname_lbl, 'rb')
        magic_nr, size = struct.unpack(">II", flbl.read(8))
        lbl = pyarray("b", flbl.read())
        flbl.close()

        fimg = open(fname_img, 'rb')
        magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
        img = pyarray("B", fimg.read())
        fimg.close()
        
    except Exception as ex:
        print ex
        print "Downloading files..."
        path = "http://yann.lecun.com/exdb/mnist/"
        if dataset is "training":
            fname_img = path + 'train-images-idx3-ubyte.gz'
            fname_lbl = path + 'train-labels-idx1-ubyte.gz'
        elif dataset is "testing":
            fname_img = path + 't10k-images-idx3-ubyte.gz'
            fname_lbl = path + 't10k-labels-idx1-ubyte.gz'
        else:
            raise ValueError, "dataset must be 'testing' or 'training'"

        response = urllib2.urlopen(fname_img)
        compressedFile = StringIO.StringIO(response.read())
        decompressedFile = gzip.GzipFile(fileobj=compressedFile)
        magic_nr, size, rows, cols = struct.unpack(">IIII", decompressedFile.read(16))
        img = pyarray("b", decompressedFile.read())
        
        response = urllib2.urlopen(fname_lbl)
        compressedFile = StringIO.StringIO(response.read())
        decompressedFile = gzip.GzipFile(fileobj=compressedFile)
        magic_nr, size = struct.unpack(">II", decompressedFile.read(8))
        lbl = pyarray("b", decompressedFile.read())
    ind = [ k for k in xrange(size) if lbl[k] in digits ]
    N = len(ind)

    images = zeros((N, rows * cols), dtype=float_)
    labels = zeros((N, 10), dtype=float_)
    for i in xrange(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows * cols))
        labels[i,lbl[ind[i]]] = 1.0

    return images, labels
Пример #7
0
def _load_mnist(path,
                dataset="training",
                digits=None,
                asbytes=False,
                selection=None,
                return_labels=True,
                return_indices=False):
    files = {
        'training': ('train-images-idx3-ubyte', 'train-labels-idx1-ubyte'),
        'testing': ('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte'),
    }

    try:
        images_fname = os.path.join(path, files[dataset][0])
        labels_fname = os.path.join(path, files[dataset][1])
    except KeyError:
        raise ValueError("Data set must be 'testing' or 'training'")

    if return_labels or digits is not None:
        flbl = open(labels_fname, 'rb')
        magic_nr, size = struct.unpack(">II", flbl.read(8))
        labels_raw = pyarray("b", flbl.read())
        flbl.close()

    fimg = open(images_fname, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    images_raw = pyarray("B", fimg.read())
    fimg.close()

    if digits:
        indices = [k for k in range(size) if labels_raw[k] in digits]
    else:
        indices = range(size)

    if selection:
        indices = indices[selection]

    images = np.zeros((len(indices), rows, cols), dtype=np.uint8)

    if return_labels:
        labels = np.zeros((len(indices)), dtype=np.int8)
    for i in range(len(indices)):
        images[i] = np.array(
            images_raw[indices[i] * rows * cols:(indices[i] + 1) * rows *
                       cols]).reshape((rows, cols))
        if return_labels:
            labels[i] = labels_raw[indices[i]]

    if not asbytes:
        images = images.astype(float) / 255.0

    ret = (images, )
    if return_labels:
        ret += (labels, )
    if return_indices:
        ret += (indices, )

    if len(ret) == 1:
        return ret[0]
    return ret
Пример #8
0
def open_mnist_files(data_fpath, labels_fpath):
    """
    For mnist1

    References:
        http://g.sweyla.com/blog/2012/mnist-numpy/
    """
    import struct
    #import os
    import numpy as np
    from array import array as pyarray
    with open(labels_fpath, 'rb') as flbl:
        magic_nr, size = struct.unpack(">II", flbl.read(8))
        lbl = pyarray("b", flbl.read())

    with open(data_fpath, 'rb') as fimg:
        magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
        img = pyarray("B", fimg.read())
    digits = np.arange(10)

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = np.zeros((N, rows, cols), dtype=np.uint8)
    labels = np.zeros((N, 1), dtype=np.uint8)
    for i in range(len(ind)):
        images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    return images, labels
def load_mnist(dataset="training_data",
               digits=np.arange(9),
               path=".\\Mnist_by_benyuan\\MNIST_data"):
    print(os.path.curdir)
    print(os.listdir(path="."))
    if dataset == "training_data":
        fname_image = os.path.join(path, 'train-images.idx3-ubyte')
        fname_label = os.path.join(path, 'train-labels.idx1-ubyte')
    elif dataset == "testing_data":
        fname_image = os.path.join(path, 't10k-images.idx3-ubyte')
        fname_label = os.path.join(path, 't10k-labels.idx1-ubyte')
    else:
        raise ValueError("dataset must be 'training_data' or 'testing_data'")
    flbl = open(fname_label, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()
    fimg = open(fname_image, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()
    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)
    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                              cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    return images, labels
Пример #10
0
def load_mnist(dataset="training", digits=np.arange(10), path=".", size=60000):
    """Loads the MNIST raw data."""
    # Courtesy of https://gist.github.com/mfathirirhas/f24d61d134b014da029a
    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')

    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = size  #int(len(ind) * size/100.)
    images = np.zeros((N, rows, cols), dtype=np.uint8)
    labels = np.zeros((N, 1), dtype=np.int8)
    for i in range(N):  #int(len(ind) * size/100.)):
        images[i] = np.array(img[ind[i]*rows*cols : (ind[i]+1)*rows*cols])\
            .reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    labels = [label[0] for label in labels]
    return images, labels
Пример #11
0
def read(digits, dataset = "training", path = "."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset is "training":
        fname_img = 'data/train-images.txt'
        fname_lbl = 'data/train-labels.txt'
    elif dataset is "testing":
        fname_img = os.path.join(path, 'data/test-images.txt')
        fname_lbl = os.path.join(path, 'data/test-labels.txt')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()
    ind = [ k for k in xrange(size) if lbl[k] in digits ]
    N = len(ind)
    
    images = zeros((N, 28*28), dtype=float64)
    labels = zeros(N, dtype=int8)
    for i in xrange(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ])
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #12
0
def load_mnist(dataset="training", digits=np.arange(10), path=".", size=60000):
    if dataset == "training":
        fname_img = os.path.join(path, "train-images-idx3-ubyte")
        fname_lbl = os.path.join(path, "train-labels-idx1-ubyte")
    elif dataset == "testing":
        fname_img = os.path.join(path, "t10k-images-idx3-ubyte")
        fname_lbl = os.path.join(path, "t10k-labels-idx1-ubyte")

    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, "rb")
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, "rb")
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = size  # int(len(ind) * size/100.)
    images = np.zeros((N, rows, cols), dtype=np.uint8)
    labels = np.zeros((N, 1), dtype=np.int8)
    for i in range(N):  # int(len(ind) * size/100.)):
        images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    labels = [label[0] for label in labels]
    return images, labels
Пример #13
0
    def load(dataset="training", digits=np.arange(10)):
        import struct
        from array import array as pyarray
        from numpy import array, int8, uint8, zeros

        if dataset == "train":
            fname_img = get_filename("data/mnist/train-images-idx3-ubyte")
            fname_lbl = get_filename("data/mnist/train-labels-idx1-ubyte")
        elif dataset == "test":
            fname_img = get_filename("data/mnist/t10k-images-idx3-ubyte")
            fname_lbl = get_filename("data/mnist/t10k-labels-idx1-ubyte")

        flbl = open(fname_lbl, "rb")
        magic_nr, size = struct.unpack(">II", flbl.read(8))
        lbl = pyarray("b", flbl.read())
        flbl.close()

        fimg = open(fname_img, "rb")
        magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
        img = pyarray("B", fimg.read())
        fimg.close()

        ind = [k for k in range(size) if lbl[k] in digits]
        N = len(ind)

        images = zeros((N, rows, cols), dtype=uint8)
        labels = zeros((N, 1), dtype=int8)
        for i in range(len(ind)):
            images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                  cols]).reshape((rows, cols))
            labels[i] = lbl[ind[i]]

        return images, labels
Пример #14
0
def load_mnist(dataset="training", digits=range(10), path='data/3/'):
    """
    Adapted from: http://cvxopt.org/applications/svm/index.html?highlight=mnist
    """
    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = np.zeros((N, rows, cols), dtype=np.uint8)
    labels = np.zeros((N, 1), dtype=np.int8)
    for i in range(len(ind)):
        images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #15
0
def load_mnist(dataset="training", digits=numpy.arange(10), path=__mnist_path__):

    if dataset == "training":
        fname_img = os.path.join(path, "train-images.idx3-ubyte")
        fname_lbl = os.path.join(path, "train-labels.idx1-ubyte")
    elif dataset == "testing":
        fname_img = os.path.join(path, "t10k-images.idx3-ubyte")
        fname_lbl = os.path.join(path, "t10k-labels.idx1-ubyte")
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, "rb")
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, "rb")
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = numpy.zeros((N, rows, cols), dtype=numpy.uint8)
    labels = numpy.zeros((N, 1), dtype=numpy.int8)
    for i in range(len(ind)):
        images[i] = numpy.array(img[ind[i] * rows * cols : (ind[i] + 1) * rows * cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #16
0
def load_data(training=True):
    """Adapted from http://g.sweyla.com/blog/2012/mnist-numpy/"""
    path = './data'

    if training:
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    else:
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')

    # Inputs
    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    imgs = pyarray("B", fimg.read())
    fimg.close()

    imgs = [imgs[n:n + 784] for n in xrange(0, len(imgs), 784)]
    inputs = []
    V = Vol(28, 28, 1, 0.0)
    for img in imgs:
        V.w = [(px / 255.0) for px in img]
        inputs.append(augment(V, 24))

    # Outputs
    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    labels = pyarray("b", flbl.read())
    flbl.close()

    return zip(inputs, labels)
Пример #17
0
def load_mnist(dataset="training", digits=np.arange(10), path='.'):
    fname_img = os.path.join(path, 'train-images.idx3-ubyte')
    fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    # ind = [ k for k in range(size) ]
    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = np.zeros((N, rows, cols))
    labels = np.zeros(N)
    for i in range(len(ind)):
        images[i] = np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    return images, labels
Пример #18
0
def load_mnist(dataset="training", digits=range(10), path=DATA_PATH):
#Set the filename
    if dataset == "training":
        fname_img = os.path.join(path, TRAIN_IMG_NAME)
        fname_lbl = os.path.join(path, TRAIN_LBL_NAME)
    elif dataset == "testing":
        fname_img = os.path.join(path, TEST_IMG_NAME)
        fname_lbl = os.path.join(path, TEST_LBL_NAME)
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #19
0
def load_mnist(dataset="training", digits=np.arange(10), path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 'test-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'test-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()
    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels 
Пример #20
0
def load_mnist(dataset="training", digits=np.arange(10), path='image_data'):
    """
	Loads MNIST files into a 3D numpy array.
	"""

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    indices = [k for k in range(size) if lbl[k] in digits]
    N = len(indices)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i, ind in enumerate(indices):
        images[i] = array(img[ind * rows * cols:(ind + 1) * rows *
                              cols]).reshape((rows, cols))
        labels[i] = lbl[ind]

    return images, labels
Пример #21
0
def load_data(training=True):
    """Adapted from http://g.sweyla.com/blog/2012/mnist-numpy/"""
    path = "./data"

    if training:
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    else:
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')

    # Inputs
    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    imgs = pyarray("B", fimg.read())
    fimg.close()

    imgs = [imgs[n:n+784] for n in xrange(0, len(imgs), 784)]
    inputs = []
    for img in imgs:
        V = Vol(28, 28, 1, 0.0)
        V.w = [ (px / 255.0) for px in img ]
        inputs.append(V)

    # Outputs
    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    labels = pyarray("b", flbl.read())
    flbl.close()

    return zip(inputs, labels)
Пример #22
0
def getData(digits, dataset="training", path="."):
    """this function is modified from http://g.sweyla.com/blog/2012/mnist-numpy/ ,
     which returning 3D array.for our needs 2D array is better"""
    if dataset is "training":
        fname_img = os.path.join('rawdata\\train-images.idx3-ubyte')
        fname_lbl = os.path.join('rawdata\\train-labels.idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join('rawdata\\t10k-images.idx3-ubyte')
        fname_lbl = os.path.join('rawdata\\t10k-labels.idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"

    flbl = open(fname_lbl, 'rb')
    _, _ = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    _, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    ind = [k for k in xrange(size) if lbl[k] in digits]
    N = len(ind)
    images = sp.zeros((N, rows * cols), dtype=sp.uint8)
    labels = sp.zeros((N, 1), dtype=sp.int8)
    for i in xrange(len(ind)):
        images[i] = sp.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows * cols))
        images[i][0] = 1
        """because we want to classify by w*x,the bias b will be absorbed in
         the samples - (1,x1,x2....xn) """
        labels[i] = lbl[ind[i]]
    return images, labels
Пример #23
0
def load_mnist(image_file, label_file, path="."):
    digits = np.arange(10)

    fname_image = os.path.join(path, image_file)
    fname_label = os.path.join(path, label_file)

    flbl = open(fname_label, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_image, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = zeros((N, rows * cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                              cols]).reshape((1, rows * cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #24
0
def load_mnist(dataset="training", digits=range(10), 
   path='/Users/SwatzMac/Documents/Study/Classes/Machine Learning, Statistics and Python/Python_Programs/PCA'):

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in range(size) if lbl[k] in digits ]       
    N = len(ind)
    print('N =', N)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #25
0
def load_mnist(dataset="training_data", digits=np.arange(10), path="."):

    if dataset == "training_data":
        fname_image = '../database/train-images-idx3-ubyte/data'
        fname_label = '../database/train-labels-idx1-ubyte/data'
    elif dataset == "testing_data":
        fname_image = '../database/t10k-images-idx3-ubyte/data'
        fname_label = '../database/t10k-labels-idx1-ubyte/data'
    else:
        raise ValueError("dataset must be 'training_data' or 'testing_data'")

    flbl = open(fname_label, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_image, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                              cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #26
0
    def load_alldata(self, fname_images, fname_labels, digits):
        # Load in the data into images and labels that we can then test on

        # Check the type of the data
        # Open and read the data
        label_file = open(fname_labels, 'rb')
        magic_number, size = struct.unpack(">II", label_file.read(8))
        if magic_number != 2049:
            warnings.warn("Wrong magic number")
        label_raw = pyarray("b", label_file.read())
        label_file.close()

        image_file = open(fname_images, 'rb')
        magic_number, size, rows, cols = struct.unpack(">IIII", image_file.read(16))
        if magic_number != 2051:
            warnings.warn("Wrong magic number")
        image_raw = pyarray("B", image_file.read())
        image_file.close()

        # Make an index we can iterate across to load in the values
        index = [k for k in range(size) if label_raw[k] in digits]
        length = len(index)

        # Initialize arrays in which we can store the values, set all to zeros
        images = np.zeros((length, rows, cols), dtype = np.uint8)
        labels = np.zeros((length, 1), dtype = np.uint8)

        # Make the data fit nicely into our matrix-like arrays
        for i in range(length):
            images[i] = np.array(image_raw[(index[i]*rows*cols) : ((index[i]+1)*rows*cols)]).reshape((rows,cols))
            labels[i] = label_raw[index[i]]

        return images, labels
Пример #27
0
    def load(dataset='train', digits=np.arange(10)):
        if dataset == 'train':
            fname_img = get_filename('data/mnist/train-images-idx3-ubyte')
            fname_lbl = get_filename('data/mnist/train-labels-idx1-ubyte')
        elif dataset == "test":
            fname_img = get_filename('data/mnist/t10k-images-idx3-ubyte')
            fname_lbl = get_filename('data/mnist/t10k-labels-idx1-ubyte')
        
        with open(fname_lbl, 'rb') as flbl:
            magic_nr, size = struct.unpack(">II", flbl.read(8))
            lbl = pyarray("b", flbl.read())
        
        with open(fname_img, 'rb') as fimg:
            magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
            img = pyarray("B", fimg.read())

        ind = [k for k in range(size) if lbl[k] in digits]
        N = len(ind)

        images = zeros((N, rows, cols), dtype=uint8)
        labels = zeros((N, 1), dtype=int8)
        for i in range(len(ind)):
            images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols))
            labels[i] = lbl[ind[i]]

        return images, labels
def ExtractingData():
	os.chdir('/Users/manaswipodduturi/Documents/Research/MachineLearning/Data/MNIST')
	img_data = os.path.join('.','train-images-idx3-ubyte')
	lbl_data = os.path.join('.','train-labels-idx1-ubyte')

	file_img = open(img_data,'rb')
	magic_nr, size,rows,cols = struct.unpack(">IIII",file_img.read(16))
	img = pyarray("b",file_img.read())
	file_img.close()


	file_lbl = open(lbl_data,'rb')
	magic_nr,size = struct.unpack(">II",file_lbl.read(8))
	lbl = pyarray("B",file_lbl.read())
	file_lbl.close()

	digits = np.arange(10)

	ind = [ k for k in range(size) if lbl[k] in digits ]
	N = len(ind)

	images = np.zeros((N,rows*cols),dtype=np.uint8)
	labels = np.zeros((N,1),dtype=np.uint8)

	for i in range(len(ind)):
		images[i] = np.array(img[ind[i]*rows*cols : (ind[i]+1)*rows*cols])
		labels[i] = lbl[ind[i]]

	return images,labels,rows,cols
Пример #29
0
def getData(digits, dataset = "training", path = "."):
    """this function is modified from http://g.sweyla.com/blog/2012/mnist-numpy/ ,
     which returning 3D array.for our needs 2D array is better"""
    if dataset is "training":
        fname_img = os.path.join('rawdata\\train-images.idx3-ubyte')
        fname_lbl = os.path.join('rawdata\\train-labels.idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join('rawdata\\t10k-images.idx3-ubyte')
        fname_lbl = os.path.join('rawdata\\t10k-labels.idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"

    flbl = open(fname_lbl, 'rb')
    _, _ = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    _, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    ind = [ k for k in xrange(size) if lbl[k] in digits ]
    N = len(ind)
    images = sp.zeros((N, rows*cols), dtype=sp.uint8)
    labels = sp.zeros((N, 1), dtype=sp.int8)
    for i in xrange(len(ind)):
        images[i] = sp.array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows*cols))
        images[i][0]=1
        """because we want to classify by w*x,the bias b will be absorbed in
         the samples - (1,x1,x2....xn) """ 
        labels[i] = lbl[ind[i]]
    return images, labels
Пример #30
0
def load_feature_vector(digit, dataset="training"):
    path="."
    fname_image_array = os.path.join(path, 't10k-images-idx3-ubyte')
    fname_label_array = os.path.join(path, 't10k-labels-idx1-ubyte')

    file_label = open(fname_label_array, 'rb')
    magic_nr, size = struct.unpack(">II", file_label.read(8))
    label_array = pyarray("b", file_label.read())
    file_label.close()

    file_image = open(fname_image_array, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", file_image.read(16))
    image_array = pyarray("B", file_image.read())
    file_image.close()

    matching_index_array =[]
    for k in range(size):
        if (label_array[k] == digit):
            matching_index_array.append(k)

    digit_cnt  = len(matching_index_array)

    no_of_features= rows * cols
    images = zeros((digit_cnt, no_of_features), dtype=float)
    labels = zeros((digit_cnt, 1), dtype=int8)

    for i in range(digit_cnt):
        images[i] = numpy.array(image_array[ matching_index_array[i]*rows*cols : (matching_index_array[i]+1)*rows*cols ]).reshape((no_of_features))
        labels[i] = label_array[matching_index_array[i]]
    return images, labels
 def load_mnist(self, dataset="training", digits=np.arange(10), path="."):
     '''
     Load the training data-set and the labels
     '''
     if dataset == "training":
         fname_img = os.path.join(path, 'train-images.idx3-ubyte')
         fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
     elif dataset == "testing":
         fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
         fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
     else:
         raise ValueError("dataset must be 'testing' or 'training'")
 
     flbl = open(fname_lbl, 'rb')
     magic_nr, size = struct.unpack(">II", flbl.read(8))
     lbl = pyarray("b", flbl.read())
     flbl.close()
 
     fimg = open(fname_img, 'rb')
     magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
     img = pyarray("B", fimg.read())
     fimg.close()
 
     ind = [ k for k in range(size) if lbl[k] in digits ]
     N = len(ind)
 
     images = zeros((N, rows, cols), dtype=uint8)
     labels = zeros((N, 1), dtype=int8)
     for i in range(len(ind)):
         images[i] = array(img[ ind[i] * rows * cols : (ind[i] + 1) * rows * cols ]).reshape((rows, cols))
         labels[i] = lbl[ind[i]]
 
     return images, labels      
Пример #32
0
def load_mnist(dataset="training", digits=np.arange(10), path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")
    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()
    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()
    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)
    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    return images, labels
Пример #33
0
    def load(dataset="training", digits=np.arange(10)):
        import struct
        from array import array as pyarray
        from numpy import array, int8, uint8, zeros

        if dataset == "train":
            fname_img = get_filename('data/mnist/train-images-idx3-ubyte')
            fname_lbl = get_filename('data/mnist/train-labels-idx1-ubyte')
        elif dataset == "test":
            fname_img = get_filename('data/mnist/t10k-images-idx3-ubyte')
            fname_lbl = get_filename('data/mnist/t10k-labels-idx1-ubyte')

        flbl = open(fname_lbl, 'rb')
        magic_nr, size = struct.unpack(">II", flbl.read(8))
        lbl = pyarray("b", flbl.read())
        flbl.close()

        fimg = open(fname_img, 'rb')
        magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
        img = pyarray("B", fimg.read())
        fimg.close()

        ind = [k for k in range(size) if lbl[k] in digits]
        N = len(ind)

        images = zeros((N, rows, cols), dtype=uint8)
        labels = zeros((N, 1), dtype=int8)
        for i in range(len(ind)):
            images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols))
            labels[i] = lbl[ind[i]]

        return images, labels
Пример #34
0
def load_mnist(dataset="training", digits=arange(10), path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    i=1
    data=[]
    index=0
    for I in images:
        Ara = []
        for J in I:
            for K in J:
                Ara.append(K)
        npAra=array(Ara)
        data.append(npAra)
        # print type(data_train[0])
    # Label=labels
    if dataset == "training":
        Label = [vectorized_result(y) for y in labels]
    # print Label
    else:
        Label = []
        for I in labels:
            for J in I:
                Label.append(J)

        Label=array(Label)


    return data, Label
Пример #35
0
def load_mnist(dataset="training",
               digits=np.arange(10),
               path='.',
               random=False,
               perDigitNum=100):
    """
    Loads MNIST files into 3D numpy arrays 
    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    counter = [0 for k in range(0, 10)]  # initialize a counter for digit 0 ~ 9
    N = len(ind)

    images = zeros((N, rows, cols))
    labels = zeros((N))

    shuffle(ind)  # suffle the index of images
    for i in range(len(ind)):
        if random == False:
            images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                  cols]).reshape((rows, cols))
            labels[i] = lbl[ind[i]]
        else:
            # check if the couter[digit] exceed perDigitNum
            if counter[lbl[ind[i]]] >= perDigitNum:
                continue
            else:
                images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) *
                                      rows * cols]).reshape((rows, cols))
                labels[i] = lbl[ind[i]]
                counter[lbl[ind[i]]] += 1

    return images, labels
def read_data(digits=np.arange(10)):
	imageFile =  './train-images.idx3-ubyte'
        labelFile =  './train-labels.idx1-ubyte'
        imageFile1 = './t10k-images.idx3-ubyte'
        labelFile1 = './t10k-labels.idx1-ubyte'
	filelabel = open(labelFile, 'rb')
	magic_nr, size = struct.unpack(">II", filelabel.read(8))
	labelArr = pyarray("b", filelabel.read())
	filelabel.close()

	fimg = open(imageFile, 'rb')
	magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
	img = pyarray("B", fimg.read())
	fimg.close()

	ind = [ k for k in range(size) if labelArr[k] in digits ]
	N = len(ind)
	images = zeros((N, rows, cols), dtype=uint8)
	labels = zeros((N, 1), dtype=int8)
	for i in range(len(ind)):
        	images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        	labels[i] = labelArr[ind[i]]


    	filelabel = open(labelFile1, 'rb')
   	magic_nr, size = struct.unpack(">II", filelabel.read(8))
    	labelArr = pyarray("b", filelabel.read())
    	filelabel.close()

   	fimg = open(imageFile1, 'rb')
	magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
	img = pyarray("B", fimg.read())
    	fimg.close()

    	ind = [ k for k in range(size) if labelArr[k] in digits ]
   	N = len(ind)

    	images1 = zeros((N, rows, cols), dtype=uint8)
    	labels1 = zeros((N, 1), dtype=int8)
    	for i in range(len(ind)):
        	images1[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        	labels1[i] = labelArr[ind[i]]

	'''
	row,col,ch=images.shape
	gauss=np.random.randn(row,col,ch)
	gauss=gauss.reshape(row,col,ch)
	images=images+images*gauss
	row,col,ch=images1.shape
	gauss=np.random.randn(row,col,ch)
	gauss=gauss.reshape(row,col,ch)
	images1=images1+images1*gauss'''
    	return images/255, labels,images1,labels1
Пример #37
0
def load_mnist(dataset="training", digits=numpy.arange(10), path=__mnist_path__):
    """
    The load_mnist function is the main interface between the MNIST files and your machine-learning code.  It fetches
    subsets of the entire training or test sets, as determined by the 'digits' argument.  For example, when
    digits = [5,8], this returns all and only the images of 5's and 8's.

    Note that the 'path' argument is the complete file path to the directory in which you store the 4 -ubyte files. To
    test if this works, load this module and then type: "show_avg_digit(3)", which should produce a picture of the
    "average 3" in the training set.

    Also note that the training and test data are divided into two pairs of files. Each pair contains the images and the
    labels, each in a separate file.  The functions in this file maintain that same distinction, always dealing with
    separate lists (or arrays) of images or labels.  Your own code may package a case into a combination of a feature
    vector and a label, but that is not done here.

    The representations created by load_mnist are:
    1) images (i.e. features) - A 3-dimensional numpy array, where the first dimension is the index of the image in the
       subset, and the remaining two dimensions are those of the rows and columns of each image.
    2) labels - a 2-dimensional numpy array whose first dimension is the number of images in subset and whose second
       dimension is always 1.   Check it out by calling and examining the results.
    """
    if dataset == "training":
        fname_img = os.path.join(path, 'train-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)

    images = numpy.zeros((N, rows, cols), dtype=numpy.uint8)
    labels = numpy.zeros((N, 1), dtype=numpy.int8)
    for i in range(len(ind)):
        images[i] = numpy.array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #38
0
    def load_mnist(self, dataset, mode, imgSize, digits, path):
        """
        Loads MNIST files into 3D numpy arrays
     
        Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
        """

        if dataset == "training":
            fname_img = os.path.join(path, 'train-images.idx3-ubyte')
            fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
        elif dataset == "testing":
            fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
            fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
        else:
            raise ValueError("dataset must be 'testing' or 'training'")

        flbl = open(fname_lbl, 'rb')
        magic_nr, size = struct.unpack(">II", flbl.read(8))
        lbl = pyarray("b", flbl.read())
        flbl.close()

        fimg = open(fname_img, 'rb')
        magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
        img = pyarray("B", fimg.read())
        fimg.close()

        ind = [k for k in range(size) if lbl[k] in digits]
        N = len(ind)

        images = np.zeros((N, imgSize, imgSize), dtype=np.uint8)
        labels = np.zeros((N, 1), dtype=np.int8)
        if mode == "BW":
            for i in range(len(ind)):
                images[i] = scipy.misc.imresize(
                    np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows, cols)),
                    (imgSize, imgSize), "bilinear")
                images[i] = self.img2bw(images[i], self.bwThres)
                labels[i] = lbl[ind[i]]
        else:
            for i in range(len(ind)):
                images[i] = scipy.misc.imresize(
                    np.array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                                 cols]).reshape((rows, cols)),
                    (imgSize, imgSize))
                labels[i] = lbl[ind[i]]

        return images, labels
Пример #39
0
def load_mnist(dataset="training", digits=np.arange(10), path="."):
    """
    Loads MNIST

	Note that you first need to download files
	http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
	http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
	http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
	http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
	and unpack them
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows *
                              cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    labels += 1
    labels = labels.flatten()

    images = images.astype(np.float16)
    images = images.reshape((images.shape[0], -1))  # FLATTENING for plain DNN

    return images, labels
Пример #40
0
def getImagesArray(fname):
    fimg = open(fname, 'rb')
    magic_nr, size_img, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    img = np.asarray(img).reshape(size_img, rows * cols)
    fimg.close()
    return img, size_img
Пример #41
0
    def Load(self, filename, output_type=numpy.ubyte):
        print("Loading ",filename)
        # Open the file
        fh = open(filename, 'rb')
        
        #Read magic number
        # The first 4 bytes should be
        # byte 0: 0
        # byte 1: 0
        # byte 2: datatype enum
        # byte 3: dimension
        z1,z2,type_code,dimensions = tuple(ord(i) for i in struct.unpack("4c", fh.read(4)))

                
        # Read size in each dimension
        dim_sizes = [struct.unpack(">I", fh.read(4))[0] for i in range(dimensions)]
        
        # For now we only support type 8 (unsigned byte)
        data_size = numpy.cumprod(dim_sizes)[-1] * 1 #
        
        # Read the data as a C-array
        all_data = pyarray('B', fh.read(data_size))        
        fh.close()

        # Return a vector if dimensions = 1
        if dimensions == 1:
            return numpy.array(all_data, output_type)        
        # Return a matrix if dimension > 1
        reshape_dimension = (dim_sizes[0], data_size / dim_sizes[0])
        return numpy.array(all_data, output_type).reshape(reshape_dimension)
Пример #42
0
def load(start, number_samples):

    file_name_img = '../dataset/infimnist/mnist8m-patterns-idx3-ubyte'
    file_name_lbl = '../dataset/infimnist/mnist8m-labels-idx1-ubyte'

    file_lbl = open(file_name_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", file_lbl.read(8))
    file_lbl.seek(start, 1)
    labels = np.fromstring(file_lbl.read(number_samples), dtype=np.int8)
    #labels = pyarray("B", file_lbl.read(number_samples))
    file_lbl.close()

    file_img = open(file_name_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", file_img.read(16))

    images = zeros((number_samples, rows*cols), dtype=np.uint8)
    file_img.seek(start*rows*cols, 1)
    for i in range(number_samples):
        images[i] = pyarray("B", file_img.read(rows*cols))

    file_img.close()

    # normalize
    images = images/255.0

    return images, labels
Пример #43
0
def load_mnist_from_binary(dataset="training",
                           digits=numpy.arange(10),
                           path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = zeros((N, rows * cols), dtype=float_)
    targets = zeros((N, 10), dtype=int8)
    for i in xrange(N):
        if i % 1000 == 0:
            logging.info("Loaded " + str(i) + " MNIST images")
        image = array(img[ind[i] * rows * cols:(ind[i] + 1) * rows * cols])

        image_f = zeros(rows * cols, dtype=float_)

        for k in xrange(rows * cols):
            image_f[k] = image[k] / 255.0

        images[i] = image_f

        targets[i][lbl[ind[i]]] = 1
    return images, targets
Пример #44
0
 def gather(self, time):
     values = self.func(time)
     if values is None:
         return
     if len(self.arr) == 0:
         if np.shape(values) != ():
             self.arr = [pyarray('d') for _ in values]
             self.arr_flat = False
         else:
             self.arr = pyarray('d')
             self.arr_flat = True
     
     if self.arr_flat:
         self.arr.append(values)
     else:
         for v, a in zip(values, self.arr):
             a.append(v)
Пример #45
0
 def __init__ ( self, c ):
     path = os.path.dirname(__file__)
     c = str(c)
     if c in available_c:
         self.gfolds = pyarray('f',[])
         self.gfolds.fromfile(file(os.path.join(path,'g'+str(c)+'.dat'),'rb'),table_size)
     else:
         raise InvalidCError(c)
Пример #46
0
def hexRead():
    f = open('a.hex', 'rb')
    n, s = struct.unpack(">II", f.read(8))
    print "%d %d" % (n, s)
    arr = pyarray("b", f.read())
    for i in arr:
        print i
    f.close()
def solve_tsp(distances, optim_steps=3, pairs_by_dist=pairs_by_dist):
    """Given a distance matrix, finds a solution for the TSP problem.
    Returns list of vertex indices.
    Guarantees that the first index is lower than the last"""
    N = len(distances)
    if N == 0:
        return []
    if N == 1:
        return [0]
    for row in distances:
        if len(row) != N:
            raise ValueError("Matrix is not square")

    # State of the TSP solver algorithm.
    node_valency = pyarray('i', [2]) * N  # Initially, each node has 2 sticky ends

    # for each node, stores 1 or 2 connected nodes
    connections = [[] for i in xrange(N)]

    def join_segments(sorted_pairs):
        # segments of nodes. Initially, each segment contains only 1 node
        segments = [[i] for i in xrange(N)]

        def filtered_pairs():
            # Generate sequence of
            for ij in sorted_pairs:
                i, j = ij
                if not node_valency[i] or\
                        not node_valency[j] or\
                        (segments[i] is segments[j]):
                    continue
                yield ij

        for i, j in islice(filtered_pairs(), N - 1):
            node_valency[i] -= 1
            node_valency[j] -= 1
            connections[i].append(j)
            connections[j].append(i)
            # Merge segment J into segment I.
            seg_i = segments[i]
            seg_j = segments[j]
            if len(seg_j) > len(seg_i):
                seg_i, seg_j = seg_j, seg_i
                i, j = j, i
            for node_idx in seg_j:
                segments[node_idx] = seg_i
            seg_i.extend(seg_j)

    join_segments(pairs_by_dist(N, distances))

    for passn in range(optim_steps):
        nopt, dtotal = optimize_solution(distances, connections)
        if nopt == 0:
            break

    path = restore_path(connections)
    return path
Пример #48
0
def load_feature_vector(digit, dataset="training"):
    path="."

    fname_image_array = os.path.join(path, 't10k-images-idx3-ubyte')
    fname_label_array = os.path.join(path, 't10k-labels-idx1-ubyte')

    file_label = open(fname_label_array, 'rb')
    magic_nr, size = struct.unpack(">II", file_label.read(8))
    print "********************************************"
    print "     READING  CLASS LABEL of TRAINING set"
    print "********************************************"
    print "No of class labels :%d" %(size)
    label_array = pyarray("b", file_label.read())
    print " Done reading class label from training set"
    file_label.close()

    file_image = open(fname_image_array, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", file_image.read(16))
    print "********************************************"
    print "     READING  FEATURE VECTOR of TRAINING set"
    print "********************************************"
    print "No-of-records:%d, No-of-rows:%d, No-of-col:%d" %(size, rows, cols)
    print "********************************************"
    image_array = pyarray("B", file_image.read())
    print " Done reading feature vector from training set"
    file_image.close()

    #ind = [ k for k in range(size) if label_array[k] in digit ]
    # following code replaces the above 1 liner..
    matching_index_array =[]
    for k in range(size):
        if (label_array[k] == digit):
            matching_index_array.append(k)

    digit_cnt  = len(matching_index_array)

    feature_vector = rows * cols
    images = zeros((digit_cnt, feature_vector), dtype=float)
    labels = zeros((digit_cnt, 1), dtype=int8)

    for i in range(digit_cnt):
        images[i] = numpy.array(image_array[ matching_index_array[i]*rows*cols : (matching_index_array[i]+1)*rows*cols ]).reshape((feature_vector))
        labels[i] = label_array[matching_index_array[i]]
    return images, labels
Пример #49
0
def read(digits, dataset = "training", path = "."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset is "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in xrange(size) if lbl[k] in digits ]
    N = len(ind)

    images = zeros((N, rows*cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in xrange(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ])
        labels[i] = lbl[ind[i]]

    # Matlab patches use an odd col-major order
    images = images / 255. # Remap between [0,1]        
    for i in range(len(images)):  # Convert them back over to row-major
        images[i,:] = images[i,:].reshape(28,28).T.flatten()

    labels = np.fromiter(chain.from_iterable(labels), dtype='int')

    return images, labels
Пример #50
0
def read(digits, dataset="training", path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    # assume these files exist and have been gunzipped.
    # download the 4 gz files from http://yann.lecun.com/exdb/mnist/
    if dataset is "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training"

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in xrange(size) if lbl[k] in digits ]
    N = len(ind)

    if DO_REALS:
        images = zeros((N, rows, cols), dtype=float)
        labels = zeros((N, 1), dtype=int8) # always need these to be int for H2O RF output
    else:
        images = zeros((N, rows, cols), dtype=int8)
        labels = zeros((N, 1), dtype=int8)

    for i in xrange(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #51
0
def get_MNIST_dataset(digits, dataset = "training", path = "./data"):
    """
    Loads MNIST files into 3D numpy arrays

    @param digits list of numbers (0 to 9)
    @param dataset "training" or "testing"
    @returns 2 numpy arrays : images, labels

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset is "training":
        fname_img = os.path.join(path, 'train-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in xrange(size) if lbl[k] in digits ]
    N = len(ind)

    #images = zeros((N, rows, cols), dtype=uint8)
    #labels = zeros((N, 1), dtype=int8)
    #
    images = zeros((N, rows, cols), dtype="float64")
    labels = zeros((N, 1), dtype="float64")
    for i in xrange(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #52
0
def load_mnist(dataset="training", digits=np.arange(10), path=".", prop = 100):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """
    # Depending on whether the training or testing dataset is needed,
    # read in the appropriate images and labels.
    if dataset == "training":
        fname_img = os.path.join(path, '../data/train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, '../data/train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, '../data/t10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, '../data/t10k-labels-idx1-ubyte')
    # If improper argument is provided to the "dataset" parameter,
    # raise an error.
    else:
        raise ValueError("dataset must be 'testing' or 'training'")
    # Read in labels file.
    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()
    # Read in pixel values file.
    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()
    # Find indices of images whose labels are in the specified digit labels.
    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = int(len(ind) * prop/100.)
    # Generate images and labels.
    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(int(len(ind) * prop/100.)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ])\
            .reshape((rows, cols))
        labels[i] = lbl[ind[i]]
    # flatten labels from format [[3],[2],[5],...] to [3,2,5,...]
    labels = [label[0] for label in labels]
    return images, labels
Пример #53
0
def load_mnist(dataset="training", digits=arange(10), path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    Adapted again from: http://g.sweyla.com/blog/2012/mnist-numpy/ (Ethan Sargent, 7/25/16)
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte.gz')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte.gz')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte.gz')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte.gz')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = gzip.open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = gzip.open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)

    # adapted to reduce dimension of images, put all pixel inputs in one long row vector for a given X[i]
    images = zeros((N, rows*cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ])
        labels[i] = lbl[ind[i]]

    # images: 784 x 60000 np array of images
    # labels is a 60000 x 1 column vector; to get a row vector use np.transpose
    return np.transpose(images), labels
Пример #54
0
def load_mnist(dataset="training", digits=np.arange(10), path="."):
    """
    Python function for importing the MNIST data set.
    """
    
    

    if dataset is "training":
        fname_img = os.path.join(path, 'train-images.idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
        print "training"
    elif dataset is "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError, "dataset must be 'testing' or 'training'"


    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

   # ind = [ k for k in range(size) if lbl[k] in digits ]
   # N = len(ind)
    N=1
    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    #for i in range(len(ind)):
     #   images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
      #  labels[i] = lbl[ind[i]]



    return images, labels
Пример #55
0
def load_mnist(dataset="training_data", digits=np.arange(10), path="../mnist"):

    if dataset == "training_data":
        fname_image = os.path.join(path, 'train-images-idx3-ubyte.gz')
        fname_label = os.path.join(path, 'train-labels-idx1-ubyte.gz')
    elif dataset == "testing_data":
        fname_image = os.path.join(path, 't10k-images-idx3-ubyte.gz')
        fname_label = os.path.join(path, 't10k-labels-idx1-ubyte.gz')
    else:
        raise ValueError("dataset must be 'training_data' or 'testing_data'")

    # flbl = open(fname_label, 'rb')
    flbl = gzip.open(fname_label, 'rb')
    # magic_nr, size = struct.unpack(">II", flbl.read(8))
    magic_nr, size = struct.unpack(">2I", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    # print(magic_nr, size)
    flbl.close()

    # fimg = open(fname_image, 'rb')
    fimg = gzip.open(fname_image, 'rb')
    # print(fimg.tell()) # refer to https://docs.python.org/2/tutorial/inputosizeutput.html
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    # magic_nr, size, rows, cols = struct.unpack(">4I", fimg.read(16))
    # print(magic_nr, size, rows, cols)
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)
    # print(N)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(N):
        images[i] = array(img[ ind[i] * rows * cols : (ind[i] + 1) * rows * cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
def load_data_from_mnist():
    global images_train,labels_train,images_test,labels_test,size,rows,cols
    path = os.getcwd()
    training_images_location = os.path.join(path, 'train-images.idx3-ubyte')
    training_labels_location = os.path.join(path, 'train-labels.idx1-ubyte')
    testing_images_location = os.path.join(path, 't10k-images.idx3-ubyte')
    testing_labels_location = os.path.join(path, 't10k-labels.idx1-ubyte')
    

    with open(training_labels_location,"r") as training_labels_file:
        magic_nr, size = struct.unpack(">II", training_labels_file.read(8))
        labels_train_data = pyarray("b", training_labels_file.read())

    print "Creating training images "
    start_time = time.time()
    with open(training_images_location,"r") as training_images_file:
        magic_nr, size, rows, cols = struct.unpack(">IIII", training_images_file.read(16))
        images_train_data = pyarray("b", training_images_file.read())
        # images_train = np.fromfile(training_images_file,dtype=np.uint8).reshape(len(labels_train),rows*cols)
    end_time = time.time() - start_time
    print "It took "+ str(end_time) + " to create the training images"

    labels_train,images_train = process_labels_and_images(images_train_data,labels_train_data,size,rows, cols)

    with open(testing_labels_location,"r") as testing_labels_file:
        magic_nr, size = struct.unpack(">II", testing_labels_file.read(8))
        labels_test_data = pyarray("b", testing_labels_file.read())
    

    print "Creating testing images "
    start_time = time.time()
    with open(testing_images_location,"r") as testing_images_file:
        magic_nr, size, rows, cols = struct.unpack(">IIII", testing_images_file.read(16))
        images_test_data = pyarray("b", testing_images_file.read())
        # images_test = np.fromfile(testing_images_file,dtype=np.uint8).reshape(len(labels_test),rows*cols)
    end_time = time.time() - start_time
    print "It took "+ str(end_time) + " to create the testing images"

    labels_test,images_test =process_labels_and_images(images_test_data,labels_test_data,size,rows, cols)
def load_mnist(dataset="training_data", digits=np.arange(10), path="."):
    if dataset == "training_data":
        fname_image = os.path.join(path, 'train-images-idx3-ubyte.gz')
        fname_label = os.path.join(path, 'train-labels-idx1-ubyte.gz')
    elif dataset == "testing_data":
        fname_image = os.path.join(path, 't10k-images-idx3-ubyte.gz')
        fname_label = os.path.join(path, 't10k-labels-idx1-ubyte.gz')
    else:
        raise ValueError("dataset must be 'training_data' or 'testing_data'")

    flbl = open(fname_label, 'rb')
    with open(fname_label, 'rb')as f:
        with gzip.GzipFile(fileobj=f) as bytestream:
            magic_nr, size = struct.unpack(">II", bytestream.read(8))
            lbl = pyarray("b", bytestream.read())
    flbl.close()

    # magic_nr, size = struct.unpack(">II", flbl.read(8))
    # lbl = pyarray("b", flbl.read())
    # flbl.close()

    fimg = open(fname_image, 'rb')
    # magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    # img = pyarray("B", fimg.read())
    with gzip.GzipFile(fileobj=fimg)as bytestream:
        magic_nr, size, rows, cols = struct.unpack(">IIII", bytestream.read(16))
        img = pyarray("B", bytestream.read())
    fimg.close()

    ind = [k for k in range(size) if lbl[k] in digits]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels
Пример #58
0
def load_mnist(choice,digits=np.arange(10), path="./minst_data"):
	if choice == "train":
		fname_img = os.path.join(path, 'train-images-idx3-ubyte')
		fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
	if choice == "test":
		fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
		fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
	flbl = open(fname_lbl, 'rb')
	magic_nr, size = struct.unpack(">II", flbl.read(8))
	lbl = pyarray("b", flbl.read())
	flbl.close()
	fimg = open(fname_img, 'rb')
	magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
	img = pyarray("B", fimg.read())
	fimg.close()
	ind = [ k for k in range(size) if lbl[k] in digits ]
	N = len(ind)
	images = zeros((N, rows, cols), dtype=uint8)
	labels = zeros((N, 1), dtype=int8)
	for i in range(len(ind)):
		images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
		labels[i] = lbl[ind[i]]
	return images, labels
Пример #59
0
def loadMnist(dataset="training", digits=arange(10)):
    
    # Loads MNIST files into 3D numpy arrays

    if dataset == "training":
        fname_img = '../Data/train-images.idx3-ubyte'
        fname_lbl = '../Data/train-labels.idx1-ubyte'
    elif dataset == "testing":
        fname_img = '../Data/t10k-images.idx3-ubyte'
        fname_lbl = '../Data/t10k-labels.idx1-ubyte'
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    labels_raw = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    images_raw = pyarray("B", fimg.read())
    fimg.close()

    indices = range(size)

    N = len(indices)

    images = zeros((N, rows*cols), dtype=uint8)

    labels = zeros((N), dtype=int8)
    for i, index in enumerate(indices):
        images[i] = array(images_raw[ indices[i]*rows*cols : (indices[i]+1)*rows*cols ])
        labels[i] = labels_raw[indices[i]]

    images = images.astype(float)/255.0

    return images, labels
Пример #60
0
def read_labels(path):
    f = open(path, 'rb')

    magic_nr, size = struct.unpack(">II", f.read(8))

    lbl = pyarray("B", f.read())

    f.close()

    labels = np.zeros( size )

    for i in range(size):
        labels[i] = lbl[i]

    return labels