Exemplo n.º 1
0
def calc(number):
    curve = idx2numpy.convert_from_file('../MNIST/curve')
    images = idx2numpy.convert_from_file("../MNIST/spin_train_images")
    labels = idx2numpy.convert_from_file("../MNIST/train_labels")
    double = np.zeros((410, 410), dtype=np.float_)
    for n in range(10000 * number, 10000 * (number + 1)):
        for i in range(400):
            for j in range(400):
                double[i][j] += images[n][curve[i][0]][
                    curve[i][1]] * images[n][curve[j][0]][curve[j][1]]
                if i == 0 and j == 0:
                    print(n)
    for i in range(400):
        for n in range(10000 * number, 10000 * (number + 1)):
            for j in range(10):
                if labels[n] == j:
                    double[i][400 + j] += images[n][curve[i][0]][curve[i][1]]
                    double[400 + j][i] += images[n][curve[i][0]][curve[i][1]]
                else:
                    double[i][400 + j] -= images[n][curve[i][0]][curve[i][1]]
                    double[400 + j][i] -= images[n][curve[i][0]][curve[i][1]]

    for i in range(410):
        for j in range(410):
            double[i][j] = double[i][j] / 60000
    idx2numpy.convert_to_file("../Data/double" + str(number), double)
Exemplo n.º 2
0
def create_some_test_images(some_number=10):
    import idx2numpy
    import gzip
    import numpy as np
    import PIL.Image
    import os

    test_images = "../t10k-images-idx3-ubyte.gz"
    test_labels = "../t10k-labels-idx1-ubyte.gz"
    EXPORT_FOLDER = "../test_images"

    imgs = idx2numpy.convert_from_file(gzip.open(test_images))
    lbls = idx2numpy.convert_from_file(gzip.open(test_labels))
    indices = np.arange(0, len(lbls))
    for i in range(10):

        ix = np.random.choice(indices[lbls == i], size=some_number)
        assert len(ix) == some_number

        for j, image_index in enumerate(ix):
            pil_image = PIL.Image.fromarray(np.squeeze(
                imgs[image_index, :, :]))
            os.makedirs(os.path.join('..', 'static', 'img', 'mnist', str(i)),
                        exist_ok=True)
            pil_image.save(f'../static/img/mnist/{i}/{j}.png')
Exemplo n.º 3
0
def main():
    # 训练数据
    images = idx2numpy.convert_from_file("train-images-idx3-ubyte")
    # print(images.shape)
    X = images.reshape(images.shape[0], -1)
    X = X.astype('float32')
    # print(X.shape)
    # X = (X-np.min(X,0))/(np.max(X,0)+0.0001)
    X = X / 255.0
    # X = np.matrix(X)

    # 测试数据
    Y = idx2numpy.convert_from_file("train-labels-idx1-ubyte")
    Y = Y.astype('int64')
    # print(Y.shape)
    X_test = X[50000:]
    Y_test = Y[50000:]
    X = X[:50000]
    Y = Y[:50000]
    sizes = [X.shape[1], 200, 100, 10]

    time_start = time.time()
    ANN_train(sizes, X, Y)
    time_end = time.time()
    print('totally cost', time_end - time_start)
    correct = ANN_test(sizes, 'ANN.pth', X_test, Y_test)
    print('correct:', correct)
Exemplo n.º 4
0
def load_data():
    """
    Read in the training data from the data dir.

    The data comes to us from http://yann.lecun.com/exdb/mnist/
    """
    train_images = idx2numpy.convert_from_file(
        'data/train-images-idx3-ubyte').astype('float64')

    # We know that from the IDX file documentation the max value we will be getting
    # is 255, and we wha the values of our input to be [0, 1] so
    train_images = train_images / 255

    # # Lets look at one of the images
    # import matplotlib.pyplot as plt
    # plt.imsave("test", train_images[1, :, :])

    # Well that was fun

    # We also reshape the images so they match the first layer of the nn
    train_images = train_images.reshape(train_images.shape[0], 784, 1)

    # Lables
    # Along with the images we need the
    train_lables_ff = idx2numpy.convert_from_file(
        'data/train-labels-idx1-ubyte')

    # These are also in the wrong format, we need them to be activations of nodes
    train_lables = numpy.zeros([train_lables_ff.shape[0], 10, 1])

    # This cannot be the most efficient solution by it works
    for i in range(0, train_lables_ff.shape[0]):
        train_lables[i, train_lables_ff[i]] = 1

    return (train_images, train_lables)
Exemplo n.º 5
0
def leerTrainingSet():
    x_training = idx2numpy.convert_from_file(
        'datos/train-images-idx3-ubyte'
    )  # Las imagenes son 60000 cada una de 28x28
    y_training = idx2numpy.convert_from_file('datos/train-labels-idx1-ubyte')
    x_training = np.reshape(x_training, (60000, 784))
    return (x_training, y_training)
def lire_MNIST():
    """ 
    Get alpha digit data for given character.
    
    Parameters
    ----------

    Returns
    -------
    data : ndarray
        Matrix containing the data with rows corresponding to samples and columns to pixels.
    """
    train_image_file = 'data/train-images-idx3-ubyte'
    train_label_file = 'data/train-labels-idx1-ubyte'
    test_image_file = 'data/t10k-images-idx3-ubyte'
    test_label_file = 'data/t10k-labels-idx1-ubyte'

    train_image = idx2numpy.convert_from_file(train_image_file)
    train_image = to_black_white(np.array([img.flatten() for img in train_image]))

    test_image = idx2numpy.convert_from_file(test_image_file)
    test_image = to_black_white(np.array([img.flatten() for img in test_image]))

    train_label = idx2numpy.convert_from_file(train_label_file)
    test_label = idx2numpy.convert_from_file(test_label_file)

    return train_image, train_label, test_image, test_label
Exemplo n.º 7
0
def labelDigitsToMultipleFiles(label_filename, instance_filename,
                               out_filename):
    dirname = os.path.dirname(os.path.dirname(__file__))
    instance_filename = dirname + instance_filename
    label_filename = dirname + label_filename
    digits = idx2numpy.convert_from_file(instance_filename)
    labels = idx2numpy.convert_from_file(label_filename)
    n = len(labels)
    c = 0
    prefix = 0
    out_file = open(dirname + "/assets/" + str(prefix) + "_" + out_filename,
                    "w+")
    for i in range(0, n):
        if (c == 19999):
            prefix += 1
            c = 0
            out_file.close()
            out_file = open(
                dirname + "/assets/" + str(prefix) + "_" + out_filename, "w+")
        out_file.write((str(labels[i]) + ","))
        for r in digits[i]:
            for x in r:
                out_file.write(str(x) + ",")
        out_file.write("\n")
        c += 1
    out_file.close()
Exemplo n.º 8
0
def load_mnist():
    X_train = idx2numpy.convert_from_file('MNIST_data/train-images.idx3-ubyte')
    train_labels = idx2numpy.convert_from_file(
        'MNIST_data/train-labels.idx1-ubyte')
    X_test = idx2numpy.convert_from_file('MNIST_data/t10k-images.idx3-ubyte')
    test_labels = idx2numpy.convert_from_file(
        'MNIST_data/t10k-labels.idx1-ubyte')

    train_images = []  # reshape train images so that the training set
    for i in range(X_train.shape[0]):  # is of shape (60000, 1, 28, 28)
        train_images.append(np.expand_dims(X_train[i], axis=0))
    train_images = np.array(train_images)

    test_images = []  # reshape test images so that the test set
    for i in range(X_test.shape[0]):  # is of shape (10000, 1, 28, 28)
        test_images.append(np.expand_dims(X_test[i], axis=0))
    test_images = np.array(test_images)

    indices = np.random.permutation(
        train_images.shape[0])  # permute and split training data in
    training_idx, validation_idx = indices[:55000], indices[
        55000:]  # training and validation sets
    train_images, validation_images = train_images[
        training_idx, :], train_images[validation_idx, :]
    train_labels, validation_labels = train_labels[training_idx], train_labels[
        validation_idx]

    return {
        'train_images': train_images,
        'train_labels': train_labels,
        'validation_images': validation_images,
        'validation_labels': validation_labels,
        'test_images': test_images,
        'test_labels': test_labels
    }
Exemplo n.º 9
0
def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.

    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    trainImages = idx2numpy.convert_from_file('train-images-idx3-ubyte')
    trainLabels = idx2numpy.convert_from_file('train-labels-idx1-ubyte')
    testImages = idx2numpy.convert_from_file('t10k-images-idx3-ubyte')
    testLabels = idx2numpy.convert_from_file('t10k-labels-idx1-ubyte')

    training_data = np.array((trainImages[:50000],trainLabels[:50000]))
    validation_data = np.array((trainImages[50000:],trainLabels[50000:]))
    test_data = np.array((testImages,testLabels))
    return (training_data, validation_data, test_data)
Exemplo n.º 10
0
def load_full_Mnist(USE_COLAB = False, path = ''):
    """
    Function that downloads data from the MNIST dataset. (numbers)
    The following are the inputs and outputs:

    Outputs:
        train_x_full: tensor of images sampled randomly from the Mnist training dataset. (60000,28,28)
        train_y_full: tensor of labels of those sampled training images. (60000,)
        test_x_full: tensor of images sampled randomly from the Mnist test dataset. (10000,28,28)
        test_y_full: tensor of labels of those sampled testing images. (10000,)
    """
    
    
    train_images_file = 'Data/Mnist/train-images.idx3-ubyte'
    test_images_file = 'Data/Mnist/t10k-images.idx3-ubyte'
    train_labels_file = 'Data/Mnist/train-labels.idx1-ubyte'
    test_labels_file = 'Data/Mnist/t10k-labels.idx1-ubyte'
    
    if USE_COLAB:
        train_images_file = os.path.join(path,'Data/Mnist/train-images.idx3-ubyte')
        test_images_file = os.path.join(path,'Data/Mnist/t10k-images.idx3-ubyte')
        train_labels_file = os.path.join(path,'Data/Mnist/train-labels.idx1-ubyte')
        test_labels_file = os.path.join(path,'Data/Mnist/t10k-labels.idx1-ubyte')
    


    train_x_full = torch.tensor(np.array(idx2numpy.convert_from_file(train_images_file))).to(dtype = torch.float32, device = 'cpu')
    train_y_full = torch.tensor(np.array(idx2numpy.convert_from_file(train_labels_file))).to(dtype = torch.int32, device = 'cpu')
    test_x_full = torch.tensor(np.array(idx2numpy.convert_from_file(test_images_file))).to(dtype = torch.float32, device = 'cpu')
    test_y_full = torch.tensor(np.array(idx2numpy.convert_from_file(test_labels_file))).to(dtype = torch.int32, device = 'cpu')

    return train_x_full, train_y_full, test_x_full, test_y_full
Exemplo n.º 11
0
def load_data(filename):
	if ".h5" in filename[0]:
		X,Y = read_h5(filename[0])
	else:
		X = idx2numpy.convert_from_file(filename[0])
		Y = idx2numpy.convert_from_file(filename[1])
	return X,Y
Exemplo n.º 12
0
def load_mnist(data_path):
    ''' Loads the MNIST data from the base path '''

    train_img_path = '%s/train-images.idx3-ubyte' % data_path
    train_lbl_path = '%s/train-labels.idx1-ubyte' % data_path
    test_img_path = '%s/t10k-images.idx3-ubyte' % data_path
    test_lbl_path = '%s/t10k-labels.idx1-ubyte' % data_path

    def encode_one_hot(y, m, k):
        y_one_hot = np.zeros((m, k))
        y_one_hot[range(m), y] = 1
        return y_one_hot

    # get the training data
    train_img = idx2numpy.convert_from_file(train_img_path)
    m, row, col = train_img.shape
    d = row * col
    X_tr = np.reshape(train_img, (m, d)) / 255.

    train_lbl = idx2numpy.convert_from_file(train_lbl_path)
    k = max(train_lbl) + 1
    y_tr = encode_one_hot(train_lbl, m, k)

    # set the data matrix for test
    test_img = idx2numpy.convert_from_file(test_img_path)
    m_te = test_img.shape[0]
    X_te = np.reshape(test_img, (m_te, d)) / 255.  # test data matrix
    test_lbl = idx2numpy.convert_from_file(test_lbl_path)
    y_te = encode_one_hot(test_lbl, m_te, k)

    return X_tr, y_tr, X_te, y_te
Exemplo n.º 13
0
def get_dataset():
    import idx2numpy
    files = ["train-images-idx3-ubyte","train-labels-idx1-ubyte"]
    trains = idx2numpy.convert_from_file(files[0])
    labels = idx2numpy.convert_from_file(files[1])
    train,labels = convert(trains,labels)
    return train,labels
Exemplo n.º 14
0
    def run(self):
        # Get images and labels
        img = i2n.convert_from_file('t10k-images-idx3-ubyte')
        lbl = i2n.convert_from_file('t10k-labels-idx1-ubyte')

        self.hdr.write("#define MNIST_IMAGES {\\\n")
        for image in range(self.numIm):
            for row in range(28):
                for col in range(28):
                    if self.norm:
                        temp = float(img[self.idxSt + image][row][col]) / 255.0
                        temp -= 0.1307
                        temp = temp / 0.3015
                        if temp >= 0:
                            temp = 1
                        else:
                            temp = 0
                        self.hdr.write(str(temp) + ", ")
                    else:
                        self.hdr.write(
                            str(img[self.idxSt + image][row][col]) + ", ")
            self.hdr.write("\\\n")
        self.hdr.write("}\n\n")

        self.hdr.write("#define MNIST_LABELS {\\\n")
        for image in range(self.numIm):
            self.hdr.write(str(lbl[self.idxSt + image]) + ", ")
            self.hdr.write("\\\n")
        self.hdr.write("}\n\n")
Exemplo n.º 15
0
def load_data():
    """
    Loads mnist dataset.
    """
    names = ['trainX', 'trainY', 'testX', 'testY']
    data = []
    path = easyDL.__file__.split('__init__')[0] + 'preprocessing/datasets/__datasets__/mnist/'
    if not os.path.isdir(path):
        print('downloading...')
        os.mkdir(path)
        urls = ['http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
                'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
                'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
                'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz']
        sleep(0.5)
        for i, _ in enumerate(urls):
            download_url(urls[i], path + names[i] + '.gz')
            with gzip.open(path + names[i] + '.gz', 'rb') as f_in:
                with open(path + names[i]  + '.idx', 'wb') as f_out:
                    shutil.copyfileobj(f_in,f_out)
            data.append(idx2numpy.convert_from_file(path + names[i] + '.idx'))
            os.remove(path + names[i] + '.gz')
        print('\nAll Done.')
    else:
        for name in names:
            data.append(idx2numpy.convert_from_file(path + name + '.idx'))
    
    trainX = data[0].reshape(-1, 28, 28, 1).astype(np.int32)
    testX = data[2].reshape(-1, 28, 28, 1).astype(np.int32)
    trainY = data[1].astype(np.int32)
    testY = data[3].astype(np.int32)
    
    return (trainX, trainY), (testX, testY)
Exemplo n.º 16
0
def mnist_school(classifier, samples_limit=5123):
    # Raw training, no caffe use.
    print 'MNIST training started.'
    mnist_file = '/home/student/Downloads/MNIST/train-images.idx3-ubyte'
    if os.path.isfile(mnist_file):
        train_arr = idx2numpy.convert_from_file(mnist_file)
    else:
        print 'Error, no file'
        return
    print 'Train array loaded, size is ', train_arr.shape
    label_file = '/home/student/Downloads/MNIST/train-labels.idx1-ubyte'
    label_arr = idx2numpy.convert_from_file(label_file)
    print 'Train labels loaded, size is ', label_arr.shape
    digits = set(label_arr)
    # Train for each digit
    for digit_i in digits:
        # binarize
        y = 1 * (label_arr == digit_i)[:samples_limit]
        x_train = np.vstack([i.flatten() for i in train_arr[:samples_limit]])
        classifier.fit_from_features(x_train, y, 'MNIST_' + str(digit_i))
    print 'MNIST training done.'
    # Testing on last 1000 samples
    print 'MNIST testing started.'
    scores = []
    for digit_i in digits:
        # binarize
        y = 1 * (label_arr == digit_i)[-1000:]
        x_train = np.vstack([i.flatten() for i in train_arr[-1000:]])
        scores.append(classifier.score(x_train, y))
    print 'The mean score for MNIST Task is ', np.mean(scores)
def calcMeanAndStd(pictureFile,labelFile,out_dir):
    pictures =  ~idx2numpy.convert_from_file(pictureFile) #0 means black usually! And 255 means white!
    labels    = idx2numpy.convert_from_file(labelFile)

    mean_picture = numpy.mean(pictures,0)
    std_picture  = numpy.std(pictures,0)

    pictures_by_classes = [[] for i in range(10)]
    pictures_by_classes_array = [None for i in range(10)]

    for picture,label in zip(pictures,labels):
        pictures_by_classes[label].append(picture)


    for i in range(10):
        pictures_by_classes_array[i] = numpy.array(pictures_by_classes[i])

        mean_image = Image.fromarray(numpy.mean(pictures_by_classes_array[i],axis=0).astype('uint8'),'L')
        std_image = Image.fromarray(numpy.std(pictures_by_classes_array[i],axis=0).astype('uint8'),'L')
        std_image_inv = Image.fromarray(~numpy.std(pictures_by_classes_array[i],axis=0).astype('uint8'),'L')

        mean_image.save(out_dir+os.sep + 'mean_train_' + str(i)+'.png','png')
        std_image.save(out_dir+os.sep + 'std_train_' + str(i)+'.png','png')
        std_image_inv.save(out_dir+os.sep + 'std_train_inv_' + str(i)+'.png','png')


    im = Image.fromarray(mean_picture.astype('uint8'),'L')
    im.save(out_dir+os.sep +'mean_train.png','png')

    im = Image.fromarray(std_picture.astype('uint8'),'L')
    im.save(out_dir+os.sep +'std_train.png','png')

    im = Image.fromarray(~std_picture.astype('uint8'),'L')
    im.save(out_dir+os.sep +'std_train_inv.png','png')
Exemplo n.º 18
0
def emnist_train(model):
    t_start = time.time()

    emnist_path = '../gzip'
    X_train = idx2numpy.convert_from_file(emnist_path + 'emnist-byclass-train-images-idx3-ubyte')
    y_train = idx2numpy.convert_from_file(emnist_path + 'emnist-byclass-train-labels-idx1-ubyte')

    X_test = idx2numpy.convert_from_file(emnist_path + 'emnist-byclass-test-images-idx3-ubyte')
    y_test = idx2numpy.convert_from_file(emnist_path + 'emnist-byclass-test-labels-idx1-ubyte')

    X_train = np.reshape(X_train, (X_train.shape[0], 28, 28, 1))
    X_test = np.reshape(X_test, (X_test.shape[0], 28, 28, 1))

    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, len(emnist_labels))

    k = 10
    X_train = X_train[:X_train.shape[0] // k]
    y_train = y_train[:y_train.shape[0] // k]
    X_test = X_test[:X_test.shape[0] // k]
    y_test = y_test[:y_test.shape[0] // k]

    X_train = X_train.astype(np.float32)
    X_train /= 255.0
    X_test = X_test.astype(np.float32)
    X_test /= 255.0

    x_train_cat = keras.utils.to_categorical(y_train, len(emnist_labels))
    y_test_cat = keras.utils.to_categorical(y_test, len(emnist_labels))

    learning_rate_reduction = keras.callbacks.ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001)

    keras.backend.get_session().run(tf.global_variables_initializer())

    model.fit(X_train, x_train_cat, validation_data=(X_test, y_test_cat), callbacks=[learning_rate_reduction], batch_size=64, epochs=30)
    print("Training done, dT:", time.time() - t_start)
Exemplo n.º 19
0
def way_1():
    '''
        SOURCE : https://stackoverflow.com/questions/40427435/extract-images-from-idx3-ubyte-file-or-gzip-via-python

        installation :
            pip install idx2numpy
    '''
    NUMBER_OF_ITRATION = 1
    data = idx2numpy.convert_from_file(TRAIN_DATA)
    labels = idx2numpy.convert_from_file(TRAIN_LABEL)
    print(data.shape)
    print(labels.shape)
    for i in range(NUMBER_OF_ITRATION):
        idx = np.random.randint(0, len(data))
        digit = data[idx].reshape(data.shape[1:])
        plt.imshow(digit)
        plt.title('Real number : {}'.format(labels[idx]))
        plt.show()

    # ...
    data = idx2numpy.convert_from_file(VALIDATION_DATA)
    labels = idx2numpy.convert_from_file(VALIDATION_LABEL)
    print(data.shape)
    print(labels.shape)
    for i in range(NUMBER_OF_ITRATION):
        idx = np.random.randint(0, len(data))
        digit = data[idx].reshape(data.shape[1:])
        plt.imshow(digit)
        plt.title('Real number : {}'.format(labels[idx]))
        plt.show()
Exemplo n.º 20
0
def conversion(document, new_file):

    #Determining the size and magic of the document for error checking
    with open(document, 'rb') as f:
        bytes = f.read(8)
        magic, size = struct.unpack(">II", bytes)

        print(magic)
        print(size)

    #Setting the output of this function as an array
    ndarr = idx2numpy.convert_from_file(document)
    print(ndarr)

    #
    f_read = open(document, 'rb')
    print(f_read)

    ndarr = idx2numpy.convert_from_file(f_read)
    print(ndarr)

    write(ndarr, new_file)
    read_data = read(new_file)

    print(read_data)
Exemplo n.º 21
0
def mnist_school(classifier, samples_limit=5123):
    # Raw training, no caffe use.
    print 'MNIST training started.'
    mnist_file = '/home/student/Downloads/MNIST/train-images.idx3-ubyte'
    if os.path.isfile(mnist_file):
        train_arr = idx2numpy.convert_from_file(mnist_file)
    else:
        print 'Error, no file'
        return
    print 'Train array loaded, size is ', train_arr.shape
    label_file = '/home/student/Downloads/MNIST/train-labels.idx1-ubyte'
    label_arr = idx2numpy.convert_from_file(label_file)
    print 'Train labels loaded, size is ', label_arr.shape
    digits = set(label_arr)
    # Train for each digit
    for digit_i in digits:
        # binarize
        y = 1 * (label_arr == digit_i)[:samples_limit]
        x_train = np.vstack([i.flatten() for i in train_arr[:samples_limit]])
        classifier.fit_from_features(x_train, y, 'MNIST_' + str(digit_i))
    print 'MNIST training done.'
    # Testing on last 1000 samples
    print 'MNIST testing started.'
    scores = []
    for digit_i in digits:
        # binarize
        y = 1 * (label_arr == digit_i)[-1000:]
        x_train = np.vstack([i.flatten() for i in train_arr[-1000:]])
        scores.append(classifier.score(x_train, y))
    print 'The mean score for MNIST Task is ', np.mean(scores)
Exemplo n.º 22
0
def readIDX(path):
    import idx2numpy
    ndarr = idx2numpy.convert_from_file(path)
    f_read = open(path, 'rb')
    ndarr = idx2numpy.convert_from_file(f_read)
    s = f_read.read()
    #ndarr = idx2numpy.convert_from_string(s)
    return ndarr
Exemplo n.º 23
0
    def __init__(self, data_path, label_path):
        self.data_path = data_path
        self.label_path = label_path

        self.dataset = idx2numpy.convert_from_file(self.data_path)
        self.labels = idx2numpy.convert_from_file(self.label_path)

        super(IdxFileDataset, self).__init__()
Exemplo n.º 24
0
def generate_df(data_path, labels_path):
    data_raw = idx2numpy.convert_from_file(data_path)
    labels = idx2numpy.convert_from_file(labels_path)
    df = pd.DataFrame(data_raw.reshape((data_raw.shape[0], 784)))
    df['value'] = labels
    df.loc[df['value'] == 0, df.columns[:784]] = 0
    df = df.astype(np.uint8)
    return df
Exemplo n.º 25
0
def loadData(Directory):
    x_train = idx2numpy.convert_from_file(Directory +
                                          'train-images.idx3-ubyte')
    y_train = idx2numpy.convert_from_file(Directory +
                                          'train-labels.idx1-ubyte')
    x_test = idx2numpy.convert_from_file(Directory + 't10k-images.idx3-ubyte')
    y_test = idx2numpy.convert_from_file(Directory + 't10k-labels.idx1-ubyte')
    return x_train, y_train, x_test, y_test
def pcaReduction(trainIdxPath, testIdxPath, outTrainPath, outTestIdx,
                 outRatioFile, pcaEigenDir):
    trainData = idx2numpy.convert_from_file(trainIdxPath)
    testData = idx2numpy.convert_from_file(testIdxPath)

    shape = trainData.shape
    if (len(shape) > 1):
        trainData = trainData.reshape(shape[0], shape[1] * shape[2])

    shape = testData.shape
    if (len(shape) > 1):
        testData = testData.reshape(shape[0], shape[1] * shape[2])

    pca = PCA()

    pca.fit(trainData)

    cumSumRatio = numpy.cumsum(pca.explained_variance_ratio_)

    indOf09 = numpy.argmax(cumSumRatio >= 0.9)
    indOf095 = numpy.argmax(cumSumRatio >= 0.95)
    indOf099 = numpy.argmax(cumSumRatio >= 0.99)

    print('explained variance ratio: ',
          pca.explained_variance_ratio_[0:indOf099])

    print('cumsum of explained variance ratio: ', cumSumRatio[0:indOf099])

    print('indexes of 0.9, 095, 0.99 ', indOf09, indOf095, indOf099)

    #print('Shape of components:',pca.components_.shape)

    A = pca.components_[:, 0:indOf095 + 1]

    for i in range(0, indOf095 + 1):
        v = A[:, i]
        v = abs(v) * 255
        v = v.reshape(28, 28)
        im = Image.fromarray(~v.astype('uint8'), 'L')
        im.save(pcaEigenDir + os.sep + str(i) + '.png', 'png')

    #print('xxx',A.shape)

    trainData = numpy.dot(trainData, A)

    print('train shape', trainData.shape, trainData.dtype)

    testData = numpy.dot(testData, A)

    print('test shape', testData.shape, trainData.dtype)

    f_write = open(outTrainPath, 'wb')
    idx2numpy.convert_to_file(f_write, trainData)

    f_write = open(outTestIdx, 'wb')
    idx2numpy.convert_to_file(f_write, testData)

    pickle.dump(cumSumRatio[0:indOf099], open(outRatioFile, 'wb'))
Exemplo n.º 27
0
def readNewImage():
    index = 0
    file = './samples/train-images-idx3-ubyte/train-images.idx3-ubyte'
    label = './samples/train-labels-idx1-ubyte/train-labels.idx1-ubyte'
    arrFiles = idx2numpy.convert_from_file(file)
    arrLabels = idx2numpy.convert_from_file(label)
    for i in range(1):
        ascii_show(arrFiles[index])
        print(arrLabels[index])
Exemplo n.º 28
0
def load_mnist_data(imgPath, lblPath):

    examples = idx.convert_from_file(imgPath)
    labels = idx.convert_from_file(lblPath)

    examples = examples.reshape((examples.shape[0], 784))
    labels = labels.reshape((labels.size, 1))
    data = np.concatenate((examples, labels), axis=1)
    return data
Exemplo n.º 29
0
 def __init__(self):
     print("Loading dataset from files...")
     self._load(
         idx2numpy.convert_from_file('../data/MNISTdataset/train-images-idx3-ubyte.idx'),
         idx2numpy.convert_from_file('../data/MNISTdataset/train-labels-idx1-ubyte.idx'),
         idx2numpy.convert_from_file('../data/MNISTdataset/t10k-images-idx3-ubyte.idx'),
         idx2numpy.convert_from_file('../data/MNISTdataset/t10k-labels-idx1-ubyte.idx')
     )
     print("Dataset loaded from files.")
Exemplo n.º 30
0
 def __init__(self, image_path=None, label_path=None):
     ###    Set Image/Label Path otherwise it defaults to the handwritten digits file
     if (not image_path and not label_path):
         image_path = './lib/train-images.idx3-ubyte'
         label_path = './lib/train-labels.idx1-ubyte'
     #Import images directly with the idx2numpy library
     #Using the initialized function to call the image is not recommended as it's the raw image file, prior to preproccessing the image for fliffpress
     (self.images, self.labels) = (idx2numpy.convert_from_file(image_path),
                                   idx2numpy.convert_from_file(label_path))
Exemplo n.º 31
0
def Datasets():
    MaybeDownload()
    prefix = "./mnist/"
    train_im = idx2numpy.convert_from_file(prefix + "train-images-idx3-ubyte")
    test_im = idx2numpy.convert_from_file(prefix + "t10k-images-idx3-ubyte")
    train_labels = idx2numpy.convert_from_file(prefix +
                                               "train-labels-idx1-ubyte")
    test_labels = idx2numpy.convert_from_file(prefix +
                                              "t10k-labels-idx1-ubyte")
    return train_im, test_im, train_labels, test_labels
def train_model(model):
    t_start = time.time()

    emnist_path = 'neural/emnist/database/digits/'

    X_train = idx2numpy.convert_from_file(
        emnist_path + 'emnist-digits-train-images-idx3-ubyte')
    y_train = idx2numpy.convert_from_file(
        emnist_path + 'emnist-digits-train-labels-idx1-ubyte')

    X_test = idx2numpy.convert_from_file(
        emnist_path + 'emnist-digits-test-images-idx3-ubyte')
    y_test = idx2numpy.convert_from_file(
        emnist_path + 'emnist-digits-test-labels-idx1-ubyte')

    X_train = np.reshape(X_train, (X_train.shape[0], 28, 28, 1))
    X_test = np.reshape(X_test, (X_test.shape[0], 28, 28, 1))

    # Test:
    k = 1
    X_train = X_train[:X_train.shape[0] // k]
    y_train = y_train[:y_train.shape[0] // k]
    X_test = X_test[:X_test.shape[0] // k]
    y_test = y_test[:y_test.shape[0] // k]

    # Normalize
    X_train = X_train.astype(np.float32)
    X_train /= 255.0
    X_test = X_test.astype(np.float32)
    X_test /= 255.0

    y_train_cat = keras.utils.to_categorical(y_train, len(emnist_labels))
    y_test_cat = keras.utils.to_categorical(y_test, len(emnist_labels))

    model.compile(
        optimizer='adam',  # Optimizer
        # Минимизируемая функция потерь
        loss=keras.losses.CategoricalCrossentropy(),
        # Список метрик для мониторинга
        metrics=[keras.metrics.CategoricalAccuracy()])

    print('# Обучаем модель на тестовых данных')
    history = model.fit(X_train,
                        y_train_cat,
                        batch_size=64,
                        epochs=3,
                        validation_data=(X_test, y_test_cat))

    print("Training done, dT:", time.time() - t_start)
    print('\nhistory dict:', history.history)

    # Оценим модель на тестовых данных, используя "evaluate"
    print('\n# Оцениваем на тестовых данных')
    results = model.evaluate(X_test, y_test_cat, batch_size=1024)
    print('test loss, test acc:', results)
Exemplo n.º 33
0
    def read_answer(self, number="test"):
        filename1 = self.path + "Resp/" + number + ".samples"
        filename2 = self.path + "Resp/" + number + ".occs"
        '''filename3 = self.path + "Resp/" + number + ".lens"

        lens = np.zeros((2), dtype = "int16")
        lens[0] = self.hidlen
        lens[1] = self.vislen'''

        self.answer = idx2numpy.convert_from_file(filename1)
        self.answer_occ = idx2numpy.convert_from_file(filename2)
def prepareData():
	""" Loads the MNIST test dataset from files located in the MNIST_data folder. Inverts 
	images and reformats them into a single array of pixel features and adds each to a pandas 
	dataframe. Returns the aformentioned pandas dataframe along with a numpy array of the 
	accomanying labels.
	"""

	test_images_np = idx2numpy.convert_from_file('MNIST_data/t10k-images-idx3-ubyte')
	test_labels_np = idx2numpy.convert_from_file('MNIST_data/t10k-labels-idx1-ubyte')

	img_set = [[255-x for x in entry.flatten()] for entry in test_images_np]
	mnist_test_df = pd.DataFrame(img_set)

	return mnist_test_df, test_labels_np
def prepareData():
	""" Loads the MNIST training dataset from files located in the MNIST_data folder. 
	Inverts images and reformats them into a single array of pixel features and adds 
	each to a pandas dataframe. Returns the aformentioned pandas dataframe along with 
	a numpy array of the accomanying labels.
	"""

	train_images_np = idx2numpy.convert_from_file('MNIST_data/train-images-idx3-ubyte')
	train_labels_np = idx2numpy.convert_from_file('MNIST_data/train-labels-idx1-ubyte')

	# Reformat from 2-d array of pixels to a 1-d array and invert so that images are dark letters on lighter background
	img_set = [[255-x for x in entry.flatten()] for entry in train_images_np]
	mnist_df = pd.DataFrame(img_set)

	return mnist_df, train_labels_np
Exemplo n.º 36
0
def load_dataset(IDX_IMG_FILE, IDX_LBL_FILE):
	imgs = idx2numpy.convert_from_file(IDX_IMG_FILE)
        lbls = idx2numpy.convert_from_file(IDX_LBL_FILE)
        dataset = []
        for i in range(len(imgs)):
                vector = []
                for row in imgs[i]:
                        vector = vector + row.tolist()
                lbl = [0] * 10
		lbl[lbls[i]] = lbls[i]
		img = []
                img.append(vector)
                img.append(lbl)
                dataset.append(img)
        return dataset
Exemplo n.º 37
0
def load_dataset(IDX_IMG_FILE, IDX_LBL_FILE):
	imgs = idx2numpy.convert_from_file(IDX_IMG_FILE)
	lbls = idx2numpy.convert_from_file(IDX_LBL_FILE)
	lbls = lbls.tolist()
	train = []
	for i in range(len(imgs)):
		vector = []
		for row in imgs[i]:
			vector = vector + row.tolist()
		img = []
		img.append(vector)
		img.append(lbls[i])
		train.append(img)
	print len(train)
	for i in range(10):
		print train[i]
Exemplo n.º 38
0
def load_mnist(data_path):
    ''' Loads the MNIST data from the base path '''

    train_img_path = '%s/train-images.idx3-ubyte' % data_path

    # get the training data
    train_img = idx2numpy.convert_from_file(train_img_path)
    m, row, col = train_img.shape
    d = row * col
    X_tr = np.reshape(train_img, (m, d)) / 255.

    return X_tr
Exemplo n.º 39
0
    def __init__(self, filepath, labels=0) :
        self.data = idx2numpy.convert_from_file(filepath)

        if labels > 0 :
            temp = np.zeros((self.data.shape[0], labels))

            for i in range(self.data.shape[0]) :
                temp[i, self.data[i]] = 1

            self.data=temp

        else :
            self.data = self.data / 256
Exemplo n.º 40
0
import numpy as np
import idx2numpy
import csv

""" The idx files for the MNIST dataset can be downloaded at
http://yann.lecun.com/exdb/mnist/.  This python script can
then be used to convert them into two csv files.  The first
containing all 70,000 images (one row per image), and the
second containing all 70,000 labels (single row). """

trainImages = idx2numpy.convert_from_file('/your/path/here/train-images.idx3-ubyte')
trainLabels = idx2numpy.convert_from_file('/your/path/here/train-labels.idx1-ubyte')
testImages = idx2numpy.convert_from_file('/your/path/here/t10k-images.idx3-ubyte')
testLabels = idx2numpy.convert_from_file('/your/path/here/t10k-labels.idx1-ubyte')

images = np.concatenate([trainImages.reshape(60000,784), testImages.reshape(10000,784)])
labels = np.concatenate([trainLabels, testLabels])

with open('/your/path/here/mnist_images.csv', 'w') as csvfile:
	writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
	for row in images:
		writer.writerow(row)
			
with open('/your/path/here/mnist_labels.csv', 'w') as csvfile:
	writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
	writer.writerow(labels)
Exemplo n.º 41
0
# fine-tuning  

import idx2numpy
import numpy as  np
from nnet import SoftmaxClassifier as scl
from nnet import Autoencoder as ae
from nnet import DeepAutoencoderClassifier as dac

# define the paths
train_img_path = '/home/avasbr/datasets/MNIST/train-images.idx3-ubyte'
train_lbl_path = '/home/avasbr/datasets/MNIST/train-labels.idx1-ubyte' 
test_img_path = '/home/avasbr/datasets/MNIST/t10k-images.idx3-ubyte' 
test_lbl_path = '/home/avasbr/datasets/MNIST/t10k-labels.idx1-ubyte'

# convert the raw images into feature vectors
train_img = idx2numpy.convert_from_file(train_img_path)
m_tr,row,col = train_img.shape
d = row*col # dimensions
X_tr = np.reshape(train_img[:m_tr],(m_tr,d)).T/255. # train data matrix
train_lbl = idx2numpy.convert_from_file(train_lbl_path)
k = max(train_lbl)+1

# set the targets for the training-set
y_tr = np.zeros((k,m_tr))
for i,idx in enumerate(train_lbl[:m_tr]):
	y_tr[idx,i] = 1

# set the data matrix for test
test_img = idx2numpy.convert_from_file(test_img_path)
m_te = test_img.shape[0]
X_te = np.reshape(test_img,(m_te,d)).T/255. # test data matrix
Exemplo n.º 42
0
#!/usr/bin/env python
#-*- coding: utf-8 -*-

from __future__ import division, print_function
import idx2numpy, gzip, sys
import numpy as np

DIM_SIZE = 28

print("Reading training data from original MNIST file ...", file=sys.stderr)
training_idx_fp = gzip.open('../data/MNIST/train-images-idx3-ubyte.gz')
training_arr = idx2numpy.convert_from_file(training_idx_fp)
training_idx_fp.close()

# convert to binary
print("Converting to binary images ...", file=sys.stderr)
training_arr = training_arr.astype(bool).astype(int)
# flatten each image
training_arr = training_arr.reshape(training_arr.shape[0], DIM_SIZE ** 2)

print("Save results in csv format for IBP noisyor ...", file=sys.stderr)
# write out csv file for ibp
header_str = ','.join(['p' + str(_) for _ in range(DIM_SIZE)])
training_ibp_fp = gzip.open('../data/MNIST/train-images-binary-ibp.csv.gz', 'w')
np.savetxt(training_ibp_fp, training_arr, fmt='%d', delimiter=',', header=header_str, comments='')
training_ibp_fp.close()

print("Save results in csv format for tIBP noisyor ...", file=sys.stderr)
# write out csv file for ibp
training_tibp_fp = gzip.open('../data/MNIST/train-images-binary-tibp.csv.gz', 'w')
training_arr_tibp = np.insert(training_arr, 0, DIM_SIZE, axis=1)
Exemplo n.º 43
0
from kmeans import K_means
from dbscan import DBScan
from idx2numpy import convert_from_file
import numpy
from scipy.spatial.distance import hamming

images = numpy.reshape(convert_from_file("train-images.idx3-ubyte"), (60000, 784)).astype("float64")
labels = convert_from_file("train-labels.idx1-ubyte")
images = numpy.multiply(images, 1 / 255)

kmeans = K_means(10, 1)
kmeans.fit(numpy.array([images[i] for i in range(5000)]))
print(kmeans.score(numpy.array([labels[i] for i in range(5000)])))

#dbscan = DBScan(4.795, 50)
#dbscan.fit(numpy.array([images[i] for i in range(1000)]))
#print(dbscan.score(numpy.array([labels[i] for i in range(1000)])))
#print(dbscan.clusters())
Exemplo n.º 44
0
import numpy as np
import idx2numpy
import Image
import pickle

images = idx2numpy.convert_from_file("train-images-idx3-ubyte")
data = []
temp = []
for image in images:
    for i in image:
        for j in i:
            temp.append(j)
    data.append(temp)
    temp = []
X = np.asarray(data, "float32")
# print X.shape
# X = (X-np.min(X,0))/(np.max(X,0)+0.0001)
X = X / 255.0
# X = np.matrix(X)
Y = idx2numpy.convert_from_file("train-labels-idx1-ubyte")
X_test = X[50000:]
Y_test = Y[50000:]
X = X[:50000]
Y = Y[:50000]

sizes = [X.shape[1], 100]


class rbm:
    def __init__(self, sizes=[], learning_rate=0.01, numepochs=1):
        print "rbm init ,sizes:", sizes, ", numepochs:", numepochs
Exemplo n.º 45
0
def load_dataset(ds):
  x_path = "mnist/%s-images-idx3-ubyte" % ds
  y_path = "mnist/%s-labels-idx1-ubyte" % ds
  x = preprocess_xs(idx2numpy.convert_from_file(x_path))
  y = idx2numpy.convert_from_file(y_path)
  return (x, y)
Exemplo n.º 46
0
import idx2numpy
import numpy as np
from PIL import Image
import scipy
import scipy.misc
import csv

imagesidx = idx2numpy.convert_from_file('t10k-images.idx3-ubyte')
labelidx = idx2numpy.convert_from_file('t10k-labels.idx1-ubyte')

images = []
for img, label in zip(imagesidx, labelidx):
    raw_img = scipy.misc.imresize(img, (16,16)).flatten()
    raw_img = [(255 - pixel) for pixel in raw_img]
    images.append(np.append(raw_img, label))


images = np.array(images)
images = images.astype('uint8')
print(images.dtype)
# np.savetxt("foo.csv", images, delimiter=",")
with open("foo.csv", 'w+') as f:
    csvwriter = csv.writer(f, delimiter=',')
    for image in images:
        csvwriter.writerow(image)
Exemplo n.º 47
0
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    from sklearn.ensemble import RandomForestClassifier as RFC

__author__ = 'vks'

import_labs(["task4p1/", "task3/"])
from random_forest import RandomForest
from kNN import Naive_kNN
from CV import k_fold

px_x = 2
px_y = 2

train_images = idx2numpy.convert_from_file("train-images.idx3-ubyte")
train_images_hog = [hog(img, orientations=8, pixels_per_cell=(px_x, px_y), cells_per_block=(1, 1))
                    for img in train_images]
train_labels = idx2numpy.convert_from_file("train-labels.idx1-ubyte")
test_images = idx2numpy.convert_from_file("t10k-images.idx3-ubyte")
test_images_hog = [hog(img, orientations=8, pixels_per_cell=(px_x, px_y), cells_per_block=(1, 1))
                   for img in test_images]
test_labels = idx2numpy.convert_from_file("t10k-labels.idx1-ubyte")


def shift(axis, dist, data):
    ans = [[[0] * len(data[0][0]) for i in range(len(data[0]))] for j in range(len(data))]
    for i in range(len(data)):
        size = len(ans[i])
        for j in range(size):
            for k in range(size):
Exemplo n.º 48
0
# This demo applies the sparse autoencoder to the MNIST data to learn
# pen-stroke-like features. The self-taught learning (STL) demo explores
# the use of these features for classification purposes

import idx2numpy
import numpy as np
import matplotlib.pyplot as plt
from nnet import Autoencoder as ae
from nnet.common import dataproc as dp

# define the paths
train_img_path = '/home/avasbr/datasets/MNIST/train-images.idx3-ubyte'

# convert the raw images into feature vectors
num_img = 10000
train_img = idx2numpy.convert_from_file(train_img_path)
dummy,row,col = train_img.shape
d = row*col # dimensions
X_tr = np.reshape(train_img[:num_img],(num_img,d)).T/255. # train data matrix

# Neural network initialization parameters

print 'Sparse Autoencoder applied to MNIST data\n'

print 'Data:'
print '------'
print 'Number of samples for training:',num_img,'\n'

nnet_params = {'d':d,'n_hid':196,'decay':0.003,'beta':3,'rho':0.1}
optim_params = {'method':'L-BFGS-B','n_iter':400}
Exemplo n.º 49
0

xp = np

img_size = 48

train_size = 1711
test_size = 249
N = train_size
N_test = test_size

train_path = "./data/numbers-proceed"
test_path = "./data/mustread-proceed"


x_train = i2n.convert_from_file('./data/new/faxocr-training-48_train_images.idx3')
y_train = i2n.convert_from_file('./data/new/faxocr-training-48_train_labels.idx1').astype('int32')

x_test = i2n.convert_from_file('./data/new/faxocr-mustread-48_train_images.idx3')
y_test = i2n.convert_from_file('./data/new/faxocr-mustread-48_train_labels.idx1').astype('int32')


print x_train.shape
def reshape(data):
    shape = data.shape
    n_d = np.zeros((shape[0],1,shape[1],shape[2]),dtype="float32")
    size = shape[0]
    for i in range(size):
        n_d[i][0] = data[i]
    return n_d
#!c:/Python34/python.exe

# from here: https://www.snip2code.com/Snippet/257756/Python-script-for-converting-the-MNIST-d

import numpy as np
import idx2numpy
import csv

""" The idx files for the MNIST dataset can be downloaded at
http://yann.lecun.com/exdb/mnist/.  This python script can
then be used to convert them into two csv files.  The first
containing all 70,000 images (one row per image), and the
second containing all 70,000 labels (single row). """

trainImages = idx2numpy.convert_from_file('data/train-images-idx3-ubyte')
trainLabels = idx2numpy.convert_from_file('data/train-labels-idx1-ubyte')
testImages = idx2numpy.convert_from_file('data/t10k-images-idx3-ubyte')
testLabels = idx2numpy.convert_from_file('data/t10k-labels-idx1-ubyte')

#images = np.concatenate([trainImages.reshape(60000,784), testImages.reshape(10000,784)])
#labels = np.concatenate([trainLabels, testLabels])

train_images = trainImages.reshape(60000,784)
test_images = testImages.reshape(10000,784)


with open('mnist_train_images.csv', 'w') as csvfile:
	writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
	for row in train_images:
		writer.writerow(row)
	random_forest_classifier.fit(training_image_data_hog, training_label_data)
	random_forest_classifier_accuracy = random_forest_classifier.score(testing_image_data_hog, testing_label_data)
	print "\nRandom Forest accuracy with max_depth="+str(max_depth)+" and number of trees = "+str(number_of_trees)+ " is "+ str(random_forest_classifier_accuracy)

	if max_depth in best_accuracy_forest:
		if  best_accuracy_forest[max_depth] < random_forest_classifier_accuracy:
			best_accuracy_forest[max_depth] = random_forest_classifier_accuracy
			best_number_of_tree_forest[max_depth] = number_of_trees
	else :
		# print "\nEntered else case in forest"
		best_accuracy_forest[max_depth] = random_forest_classifier_accuracy
		best_number_of_tree_forest[max_depth] = number_of_trees

if __name__ == '__main__':
	print "Building Data set"	
	training_image_data = idx2numpy.convert_from_file("train-images.idx3-ubyte")
	training_image_data_hog = [hog(img, orientations=9, pixels_per_cell=(8,8), cells_per_block=(3, 3))
					for img in training_image_data]
	training_label_data = idx2numpy.convert_from_file("train-labels.idx1-ubyte")
	testing_image_data = idx2numpy.convert_from_file("t10k-images.idx3-ubyte")
	testing_image_data_hog = [hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(3, 3))
					for img in testing_image_data]
	testing_label_data = idx2numpy.convert_from_file("t10k-labels.idx1-ubyte")
	print "Dataset is complete"

	depth_array = [5,6,7]
	number_of_trees_array = [310,350,390]
	for depth in depth_array:
		for number_of_trees in number_of_trees_array :
			find_best_adaboost_classifier(number_of_trees,depth)
			find_best_random_forest_classifier(number_of_trees,depth)
Exemplo n.º 52
0
from idx2numpy import convert_from_file
from adaboost import AdaBoostMulticlass
from numpy import reshape, multiply

train_images = reshape(convert_from_file("train-images.idx3-ubyte").astype('float64'), (60000, 784))
train_labels = convert_from_file("train-labels.idx1-ubyte")
test_images = reshape(convert_from_file("t10k-images.idx3-ubyte").astype('float64'), (10000, 784))
test_labels = convert_from_file("t10k-labels.idx1-ubyte")

train_images = multiply(train_images, 1 / 255)
test_images = multiply(test_images, 1 / 255)

ada = AdaBoostMulticlass(1000)
ada.fit(train_images, train_labels)
score = 0
for i in range(10000):
    if ada.predict(test_images[i]) == test_labels[i]:
        score += 1

print(score / 10000)
Exemplo n.º 53
0
import idx2numpy

train_img_path = r'E:\VirtualDesktop\nnet\minist\train-images.idx3-ubyte'
train_index_path = r'E:\VirtualDesktop\nnet\minist\train-labels.idx1-ubyte'
t10k_img_path = r'E:\VirtualDesktop\nnet\minist\t10k-images.idx3-ubyte'
t10k_index_path = r'E:\VirtualDesktop\nnet\minist\t10k-labels.idx1-ubyte'

train_images = idx2numpy.convert_from_file(train_img_path)
train_labels = idx2numpy.convert_from_file(train_index_path)

print 'End'
Exemplo n.º 54
0
#!/usr/bin/env python

# import array
import os
import numpy
from PIL import Image

import idx2numpy

def isint(x):
  try: int(x)
  except: return False
  else: return True

if __name__ == "__main__":
  trainingImages =  idx2numpy.convert_from_file("train-images-idx3-ubyte")
  trainingLabels =  idx2numpy.convert_from_file("train-labels-idx1-ubyte")
  testImages =  idx2numpy.convert_from_file("t10k-images-idx3-ubyte")
  testLabels =  idx2numpy.convert_from_file("t10k-labels-idx1-ubyte")

  labelSet = set()
  labelSet.update(set(trainingLabels))
  labelSet.update(set(testLabels))

  # ints = [i for i in labelSet if isint(i)]
  # labelSet.difference_update(ints)
  # labelSet = [str(j) for j in (sorted([int(i) for i in ints]) + sorted(labelSet))]
  labelSet = [str(j) for j in sorted(labelSet)]

  extension = "png"
import numpy as np
import math
import idx2numpy as inp
from sklearn.neighbors import KNeighborsClassifier

trimgs = inp.convert_from_file('train-images.idx3-ubyte')
trlbls = inp.convert_from_file('train-labels.idx1-ubyte')
tsimgs = inp.convert_from_file('t10k-images.idx3-ubyte')
tslbls = inp.convert_from_file('t10k-labels.idx1-ubyte')
trimgs = np.resize(trimgs,(60000,784))
tsimgs = np.resize(tsimgs,(10000,784))

neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(trimgs, trlbls)

pred=neigh.predict(tsimgs)
corr=0
for i in range(10000):
    if (pred[i]==tslbls[i]):
        corr+=1
print "accuracy: ",corr/100

Exemplo n.º 56
0
 def test_correct_file_on_disk(self):
     file = os.path.join(self.files_dir, 'correct.idx')
     self.assertSequenceEqual(
         [0x0A, 0x0B, 0x0C],
         self._to_list(idx2numpy.convert_from_file(file)))
def get_mnist_trainset():
    x_train = idx2numpy.convert_from_file('train-images.idx3-ubyte')
    x_train = np.array([x.flatten() for x in x_train])
    y_train = idx2numpy.convert_from_file('train-labels.idx1-ubyte')
    return x_train, y_train
Exemplo n.º 58
0
import idx2numpy
import numpy as np
from sklearn.lda import LDA
from multiprocessing import Pool
import pdb

imagestra = idx2numpy.convert_from_file('train-images.idx3-ubyte')
labelstra = idx2numpy.convert_from_file('train-labels.idx1-ubyte')

labelsmaptra = { x : [] for x in list( set( list(labelstra))) }
imagesArray = []
for i in range(len(imagestra)):
    imagesArray.append( imagestra[i].reshape( (1, (28*28)))[0] )

for lab, img in zip( list(labelstra), imagesArray ):
    labelsmaptra[lab].append( img )

imagestes = idx2numpy.convert_from_file('t10k-images.idx3-ubyte')
labelstes = idx2numpy.convert_from_file('t10k-labels.idx1-ubyte')

labelsmaptes = { x : [] for x in list( set( list(labelstes))) }
imagesArray = []
for i in range(len(imagestes)):
    imagesArray.append( imagestes[i].reshape( (1, (28*28)))[0] )

for lab, img, in zip( list(labelstes), imagesArray ):
    labelsmaptes[lab].append( img )

#numbers = [ (x, y) for x in range(10) for y in range(x,10) if x != y ]

def runTestPairs( e ):
Exemplo n.º 59
0
import numpy as np
import idx2numpy 
import csv

""" The idx files for the MNIST dataset can be downloaded at
http://yann.lecun.com/exdb/mnist/.  This python script can
then be used to convert them into two csv files.  The first
containing all 70,000 images (one row per image), and the
second containing all 70,000 labels (single row). """

trainImages = idx2numpy.convert_from_file('/home/simjay/workspace/NaiveBayes/others/mnist/train-images.idx3-ubyte')
trainLabels = idx2numpy.convert_from_file('/home/simjay/workspace/NaiveBayes/others/mnist/train-labels.idx1-ubyte')
testImages = idx2numpy.convert_from_file('/home/simjay/workspace/NaiveBayes/others/mnist/t10k-images.idx3-ubyte')
testLabels = idx2numpy.convert_from_file('/home/simjay/workspace/NaiveBayes/others/mnist/t10k-labels.idx1-ubyte')

trainImages = np.concatenate([trainImages.reshape(60000,784)])
testImages = np.concatenate([testImages.reshape(10000,784)])
trainLabels = np.concatenate([trainLabels])
testLabels = np.concatenate([testLabels])



with open('/home/simjay/workspace/NaiveBayes/mnistCSV/trainImages.csv', 'w') as csvfile:
	writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
	for row in trainImages:
		writer.writerow(row)

print("done")

with open('/home/simjay/workspace/NaiveBayes/mnistCSV/testImages.csv', 'w') as csvfile:
	writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)