Exemplo n.º 1
0
def get_mnist():
    mnist = MNIST('../datasets/MNIST')
    print("Loading Data ... ")
    x_train, y_train = mnist.load_training()  #60000 samples
    x_test, y_test = mnist.load_testing()  #10000 samples

    x_train = np.asarray(x_train).astype(np.float32) / 255
    y_train = np.asarray(y_train).astype(np.int32)
    x_test = np.asarray(x_test).astype(np.float32) / 255
    y_test = np.asarray(y_test).astype(np.int32)

    print("formatting data")
    training_labels = []
    for y in y_train:
        label = np.zeros((10, 1))
        label[y] = 1
        training_labels.append(label)

    testing_labels = []
    for y in y_test:
        label = np.zeros((10, 1))
        label[y] = 1
        testing_labels.append(label)

    training_data = []
    for x, y in zip(x_train, training_labels):
        training_data.append((x.reshape(784, 1), y))

    testing_data = []
    for x, y in zip(x_test, testing_labels):
        testing_data.append((x.reshape(784, 1), y))

    return (training_data, testing_data)
Exemplo n.º 2
0
def MnistData(filepath = mnistpath): 
    '''Loads Mnist and converts it into a pytorch tensor 
    
    Input
    -----
    
    filepath - 'string'
        Filepath for the location of the MNIST dataset
        
    Output
    ------
    
    Xtrain - 'torch.tensor'
        Training set for the Mnist dataset 
        
    trainLabels - 'torch.tensor'
        train labels
        
    Xtest - 'torch.tensor'
        Test set for the Mnist dataset
        
    testLabels - 'torch.tensor'
        test labels
    '''
    
    # Load the MNIST Dataset 
    mndata = MNIST(filepath)
    Xtrain, trainLabels = map(torch.tensor, mndata.load_training())
    Xtest, testLabels  = map(torch.tensor, mndata.load_testing())
    Xtrain = Xtrain/255.0 # normalize dataset 
    Xtest = Xtest/255.0
    
    
    return Xtrain, trainLabels, Xtest, testLabels
Exemplo n.º 3
0
def main():
    # MNIST path data
    data_path = './Data/'
    mndata = MNIST(data_path)
    images_training, labels_training = mndata.load_training()
    images_testing, labels_testing = mndata.load_testing()

    images_training = np.asarray(images_training)
    images_testing = np.asarray(images_testing)
    # Normalize data
    images_training_normalize = preprocessing.normalize(images_training)
    labels_training = np.asarray(labels_training)

    # # This converts and plots the pca of a c
    x_hat, D = convert_pca(images_training_normalize[1])

    data = np.array(images_training_normalize[1]).reshape(28, 28)
    plot_demo(data, x_hat)

    # We can plot our scree plot to determine what percentage of our variance is attributed to those features
    plot_scree(D)

    # Adding noise
    images_training_noise = create_noise(images_training)
    images_training_noise = np.array(images_training_noise)
    images_training_noise = preprocessing.normalize(images_training_noise)

    x_hat, D = convert_pca(images_training_noise[1])
    data = np.array(images_training_noise[1]).reshape(28, 28)
    plot_demo(data, x_hat, fname='Noise_Comparison')
    plot_scree(D, n=15, name='Scree_Plot_Noise')
Exemplo n.º 4
0
def get_mnist_data():
    mnist_data = MNIST('python-mnist/data')
    train_X, train_y = mnist_data.load_training()
    test_X, test_y = mnist_data.load_testing()
    train_X = np.array(train_X).reshape(-1, 28, 28)
    test_X = np.array(test_X).reshape(-1, 28, 28)
    train_y, test_y = np.array(train_y), np.array(test_y)
    return train_X, train_y, test_X, test_y
Exemplo n.º 5
0
        def __init__(self, cv_iters):
            """
			create df for features and labels
			remove samples that are not shared between the two tables
			"""
            assert cv_iters > 2, 'Cross validation folds must be more than 2 folds'
            self.cv_iters = cv_iters
            mndata = MNIST('data')
            self.features, self.labels = mndata.load_training()
            images, labels = mndata.load_testing()
            self.features = self.features + images
            self.features = np.array(self.features)
            self.labels = self.labels + labels
            self.labels = np.reshape(np.array(self.labels), (-1, 1))
            self.labels = self.labels == 8  #6825 samles of 8 in total of 70000 samples

            self.shuffle()
Exemplo n.º 6
0
def load_dataset(ds_path, training=True, mnist_format=True):
    if mnist_format:
        dataset = MNIST(ds_path)
        if training:
            return (dataset.load_training()[0], dataset.train_labels)
        else:
            return (dataset.load_testing()[0], dataset.test_labels)
    else:
        if training:
            with open(ds_path + "/training.json") as training_json:
                dataset = json.load(training_json)
        else:
            with open(ds_path + "/training.json") as testing_json:
                dataset = json.load(testing_json)

        imgs = [ast.literal_eval(img) for img in list(dataset.keys())]
        labels = list(dataset.values())
        return (imgs, labels)
Exemplo n.º 7
0
def MnistData(filepath=mnistpath):
    '''Loads Mnist and one hot encodes Y variable for both train and test 
    variables
    
    Input
    -----
    
    filepath - 'string'
        Filepath for the location of the MNIST dataset
        
    Output
    ------
    
    Xtrain - 'np.array'
        Training set for the Mnist dataset 
        
    Ytrain - 'np.array'
        One-hot encoded Y trained labels
        
    Xtest - 'np.array'
        Training set for the Mnist dataset 
        
    Ytest - 'np.array'
        One-hot encoded Y trained labels
    '''

    # Load the MNIST Dataset
    mndata = MNIST(filepath)
    Xtrain, labels_train = map(np.array, mndata.load_training())
    Xtest, labels_test = map(np.array, mndata.load_testing())
    Xtrain = Xtrain / 255.0  # normalize dataset
    Xtest = Xtest / 255.0

    n, d = Xtrain.shape
    k = labels_train.max() + 1  # number of classes
    m = len(labels_test)  # number of test observations

    Ytrain = np.zeros((n, k))
    Ytrain[np.arange(n), labels_train] = 1

    Ytest = np.zeros((m, k))
    Ytest[np.arange(m), labels_test] = 1

    return Xtrain, Ytrain, labels_train, Xtest, Ytest, labels_test
Exemplo n.º 8
0
def EncodeMnist(values=(2, 7), encodingVal=(-1, 1), filepath=mnistpath):
    '''Function loads the mnist data, filters out anything outside of 
    values indicated and encodes the labels with the encoded values indicated
    
    Input
    -----
    filepath - 'string'
        Currently set to a global variable where the location of the mnist 
        path
    
    values - 'list or tuple'
        Two values(0-9) to use for selecting binary values
        
    Output
    ------ 
    X_trainC - 'np.array'
        training data set for the selected values 
        
    X_testC - 'np.array'
        test data set for the selected values 
        
    Y_train_lab - 'np.array'
        encoded values for the Y Values of the training set 
        
    Y_test_lab - 'np.array'
        encoded values for the Y Values of the training set 
    '''

    # Load the MNIST Dataset
    mndata = MNIST(filepath)
    X_train, labels_train = map(np.array, mndata.load_training())
    X_test, labels_test = map(np.array, mndata.load_testing())
    X_train = X_train / 255.0  # normalize dataset
    X_test = X_test / 255.0

    XtrainC, trainLab = binaryData(values, X_train, labels_train)
    XtestC, testLab = binaryData(values, X_test, labels_test)

    Ytrainlab = binarycode(trainLab, values, encodingVal)
    Ytestlab = binarycode(testLab, values, encodingVal)

    return XtrainC, Ytrainlab, XtestC, Ytestlab
Exemplo n.º 9
0
# set1 = [
# 	SimpleExample([1, 1, 1], 1),
# 	SimpleExample([0, 1, 1], 1),
# 	SimpleExample([1, 1, 0], 0),
# 	SimpleExample([0, 1, 0], 0),
# 	SimpleExample([0, 0, 1], 0)
# ]

# pn = PerceptronNetwork(3, 1)
# for i in range(100):
# 	pn.fit(set1)
# print(pn.testClassifier(set1))


mndata = MNIST(".")
# mndata.load_training()
mndata.load_testing()

examples = []
# size = len(mndata.test_images)
size = 50
for i in range(size):
	inpt = mndata.test_images[i]
	outpt = mndata.test_labels[i]
	example = SimpleExample(inpt, outpt)
	examples.append(example)

pn = PerceptronNetwork(784, 10, 300)
pn.fit(examples)
print(pn.testClassifier(examples))
Exemplo n.º 10
0
import numpy as np
from deep_neural_network_batch_normalization import *
from nn_utils import *
from mnist.loader import MNIST

# Loading data.
db = '/Users/aclaudioquiros/Documents/NN Data/Data/MNIST_database/'
mndata = MNIST(db)
images, labels = mndata.load_training()
images_test, labels_test = mndata.load_testing()
images = np.array(images).T
images_test = np.array(images_test).T
labels = onehot(labels, images.shape)
labels_test = onehot(labels_test, images_test.shape)
images = normalize(images)
images_test = normalize(images_test)

# Playing with data to over fit model.
# samples = 1e+3
# images = images[:, :int(samples)]
# labels = labels[:, :int(samples)]
# images = np.random.normal(0, 1, size=(400, 1000))
# layer_dim = [images.shape[0], 400, 400, 400, 400, 400, 400, 400, 400, 400, labels.shape[0]]
# activations = [None, 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'softmax']

layer_dim = [images.shape[0], 125, 40, labels.shape[0]]
activations = [None, 'relu', 'relu', 'softmax']
deep_nn = NeuralNetwork(layer_dim,
                        activations,
                        learning_rate=0.2,
                        num_iterations=1000,
from keras.layers import Dense, Activation
from keras import optimizers, regularizers
from keras.models import Sequential
import time
import numpy as np
import cv2
from mnist.loader import MNIST

m = MNIST('./data')
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
x_train, y_train = m.load_training()
x_test, y_test = m.load_testing()
x_train = np.asarray(x_train).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
x_test = np.asarray(x_test).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)

n_classes = len(classes)
#0-1 Hot encoding
label_train = np.zeros((y_train.shape[0], n_classes))
a = np.arange(y_train.shape[0], dtype=np.int64)
b = np.array(y_train, dtype=np.int64).reshape((y_train.shape[0], ))
label_train[a, b] = 1
label_test = np.zeros((y_test.shape[0], n_classes))
c = np.arange(y_test.shape[0], dtype=np.int64)
d = np.array(y_test, dtype=np.int64).reshape((y_test.shape[0], ))
label_test[c, d] = 1
#%%

from mnist.loader import MNIST
import base64
import struct


#%%
mndata = MNIST("./data")
images, labels = mndata.load_testing()

num_images = len(images)
print(num_images)

#%%


def map_pixel_float(p):
    return float(p) / 256.0


#%%


def image_to_float(image):
    return map(map_pixel_float, image)


#%%

Next, we explore the use of an affine transformation of the pixel values from 
d features to p features in an effor to increase our accuracy. We finally see 
performance of choosing a p and its test accuracy. 

@author: Christopher Salazar
"""
import numpy as np
from mnist.loader import MNIST
import matplotlib.pyplot as plt

# Load the MNIST Dataset
mndata = MNIST(
    r'C:\Users\salez\Documents\MISE Work\CSE 546\Homework\HW1\Programming\mnist'
)
X_train, labels_train = map(np.array, mndata.load_training())
X_test, labels_test = map(np.array, mndata.load_testing())
X_train = X_train / 255.0  # normalize dataset
X_test = X_test / 255.0

# Transform X_train and X_test into corresponding Y form
d = len(X_train[0])
# Number of outputs
k = 10

# Construct Y_train array per the form required for closed form solution
n_train = len(X_train)
Y_train = np.zeros((n_train, k))

for i in range(n_train):
    Y_train[i][labels_train[i]] = 1
Exemplo n.º 14
0
def main():
    # MNIST path data
    data_path = './Data/'
    mndata = MNIST(data_path)
    images_training, labels_training = mndata.load_training()
    images_testing, labels_testing = mndata.load_testing()

    images_training = np.asarray(images_training)
    images_testing = np.asarray(images_testing)
    images_training_noise = create_noise(images_training)
    images_testing_noise = create_noise(images_testing)
    labels_training = np.asarray(labels_training)
    labels_testing = np.asarray(labels_testing)
    # Normalize data
    images_training = preprocessing.normalize(images_training)
    images_testing = preprocessing.normalize(images_testing)
    images_training_noise = preprocessing.normalize(images_training_noise)
    images_testing_noise = preprocessing.normalize(images_testing_noise)

    images_training_pca_noise, images_testing_pca_noise = create_training_and_testing_data(
        images_training_noise,
        labels_training,
        images_testing_noise,
        labels_testing,
        n=15)

    pca_accuracy_list = deque()
    accuracy_list = deque()
    print('Starting classification')

    # KNN
    print('Starting KNN')
    knn_score = deque()
    knn_pca_score = deque()
    knn = KNeighborsClassifier()
    knn = knn.fit(images_training_noise, labels_training)
    knn_score.append(knn.score(images_testing_noise, labels_testing))

    knn_pca = KNeighborsClassifier()
    knn_pca = knn_pca.fit(images_training_pca_noise, labels_training)
    knn_pca_score.append(
        knn_pca.score(images_testing_pca_noise, labels_testing))
    knn_score = np.array(knn_score)
    knn_pca_score = np.array(knn_pca_score)
    accuracies_df = pd.DataFrame({
        'Scores': knn_score,
        'PCA_Scores': knn_pca_score
    }).to_csv('KNNAccuracies_noise.csv', index=False)

    # Random Forest Accuracies
    clf_score = deque()
    clf_pca_score = deque()
    for n in range(1, 10):
        print('Iteration {} of Random Forest Classifier'.format(n))
        clf = RandomForestClassifier(n_estimators=100)
        clf = clf.fit(images_training_noise, labels_training)
        clf_score.append(clf.score(images_testing_noise, labels_testing))

        clf_pca = RandomForestClassifier(n_estimators=100)
        clf_pca = clf_pca.fit(images_training_pca_noise, labels_training)
        clf_pca_score.append(
            clf_pca.score(images_testing_pca_noise, labels_testing))
    clf_score = np.array(clf_score)
    clf_pca_score = np.array(clf_pca_score)
    accuracies_df = pd.DataFrame({
        'Scores': clf_score,
        'PCA_Scores': clf_pca_score
    }).to_csv('RandomForestAccuracies_noise.csv', index=False)

    # Decision Tree Accuracies
    dtc_score = deque()
    dtc_pca_score = deque()
    for n in range(1, 10):
        print('Iteration {} of Decision Tree Accuracies'.format(n))
        dtc = tree.DecisionTreeClassifier()
        dtc = dtc.fit(images_training_noise, labels_training)
        dtc_score.append(dtc.score(images_testing_noise, labels_testing))

        dtc_pca = tree.DecisionTreeClassifier()
        dtc_pca = dtc_pca.fit(images_training_pca_noise, labels_training)
        dtc_pca_score.append(
            dtc_pca.score(images_testing_pca_noise, labels_testing))

    dtc_score = np.array(dtc_score)
    dtc_pca_score = np.array(dtc_pca_score)
    accuracies_df = pd.DataFrame({
        'Scores': dtc_score,
        'PCA_Scores': dtc_pca_score
    }).to_csv('DecisionTreeAccuracies_noise.csv', index=False)
Exemplo n.º 15
0
        loss = "mse"
        opt = "gd"
        io = "../networks/mnist.obj"
        graph = False
        layers = [16, 16, 10]

    print(f"learnRate: {learnRate}")
    print(f"batch: {batch}")
    print(f"runs: {runs}")
    print(f"loss: {loss}")
    print(f"optimizer: {opt}")
    print(f"io: {io}")
    print(f"graph: {graph}")
    mndata = MNIST('./samples/numbers')
    trainData, trainLabels = mndata.load_training()
    #trainData = [[(random.random()*2)-1, (random.random()*2)-1] for i in range(1000)]
    #trainLabels = [int(i[0]-0.5>i[1] or i[0]+0.5<i[1]) for i in trainData]
    input_size = len(trainData[0])
    print(f"architecture: {input_size}, {layers}\n")

    convnn = getNetwork(io, learnRate, batch, loss, opt, layers, input_size)
    convnn = train(runs, convnn, trainData, trainLabels, batch, graph)
    convnn.writeNetwork(io)
    print("training done")
    print(" ")
    testData, testLabels = mndata.load_testing()
    #testData = [[random.random()*2-1, random.random()*2-1] for i in range(20)]
    #testLabels = [int(i[0]-0.5>i[1]or i[0]+0.5<i[1]) for i in testData]
    results = test(convnn, testData, testLabels)
    print(f"final percentage: {results}")
Exemplo n.º 16
0
        return loss_history


class LinearSVM(LinearClassifier):
    def loss(self, X_batch, y_batch, reg):
        return compute_loss(self.W, X_batch, y_batch, reg)


# In[5]:

mndata = MNIST('E:/Grad School/Semester 2/ML/Homeworks/MNIST/')

# In[6]:

X_train, Y_train = mndata.load_training()  #60000 samples
X_test, Y_test = mndata.load_testing()  #10000 samples

# In[7]:

X_train = np.asarray(X_train).astype(np.float32)
Y_train = np.asarray(Y_train).astype(np.int32)
X_test = np.asarray(X_test).astype(np.float32)
Y_test = np.asarray(Y_test).astype(np.int32)

# In[8]:

W = np.random.randn(784, 10) * 0.0001
plotnum1 = []

# In[9]:
Exemplo n.º 17
0
Arquivo: main.py Projeto: rlui94/MNIST
            predictions[i] = perceptrons[i].predict(data.images[n])
        prediction = np.argmax(predictions)
        if prediction == data.labels[n]:
            correct += 1
    return correct / data.size


def make_conf_matrix(data, perceptrons):
    predictions = np.zeros(CLASSES)
    matrix = np.zeros((CLASSES, CLASSES))
    for n in range(0, data.size):
        for i in range(0, CLASSES):
            predictions[i] = perceptrons[i].predict(data.images[n])
        prediction = np.argmax(predictions)
        matrix[prediction, data.labels[n]] += 1
    return print(matrix)


if __name__ == '__main__':
    mndata = MNIST('./images/')
    train_images, train_labels = mndata.load_training()
    test_images, test_labels = mndata.load_testing()
    train_data = data.Data()
    train_data.load(60000, train_images, train_labels)
    test_data = data.Data()
    test_data.load(10000, test_images, test_labels)
    perceps = [perceptron.Perceptron(784) for i in range(CLASSES)]
    # weights = np.random.rand(10, 785) - .5
    train_on_set(train_data, test_data, perceps, ETA, 70)
    make_conf_matrix(test_data, perceps)
Exemplo n.º 18
0
公式リファレンス

MNISTデータを使うために "https://pypi.org/project/python-mnist/" を参照してダウンロード、インストールしておく

教師付き次元削減とMetric learning
"""
import numpy as np
from mnist.loader import MNIST
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='white', context='poster')

mndata = MNIST('/Users/hamamatsuikadaigakubyouribu/UMAP_python/python-mnist/data')
mndata
train, train_labels = mndata.load_training()
test, test_labels = mndata.load_testing()
data = np.array(np.vstack([train, test]), dtype=np.float64) / 255.0
target = np.hstack([train_labels, test_labels])
classes = [
    'T-shirt/top',
    'Trouser',
    'Pullover',
    'Dress',
    'Coat',
    'Sandal',
    'Shirt',
    'Sneaker',
    'Bag',
    'Ankle boot']

import umap
xp, yp, xn, yn = createdata(1)
crossvalid(xp, yp, m, n, 0, 0)
end = time.time()
print("Time elapsed while crossvalid: {:.3f} ".format(end - start), "s")

# In[44]:

d = 784
# read data and preprocess
from mnist.loader import MNIST

mndata = MNIST('')

X_train, Y_train = mndata.load_training()
# or
X_test, Y_test = mndata.load_testing()
xp = np.empty((0, 784), int)
yp = np.empty((0, 1), int)
xn = np.empty((0, 784), int)
yn = np.empty((0, 1), int)
X_opt = np.empty((0, 784), int)
Y_opt = np.empty((0, 1), int)
for i in reversed(range(len(Y_train))):
    if Y_train[i] == 1:
        xp = np.append(xp, [X_train[i]], axis=0)
        yp = np.append(yp, [Y_train[i]])
    elif Y_train[i] == 0:
        xn = np.append(xn, [X_train[i]], axis=0)
        yn = np.append(yn, [Y_train[i]])

for i in reversed(range(len(Y_test))):