def importMNIST(folder,resolution,classes,amount,signals):
    print 'importing MNIST data...'
    if os.path.isfile('saved_DY.pkl'):
        print 'found file'
        f = open('saved_DY.pkl','r')
        D = pickle.load(f)
        D_labels = pickle.load(f)
        Y = pickle.load(f)
        Y_labels = pickle.load(f)

        return np.matrix(D),D_labels,np.matrix(Y),Y_labels

    mndata = MNIST(folder)
    train_ims,train_labels = mndata.load_training()
    print 'training loaded'
    test_ims,test_labels = mndata.load_testing()
    print 'testing loaded'

    training_samples = resize(np.array(train_ims),resolution)
    training_labels = np.array(train_labels)
    D,D_labels = organize(training_samples,training_labels,classes,amount)
    print 'dictionary, D, made'

    random_idx = np.array(np.random.permutation(10000))[0:signals] #10000 is total signals avail

    Y = (resize(np.array(test_ims),resolution))[:,random_idx]
    Y_labels = np.array(test_labels)[random_idx]
    print 'signals, Y, made'

    saveToFile(D,D_labels,Y,Y_labels)

    return np.matrix(D),D_labels,np.matrix(Y),Y_labels
Exemplo n.º 2
0
	def __init__(self):
		#Load MNIST datset
		mnistData = MNIST('./mnistData')
		self.imgTrain,self.lblTrain=mnistData.load_training()
		#self.imgTrainSmpl=self.imgTrain[:50000]
		self.imgTrainSmpl = [[2.5,2.4],[0.5,0.7],[2.2,2.9],[1.9,2.2],[3.1,3.0],[2.3,2.7],[2,1.6],[1,1.1],[1.5,1.6],[1.1,0.9]]
		np.seterr(all='warn')
Exemplo n.º 3
0
    def __init__(self,collect_gold_standard):
        n_neighbors = 15

        mndata = MNIST('/home/ggdhines/Databases/mnist')
        training = mndata.load_training()

        digits = range(0,10)

        training_dict = {d:[] for d in digits}

        for t_index,label in enumerate(training[1]):
            training_dict[label].append(training[0][t_index])

        weight = "distance"
        self.clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weight)

        pca = PCA(n_components=50)
        self.T = pca.fit(training[0])
        reduced_training = self.T.transform(training[0])
        # print sum(pca.explained_variance_ratio_)
        # clf.fit(training[0], training[1])
        self.clf.fit(reduced_training, training[1])

        self.transcribed_digits = {d:[] for d in digits}
        self.collect_gold_standard = collect_gold_standard

        self.cells_to_process = []
        self.completed_cells = []
Exemplo n.º 4
0
Arquivo: ckm.py Projeto: Vaishaal/ckm
def load_data(dataset="mnist_small", center=False):
    '''
        @param dataset: The dataset to load
        @param random_state: random state to control random parameter

        Load a specified dataset currently only
        "mnist_small" and "mnist" are supported
    '''
    if (dataset == "mnist_small"):
        X_train = np.loadtxt("./mldata/mnist_small/X_train", delimiter=",").reshape(1540,64)
        X_test = np.loadtxt("./mldata/mnist_small/X_test", delimiter=",").reshape(257,64)
        y_train = np.loadtxt("./mldata/mnist_small/y_train", delimiter=",")
        y_test = np.loadtxt("./mldata/mnist_small/y_test", delimiter=",")
        X_train = X_train[:,:,np.newaxis]
        X_test = X_test[:,:,np.newaxis]
    elif dataset == "mnist":
        mndata = MNIST('./mldata/mnist')
        X_train, y_train = map(np.array, mndata.load_training())
        X_test, y_test = map(np.array, mndata.load_testing())
        X_train = X_train/255.0
        X_test = X_test/255.0
        X_train = X_train[:,:,np.newaxis]
        X_test = X_test[:,:,np.newaxis]
    elif dataset == "cifar":
        (X_train, y_train), (X_test, y_test) = load_cifar()

    else:
        raise Exception("Datset not found")

    return (X_train, y_train), (X_test, y_test)
Exemplo n.º 5
0
def load_dataset():
    mndata = MNIST("./data/")
    X_train, labels_train = map(np.array, mndata.load_training())
    X_test, labels_test = map(np.array, mndata.load_testing())
    X_train = X_train / 255.0
    X_test = X_test / 255.0
    X_train = X_train[:, :, np.newaxis]
    X_test = X_test[:, :, np.newaxis]
    return (X_train, labels_train), (X_test, labels_test)
Exemplo n.º 6
0
Arquivo: MLP.py Projeto: whirlp00l/ML
 def load_data(self):
     mn = MNIST('.')
     mn.test()
     data = mn.train_images
     data = np.array(data)
     
     data.astype(np.float32)
     data = data/255.0
     return data
Exemplo n.º 7
0
    def __init__(self):
        Classifier.__init__(self)

        self.classifier = svm.SVC(gamma=0.001,probability=True)

        mndata = MNIST('/home/ggdhines/Databases/mnist')
        training = mndata.load_training()

        self.classifier.fit(training[0], training[1])
    def load(self,pixels_per_cell = (8,8),cells_per_block=(3,3),orientations=9):
        '''
        Generates a Data Set

        Parameters: None

        Returns:    train_set     - Training Set of 10000 images
                    train_labels  - Training Set Labels of corresponding images
                    test_set      - Test Set of 10000 images
                    test_labels   - Test Set Labels of corresponding images
        '''
        mn = MNIST('./data')
        train_raw = mn.load_training()
        test_raw = mn.load_testing()

        print "Loaded Raw images"

        learning_set = []
        Boom = {}
        for i in range(10):
            Boom[str(i)] = []
        for i in range(0,60000):
            Boom[str(train_raw[1][i])].append(train_raw[0][i])
        for i in range(0,10000):
            Boom[str(test_raw[1][i])].append(test_raw[0][i])
        t = datetime.now().microsecond
        random.seed(t)
        [random.shuffle(Boom[str(i)]) for i in range(10)]

        print "Choosing 20000 training images uniformly randomly"

        # Descriptor Generator
        for l in range(10):
            for i in range(0,2000):
                img =  np.array(Boom[str(l)][i])
                img.shape = (28,28)
                fd, hog_image = hog(img, orientations=orientations, pixels_per_cell=pixels_per_cell,cells_per_block=cells_per_block, visualise=True)
                learning_set.append([fd,l])

        print "Data Points now chosen and Generated HOG descriptors for them"

        t = datetime.now().microsecond
        random.seed(t)
        print "Shuffling Chosen Data Set"
        random.shuffle(learning_set)

        for i in range(20000):
            self.learning_set.append(learning_set[i][0])
            self.learning_set_labels.append(learning_set[i][1])

        print "Data Loading and Distribution Succesfully done"

        self.train_set = self.learning_set[:10000]
        self.train_labels = self.learning_set_labels[:10000]
        self.test_set = self.learning_set[10000:20000]
        self.test_labels = self.learning_set_labels[10000:20000]
Exemplo n.º 9
0
def load_data(datadir, is_training=False):

    mn = MNIST(datadir)

    if is_training:
        img, label = mn.load_training()
    else:
        img, label = mn.load_testing()

    return img, label
Exemplo n.º 10
0
Arquivo: 1_a.py Projeto: submagr/UGP
def loaddata():
    #Loading mnist data using python-mnist library
    mnLoader = MNIST('asgndata/mnist')
    data1 = mnLoader.load_training() # train data
    data2 = mnLoader.load_testing()  # test data

    features = np.array(data1[0]+data2[0], 'int16')
    labels = np.array(data1[1]+data2[1], 'int')
    X_train, y_train, X_test, y_test = preprocessData(features, labels)
    return X_train, y_train, X_test, y_test 
Exemplo n.º 11
0
Arquivo: MLP.py Projeto: whirlp00l/ML
 def load_targets(self):
     mn = MNIST('.')
     mn.test()
     targets = []
     for t in mn.train_labels:
         #print t
         out = np.zeros(self.output)
         out[t] = 1
         targets.append(out)
     targets = np.array(targets)
     return targets
Exemplo n.º 12
0
 def __init__(self,k):
     #Define k value
     self.k=k
     #Load MNIST datset
     mnistData=MNIST('./mnistData')
     self.imgTrain,self.lblTrain=mnistData.load_training()
     self.imgTest,self.lblTest=mnistData.load_testing()
     #Initialize the random centroids
     self.imgCen=[]
     for c in xrange(self.k):
         self.imgCen.append([random.randint(0,255) for d in xrange(784)])
Exemplo n.º 13
0
def train_rls():
    mndata = MNIST("./data")
    X_train, Y_train = mndata.load_training()
    X_test, Y_test = mndata.load_testing()
    X_train, X_test = np.array(X_train), np.array(X_test)
    #One-vs-all mapping
    Y_train = ova(Y_train)
    Y_test = ova(Y_test)
    #Train greedy RLS, select 50 features
    cb = Callback(X_test, Y_test)
    learner = GreedyRLS(X_train, Y_train, 50, callbackfun=cb)
    print("Selected features " +str(learner.selected))
Exemplo n.º 14
0
def run():
  TorchModel = PyTorchHelpers.load_lua_class('torch_model.lua', 'TorchModel')
  torchModel = TorchModel(backend, 28, 10)

  mndata = MNIST('../../data/mnist')
  imagesList, labelsList = mndata.load_training()
  labels = np.array(labelsList, dtype=np.uint8)
  images = np.array(imagesList, dtype=np.float32)
  labels += 1  # since torch/lua labels are 1-based
  N = labels.shape[0]
  print('loaded mnist training data')

  if numTrain > 0:
    N = min(N, numTrain)
  print('numExamples N', N)
  numBatches = N // batchSize
  for epoch in range(numEpochs):
    epochLoss = 0
    epochNumRight = 0
    for b in range(numBatches):
      res = torchModel.trainBatch(
        learningRate,
        images[b * batchSize:(b+1) * batchSize],
        labels[b * batchSize:(b+1) * batchSize])
#      print('res', res)
      numRight = res['numRight']
      loss = res['loss']
      epochNumRight += numRight
      epochLoss += loss
      print('epoch ' + str(epoch) + ' batch ' + str(b) + ' accuracy: ' + str(numRight * 100.0 / batchSize) + '%')
    print('epoch ' + str(epoch) + ' accuracy: ' + str(epochNumRight * 100.0 / N) + '%')

  print('finished training')
  print('loading test data...')
  imagesList, labelsList = mndata.load_testing()
  labels = np.array(labelsList, dtype=np.uint8)
  images = np.array(imagesList, dtype=np.float32)
  labels += 1  # since torch/lua labels are 1-based
  N = labels.shape[0]
  print('loaded mnist testing data')

  numBatches = N // batchSize
  epochLoss = 0
  epochNumRight = 0
  for b in range(numBatches):
    predictions = torchModel.predict(images[b * batchSize:(b+1) * batchSize]).asNumpyTensor().reshape(batchSize)
    labelsBatch = labels[b * batchSize:(b+1) * batchSize]
    numRight = (predictions == labelsBatch).sum()
    epochNumRight += numRight
  print('test results: accuracy: ' + str(epochNumRight * 100.0 / N) + '%')
Exemplo n.º 15
0
Arquivo: train.py Projeto: ghl3/brain
def run(args):

    nn_args = {}

    if args.output_activation:
        activation_class = getattr(activation_functions, args.output_activation)
        nn_args['output_activation'] = activation_class()

    nn = Network(args.shape, seed=42, **nn_args)

    print "Loading the training data"
    mndata = MNIST(args.training_data)
    training_data, training_labels = mndata.load_training()

    training_data = convert_training_data(training_data)
    training_labels = convert_number_labels_to_vectors(training_labels)

    fitted, epochs = nn.SGD(training_data, training_labels,
                            epochs=args.epochs,
                            mini_batch_size=args.mini_batch_size,
                            eta=args.eta,
                            save_history=args.save_epochs)

    if args.testing_data:
        print "Testing data"
        test_data, test_labels = mndata.load_testing()
        test_data = convert_training_data(test_data)
        # For evaluation, we put the index of the label
        # with the argmax
        evaluation = fitted.evaluate(test_data, test_labels,
                                     evaluator=np.argmax)
        print evaluation

    if args.save:

        label_dir = mkdir_or_temp(args.save)

        fitted_path = "{}/nn.pkl".format(label_dir)

        with open(fitted_path, 'wb') as handle:
            pickle.dump(fitted, handle)

        if epochs is not None:
            for i, epoch in enumerate(epochs):
                epoch_path = '{}/nn_epoch_{}.pkl'.format(label_dir, i)
                with open(epoch_path, 'wb') as handle:
                    pickle.dump(epoch, handle)
                    print "Saved epoch {} to {}".format(i, epoch_path)
Exemplo n.º 16
0
def query_digit(digit):
    host, port = "localhost", 4567
    con = httplib.HTTPConnection(host, port)
    params = json.dumps({"data": digit})
    con.request("POST", "/digit", params)
    response = con.getresponse()
    print "For digit:%s\nReceived prediction response [%s]\n" % (MNIST.display(digit), response.read())
Exemplo n.º 17
0
def main(kernel):
  print "Loading the data"

  mn = MNIST(DATA_PATH)

  test_img, test_label = mn.load_testing()
  train_img, train_label = mn.load_training()
  
  train_img = np.array(train_img[:SIZE_TRAIN])
  train_label = np.array(train_label[:SIZE_TRAIN])
  test_img = np.array(test_img[:SIZE_TEST])
  test_label = np.array(test_label[:SIZE_TEST])

  print "Finished loading the data"

  # Create a classifier: a support vector classifier
  if kernel == 'rbf':
    print "Training with RBF kernel - Might take a few minutes"
    classifier = svm.SVC(C=10, gamma=5e-7, kernel='rbf') 
  elif kernel == 'linear':
    print "Training with Linear kernel - Might take a few minutes"
    classifier = svm.SVC(C=1e-6, kernel='linear')
  elif kernel == 'poly':
    print "Training with Polynomial kernel - Might take a few minutes"
    #classifier = svm.SVC(C=10, gamma=1e-7, kernel='poly', degree=2)
    #classifier = svm.SVC(C=10, gamma=1e-6, kernel='poly', degree=3)
    classifier = svm.SVC(C=10, gamma=1e-6, kernel='poly', degree=4)

  # We learn the digits on the first half of the digits
  classifier.fit(train_img, train_label)

  print "Classifying - Might take a few minutes"

  predicted = classifier.predict(test_img)
  print predicted

  cm = metrics.confusion_matrix(test_label, predicted)

  print("Classification report for classifier %s:\n%s\n"% (classifier, metrics.classification_report(test_label, predicted)))
  print("Confusion matrix:\n%s" % cm)

  cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

  plt.figure()
  plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')

  print "Result: %s"%(np.trace(cm_normalized)/10)
Exemplo n.º 18
0
    def __init__(self):
        Classifier.__init__(self)

        n_neighbors = 25

        mndata = MNIST('/home/ggdhines/Databases/mnist')
        self.training = mndata.load_training()
        print type(self.training[0][0])

        weight = "distance"
        self.clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weight)

        pca = PCA(n_components=50)
        self.T = pca.fit(self.training[0])
        reduced_training = self.T.transform(self.training[0])
        print sum(pca.explained_variance_ratio_)
        # clf.fit(training[0], training[1])
        self.clf.fit(reduced_training, self.training[1])
Exemplo n.º 19
0
def load_mnist():
	download_mnist()
	mnist_data = MNIST(MNIST_PATH)
	train_imgs, train_labels = mnist_data.load_training()
	test_imgs, test_labels = mnist_data.load_testing()
	data = {}

	for i in range(len(train_imgs)):
		square = []
		for j in range(1024):
			row = j / 32
			col = j % 32
			if (row < 2 or col < 2 or row > 29 or col > 29):
				square.append(0)
			else:
				val = train_imgs[i][(row - 2) * 28 + col - 2]
				square.append(val)
		train_imgs[i] = square

	for i in range(len(test_imgs)):
		square = []
		for j in range(1024):
			row = j / 32
			col = j % 32
			if (row < 2 or col < 2 or row > 29 or col > 29):
				square.append(0)
			else:
				val = test_imgs[i][(row - 2) * 28 + col - 2]
				square.append(val)
		test_imgs[i] = square

	data["train_imgs"] = np.array(train_imgs, dtype="f").reshape(60000, 1, 32, 32)
	data["test_imgs"] = np.array(test_imgs, dtype="f").reshape(10000, 1, 32, 32)
	data["train_labels"] = np.array(train_labels)
	data["test_labels"] = np.array(test_labels)

	preprocess(data["train_imgs"], data["test_imgs"])

	data["train_no"] = 60000
	data["test_no"] = 10000

	return data
Exemplo n.º 20
0
def main():
    data = MNIST('./data')

    def transform(x):
        return x / 255.

    # 60,000 train samples of 28x28 grid, domain 0-255
    mnist_train_data, mnist_train_label = data.load_training()
    mnist_train_data_norm = np.array([transform(np.array(x)) for x in mnist_train_data])

    mlp_config = {'hidden_layer_sizes': (1000,),
                  'activation': 'relu',
                  'algorithm': 'adam',
                  'max_iter': 20,
                  'early_stopping': True,
                  'validation_fraction': 0.1,
                  'verbose': True
                  }
    mnist_classifier = nn.MLPClassifier(**mlp_config)
    mnist_classifier.fit(X=mnist_train_data_norm, y=mnist_train_label)

    # 10,000 test samples
    mnist_test_data, mnist_test_label = data.load_testing()
    mnist_test_data_norm = np.array([transform(np.array(x)) for x in mnist_test_data])

    prediction = mnist_classifier.predict_proba(mnist_test_data_norm)
    truth_array = [prediction[idx].argmax() == mnist_test_label[idx] for idx in range(len(prediction))]
    accuracy = float(sum(truth_array)) / float(len(truth_array))
    print "out of sample model accuracy [%s]" % accuracy

    print "serializing to pmml without transform (User defined transform not yet supported"
    pmml_path = "./model_pmml"
    if not os.path.exists(pmml_path):
        os.mkdir(pmml_path)
    sklearn2pmml(mnist_classifier, None, pmml_path + "/MLP_MNIST.pmml", with_repr=True)

    print "serializing with joblib for import in python"
    # KJS TODO: Serialize transform with the model
    pickle_path = "./model_pickle"
    if not os.path.exists(pickle_path):
        os.mkdir(pickle_path)
    joblib.dump(mnist_classifier, pickle_path + "/MLP_MNIST.pkl")
Exemplo n.º 21
0
class MNISTDataset(Dataset):
    def __init__(self, path):
        self.mndata = MNIST(path)
        self.images, self.labels = self.mndata.load_training()

    def nth_case(self, n):
        return self.images[n], bitvec(self.labels[n])

    @property
    def size(self):
        return len(self.images)
Exemplo n.º 22
0
def query_digit(digit, host=None, port=None):
    """
    Issues HTTP POST to host, port with digit array
    Expects a digit in the response
    """
    if not host or not port:
        host, port = "localhost", 4567
    con = httplib.HTTPConnection(host, port)
    params = json.dumps({"data": digit})
    con.request("POST", "/digit", params)
    response = con.getresponse()
    print "For digit:%s\nReceived prediction response [%s]\n" % (MNIST.display(digit), response.read())
Exemplo n.º 23
0
def main():
    data = MNIST('./data')
    col_names = ["x" + str(x) for x in range(784)]
    # Define a transform function that will be serialized with the model
    mnist_mapper = sklearn_pandas.DataFrameMapper([(col_names, StandardScaler()), ("digit", None)])

    # 60,000 train samples of 28x28 grid, domain 0-255
    mnist_train_data, mnist_train_label = data.load_training()
    mnist_train_df = pandas.concat((pandas.DataFrame(mnist_train_data, columns=col_names),
                                    pandas.DataFrame(list(mnist_train_label), columns=["digit"])),
                                   axis=1)
    mnist_train_df_norm = mnist_mapper.fit_transform(mnist_train_df)

    mlp_config = {'hidden_layer_sizes': (1000,),
                  'activation': 'tanh',
                  'algorithm': 'adam',
                  'max_iter': 20,
                  'early_stopping': True,
                  'validation_fraction': 0.1,
                  'verbose': True
                  }
    mnist_classifier = nn.MLPClassifier(**mlp_config)
    mnist_classifier.fit(X=mnist_train_df_norm[:, 0:28 * 28], y=mnist_train_df_norm[:, 28 * 28])

    # 10,000 test samples
    mnist_test_data, mnist_test_label = data.load_testing()
    mnist_test_df = pandas.concat((pandas.DataFrame(mnist_test_data, columns=col_names),
                                   pandas.DataFrame(list(mnist_test_label), columns=["digit"])),
                                  axis=1)
    mnist_test_df_norm = mnist_mapper.fit_transform(mnist_test_df)

    prediction = mnist_classifier.predict_proba(mnist_test_df_norm[:, 0:28 * 28])
    truth_array = [prediction[idx].argmax() == mnist_test_label[idx] for idx in range(len(prediction))]
    accuracy = float(sum(truth_array)) / float(len(truth_array))
    print "out of sample model accuracy [%s]" % accuracy
    print "serializing to pmml"
    sklearn2pmml(mnist_classifier, mnist_mapper, "MLP_MNIST.pmml", with_repr=True)
Exemplo n.º 24
0
def kmm_train(xtr, xte, yte, test_labels, kf, kfargs, B):
    idx_te = list()
    for i in test_labels:
        idx_te.extend(np.where(yte == i)[0])
    print len(idx_te)
    res = kmm.kmm(xtr, xte[idx_te], kf, kfargs, B)
    coef = np.array(res['x'])
    return coef


if __name__ == '__main__':
    test_labels = [1]  # Define labels in test set
    tr_p = 0.01  # Proportion of training data subsampled for compuational simplicity

    mndata = MNIST('../python-mnist/data/')
    xtr, ytr = mndata.load_training()
    xte, yte = mndata.load_testing()
    idx_tr = np.where(np.random.rand(len(ytr), 1) < tr_p)[0]
    [xtr, ytr] = [np.array(xtr)[idx_tr], np.array(ytr)[idx_tr]]
    [xte, yte] = [np.array(xte), np.array(yte)]

    coef = kmm_train(xtr, xte, yte, test_labels, kernel.rbf, (15, ), 10)

    score = np.zeros([10, 1])
    for i in range(10):
        score[i] = np.mean(coef[np.where(ytr == i)])

    plt.scatter(ytr, coef)
    plt.show()
Exemplo n.º 25
0
 def __init__(self):
     mndata = MNIST(os.path.join(os.getcwd(), 'MNIST_samples'))  # Load the MNIST data set
     self.testing_images, self.testing_labels = mndata.load_testing()  # Load the MNIST test data set.
     self.test_size = len(self.testing_images)
     self.chosen_test_indexes = list()  # Store the random chosen indexes so tests won't repeat.
Exemplo n.º 26
0
    return opt_path


if __name__ == "__main__":
    # logs
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    os.chdir(logdir)
    print(os.getcwd())

    # device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # data
    HOME = os.environ['HOME']
    dataset = MNIST(os.path.join(HOME, 'datasets/MNIST/numpy'), device)

    # model
    model = Linear().to(device)

    # generate path
    w = SGDPath(model, dataset, eta, alpha0, num_iters, bs)
    np.save(w_name, w)

    what = SGDPath(model, dataset, gamma, alpha0+l2regu, num_iters, bs)
    np.save(what_name, what)

    # w = np.load(w_name)
    # what = np.load(what_name)

    # generate weight
Exemplo n.º 27
0
# -*- coding: utf-8 -*-
"""
Created on Sat May 16 18:34:59 2020

@author: Tejo Nutalapati
"""

import numpy as np
import matplotlib.pyplot as plt
from mnist import MNIST

mndata = MNIST('.')

train_images, train_labels = mndata.load_training()

# or
test_images, test_labels = mndata.load_testing()
"""
Training Set using stochastic gradient descent. 
It should achieve 97-98% accuracy on the Test Set
"""


class MyNeuralNet():
    # TODO: what does 'super()' do?
    super().__init__

    def __init__(self, num_layers=2, num_hidden=[2, 2], learning_rate=.01):
        self.num_layers = num_layers
        self.num_hidden = num_hidden
        self.lr = learning_rate
Exemplo n.º 28
0
import numpy as np
from mnist import MNIST
import math


def format(images, labels):
	data = []
	for index in range(0, len(labels)):
		input = np.array(images[index]) / 255
		output = np.zeros(10)
		output[labels[index]] = 1.0
		data.append((input, output))
	return data

print("Loading and formatting MNIST set")
mnist_set = MNIST('MNIST_data')
training_inputs, training_outputs = mnist_set.load_training()
test_inputs, test_outputs = mnist_set.load_testing()

training_data = format(training_inputs, training_outputs)
test_data = format(test_inputs, test_outputs)


mnist_nn = nn.NeuralNetwork(4, [784, 100, 50, 10])
print('Training neural network')
# train for 5 epochs with a learning rate of 0.5
mnist_nn.train(training_data, 5, 0.5)


print("Testing neural network")
numCorrect = 0
Exemplo n.º 29
0
import numpy as np
from mnist import MNIST
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

mntest = MNIST('./MNIST/')

mntest.load_testing()

X_test = np.asarray(mntest.test_images)
y_test = np.asarray(mntest.test_labels)
# a = [i for i in range(len(label)) if label[i] in [0,1,2,3,4,5,6,7,8,9]]
# label = label[a]
# data = data[a]

mntrain = MNIST('./MNIST/')

mntrain.load_training()

X_train = np.asarray(mntrain.train_images)
y_train = np.asarray(mntrain.train_labels)

#X_train, X_test, y_train, y_test = train_test_split(data,label,test_size = 0.3)
#net = perceptron.Perceptron(n_iter=100, eta0=0.002)
# net = mlp = MLPClassifier(verbose=10, learning_rate='adaptive')
# net = SVC(decision_function_shape='ovo',gamma=0.001)
net = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
Exemplo n.º 30
0
from mnist import MNIST
import numpy as np
import os
import collections
from pathlib import Path
from tensorflow.python.framework import random_seed
from tensorflow.python.framework import dtypes

path = Path(__file__).parent.parent.parent
path = os.path.join(path, "..\\DL_data\\mnist")
print(path)
mndata = MNIST(path=path)

def load_data():
    """Loads the MNIST dataset.

    # Arguments
        path: path where to cache the dataset locally
            (relative to ~/.keras/datasets).

    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """
    # path = get_file(path, origin='https://s3.amazonaws.com/img-datasets/mnist.npz')
    # f = np.load(path)
    # x_train = f['x_train']
    # y_train = f['y_train']
    # x_test = f['x_test']
    # y_test = f['y_test']
    # f.close()
    # return (x_train, y_train), (x_test, y_test)
Exemplo n.º 31
0
import numpy as np
from mnist import MNIST
from numpy.core.multiarray import ndarray
from typing import Union

mndata = MNIST("/export/home/016/a0161419/le4nn/")

SIZEX, SIZEY = 28, 28
PIC_LEARN = 60000
PIC_TEST = 10000
M = 100              # There are M nodes on the intermediate layer
CLASS = 10


# ========================================
# Function Definition
# ========================================

# Sigmoid function (as activate function)
def sigmoid(t):
    # Avoid stack overflow
    return np.where(t <= -710, 0, (1 / (1 + np.exp(-t))))

# Softmax function (as activate function)
def softmax(a):
    alpha = a.max()
    den_y2 = 0
    for i in range(CLASS):
        den_y2 += np.exp(a[i] - alpha)
    y2 = np.exp(a - alpha) / den_y2
    return np.argmax(y2)
Exemplo n.º 32
0
def get_valid_loader(config):
    mndata = MNIST(config.mnist_path)
    data_list, label_list = mndata.load_testing()
    return get_loader(config, data_list, label_list)
Exemplo n.º 33
0
)
tf.app.flags.DEFINE_string('train_root_dir', '../training',
                           'Root directory to put the training data')
tf.app.flags.DEFINE_integer('log_step', 10000,
                            'Logging period in terms of iteration')

NUM_CLASSES = 10

TRAIN_FILE = 'svhn'
TEST_FILE = 'mnist'
print TRAIN_FILE + '  --------------------------------------->   ' + TEST_FILE
print TRAIN_FILE + '  --------------------------------------->   ' + TEST_FILE
print TRAIN_FILE + '  --------------------------------------->   ' + TEST_FILE

TRAIN = SVHN('data/svhn', split='train', shuffle=True)
VALID = MNIST('data/mnist', split='test', shuffle=True)
TEST = MNIST('data/mnist', split='test', shuffle=False)

FLAGS = tf.app.flags.FLAGS
MAX_STEP = 10000


def decay(start_rate, epoch, num_epochs):
    return start_rate / pow(1 + 0.001 * epoch, 0.75)


def adaptation_factor(x):
    #return 1.0
    #return 0.25
    den = 1.0 + math.exp(-10 * x)
    lamb = 2.0 / den - 1.0
Exemplo n.º 34
0
from mnist import MNIST
import numpy as np
import matplotlib.pyplot as plt
import pickle as cPickle

white = 0.001

mndata = MNIST('mnist/') #directiry holding the MNIST dataset

mndata.load_training()
mndata.load_testing()

train_data = np.reshape(mndata.train_images, (60000, 28, 28))
test_data = np.reshape(mndata.test_images, (10000, 28, 28))

train_data = train_data / 255.
test_data = test_data / 255.

for x in range(0,len(train_data)):
	train_data[x] = np.rot90(train_data[x], 3)

for x in range(0,len(test_data)):
	test_data[x] = np.rot90(test_data[x], 3)

trn_labels = list()
tst_labels = list()

white_space=[0 for p in range(11)]
white_space[10] = 1

t = 2
Exemplo n.º 35
0
## if files are not in local directory then download and decompress them
for key in mnist_data_files:
    url = mnist_data_files[key]
    filename = os.path.basename(url)
    if filename.split(".")[0] not in os.listdir(mndata_dir):
        print "Downloading File: %s" % mnist_data_files[key]
	r = requests.get(mnist_data_files[key], stream=True)
        compressed_file=StringIO.StringIO()
        compressed_file.write(r.content)
        compressed_file.seek(0)
        decompressed = gzip.GzipFile(fileobj=compressed_file, mode='rb')
        with open(os.path.join(mndata_dir, filename.split(".")[0]),'wb') as handle:
            handle.write(decompressed.read())

mndata = MNIST(mndata_dir)

if os.path.exists(mndata_file):
    js = simplejson.load(open(mndata_file, 'r'))
    mndata.train_images = js['train_data']
    mndata.train_labels = js['train_labels']
    mndata.test_images = js['test_data']
    mndata.test_labels = js['test_labels']

else:
    mndata.load_training()
    mndata.load_testing()
    js = {"train_data": mndata.train_images, "train_labels": mndata.train_labels.tolist(),
          "test_data": mndata.test_images, "test_labels": mndata.test_labels.tolist()}

    simplejson.dump(js, open(mndata_file, 'w'))
Exemplo n.º 36
0
def main(args: argparse.Namespace) -> float:
    # Fix random seeds and threads
    tf.keras.utils.set_random_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join(
        "logs", "{}-{}-{}".format(
            os.path.basename(globals().get("__file__", "notebook")),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
                 for k, v in sorted(vars(args).items())))))

    # Load data
    mnist = MNIST(size={"train": 5000})

    # TODO: Create the model and incorporate L2 regularization and dropout:
    # - L2 regularization:
    #   If `args.l2` is nonzero, create a `tf.keras.regularizers.L2` regularizer
    #   and use it for all kernels (but not biases) of all Dense layers.
    # - Dropout:
    #   Add a `tf.keras.layers.Dropout` with `args.dropout` rate after the Flatten
    #   layer and after each Dense hidden layer (but not after the output Dense layer).

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=[MNIST.H, MNIST.W, MNIST.C]))
    for hidden_layer in args.hidden_layers:
        model.add(tf.keras.layers.Dense(hidden_layer, activation=tf.nn.relu))
    model.add(tf.keras.layers.Dense(MNIST.LABELS, activation=tf.nn.softmax))

    # TODO: Implement label smoothing.
    # Apply the given smoothing. You will need to change the
    # `SparseCategorical{Crossentropy,Accuracy}` to `Categorical{Crossentropy,Accuracy}`
    # because `label_smoothing` is supported only by `CategoricalCrossentropy`.
    # That means you also need to modify the labels of all three datasets
    # (i.e., `mnist.{train,dev,test}.data["labels"]`) from indices of the gold class
    # to a full categorical distribution (you can use either NumPy or there is
    # a helper method also in the `tf.keras.utils` module).

    model.compile(
        optimizer=tf.optimizers.Adam(),
        loss=tf.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.metrics.SparseCategoricalAccuracy(name="accuracy")],
    )

    tb_callback = tf.keras.callbacks.TensorBoard(args.logdir, histogram_freq=1)

    def evaluate_test(epoch, logs):
        if epoch + 1 == args.epochs:
            test_logs = model.evaluate(
                mnist.test.data["images"],
                mnist.test.data["labels"],
                args.batch_size,
                return_dict=True,
                verbose=0,
            )
            logs.update({
                "val_test_" + name: value
                for name, value in test_logs.items()
            })

    logs = model.fit(
        mnist.train.data["images"],
        mnist.train.data["labels"],
        batch_size=args.batch_size,
        epochs=args.epochs,
        validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]),
        callbacks=[
            tf.keras.callbacks.LambdaCallback(on_epoch_end=evaluate_test),
            tb_callback
        ],
    )

    # Return test accuracy for ReCodEx to validate
    return logs.history["val_test_accuracy"][-1]
Exemplo n.º 37
0
import time
import sys

from mnist import MNIST

from pylearn.neural_network import NeuralNetwork
from pylearn.preprocess import InputData

mndata = MNIST('.\data\\numbers')
mndata.load_training()
mndata.load_testing()

test_images_wrap, test_labels_wrap = InputData.image_matrix_normalizer(
    mndata.test_images, mndata.test_labels, range(10))
train_images_wrap, train_labels_wrap = InputData.image_matrix_normalizer(
    mndata.train_images, mndata.train_labels, range(10))

print('Training set size:', len(train_images_wrap))


def notifier(net, epoch_id):
    print("Epoch {0}: {1} / {2}, cost: {3}".format(
        epoch_id + 1, net.test_network(test_images_wrap, test_labels_wrap),
        len(test_images_wrap),
        net.batch_cost(test_images_wrap, test_labels_wrap)))


start_time = time.time()

net = NeuralNetwork([784, 20, 10], 3)
net.batch_size = 10
Exemplo n.º 38
0
from mnist import MNIST
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

print("Loading dataset...")
mndata = MNIST("./data/")
images, labels = mndata.load_training()

clf = RandomForestClassifier(n_estimators=100)

# Train on the first 10000 images:
train_x = images[:10000]
train_y = labels[:10000]

print("Train model")
clf.fit(train_x, train_y)

# Test on the next 1000 images:
test_x = images[10000:11000]
expected = labels[10000:11000].tolist()

print("Compute predictions")
predicted = clf.predict(test_x)

print("Accuracy: ", accuracy_score(expected, predicted))
import mnist
from mnist import MNIST
import random
from train_model import train_
from test_model import test_
from sklearn import metrics
import matplotlib.pyplot as plt
import math
import numpy as np

mndata = MNIST('shreya')

images, labels = mndata.load_training()
# or
images1, labels1 = mndata.load_testing()


# index = random.randrange(0, len(images))  # choose an index ;-)
# print(mndata.display(images[index]))

low = [0,3]
high = [1,8]


for yo in range(2):
	C1 = low[yo]
	C2 = high[yo]
	print str(C1) + " " + str(C2)
	if(C1 == 0 and C2== 1):
		train_data=[]
		test_data=[]
Exemplo n.º 40
0
import numpy as np
from sklearn.linear_model import LogisticRegression
from mnist import MNIST
from sklearn.metrics import accuracy_score

# load MNIST data
datadir = "E:\\Dulieu(znz)\\Python_Pycharm\\datasets"
mnist = MNIST(datadir)
mnist.load_training()
mnist.load_testing()

X_train = np.asarray(mnist.train_images)
y_train = np.asarray(mnist.train_labels)
X_test = np.asarray(mnist.test_images)
y_test = np.asarray(mnist.test_labels)

model = LogisticRegression(C=1e5, solver="lbfgs",
                           multi_class="multinomial")  # C is inverse of lamda
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy %.2f %%" % (100 * accuracy_score(y_test, y_pred)))
Exemplo n.º 41
0
def training_labels():
    mndata = MNIST('samples')
    return mndata.load_training()[1]
Exemplo n.º 42
0
from mnist import MNIST

mndata = MNIST('samples')

images, labels = mndata.load_training()
# or
#images, labels = mndata.load_testing()
print(len(images))
Exemplo n.º 43
0
from mnist import MNIST
import numpy

mndata = MNIST('Data')

#load images
trainImages, trainLabels = mndata.load_training()
testImages, testLabels = mndata.load_testing()

#convert images to array in order to simplify computations
trainImages = numpy.asarray(trainImages)
testImages = numpy.asarray(testImages)

#Simple classifier
#compute the singular value decomposition of the matrix
u, s, vt = numpy.linalg.svd(trainImages - trainImages.mean(axis=0),
                            full_matrices=False)

#compute how many components should be used to achieve at least 95 % of the variance
numComponents = 0
runningTotal = 0
totalEig = sum(s)

for eigenvalue in s:
    runningTotal += eigenvalue
    numComponents += 1
    if (runningTotal / totalEig > 0.95):
        break
#compute the coordinate matrix
trainCoords = numpy.matmul(u, numpy.diag(s))
trainCoords = trainCoords[:, 0:(numComponents - 1)]
Exemplo n.º 44
0
from mnist import MNIST
import numpy as nump
from sklearn.manifold import TSNE
import sys
import matplotlib.pyplot as plot

nump.set_printoptions(threshold=sys.maxsize)

print("Loading the data...")

data = MNIST("../data/MNIST/")

images, labels = data.load_training()
test_images, test_labels = data.load_testing()
print(nump.array(test_labels))

zeros = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 0])
ones = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 1])
twos = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 2])
threes = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 3])
fours = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 4])
fives = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 5])
sixes = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 6])
sevens = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 7])
eights = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 8])
nines = nump.array([images[key] for (key, label) in enumerate(labels) if int(label) == 9])
fours_sevens_eights = nump.array([images[key] for (key, label) in enumerate(labels)
                                  if int(label) == 4 or int(label) == 7 or int(label) == 8])
fours_sevens_eights_labels = nump.array([labels[key] for (key, label) in enumerate(labels)
                                         if int(label) == 4 or int(label) == 7 or int(label) == 8])
Exemplo n.º 45
0
import pyximport
pyximport.install()
import cython
import numpy as np
import matplotlib.pyplot as plt
import time
import random
import train_ml_prange

from mnist import MNIST
mndata = MNIST('/n/home04/cs205u1716/Proj/data')
images, labels = mndata.load_training()

#Build feature map
N_array = np.array([1000, 5000, 10000, 20000])  #How many images I want to load
threads_array = np.array([1, 2, 5, 10, 20])
d = 784  #Pixels of MNIST data

for ti in np.arange(len(threads_array)):
    nthreads = threads_array[ti]
    print(' ')
    print('Threads: ', nthreads)
    for ni in np.arange(len(N_array)):

        N = N_array[ni]

        start = time.time()

        #Retrieve data and labels - do preprocessing
        y_labs = labels[0:N]
Exemplo n.º 46
0
def load_dataset():
    data = MNIST('../../../../../dataset_imgs')
    return data.load_training()
Exemplo n.º 47
0
from mnist import MNIST
import pandas as pd
import pickle

mndata = MNIST('./Dataset')
images_train, labels_train = mndata.load_training()
images_test, labels_test = mndata.load_testing()

images_train = [[float(i) / 255 for i in j] for j in images_train]
images_test = [[float(i) / 255 for i in j] for j in images_test]

df_images_train = pd.DataFrame(images_train)
df_labels_train = pd.DataFrame(list(labels_train))
df_images_test = pd.DataFrame(images_test)
df_labels_test = pd.DataFrame(list(labels_test))

df_images_train.to_pickle("Dataset/images_train.pkl")
df_labels_train.to_pickle("Dataset/labels_train.pkl")
df_images_test.to_pickle("Dataset/images_test.pkl")
df_labels_test.to_pickle("Dataset/labels_test.pkl")
Exemplo n.º 48
0
parser.add_argument('--maxiter', default=20000, type=int)
parser.add_argument('--keep-prob', default=1.0, type=float)
parser.add_argument('--lr', default=0.07, type=float)
parser.add_argument('--lead', default=20, type=int)
parser.add_argument('--batch-size', default=20, type=int)
parser.add_argument('--frequence', default=10, type=int)
parser.add_argument('--datadir',
                    default='/Users/wjf/datasets/fashion_tf_1k_cor',
                    type=str)
parser.add_argument('--logdir', default='logs/gld_lead', type=str)

if __name__ == '__main__':
    args = parser.parse_args()

    # data loader
    train_set = MNIST(os.path.join(args.datadir, 'train.npz'))
    val_set = MNIST(os.path.join(args.datadir, 'val.npz'))
    val_loader = Loader(val_set, batch_size=500, shuffle=False)
    one_loader = Loader(train_set, batch_size=args.batch_size, shuffle=True)

    # model
    model = ConvYu()

    # summary
    _loss = tf.placeholder(tf.float32)
    _acc = tf.placeholder(tf.float32)

    train_summary_list = [
        tf.summary.scalar('loss/train', _loss),
        tf.summary.scalar('acc/train', _acc)
    ]
Exemplo n.º 49
0
from mnist import MNIST
import numpy as np
import scipy.misc
import random
import os

NUM_IMAGES = 60000
alphas = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
output_dir = "datasets/ULN" + str(NUM_IMAGES) + "/"
os.makedirs(output_dir)
output_filename = output_dir + "MNIST_uniform_label_noise_"

label_space = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
#Load MNIST dataset
mnist_directory = "datasets/MNIST_raw"
mndata = MNIST(mnist_directory)
images, labels = mndata.load_training()

output_x_files = {}
output_y_files = {}

BINARIZATION_THRESHOLD = 150


def img_to_str(img):
    img_str = "".join(
        list(map(lambda x: '1,' if x > BINARIZATION_THRESHOLD else '0,', img)))
    return img_str[:-1]


def str_to_img(s):
Exemplo n.º 50
0
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Report only errors by default
    if not args.verbose:
        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

    # Create logdir name
    args.logdir = os.path.join(
        "logs", "{}-{}-{}".format(
            os.path.basename(globals().get("__file__", "notebook")),
            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(
                ("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value)
                 for key, value in sorted(vars(args).items())))))

    # Load data
    mnist = MNIST()

    # Create the TensorBoard writer
    writer = tf.summary.create_file_writer(args.logdir, flush_millis=10 * 1000)

    # Create the model
    model = Model(args)

    for epoch in range(args.epochs):
        # TODO: Run the `train_epoch` with `mnist.train` dataset
        model.train_epoch(mnist.train)

        # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset
        accuracy = model.evaluate(mnist.dev)

        print("Dev accuracy after epoch {} is {:.2f}".format(
This code is used for K-means Clustering
"""

# %reset
# import needed libraries, especially "minst"
import numpy as np
from mnist import MNIST
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
#from sklearn.preprocessing import normalize

from display_network import *

# load the handwritten digit images
mndata = MNIST('MNIST/')
mndata.load_testing()
X = mndata.test_images
X0 = np.asarray(X)[:1000, :] / 256.0
X = X0

# there are 10 clusters for 10 numbers from 0 to 9
K = 10
kmeans = KMeans(n_clusters=K).fit(X)

pred_label = kmeans.predict(X)

print(type(kmeans.cluster_centers_.T))
print(kmeans.cluster_centers_.T.shape)
A = display_network(kmeans.cluster_centers_.T, K, 1)
Exemplo n.º 52
0
torch.cuda.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
torch.backends.cudnn.deterministic = True

np.random.seed(args.seed)

n_epoch = args.max_e
valid_every = 5
start = 60  # epoch number to switch from JSA without cache to JSA with cache
net = Model().cuda()

optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)

# use MNIST-static as dataset, binarization used by (Salakhutdinov & Murray, 2008)
trainset = MNIST(fname="../data/mnist_salakhutdinov.pkl.gz",
                 which_set='train',
                 preproc=[],
                 n_datapoints=50000)
valiset = MNIST(fname="../data/mnist_salakhutdinov.pkl.gz",
                which_set='valid',
                preproc=[],
                n_datapoints=10000)
testset = MNIST(fname="../data/mnist_salakhutdinov.pkl.gz",
                which_set='test',
                preproc=[],
                n_datapoints=10000)

trainx, trainy = torch.FloatTensor(trainset.X), torch.LongTensor(
    np.arange(0, 50000)
)  #trainy records the index of each training datapoint, for recording the cache samples
validx, validy = torch.FloatTensor(valiset.X), torch.LongTensor(valiset.Y)
testx, testy = torch.FloatTensor(testset.X), torch.LongTensor(testset.Y)
Exemplo n.º 53
0
    r=0
    w=0
    for i in range(len(testlabels)):
        if y_pred[i] == testlabels[i]:
            r+=1
        else:
            w+=1
    print "tested ", len(testlabels), " digits"
    print "correct: ", r, "wrong: ", w, "error rate: ", float(w)*100/(r+w), "%"
    print "got correctly ", float(r)*100/(r+w), "%"



digitslst = prepareDigits()

mndata = MNIST('H:/tools/MNIST/')
trainims, trainlabels = mndata.load_training()
ims, labels = mndata.load_testing()
#print len(ims[0])


def saveIm(ims, pref, n):
    for i in range(n):
        fname=pref + str(i)+".png"
        im0 = np.array(ims[i]).reshape((28,28))
        imsave("testres/"+fname, im0)

def savePCAs(pcas):
    res = np.zeros((150, 120), dtype=np.float)
    npcas = len(pcas)
Exemplo n.º 54
0
import numpy as np
from mnist import MNIST
from sklearn.neighbors import NearestNeighbors
from sklearn.externals import joblib

mnist_dir = 'data/mnist/'

print("Loading")
mndata = MNIST(mnist_dir)
tr_data = mndata.load_training()
tr_data = np.asarray(tr_data[0])
tr_data = np.where(tr_data > 0, 1, 0)

k = 1
d_metric = 'l2'

print("Fitting")
neigh = NearestNeighbors(n_neighbors=k, metric=d_metric)
neigh.fit(tr_data)
joblib.dump(neigh, 'data/model/mnist_nn_model.pkl')

# for loading
# neigh = joblib.load('data/model/mnist_nn_model.pkl')
Exemplo n.º 55
0
from mnist import MNIST
import numpy as np

#
#  LOADING MNIST DATA SET
#

mnist_path = '/Users/beta/Documents/workspaces-classes/CS1L/project-ocr/mnist'

mndata = MNIST(mnist_path)

def to_nnet_outputs(range):
    return lambda x: [1 if x == y else 0 for y in range]

# scales a list. woot woot.
def scale(factor):
    def f(list):
        return [x * factor for x in list]
    return f
data_training = (lambda d: zip(d[1], map(scale(1.0/255.0), d[0]))) (mndata.load_training())
data_testing = (lambda d: zip(d[1], map(scale(1.0/255.0), d[0]))) (mndata.load_testing())

to_nnet_digit_outputs = to_nnet_outputs(range(0, 10))

data_training = map (lambda x: (x[0], x[1], to_nnet_digit_outputs(x[0])), data_training)
data_testing = map (lambda x: (x[0], x[1], to_nnet_digit_outputs(x[0])), data_training)

np.random.shuffle(data_training)
np.random.shuffle(data_testing)
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from mnist import MNIST

data = MNIST(data_dir='./data/MNIST_data')
print("Size of:")
print("- Training-set:\t\t{}".format(data.num_train))
print("- Validation-set:\t{}".format(data.num_val))
print("- Test-set:\t\t{}".format(data.num_test))

# The images are stored in one-dimensional arrays of this length.
img_size_flat = data.img_size_flat
print('- img_size_flat:\t{}'.format(img_size_flat))

# tuple with height and width of images used to reshape arrays.
img_shape = data.img_shape
print('- img_shape:\t\t{}'.format(img_shape))

# Number of classes, one class for each of 10 digits.
num_classes = data.num_classes
print('- num_classes:\t\t{}'.format(num_classes))

print(data.y_test[0:5, :])
print(data.y_test_cls[0:5])

# MNIST data image of shape, img_size_flat: 28 * 28 = 784
X = tf.compat.v1.placeholder(tf.float32, shape=[None, img_size_flat], name='X')
# 0 - 9 digits recognition, num_classes = 10 classes
Y = tf.compat.v1.placeholder(tf.float32, shape=[None, num_classes], name='Y')
Exemplo n.º 57
0
def get_train_loader(config):
    mndata = MNIST(config.mnist_path)
    data_list, label_list = mndata.load_training()
    return get_loader(config, data_list, label_list)
Exemplo n.º 58
0
def testConvolution():
    ''' Second thread used to break out of training loop'''
    thr = threading.Thread(target=running_func)
    thr.start()
    global running
    ''' Get training data'''
    #training_data, classes =  getData(["C:\\Users\\Ryan\\Documents\\\SWAG_TRAINING\\male\\Zoom\\F", "C:\\Users\\Ryan\\Documents\\\SWAG_TRAINING\\female\\Zoom\\F"])
    mn = MNIST()
    training_data, classes = mn.load_training()
    training_data = np.asarray(training_data)  #[0:50])
    training_data = np.reshape(training_data, [len(training_data), 28, 28])
    #training_data = training_data/training_data.max()
    #imsave("Images\\reconL0_"+str(20)+".jpg", training_data[0])

    print 'Test ConvRBM'
    rLayers = 40
    '''Conv(num_filters, filter_shape, pool_shape, binary=True, scale=0.001):'''
    #r = Convolutional(rLayers, [12,12], [2,2], False) #Convolutional(2, [3, 3], [2, 2])
    #rprev = pickle.load(open("minConvLayer1.p", "rb"))
    #print rprev.visible_to_pooled(training_data[0]).shape

    #hidden_data = rprev.visible_to_pooled(training_data[0])
    #for j in range(training_data.shape[0]):
    #    hidden_data = np.append(hidden_data, rprev.visible_to_pooled(training_data[j])[0:1], axis=0 )
    #training_data = hidden_data
    #print training_data.shape
    # Layer 2
    #r = Convolutional(rLayers, [6, 6], [2, 2], True) #pickle.load(open("convLayer1.p", "rb"))#
    #r.setUpperLayer(0, r)
    r = pickle.load(open("minConvPBS.p", "rb"))
    t = ConvolutionalTrainer(r, .5, 0, .003)
    #t.setUpperLayer()
    '''Trainer(rbm, momentum=0., l2=0., target_sparsity=None):'''
    #t = ConvolutionalTrainer(r,.5, 0, .005) #changed from .005 to .05
    saveType = "Serverlayer1"
    rLayers = r.num_filters
    print 'Training...'
    for i in range(rLayers):
        imsave(
            os.path.join("Images",
                         "weightsL" + saveType + "_" + str(i) + ".jpg"),
            r.weights[i])
    ''' Training for first layer'''
    for i in range(50):
        ''' Get NEW training data'''
        #        def trainThread():
        global avgRE, learning_rate, minRE1
        np.random.shuffle(training_data)
        for j in range(training_data.shape[0]):
            oldRE = avgRE
            ''' Slowly decrease learning rate from ~1/500th of the mean weight'''
            learning_rate = .99 * learning_rate + .01 * (
                abs(float(r.weights.mean())) / (100000 + i * i))
            t.learn(training_data[j], learning_rate, cdk=2)
            avgRE = r.get_avg_error(training_data[j])

            # If error stops decreasing over 100 iterations, break loop
            if j + i * (training_data.shape[0]) % 9999 == 1:
                oldRE = avgRE
            ''' Save minimum weights'''
            if avgRE < oldRE:
                direction = '-'

                if avgRE < minRE1:
                    minRE1 = avgRE

                    if j % 100 == 0:
                        ''' Reconstruct image for one layer'''
                        minRecon = r.reconstruct(training_data[j], 2)
                        #minRecon = minRecon / minRecon.max() * 255
                        with lock:
                            imsave(
                                os.path.join(
                                    "Images", "reconL" + saveType + "_" +
                                    str(i * 100 + j) + "_0.jpg"),
                                training_data[j])
                            imsave(
                                os.path.join(
                                    "Images", "reconL" + saveType + "_" +
                                    str(i * 100 + j) + "_1.jpg"), minRecon)

                    if j % 5 == 4:  #minRE1 < 2000 and
                        with lock:
                            print 'Saving...'
                            pickle.dump(
                                r, open("minConvLayer" + saveType + ".p",
                                        "wb"))
                            for k in range(rLayers):
                                imsave(
                                    os.path.join(
                                        "Images", "weightsL" + saveType +
                                        "_min_" + str(k) + ".jpg"),
                                    r.weights[k])
                if abs(oldRE - avgRE) < 10:
                    t.momentum = .7
            else:
                direction = '+'
            with lock:
                print i, 'Error 1: ', avgRE, direction, ' old: ', oldRE
            #if abs(oldRE - avgRE) < .01:
            #    break
            if not running:
                with lock:
                    print 'First break'
                    print 'Breaking on running (in)'
                break
    #thrs = []
    #for tt in range(1):
    #print 'Starting threads...'
    #thr_train = threading.Thread(target=trainThread)
    #thr_train.daemon = True
    #thrs.append(thr_train)
    #thr_train.start()
    #print 'Started.'
    #[x.start() for x in thrs]
    #with lock:
    #print 'Joining threads...'
    #[x.join() for x in thrs]
    #thr_train.join()

    #if abs(oldRE - avgRE) < .0001:
    #    break
        if not running:
            print 'Second break'
            print 'Breaking on running (out)'
            break
            #print 'shape: ', r.hidden_sample(training_data[j]).shape
    #with lock:
    #    print 'Joining threads...'
    #thr_train.join()
    print 'Saving layer 1 weights'
    pickle.dump(r, open("convLayer.p", "wb"))
    ''' Use the min reconstruction error weights as layer 1'''
    #if minRE1 < avgRE and minRE1 != -1:
    #    r = pickle.load(open("minConvLayer1.p", "rb"))

    # Print weights to images
    for i in range(rLayers):
        imsave(os.path.join("Images", "weightsL20_" + str(i) + ".jpg"),
               r.weights[i])
Exemplo n.º 59
0
import math

from mnist import MNIST
mndata = MNIST('./mnist')
train_data,train_label = mndata.load_training()
test_data,test_label = mndata.load_testing()

f3 = [[0 for x in range(28)]for x in range(5000)]

def countone(img):
	row = -1
	count = [0 for x in range(28)]
	for j in range(len(img)):
		if j%28 == 0:
			row += 1
		if img[j] > 0:
			count[row] += 1
	return count

f3_test = []
accuracy = 0

for j in range(5000):
	diff = []
	count_digit = [0 for x in range(10)]
	test_img = test_data[j]
	label = []
	f3_test = countone(test_img)
	for i in range(5000):
		img = train_data[i]
		f3[i] = countone(img)
Exemplo n.º 60
0
import numpy as np  # we're going to use numpy to process input and output data
import onnxruntime  # to inference ONNX models, we use the ONNX Runtime
import onnx
from onnx import numpy_helper
import urllib.request
import time
import glob
import os

### One way of loading ###
#Load sample inputs and outputs
from mnist import MNIST

mndata = MNIST('samples')

images, labels = mndata.load_testing()

print(mndata.display(images[0]))
print(mndata.display(images[1]))
print(labels[0])
print(labels[1])
#print (labels)

print(type(images[0]))
#print (images[0])
### Another way
import gzip
import numpy as np

#test inputs
f = gzip.open('t10k-images-idx3-ubyte.gz', 'rb')