if __name__ == "__main__": sigma, alpha = 4, 32 train = load_data() multiplier = 4 original_size = train[0].shape[0] final_size = multiplier * original_size print("Creating dataset of size {}".format(final_size)) x = [] y = numpy.array([]) for run in range(multiplier): for i in range(original_size): image = train[0][i] label = train[1][i] image = transform.elastic(image, sigma, alpha) rotation = random.randint(-30, 30) if label in (1, 7): rotation /= 2 image = transform.rotate(image, rotation) image = transform.sigmoid(image, 12) x.append(image) y = numpy.append(y, label) if len(x) % 1000 == 0: print("Image {}".format(len(x))) ds = (numpy.vstack(x), y) prefix = "mnist_elastic_{}_{}".format(sigma, alpha) suffix = "{}k".format(int(final_size / 1000)) numpy.save("{}_x_{}".format(prefix, suffix), ds[0])
# Load the dataset # train_set, valid_set, test_set format: tuple(input, target) # input is an numpy.ndarray of 2 dimensions (a matrix) # witch row's correspond to an example. target is a # numpy.ndarray of 1 dimensions (vector)) that have the same length as # the number of rows in the input. It should give the target # target to the example with the same index in the input. dataset = '../data/mnist.pkl.gz' f = gzip.open(dataset, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() return train_set if __name__ == "__main__": ds = load_images() images = ds[0] labels = ds[1] w = 16 for n in range(15): image = images[n] label = int(labels[n]) image = transform.elastic(image, 4, 32) rotation = random.randint(-30, 30) if label in (1, 7): rotation /= 2 image = transform.rotate(image, rotation) image = transform.sigmoid(image, 12) show_image(images[n], image)