def gridfonts(*args, **kwargs): dataset = cx.Dataset() url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/gridfonts.npy" path = get_file("gridfonts.npy", origin=url) ds = np.load(path, allow_pickle=True) ## [letters, labels] letters = np.array([matrix for matrix in ds[0]]) targets = np.array([matrix for matrix in ds[0]]) labels = np.array([char for char in ds[1]], dtype=str) dataset.name = "Gridfonts" dataset.description = """ This dataset originates from Douglas Hofstadter's research group: http://goosie.cogsci.indiana.edu/pub/gridfonts.data ![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png) These data have been processed to make them neural network friendly: https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py The dataset is composed of letters on a 25 row x 9 column grid. The inputs and targets are identical, and the labels contain a string identifying the letter. You can read a thesis using part of this dataset here: https://repository.brynmawr.edu/compsci_pubs/78/ """ dataset.load_direct([letters], [targets], [labels]) return dataset
def cifar10(*args, **kwargs): dataset = cx.Dataset() from keras.datasets import cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() inputs = np.concatenate((x_train, x_test)) x_train, x_test = None, None inputs = inputs.astype('float32') inputs /= 255 labels = np.concatenate((y_train, y_test)) y_train, y_test = None, None targets = to_categorical(labels, 10) labels = np.array([str(label[0]) for label in labels], dtype=str) dataset.name = "CIFAR-10" dataset.description = """ Original source: https://www.cs.toronto.edu/~kriz/cifar.html The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. The classes are completely mutually exclusive. There is no overlap between automobiles and trucks. "Automobile" includes sedans, SUVs, things of that sort. "Truck" includes only big trucks. Neither includes pickup trucks. """ dataset.load_direct([inputs], [targets], [labels]) return dataset
def figure_ground_a(*args, **kwargs): dataset = cx.Dataset() url = "https://raw.githubusercontent.com/Calysto/conx-data/master/gridfonts/figure_ground_a.npy" path = get_file("figure_ground_a.npy", origin=url) ds = np.load(path, allow_pickle=True) ## [[[letter], [brim, body]], ...] letters = np.array([pair[0] for pair in ds]) brims = np.array([pair[1][0] for pair in ds]) bodies = np.array([pair[1][1] for pair in ds]) dataset.name = "Figure-Ground A" dataset.description = """ This dataset (the so-called a-tabase) originates from Douglas Hofstadter's research group: http://goosie.cogsci.indiana.edu/pub/gridfonts.data ![Gridfont Grid](https://github.com/Calysto/conx-data/raw/master/gridfonts/grid.png) These data (all the letter A) have been processed to make them neural network friendly: https://github.com/Calysto/conx-data/blob/master/gridfonts/gridfonts.py The brim and body parts have been idenified manually. The dataset is composed of letters on a 17 row x 9 column grid (4 lines not used on top and another 4 not used on the bottom of each letter were removed from the original 25x9 latter images). The inputs are composed of the full letter. The targets are composed of a picture of the body and the brim. You can read a thesis using part of this dataset here: https://repository.brynmawr.edu/compsci_pubs/78/ """ dataset.load_direct([letters], [brims, bodies]) return dataset
def colors( *args, path='colors.csv', url="https://raw.githubusercontent.com/Calysto/conx-data/master/colors/colors.csv", **kwargs): dataset = cx.Dataset() from keras.utils import get_file path = get_file(path, origin=url) fp = open(path, "r") reader = csv.reader(fp) inputs = [] labels = [] targets = [] count = 1 for line in reader: name, r, g, b = line if name == "name": continue # first line is header inputs.append( [float(int(r) / 255), float(int(g) / 255), float(int(b) / 255)]) targets.append([count]) labels.append(name) count += 1 inputs = np.array(inputs, dtype='float32') targets = np.array(targets, dtype='uint16') dataset.name = "Colors" dataset.description = """ Original source: https://github.com/andrewortman/colorbot This dataset also includes some ignored in original data. Inspired by: * http://aiweirdness.com/tagged/paint-colors When initially loaded, this database has the following format: * labels: [color_name_string, ...] # order matches target * inputs: [[red, green, blue], ...] # scaled between 0 and 1 * targets: [[int], ...] # number of label For example: ``` >>> import conx as cx >>> ds = cx.Dataset.get("colors") >>> ds.labels[0], ds.inputs[0], ds.targets[0] ('tidewater', [0.7686274647712708, 0.843137264251709, 0.8352941274642944], [1]) ``` """ dataset.load_direct([inputs], [targets], [labels]) return dataset
def cmu_faces_full_size(*args, path="cmu_faces_full_size.npz", **kwargs): dataset = cx.Dataset() inputs, labels = load_dataset_npz( path, "https://raw.githubusercontent.com/Calysto/conx-data/master/cmu_faces/cmu_faces_full_size.npz" ) dataset.name = "CMU Faces, full-size" dataset.description = """ Original source: http://archive.ics.uci.edu/ml/datasets/cmu+face+images """ return process_face_data(dataset, inputs, labels)
def cifar100(*args, **kwargs): dataset = cx.Dataset() from keras.datasets import cifar100 (x_train, y_train), (x_test, y_test) = cifar100.load_data() inputs = np.concatenate((x_train, x_test)) labels = np.concatenate((y_train, y_test)) targets = to_categorical(labels, 100) labels = np.array([str(label[0]) for label in labels], dtype=str) inputs = inputs.astype('float32') inputs /= 255 dataset.name = "CIFAR-100" dataset.description = """ Original source: https://www.cs.toronto.edu/~kriz/cifar.html This dataset is just like the CIFAR-10, except it has 100 classes containing 600 images each. The 100 classes in the CIFAR-100 are grouped into 20 superclasses. Each image comes with a "fine" label (the class to which it belongs) and a "coarse" label (the superclass to which it belongs). Here is the list of classes in the CIFAR-100: Superclass | Classes -------------------------------|----------------------------------------------------- aquatic mammals | beaver, dolphin, otter, seal, whale fish | aquarium fish, flatfish, ray, shark, trout flowers | orchids, poppies, roses, sunflowers, tulips food containers | bottles, bowls, cans, cups, plates fruit and vegetables | apples, mushrooms, oranges, pears, sweet peppers household electrical devices | clock, computer keyboard, lamp, telephone, television household furniture | bed, chair, couch, table, wardrobe insects | bee, beetle, butterfly, caterpillar, cockroach large carnivores | bear, leopard, lion, tiger, wolf large man-made outdoor things | bridge, castle, house, road, skyscraper large natural outdoor scenes | cloud, forest, mountain, plain, sea large omnivores and herbivores | camel, cattle, chimpanzee, elephant, kangaroo medium-sized mammals | fox, porcupine, possum, raccoon, skunk non-insect invertebrates | crab, lobster, snail, spider, worm people | baby, boy, girl, man, woman reptiles | crocodile, dinosaur, lizard, snake, turtle small mammals | hamster, mouse, rabbit, shrew, squirrel trees | maple, oak, palm, pine, willow vehicles 1 | bicycle, bus, motorcycle, pickup truck, train vehicles 2 | lawn-mower, rocket, streetcar, tank, tractor """ dataset.load_direct([inputs], [targets], [labels]) return dataset
def mnist_h5(*args, **kwargs): """ Load the Keras MNIST dataset from an H5 file. """ import h5py path = "mnist.h5" url = "https://raw.githubusercontent.com/Calysto/conx-data/master/mnist/mnist.h5" path = get_file(path, origin=url) h5 = h5py.File(path, "r") dataset = cx.Dataset() dataset._inputs = h5["inputs"] dataset._targets = h5["targets"] dataset._labels = h5["labels"] dataset.h5 = h5 dataset.name = "MNIST-H5" dataset.description = description dataset._cache_values() return dataset
def fingers(*args, path='fingers.npz', **kwargs): dataset = cx.Dataset() inputs, labels = load_dataset_npz( path, "https://raw.githubusercontent.com/Calysto/conx-data/master/fingers/fingers.npz") inputs = inputs.astype('float32') inputs /= 255 make_target_vector = lambda label: [int(label == n) for n in range(6)] targets = np.array([make_target_vector(l) for l in labels]).astype('uint8') dataset.name = "Fingers" dataset.description = """ This dataset contains 12,000 RGB images of human hands showing different numbers of fingers, from zero to five. The same fingers are always used to represent each number category (e.g., all images of "two" have raised index and middle fingers). Each image is a 30 x 40 x 3 array of floating-point numbers in the range 0 to 1. The target data consists of one-hot binary vectors of size 6 corresponding to the classification categories "zero" through "five". There are 2000 images for each category. Created by Shreeda Segan and Albert Yu at Sarah Lawrence College. """ dataset.load_direct([inputs], [targets], [labels]) return dataset
def mnist(*args, **kwargs): from keras.datasets import mnist import keras.backend as K # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float16') x_test = x_test.astype('float16') inputs = np.concatenate((x_train, x_test)) / 255 labels = np.concatenate((y_train, y_test)) # ints, 0 to 10 ########################################### # fix mis-labeled image(s) in Keras dataset labels[10994] = 9 ########################################### targets = to_categorical(labels).astype("uint8") labels = np.array([str(label) for label in labels], dtype=str) dataset = cx.Dataset() dataset.load_direct([inputs], [targets], [labels]) return dataset
shape=512, vshape=(16, 32), activation='relu', dropout=0.2)) net.add( cx.Layer("hidden2", shape=512, vshape=(16, 32), activation='relu', dropout=0.2)) net.add(cx.Layer("output", shape=10, activation='softmax')) net.connect('input', 'hidden1') net.connect('hidden1', 'hidden2') net.connect('hidden2', 'output') net.compile(loss='mean_squared_error', optimizer='sgd') ds = cx.Dataset() ds.get("mnist") net.set_dataset(ds) #net.rescale_inputs((0,255), (0,1), 'float32') #net.shuffle_dataset() ds.inputs.reshape(784) ds.slice(100) #net.set_targets_to_categories(10) ds.summary() net.train(10) #net.test()