def get_mnist(sc, mnist_path): # target is start from 0, (train_images, train_labels) = mnist.read_data_sets(mnist_path, "train") (test_images, test_labels) = mnist.read_data_sets(mnist_path, "test") training_mean = np.mean(train_images) training_std = np.std(train_images) rdd_train_images = sc.parallelize(train_images) rdd_train_labels = sc.parallelize(train_labels) rdd_test_images = sc.parallelize(test_images) rdd_test_labels = sc.parallelize(test_labels) rdd_train_sample = rdd_train_images.zip(rdd_train_labels).map(lambda fl: common.Sample.from_ndarray( (fl[0] - training_mean) / training_std, fl[1] + 1)) rdd_test_sample = rdd_test_images.zip(rdd_test_labels).map(lambda fl: common.Sample.from_ndarray( (fl[0] - training_mean) / training_std, fl[1] + 1)) return (rdd_train_sample, rdd_test_sample)
def get_mnist(sc, data_type="train", location="/tmp/mnist"): """ Get mnist dataset and parallelize into RDDs. Data would be downloaded automatically if it doesn't present at the specific location. :param sc: SparkContext. :param data_type: "train" for training data and "test" for testing data. :param location: Location to store mnist dataset. :return: RDD of (features: ndarray, label: ndarray). """ (images, labels) = mnist.read_data_sets(location, data_type) images = sc.parallelize(images) labels = sc.parallelize(labels + 1) # Target start from 1 in BigDL record = images.zip(labels) return record
def get_mnist(sc, data_type="train", location="/tmp/mnist"): """ Download or load MNIST dataset to/from the specified path. Normalize and transform input data into an RDD of Sample """ from bigdl.dataset import mnist from bigdl.dataset.transformer import normalizer (images, labels) = mnist.read_data_sets(location, data_type) images = images.reshape((images.shape[0], ) + input_shape) images = sc.parallelize(images) labels = sc.parallelize(labels + 1) # Target start from 1 in BigDL record = images.zip(labels).map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), rec_tuple[1])) \ .map(lambda t: Sample.from_ndarray(t[0], t[1])) return record
def get_mnist(sc, data_type="train", location="/tmp/mnist"): """ Download or load MNIST dataset. Normalize and transform input data into an RDD of Sample """ from bigdl.dataset import mnist from bigdl.dataset.transformer import normalizer (images, labels) = mnist.read_data_sets(location, data_type) images = images.reshape(images.shape[0], 1, 28, 28) images = sc.parallelize(images) labels = sc.parallelize(labels + 1) # Target start from 1 in BigDL record = images.zip(labels).map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), rec_tuple[1])) \ .map(lambda t: Sample.from_ndarray(t[0], t[1])) return record
def get_mnist(sc, data_type="train", location="/tmp/mnist"): """ Get and normalize the mnist data. We would download it automatically if the data doesn't present at the specific location. :param sc: SparkContext :param data_type: training data or testing data :param location: Location storing the mnist :return: A RDD of (features: Ndarray, label: Ndarray) """ (images, labels) = mnist.read_data_sets(location, data_type) images = sc.parallelize(images) labels = sc.parallelize(labels + 1) # Target start from 1 in BigDL record = images.zip(labels) return record
def main(data_num): sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) dataset = TFDataset.from_rdd(rdd, names=["features", "labels"], shapes=[[28, 28, 1], [1]], types=[tf.float32, tf.int32], batch_per_thread=20) # construct the model from TFDataset images, labels = dataset.tensors labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=False) predictions = tf.to_int32(tf.argmax(logits, axis=1)) correct = tf.expand_dims(tf.to_int32(tf.equal(predictions, labels)), axis=1) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, "/tmp/lenet/model") predictor = TFPredictor(sess, [correct]) accuracy = predictor.predict().mean() print("predict accuracy is %s" % accuracy)
def main(): sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "train") image_rdd = sc.parallelize(images_data) labels_rdd = sc.parallelize(labels_data) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) dataset = TFDataset.from_rdd(rdd, names=["features", "labels"], shapes=[[28, 28, 1], [1]], types=[tf.float32, tf.int32], batch_size=280) # construct the model from TFDataset images, labels = dataset.tensors labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) # create a optimizer optimizer = TFOptimizer(loss, Adam(1e-3)) optimizer.set_train_summary(TrainSummary("/tmp/az_lenet", "lenet")) # kick off training for i in range(5): optimizer.optimize(end_trigger=MaxEpoch(i + 1)) saver = tf.train.Saver() saver.save(optimizer.sess, "/tmp/lenet/")
def main(options, data_num): data_path = '/tmp/mnist' if not options.data_path else options.data_path sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets(data_path, "test") images_data = (images_data[:data_num] - mnist.TRAIN_MEAN) / mnist.TRAIN_STD labels_data = labels_data[:data_num].astype(np.int32) dataset = TFDataset.from_ndarrays((images_data, labels_data), batch_per_thread=20) # construct the model from TFDataset images, labels = dataset.tensors labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=False) predictions = tf.to_int32(tf.argmax(logits, axis=1)) correct = tf.expand_dims(tf.to_int32(tf.equal(predictions, labels)), axis=1) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, "/tmp/lenet/model") predictor = TFPredictor(sess, [correct]) accuracy = predictor.predict().mean() print("predict accuracy is %s" % accuracy)
def get_data(dataset): from bigdl.dataset import mnist (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) images_data = (images_data - mnist.TRAIN_MEAN) / mnist.TRAIN_STD return (images_data, labels_data.astype(np.int32))
def get_data_rdd(dataset): (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) image_rdd = sc.parallelize(images_data).map(lambda img: [((img / 255) - 0.5) * 2]) return image_rdd
#Train model trained_model = optimizer.optimize() # 5. Loss visualization # Let's draw the performance curve during optimization. loss = np.array(train_summary.read_scalar("Loss")) plt.figure(figsize = (12,12)) plt.plot(loss[:,0],loss[:,1],label='loss') plt.xlim(0,loss.shape[0]+10) plt.grid(True) plt.title("loss") # 6. Prediction on test dataset # Then we test our autoencoder from the previous loaded dataset, compress and reconstruct the digit images # then compare the results with the original inputs, which are also our target outputs. We are going to use # only 10 examples to demonstrate our created and trained autoencoder is working. (images, labels) = mnist.read_data_sets(mnist_path, "test") examples_to_show = 10 examples = trained_model.predict(test_data).take(examples_to_show) f, a = plt.subplots(2, examples_to_show, figsize=(examples_to_show, 2)) for i in range(examples_to_show): a[0][i].imshow(np.reshape(images[i], (28, 28))) a[1][i].imshow(np.reshape(examples[i], (28, 28)))