def main_unsupervised(): with tf.Graph().as_default() as g: sess = tf.Session() num_hidden = FLAGS.num_hidden_layers ae_hidden_shapes = [getattr(FLAGS, "hidden{0}_units".format(j + 1)) for j in xrange(num_hidden)] ae_shape = [FLAGS.image_pixels] + ae_hidden_shapes + [FLAGS.num_classes] ae = AutoEncoder(ae_shape, sess) data = read_data_sets_pretraining(FLAGS.data_dir) num_train = data.train.num_examples learning_rates = {j: getattr(FLAGS, "pre_layer{0}_learning_rate".format(j + 1)) for j in xrange(num_hidden)} noise = {j: getattr(FLAGS, "noise_{0}".format(j + 1)) for j in xrange(num_hidden)} for i in xrange(len(ae_shape) - 2): n = i + 1 with tf.variable_scope("pretrain_{0}".format(n)): input_ = tf.placeholder(dtype=tf.float32, shape=(FLAGS.batch_size, ae_shape[0]), name='ae_input_pl') target_ = tf.placeholder(dtype=tf.float32, shape=(FLAGS.batch_size, ae_shape[0]), name='ae_target_pl') layer = ae.pretrain_net(input_, n) with tf.name_scope("target"): target_for_loss = ae.pretrain_net(target_, n, is_target=True) loss = loss_x_entropy(layer, target_for_loss) train_op, global_step = training(loss, learning_rates[i], i) summary_dir = pjoin(FLAGS.summary_dir, 'pretraining_{0}'.format(n)) summary_writer = tf.train.SummaryWriter(summary_dir, graph_def=sess.graph_def, flush_secs=FLAGS.flush_secs) summary_vars = [ae["biases{0}".format(n)], ae["weights{0}".format(n)]] hist_summarries = [tf.histogram_summary(v.op.name, v) for v in summary_vars] hist_summarries.append(loss_summaries[i]) summary_op = tf.merge_summary(hist_summarries) vars_to_init = ae.get_variables_to_init(n) vars_to_init.append(global_step) sess.run(tf.initialize_variables(vars_to_init)) print("\n\n") print("| Training Step | Cross Entropy | Layer | Epoch |") print("|---------------|---------------|---------|----------|") for step in xrange(FLAGS.pretraining_epochs * num_train): feed_dict = fill_feed_dict_ae(data.train, input_, target_, noise[i]) loss_summary, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if step % 100 == 0: summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) image_summary_op = \ tf.image_summary("training_images", tf.reshape(input_, (FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 1)), max_images=FLAGS.batch_size) summary_img_str = sess.run(image_summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_img_str) output = "| {0:>13} | {1:13.4f} | Layer {2} | Epoch {3} |"\ .format(step, loss_value, n, step // num_train + 1) print(output) if i == 0: filters = sess.run(tf.identity(ae["weights1"])) np.save(pjoin(FLAGS.chkpt_dir, "filters"), filters) filters = tile_raster_images(X=filters.T, img_shape=(FLAGS.image_size, FLAGS.image_size), tile_shape=(10, 10), output_pixel_vals=False) filters = np.expand_dims(np.expand_dims(filters, 0), 3) image_var = tf.Variable(filters) image_filter = tf.identity(image_var) sess.run(tf.initialize_variables([image_var])) img_filter_summary_op = tf.image_summary("first_layer_filters", image_filter) summary_writer.add_summary(sess.run(img_filter_summary_op)) summary_writer.flush() return ae
def main_unsupervised(): with tf.Graph().as_default() as g: sess = tf.Session() num_hidden = FLAGS.num_hidden_layers ae_hidden_shapes = [ getattr(FLAGS, "hidden{0}_units".format(j + 1)) for j in range(num_hidden) ] ae_shape = [FLAGS.image_pixels ] + ae_hidden_shapes + [FLAGS.num_classes] ae = AutoEncoder(ae_shape, sess) data = read_data_sets_pretraining(FLAGS.data_dir, sub['tr'], sub['te'], sub['val'], one_hot=True) num_train = data.train.num_examples learning_rates = { j: getattr(FLAGS, "pre_layer{0}_learning_rate".format(j + 1)) for j in range(num_hidden) } noise = { j: getattr(FLAGS, "noise_{0}".format(j + 1)) for j in range(num_hidden) } for i in range(len(ae_shape) - 2): n = i + 1 with tf.variable_scope("pretrain_{0}".format(n)): input_ = tf.placeholder(dtype=tf.float32, shape=(FLAGS.batch_size, ae_shape[0]), name='ae_input_pl') target_ = tf.placeholder(dtype=tf.float32, shape=(FLAGS.batch_size, ae_shape[0]), name='ae_target_pl') layer = ae.pretrain_net(input_, n) with tf.name_scope("target"): target_for_loss = ae.pretrain_net(target_, n, is_target=True) loss = loss_x_entropy(layer, target_for_loss) train_op, global_step = training(loss, learning_rates[i], i) summary_dir = pjoin(FLAGS.summary_dir, 'pretraining_{0}'.format(n)) summary_writer = tf.summary.FileWriter( summary_dir, graph=sess.graph, flush_secs=FLAGS.flush_secs) summary_vars = [ ae["biases{0}".format(n)], ae["weights{0}".format(n)] ] hist_summarries = [ tf.summary.histogram(v.op.name, v) for v in summary_vars ] hist_summarries.append(loss_summaries[i]) summary_op = tf.summary.merge(hist_summarries) vars_to_init = ae.get_variables_to_init(n) vars_to_init.append(global_step) sess.run(tf.variables_initializer(vars_to_init)) print("\n\n") print("| Training Step | Cross Entropy | Layer | Epoch |") print("|---------------|---------------|---------|----------|") for step in range(FLAGS.pretraining_epochs * num_train): feed_dict = fill_feed_dict_ae(data.train, input_, target_, noise[i]) loss_summary, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if step % 5000 == 0: summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) image_summary_op = \ tf.summary.image("training_images", tf.reshape(input_, (FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 1)), max_outputs=FLAGS.batch_size) summary_img_str = sess.run(image_summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_img_str) output = "| {0:>13} | {1:13.4f} | Layer {2} | Epoch {3} |"\ .format(step, loss_value, n, step // num_train + 1) print(output) filters = sess.run(tf.identity(ae["weights" + str(n)])) np.save(pjoin(FLAGS.chkpt_dir, "filters" + str(n)), filters) filters_biases = sess.run(tf.identity(ae["biases" + str(n)])) np.save(pjoin(FLAGS.chkpt_dir, "biases" + str(n)), filters_biases) if i == 0: filters = tile_raster_images(X=filters.T, img_shape=(FLAGS.image_size, FLAGS.image_size), tile_shape=(10, 10), output_pixel_vals=False) filters = np.expand_dims(np.expand_dims(filters, 0), 3) image_var = tf.Variable(filters) image_filter = tf.identity(image_var) sess.run(tf.variables_initializer([image_var])) img_filter_summary_op = tf.summary.image( "first_layer_filters", image_filter) summary_writer.add_summary(sess.run(img_filter_summary_op)) summary_writer.flush() return ae
def test_dA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') os.chdir('../')