def test_create_session(self): with pytest.raises(TypeError, match='`lock_memory` must be True, ' 'False or float'): _ = create_session(lock_memory='') # test with default options session = create_session() self.assertFalse(session._config.gpu_options.allow_growth) self.assertFalse(session._config.log_device_placement) self.assertTrue(session._config.allow_soft_placement) # test with various options session = create_session(lock_memory=0.5, log_device_placement=True, allow_soft_placement=False) self.assertEqual( session._config.gpu_options.per_process_gpu_memory_fraction, .5) self.assertTrue(session._config.log_device_placement) self.assertFalse(session._config.allow_soft_placement) # test with lock_memory = False session = create_session(lock_memory=False) self.assertTrue(session._config.gpu_options.allow_growth)
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU(disable_prebuild=False) # build the model grads = [] losses = [] test_nlls = [] test_lbs = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(batch_size, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: with arg_scope([p_net, q_net], is_training=is_training, channels_last=True): _ = q_net(dev_input_x).chain(p_net, latent_names=['z'], observed={'x': dev_input_x}) else: with arg_scope([p_net, q_net], is_training=is_training, channels_last=multi_gpu.channels_last(dev)): # derive the loss and lower-bound for training train_q_net = q_net(dev_input_x) train_chain = train_q_net.chain( p_net, latent_names=['z'], latent_axis=0, observed={'x': dev_input_x}) dev_vae_loss = tf.reduce_mean( train_chain.vi.training.sgvb()) dev_loss = dev_vae_loss + regularization_loss() losses.append(dev_loss) # derive the nll and logits output for testing test_q_net = q_net(dev_input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_names=['z'], latent_axis=0, observed={'x': dev_input_x}) dev_test_nll = -tf.reduce_mean( test_chain.vi.evaluation.is_loglikelihood()) dev_test_lb = tf.reduce_mean( test_chain.vi.lower_bound.elbo()) test_nlls.append(dev_test_nll) test_lbs.append(dev_test_lb) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, test_lb, test_nll] = \ multi_gpu.average([losses, test_lbs, test_nlls], batch_size) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # derive the plotting function work_dev = multi_gpu.work_devices[0] with tf.device(work_dev), tf.name_scope('plot_x'): plot_p_net = p_net(n_z=100, is_training=is_training, channels_last=multi_gpu.channels_last(work_dev)) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with TrainLoop(params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32, name='learning_rate') learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # build the model with arg_scope([q_net, p_net], is_training=is_training): # derive the loss and lower-bound for training train_q_net = q_net(input_x, n_samples=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_names=['y', 'z'], latent_axis=0, observed={'x': input_x}) if config.train_n_samples is None: baseline = reinforce_baseline_net(input_x) vae_loss = tf.reduce_mean( train_chain.vi.training.reinforce(baseline=baseline)) else: vae_loss = tf.reduce_mean(train_chain.vi.training.vimco()) loss = vae_loss + regularization_loss() # derive the nll and logits output for testing test_q_net = q_net(input_x, n_samples=config.test_n_samples) test_chain = test_q_net.chain(p_net, latent_names=['y', 'z'], latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) # derive the classifier via q(y|x) q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits, axis=-1) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plot_x'): plot_p_net = p_net( observed={'y': tf.range(config.n_clusters, dtype=tf.int32)}, n_z=10, is_training=is_training) x_plots = tf.reshape( tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(config.n_clusters, 10), results=results) # derive the final un-supervised classifier c_classifier = ClusteringClassifier(config.n_clusters, 10) def train_classifier(loop): df = bernoulli_flow(x_train, config.batch_size, shuffle=False, skip_incomplete=False) with loop.timeit('cls_train_time'): [c_pred] = collect_outputs(outputs=[q_y_given_x], inputs=[input_x], data_flow=df, feed_dict={is_training: False}) c_classifier.fit(c_pred, y_train) print(c_classifier.describe()) def evaluate_classifier(loop): with loop.timeit('cls_test_time'): [c_pred] = collect_outputs(outputs=[q_y_given_x], inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}) y_pred = c_classifier.predict(c_pred) cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)} loop.collect_metrics(cls_metrics) results.update_metrics(cls_metrics) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with TrainLoop(params, var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_nll': test_nll}, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( train_classifier, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( evaluate_classifier, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f: f.write(c_classifier.describe()) print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # derive the loss, output and accuracy logits = model(input_x, is_training=is_training) softmax_loss = softmax_classification_loss(logits, input_y) loss = softmax_loss + regularization_loss() y = softmax_classification_output(logits) acc = classification_accuracy(y, input_y) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ load_cifar10(x_shape=(config.x_dim,), normalize_x=True) train_flow = DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = DataFlow.arrays([x_test, y_test], config.test_batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU() # build the model grads = [] losses = [] y_list = [] acc_list = [] batch_size = get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x, dev_input_y ] in multi_gpu.data_parallel(batch_size, [input_x, input_y]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: _ = model(dev_input_x, is_training, channels_last=True) else: # derive the loss, output and accuracy dev_logits = model(dev_input_x, is_training=is_training, channels_last=multi_gpu.channels_last(dev)) dev_softmax_loss = \ softmax_classification_loss(dev_logits, dev_input_y) dev_loss = dev_softmax_loss + regularization_loss() dev_y = softmax_classification_output(dev_logits) dev_acc = classification_accuracy(dev_y, dev_input_y) losses.append(dev_loss) y_list.append(dev_y) acc_list.append(dev_acc) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, acc] = multi_gpu.average([losses, acc_list], batch_size) [y] = multi_gpu.concat([y_list]) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ load_cifar10(x_shape=config.x_shape, normalize_x=True) train_flow = DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = DataFlow.arrays([x_test, y_test], config.test_batch_size) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(config, result_dir): # print the config print_with_title('Configurations', config.format_config(), after='\n') # open the result object and prepare for result directories results = MLResults(result_dir) results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = tf.placeholder(shape=(), dtype=tf.float32) learning_rate_var = AnnealingDynamicValue(config.initial_lr, config.lr_anneal_factor) # build the model with arg_scope([q_net, p_net], is_training=is_training): # derive the loss and lower-bound for training train_q_net = q_net(input_x) train_chain = train_q_net.chain(p_net, latent_names=['z'], latent_axis=0, observed={'x': input_x}) baseline = baseline_net(input_x) cost, baseline_cost = \ train_chain.vi.training.reinforce(baseline=baseline) loss = regularization_loss() + tf.reduce_mean(cost + baseline_cost) # derive the nll and logits output for testing test_q_net = q_net(input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_names=['z'], latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plot_x'): plot_p_net = p_net(n_z=100, is_training=is_training) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): session = get_default_session_or_error() images = session.run(x_plots, feed_dict={is_training: False}) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with create_session().as_default(): # train the network with TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = Trainer(loop, train_op, [input_x], train_flow, feed_dict={ learning_rate: learning_rate_var, is_training: True }, metrics={'loss': loss}) trainer.anneal_after(learning_rate_var, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()