def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # build the posterior flow with tf.variable_scope('posterior_flow'): flows = [] for i in range(config.n_flows): flows.append(spt.layers.ActNorm()) flows.append( spt.layers.CouplingLayer(tf.make_template( 'coupling', coupling_layer_shift_and_scale, create_scope_now_=True), scale_type='exp')) flows.append(spt.layers.InvertibleDense()) posterior_flow = spt.layers.SequentialFlow(flows=flows) # derive the initialization op with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(input_x, posterior_flow) init_chain = init_q_net.chain(p_net, observed={'x': input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.sgvb()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, posterior_flow) train_chain = train_q_net.chain(p_net, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net(n_z=100) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10)) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=[784]) train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # initialize the network spt.utils.ensure_variables_initialized() for [batch_x] in train_flow: print('Network initialization loss: {:.6g}'.format( session.run(init_loss, {input_x: batch_x}))) print('') break # train the network with spt.TrainLoop(params, var_groups=['p_net', 'q_net', 'posterior_flow'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, ) + config.x_shape, name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([p_net, q_net], is_initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(input_x) init_chain = init_q_net.chain(p_net, observed={'x': input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.sgvb()) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([p_net, q_net], is_training=True): train_q_net = q_net(input_x) train_chain = train_q_net.chain(p_net, observed={'x': input_x}) train_loss = (tf.reduce_mean(train_chain.vi.training.sgvb()) + tf.losses.get_regularization_loss()) # derive the nll and logits output for testing with tf.name_scope('testing'): input_original_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_original_x') test_q_net = q_net(input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # bernoulli test logits = test_chain.model['x'].distribution.logits logits = tf.clip_by_value(logits, clip_value_max=1 - 1e-7, clip_value_min=1e-7) labels = tf.tile( tf.expand_dims(tf.to_float(test_chain.model['x']), axis=0), tf.concat([[config.test_n_z], [1] * spt.utils.get_rank(test_chain.model['x'])], axis=0)) log_px_given_z = tf.reduce_sum( -tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits), axis=list(range(-len(config.x_shape), 0))) vi = spt.VariationalInference( log_joint=log_px_given_z + test_chain.model['z'].log_prob(), latent_log_probs=[test_q_net['z'].log_prob()], axis=0) adv_test_nll = -tf.reduce_mean(vi.evaluation.is_loglikelihood()) adv_test_lb = tf.reduce_mean(vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): params = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(learning_rate) grads = optimizer.compute_gradients(train_loss, params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): x_plots = tf.reshape(bernoulli_as_pixel(p_net(n_z=100)['x']), (-1, ) + config.x_shape) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection( images=images, filename='plotting/{}.png'.format(loop.epoch), grid_size=(10, 10), results=results, channels_last=config.channels_last, ) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=config.x_shape) train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) ''' bernoulli_test_flow = spt.DataFlow.arrays( [x_test], config.test_batch_size) ''' bernoulli_test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size) bernoulli_test_flow = spt.DataFlow.gather([test_flow, bernoulli_test_flow]) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x] in train_flow: print('Network initialized, first-batch loss is {:.6g}.\n'.format( session.run(init_loss, feed_dict={input_x: x}))) break # train the network with spt.TrainLoop(params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': train_loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) ''' bernoulli_evaluator = spt.Evaluator( loop, metrics={'adv_test_nll': adv_test_nll, 'adv_test_lb': adv_test_lb, 'log_px_given_z': tf.reduce_mean(log_px_given_z)}, inputs=[input_x], data_flow=bernoulli_test_flow, time_metric_name='test_time' ) ''' bernoulli_evaluator = spt.Evaluator( loop, metrics={ 'adv_test_nll': adv_test_nll, 'adv_test_lb': adv_test_lb }, inputs=[input_x, input_original_x], data_flow=bernoulli_test_flow, time_metric_name='test_time') bernoulli_evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results. update_metrics(bernoulli_evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=1) trainer.evaluate_after_epochs(bernoulli_evaluator, freq=1) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=1) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the loss, output and accuracy logits = model(input_x) cls_loss = tf.losses.sparse_softmax_cross_entropy(input_y, logits) loss = cls_loss + tf.losses.get_regularization_loss() y = spt.ops.softmax_classification_output(logits) acc = spt.ops.classification_accuracy(y, input_y) # derive the optimizer optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(normalize_x=True) train_flow = spt.DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size) with spt.utils.create_session().as_default(), \ train_flow.threaded(5) as train_flow: # train the network with spt.TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x, input_y], train_flow, metrics={ 'loss': loss, 'acc': acc }) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser) arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the output for initialization with tf.name_scope('initialization'): init_q_net = q_net(input_x, is_initializing=True) init_chain = init_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}, is_initializing=True) init_lb = tf.reduce_mean(init_chain.vi.lower_bound.elbo()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net(n_z=100) x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(10, 10), results=results) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = spt.datasets.load_mnist() train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x] in train_flow: lb_out = session.run(init_lb, feed_dict={input_x: x}) print('Network initialized, first-batch loss is {:.2g}.\n'.format( lb_out)) break # train the network with spt.TrainLoop(params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={ 'test_nll': test_nll, 'test_lb': test_lb }, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.after_run.add_hook( lambda: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(trainpath, normalpath, abnormalpath, outputpath): if config.debug_level == -1: spt.utils.set_assertion_enabled(False) elif config.debug_level == 1: spt.utils.set_check_numerics(True) #spt.utils.set_assertion_enabled(False) # print the config print_with_title('Configurations', config.format_config(), after='\n') # input and output file train_file = trainpath normal_file = normalpath abnormal_file = abnormalpath output_file = os.path.join('webankdata', '{}_{}.csv'.format(config.flow_type or 'vae', outputpath)) valid_file = os.path.join('webankdata', 'v{}_{}.csv'.format(config.flow_type or 'vae', outputpath)) # you can change it by yourself # read data (x_train, y_train), (x_test, y_test), flows_test = \ get_data_vae(train_file, normal_file, abnormal_file) config.x_dim = x_train.shape[1] #config.z_dim = get_z_dim(x_train.shape[1]) all_len = x_train.shape[0] print('origin data: %s' % all_len) for i in range(30): print(list(x_train[i])) valid_rate = 0.1 x_train, x_valid = train_test_split(x_train, test_size=valid_rate) # x_valid = x_train print('%s for validation, %s for training v2' % (x_valid.shape[0], x_train.shape[0])) print('%s for test' % x_test.shape[0]) print('x_dim: %s z_dim: %s' % (config.x_dim, config.z_dim)) # change it by yourself # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable( 'learning_rate', config.initial_lr, config.lr_anneal_factor) # build the posterior flow if config.flow_type is None: posterior_flow = None elif config.flow_type == 'planar_nf': posterior_flow = \ spt.layers.planar_normalizing_flows(config.n_planar_nf_layers) else: assert(config.flow_type == 'rnvp') with tf.variable_scope('posterior_flow'): flows = [] for i in range(config.n_rnvp_layers): flows.append(spt.layers.ActNorm()) flows.append(spt.layers.CouplingLayer( tf.make_template( 'coupling', coupling_layer_shift_and_scale, create_scope_now_=True ), scale_type='sigmoid' )) flows.append(spt.layers.InvertibleDense(strict_invertible=True)) posterior_flow = spt.layers.SequentialFlow(flows=flows) # derive the initialization op with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True): init_q_net = q_net(input_x, posterior_flow) init_chain = init_q_net.chain( p_net, latent_axis=0, observed={'x': input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.sgvb()) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, posterior_flow) train_chain = train_q_net.chain( p_net, latent_axis=0, observed={'x': input_x}) vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z) test_chain = test_q_net.chain( p_net, latent_axis=0, observed={'x': input_x}) test_logp = test_chain.vi.evaluation.is_loglikelihood() test_nll = -tf.reduce_mean(test_logp) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) cliped_grad = [] for grad, var in grads: if grad is not None and var is not None: if config.norm_clip is not None: grad = tf.clip_by_norm(grad, config.norm_clip) cliped_grad.append((grad, var)) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(cliped_grad) train_flow = spt.DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True) valid_flow = spt.DataFlow.arrays([x_valid], config.test_batch_size) test_flow = spt.DataFlow.arrays([x_test], config.test_batch_size) # model_file #model_name = '' model_name = os.path.join( 'webankdata', 'md_{}_{}.model'.format( config.flow_type or 'vae', outputpath.split('.')[0] ) ) with spt.utils.create_session().as_default() as session: var_dict = spt.utils.get_variables_as_dict() saver = spt.VariableSaver(var_dict, model_name) #if os.path.exists(model_name): if False: print('%s exists' % model_name) saver.restore() else: print('no model here, and start training') # initialize the network spt.utils.ensure_variables_initialized() for [batch_x] in train_flow: print('Network initialization loss: {:.6g}'. format(session.run(init_loss, {input_x: batch_x}))) print('') break # train the network with spt.TrainLoop(params, var_groups=['p_net', 'q_net', 'posterior_flow'], max_epoch=config.max_epoch, max_step=config.max_step, early_stopping=True, valid_metric_name='valid_loss', valid_metric_smaller_is_better=True) as loop: trainer = spt.Trainer( loop, train_op, [input_x], train_flow, metrics={'loss': loss} ) trainer.anneal_after( learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq ) evaluator = spt.Evaluator( loop, metrics={'valid_loss': test_nll}, inputs=[input_x], data_flow=valid_flow, time_metric_name='valid_time' ) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.log_after_epochs(freq=1) trainer.run() saver.save() # get the answer print('start testing') start = time.time() test_ans = collect_outputs([test_logp], [input_x], test_flow)[0] \ / config.x_dim end = time.time() print("test time: ", end-start) pd.DataFrame( {'id': flows_test, 'label': y_test, 'score': test_ans}) \ .to_csv(output_file, index=False) valid_ans = collect_outputs([test_logp], [input_x], valid_flow)[0] \ / config.x_dim pd.DataFrame({'score': valid_ans}).to_csv(valid_file, index=False)
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.int32, shape=(None, config.x_dim), name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) # derive the loss and lower-bound for training with tf.name_scope('training'): train_q_net = q_net(input_x, n_samples=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) if config.vi_algorithm == 'reinforce': baseline = reinforce_baseline_net(input_x) vae_loss = tf.reduce_mean( train_chain.vi.training.reinforce(baseline=baseline)) else: assert (config.vi_algorithm == 'vimco') vae_loss = tf.reduce_mean(train_chain.vi.training.vimco()) loss = vae_loss + tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, n_samples=config.test_n_samples) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': input_x}) test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood()) # derive the classifier via q(y|x) q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits, axis=-1, name='q_y_given_x') # derive the optimizer with tf.name_scope('optimizing'): optimizer = tf.train.AdamOptimizer(learning_rate) params = tf.trainable_variables() grads = optimizer.compute_gradients(loss, var_list=params) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.apply_gradients(grads) # derive the plotting function with tf.name_scope('plotting'): plot_p_net = p_net( observed={'y': tf.range(config.n_clusters, dtype=tf.int32)}, n_z=10) x_plots = tf.reshape( tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)), (-1, 28, 28)) def plot_samples(loop): with loop.timeit('plot_time'): images = session.run(x_plots) save_images_collection(images=images, filename='plotting/{}.png'.format( loop.epoch), grid_size=(config.n_clusters, 10), results=results) # derive the final un-supervised classifier c_classifier = ClusteringClassifier(config.n_clusters, 10) def train_classifier(loop): df = bernoulli_flow(x_train, config.batch_size, shuffle=False, skip_incomplete=False) with loop.timeit('cls_train_time'): [c_pred] = spt.evaluation.collect_outputs( outputs=[q_y_given_x], inputs=[input_x], data_flow=df, ) c_classifier.fit(c_pred, y_train) print(c_classifier.describe()) def evaluate_classifier(loop): with loop.timeit('cls_test_time'): [c_pred] = spt.evaluation.collect_outputs( outputs=[q_y_given_x], inputs=[input_x], data_flow=test_flow, ) y_pred = c_classifier.predict(c_pred) cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)} loop.collect_metrics(cls_metrics) results.update_metrics(cls_metrics) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=[784]) train_flow = bernoulli_flow(x_train, config.batch_size, shuffle=True, skip_incomplete=True) test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: # train the network with spt.TrainLoop( params, var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_nll': test_nll}, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=10) trainer.evaluate_after_epochs(functools.partial( plot_samples, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( train_classifier, loop), freq=10) trainer.evaluate_after_epochs(functools.partial( evaluate_classifier, loop), freq=10) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f: f.write(c_classifier.describe()) print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting/sample', exist_ok=True) results.make_dirs('plotting/z_plot', exist_ok=True) results.make_dirs('plotting/train.reconstruct', exist_ok=True) results.make_dirs('plotting/test.reconstruct', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) posterior_flow = spt.layers.planar_normalizing_flows( config.nf_layers, name='posterior_flow') # input placeholders input_x = tf.placeholder( dtype=tf.float32, shape=(None,) + config.x_shape, name='input_x') warm = tf.placeholder( dtype=tf.float32, shape=(), name='warm') learning_rate = spt.AnnealingVariable( 'learning_rate', config.initial_lr, config.lr_anneal_factor) beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True) # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([spt.layers.act_norm], initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_pn_net = p_net(n_z=config.train_n_pz, beta=beta) init_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz) init_p_net = p_net(observed={'x': input_x, 'z': init_q_net['z']}, n_z=config.train_n_qz, beta=beta) init_loss = sum(get_all_loss(init_q_net, init_p_net, init_pn_net)) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([batch_norm], training=True): train_pn_theta = p_net(n_z=config.train_n_pz, beta=beta) train_pn_omega = p_omega_net(n_z=config.train_n_pz, beta=beta) train_log_Z = spt.ops.log_mean_exp(-train_pn_theta['z'].log_prob().energy - train_pn_theta['z'].log_prob()) train_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz) train_p_net = p_net(observed={'x': input_x, 'z': train_q_net['z']}, n_z=config.train_n_qz, beta=beta, log_Z=train_log_Z) VAE_loss, _, VAE_G_loss, VAE_D_real = get_all_loss(train_q_net, train_p_net, train_pn_theta, warm) _, D_loss, G_loss, D_real = get_all_loss(train_q_net, train_p_net, train_pn_omega, warm) VAE_loss += tf.losses.get_regularization_loss() VAE_G_loss += tf.losses.get_regularization_loss() D_loss += tf.losses.get_regularization_loss() G_loss += tf.losses.get_regularization_loss() # derive the nll and logits output for testing with tf.name_scope('testing'): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz) # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z()) test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z()) test_chain = test_q_net.chain(p_net, observed={'x': input_x}, n_z=config.test_n_qz, latent_axis=0, beta=beta, log_Z=get_log_Z()) test_recon = tf.reduce_mean( test_chain.model['x'].log_prob() ) test_mse = tf.reduce_sum( (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) - tf.round( test_chain.model['x'] * 128 + 127.5)) ** 2, axis=[-1, -2, -3]) # (sample_dim, batch_dim, x_sample_dim) test_mse = tf.reduce_min(test_mse, axis=[0]) test_mse = tf.reduce_mean(tf.reduce_mean(tf.reshape( test_mse, (-1, config.test_x_samples,) ), axis=-1)) test_nll = -tf.reduce_mean( spt.ops.log_mean_exp( tf.reshape( test_chain.vi.evaluation.is_loglikelihood(), (-1, config.test_x_samples,) ), axis=-1) ) + config.x_shape_multiple * np.log(128.0) test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo()) vi = spt.VariationalInference( log_joint=test_chain.model['x'].log_prob() + test_chain.model['z'].distribution.log_prob( test_chain.model['z'], group_ndims=1, y=test_chain.model['x'] ).log_energy_prob, latent_log_probs=[test_q_net['z'].log_prob()], axis=0 ) adv_test_nll = -tf.reduce_mean( spt.ops.log_mean_exp( tf.reshape( vi.evaluation.is_loglikelihood(), (-1, config.test_x_samples,) ), axis=-1) ) + config.x_shape_multiple * np.log(128.0) adv_test_lb = tf.reduce_mean(vi.lower_bound.elbo()) real_energy = tf.reduce_mean(D_psi(input_origin_x)) reconstruct_energy = tf.reduce_mean(D_psi(test_chain.model['x'].distribution.mean)) pd_energy = tf.reduce_mean( D_psi(test_pn_net['x'].distribution.mean) * tf.exp( test_pn_net['z'].log_prob().log_energy_prob - test_pn_net['z'].log_prob())) pn_energy = tf.reduce_mean(D_psi(test_pn_net['x'].distribution.mean)) log_Z_compute_op = spt.ops.log_mean_exp( -test_pn_net['z'].log_prob().energy - test_pn_net['z'].log_prob()) kl_adv_and_gaussian = tf.reduce_mean( test_pn_net['z'].log_prob() - test_pn_net['z'].log_prob().log_energy_prob ) xi_node = get_var('p_net/xi') # derive the optimizer with tf.name_scope('optimizing'): VAE_params = tf.trainable_variables('q_net') + tf.trainable_variables('G_theta') + tf.trainable_variables( 'beta') + tf.trainable_variables('posterior_flow') + tf.trainable_variables('p_net/xi') D_params = tf.trainable_variables('D_psi') VAE_G_params = tf.trainable_variables('G_theta') G_params = tf.trainable_variables('G_omega') print("========VAE_params=========") print(VAE_params) print("========D_params=========") print(D_params) print("========G_params=========") print(G_params) with tf.variable_scope('VAE_optimizer'): VAE_optimizer = tf.train.AdamOptimizer(learning_rate) VAE_grads = VAE_optimizer.compute_gradients(VAE_loss, VAE_params) with tf.variable_scope('VAE_G_optimizer'): VAE_G_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) VAE_G_grads = VAE_G_optimizer.compute_gradients(VAE_G_loss, VAE_G_params) with tf.variable_scope('D_optimizer'): D_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) D_grads = D_optimizer.compute_gradients(D_loss, D_params) with tf.variable_scope('G_optimizer'): G_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999) G_grads = G_optimizer.compute_gradients(G_loss, G_params) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): VAE_train_op = VAE_optimizer.apply_gradients(VAE_grads) VAE_G_train_op = VAE_optimizer.apply_gradients(VAE_G_grads) G_train_op = G_optimizer.apply_gradients(G_grads) D_train_op = D_optimizer.apply_gradients(D_grads) # derive the plotting function with tf.name_scope('plotting'): sample_n_z = 100 gan_plots = tf.reshape(p_omega_net(n_z=sample_n_z, beta=beta)['x'].distribution.mean, (-1,) + config.x_shape) initial_z = q_net(gan_plots, posterior_flow)['z'] gan_plots = 256.0 * gan_plots / 2 + 127.5 initial_z = tf.expand_dims(initial_z, axis=1) plot_net = p_net(n_z=sample_n_z, mcmc_iterator=20, beta=beta, initial_z=initial_z) plot_history_e_z = plot_net['z'].history_e_z plot_history_z = plot_net['z'].history_z plot_history_pure_e_z = plot_net['z'].history_pure_e_z plot_history_ratio = plot_net['z'].history_ratio x_plots = 256.0 * tf.reshape( plot_net['x'].distribution.mean, (-1,) + config.x_shape) / 2 + 127.5 reconstruct_q_net = q_net(input_x, posterior_flow) reconstruct_z = reconstruct_q_net['z'] reconstruct_plots = 256.0 * tf.reshape( p_net(observed={'z': reconstruct_z}, beta=beta)['x'].distribution.mean, (-1,) + config.x_shape ) / 2 + 127.5 plot_reconstruct_energy = D_psi(reconstruct_plots) gan_plots = tf.clip_by_value(gan_plots, 0, 255) x_plots = tf.clip_by_value(x_plots, 0, 255) reconstruct_plots = tf.clip_by_value(reconstruct_plots, 0, 255) def plot_samples(loop): with loop.timeit('plot_time'): try: # plot reconstructs for [x] in reconstruct_train_flow: x_samples = uniform_sampler.sample(x) images = np.zeros((300,) + config.x_shape, dtype=np.uint8) images[::3, ...] = np.round(256.0 * x / 2 + 127.5) images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5) images[2::3, ...] = np.round(session.run( reconstruct_plots, feed_dict={input_x: x})) batch_reconstruct_z = session.run(reconstruct_z, feed_dict={input_x: x}) # print(np.mean(batch_reconstruct_z ** 2, axis=-1)) save_images_collection( images=images, filename='plotting/train.reconstruct/{}.png'.format(loop.epoch), grid_size=(20, 15), results=results, ) break # plot reconstructs for [x] in reconstruct_test_flow: x_samples = uniform_sampler.sample(x) images = np.zeros((300,) + config.x_shape, dtype=np.uint8) images[::3, ...] = np.round(256.0 * x / 2 + 127.5) images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5) images[2::3, ...] = np.round(session.run( reconstruct_plots, feed_dict={input_x: x})) save_images_collection( images=images, filename='plotting/test.reconstruct/{}.png'.format(loop.epoch), grid_size=(20, 15), results=results, ) break # plot samples [images, gan_images, batch_history_e_z, batch_history_z, batch_history_pure_e_z, batch_history_ratio] = session.run( [x_plots, gan_plots, plot_history_e_z, plot_history_z, plot_history_pure_e_z, plot_history_ratio]) # print(batch_history_e_z) # print(np.mean(batch_history_z ** 2, axis=-1)) # print(batch_history_pure_e_z) # print(batch_history_ratio) save_images_collection( images=np.round(gan_images), filename='plotting/sample/gan-{}.png'.format(loop.epoch), grid_size=(10, 10), results=results, ) save_images_collection( images=np.round(images), filename='plotting/sample/{}.png'.format(loop.epoch), grid_size=(10, 10), results=results, ) except Exception as e: print(e) # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = \ spt.datasets.load_cifar10(x_shape=config.x_shape) # train_flow = bernoulli_flow( # x_train, config.batch_size, shuffle=True, skip_incomplete=True) x_train = (_x_train - 127.5) / 256.0 * 2 x_test = (_x_test - 127.5) / 256.0 * 2 uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float) train_flow = spt.DataFlow.arrays([x_train], config.batch_size, shuffle=True, skip_incomplete=True) # train_flow = train_flow.map(uniform_sampler) gan_train_flow = spt.DataFlow.arrays( [np.concatenate([x_train, x_test], axis=0)], config.batch_size, shuffle=True, skip_incomplete=True) gan_train_flow = gan_train_flow.map(uniform_sampler) reconstruct_train_flow = spt.DataFlow.arrays( [x_train], 100, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays( [x_test], 100, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays( [np.repeat(x_test, config.test_x_samples, axis=0)], config.test_batch_size) test_flow = test_flow.map(uniform_sampler) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network for [x] in train_flow: print('Network initialized, first-batch loss is {:.6g}.\n'. format(session.run(init_loss, feed_dict={input_x: x}))) break # if config.z_dim == 512: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 1024: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 2048: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 3072: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000' # else: restore_checkpoint = None # train the network with spt.TrainLoop(tf.trainable_variables(), var_groups=['q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False, checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, restore_checkpoint=restore_checkpoint ) as loop: evaluator = spt.Evaluator( loop, metrics={'test_nll': test_nll, 'test_lb': test_lb, 'adv_test_nll': adv_test_nll, 'adv_test_lb': adv_test_lb, 'reconstruct_energy': reconstruct_energy, 'real_energy': real_energy, 'pd_energy': pd_energy, 'pn_energy': pn_energy, 'test_recon': test_recon, 'kl_adv_and_gaussian': kl_adv_and_gaussian, 'test_mse': test_mse}, inputs=[input_x], data_flow=test_flow, time_metric_name='test_time' ) loop.print_training_summary() spt.utils.ensure_variables_initialized() epoch_iterator = loop.iter_epochs() n_critical = config.n_critical # adversarial training for epoch in epoch_iterator: step_iterator = MyIterator(train_flow) while step_iterator.has_next: if epoch <= config.warm_up_start: # discriminator training for step, [x] in loop.iter_steps(limited(step_iterator, n_critical)): [_, batch_D_loss, batch_D_real] = session.run( [D_train_op, D_loss, D_real], feed_dict={ input_x: x, }) loop.collect_metrics(D_loss=batch_D_loss) loop.collect_metrics(D_real=batch_D_real) # generator training x [_, batch_G_loss] = session.run( [G_train_op, G_loss], feed_dict={ }) loop.collect_metrics(G_loss=batch_G_loss) else: # vae training for step, [x] in loop.iter_steps(limited(step_iterator, n_critical)): [_, batch_VAE_loss, beta_value, xi_value, batch_train_recon, batch_train_recon_energy, batch_VAE_D_real, batch_train_kl, batch_train_grad_penalty] = session.run( [VAE_train_op, VAE_loss, beta, xi_node, train_recon, train_recon_energy, VAE_D_real, train_kl, train_grad_penalty], feed_dict={ input_x: x, warm: 1.0 # min(1.0, 1.0 * epoch / config.warm_up_epoch) }) loop.collect_metrics(batch_VAE_loss=batch_VAE_loss) loop.collect_metrics(xi=xi_value) loop.collect_metrics(beta=beta_value) loop.collect_metrics(train_recon=batch_train_recon) loop.collect_metrics(train_recon_energy=batch_train_recon_energy) loop.collect_metrics(D_real=batch_VAE_D_real) loop.collect_metrics(train_kl=batch_train_kl) loop.collect_metrics(train_grad_penalty=batch_train_grad_penalty) # loop.print_logs() # generator training x # [_, batch_VAE_G_loss] = session.run( # [VAE_G_train_op, VAE_G_loss], feed_dict={ # }) # loop.collect_metrics(VAE_G_loss=batch_VAE_G_loss) if epoch in config.lr_anneal_epoch_freq: learning_rate.anneal() if epoch == config.warm_up_start: learning_rate.set(config.initial_lr) if epoch % config.plot_epoch_freq == 0: plot_samples(loop) if epoch % config.test_epoch_freq == 0: log_Z_list = [] for i in range(config.log_Z_times): log_Z_list.append(session.run(log_Z_compute_op)) from scipy.misc import logsumexp log_Z = logsumexp(np.asarray(log_Z_list)) - np.log(config.log_Z_times) get_log_Z().set(log_Z) print('log_Z_list:{}'.format(log_Z_list)) print('log_Z:{}'.format(log_Z)) with loop.timeit('eval_time'): evaluator.run() dataset_img = _x_train sample_img = [] for i in range((len(x_train)) // 100 + 1): sample_img.append(session.run(gan_plots)) sample_img = np.concatenate(sample_img, axis=0).astype('uint8') sample_img = sample_img[:len(dataset_img)] sample_img = np.asarray(sample_img) FID = get_fid(sample_img, dataset_img) # turn to numpy array IS_mean, IS_std = get_inception_score(sample_img) loop.collect_metrics(FID=FID) loop.collect_metrics(IS=IS_mean) loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('plotting/sample', exist_ok=True) results.make_dirs('plotting/z_plot', exist_ok=True) results.make_dirs('plotting/train.reconstruct', exist_ok=True) results.make_dirs('plotting/test.reconstruct', exist_ok=True) results.make_dirs('train_summary', exist_ok=True) posterior_flow = spt.layers.planar_normalizing_flows(config.nf_layers, name='posterior_flow') # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') input_origin_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_origin_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True) # derive the nll and logits output for testing with tf.name_scope('testing'), \ arg_scope([batch_norm], training=True): test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz) # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z()) test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z()) test_p_net = p_net(observed={'z': test_q_net['z']}, n_z=config.test_n_qz, beta=beta, log_Z=get_log_Z()) pn_abs = tf.abs( tf.reduce_mean(D_psi(test_pn_net['x']), axis=0) - D_psi(test_pn_net['x'].distribution.mean)) print(pn_abs) pn_abs = tf.reduce_mean(pn_abs) p_abs = tf.abs( tf.reduce_mean(D_psi(test_p_net['x']), axis=0) - D_psi(test_p_net['x'].distribution.mean)) p_abs = tf.reduce_mean(p_abs) xi_node = get_var('p_net/xi') # prepare for training and testing data (_x_train, _y_train), (_x_test, _y_test) = spt.datasets.load_cifar10(x_shape=config.x_shape) x_train = (_x_train - 127.5) / 256.0 * 2 x_test = (_x_test - 127.5) / 256.0 * 2 # uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float) train_flow = spt.DataFlow.arrays([x_train, x_train], config.test_batch_size) random_train_flow = spt.DataFlow.arrays([x_train, x_train], config.test_batch_size, shuffle=True) reconstruct_train_flow = spt.DataFlow.arrays([x_train], 100, shuffle=True, skip_incomplete=False) reconstruct_test_flow = spt.DataFlow.arrays([x_test], 100, shuffle=True, skip_incomplete=False) test_flow = spt.DataFlow.arrays([x_test, x_test], config.test_batch_size) with spt.utils.create_session().as_default() as session, \ train_flow.threaded(5) as train_flow: spt.utils.ensure_variables_initialized() # initialize the network # for [x, origin_x] in train_flow: # print('Network initialized, first-batch loss is {:.6g}.\n'. # format(session.run(init_loss, feed_dict={input_x: x, input_origin_x: origin_x}))) # break # if config.z_dim == 512: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 1024: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 2048: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000' # elif config.z_dim == 3072: # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000' # else: restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/3d/0c/d445f4f80a9fee59aed5/checkpoint/checkpoint/checkpoint.dat-312000' # train the network with spt.TrainLoop(tf.trainable_variables(), var_groups=[ 'q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi', 'G_omega', 'D_kappa' ], max_epoch=config.max_epoch + 10, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False, checkpoint_dir=results.system_path('checkpoint'), checkpoint_epoch_freq=100, restore_checkpoint=restore_checkpoint) as loop: loop.print_training_summary() spt.utils.ensure_variables_initialized() epoch_iterator = loop.iter_epochs() evaluator = spt.Evaluator(loop, metrics={ 'pn_abs': pn_abs, 'p_abs': p_abs }, inputs=[input_x, input_origin_x], data_flow=train_flow, time_metric_name='test_time') # adversarial training for epoch in epoch_iterator: evaluator.run() loop.collect_metrics(lr=learning_rate.get()) loop.print_logs() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories results = MLResults(config.result_dir) results.save_config(config) # save experiment settings for review results.make_dirs('train_summary', exist_ok=True) # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, config.x_dim), name='input_x') input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y') is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor) multi_gpu = MultiGPU() # build the model grads = [] losses = [] y_list = [] acc_list = [] batch_size = spt.utils.get_batch_size(input_x) params = None optimizer = tf.train.AdamOptimizer(learning_rate) for dev, pre_build, [dev_input_x, dev_input_y ] in multi_gpu.data_parallel(batch_size, [input_x, input_y]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): if pre_build: _ = model(dev_input_x, is_training, channels_last=True) else: # derive the loss, output and accuracy dev_logits = model(dev_input_x, is_training=is_training, channels_last=multi_gpu.channels_last(dev)) dev_cls_loss = tf.losses.sparse_softmax_cross_entropy( dev_input_y, dev_logits) dev_loss = dev_cls_loss + tf.losses.get_regularization_loss() dev_y = spt.ops.softmax_classification_output(dev_logits) dev_acc = spt.ops.classification_accuracy(dev_y, dev_input_y) losses.append(dev_loss) y_list.append(dev_y) acc_list.append(dev_acc) # derive the optimizer params = tf.trainable_variables() grads.append( optimizer.compute_gradients(dev_loss, var_list=params)) # merge multi-gpu outputs and operations [loss, acc] = multi_gpu.average([losses, acc_list], batch_size) [y] = multi_gpu.concat([y_list]) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # prepare for training and testing data (x_train, y_train), (x_test, y_test) = \ spt.datasets.load_mnist(x_shape=[784], normalize_x=True) train_flow = spt.DataFlow.arrays([x_train, y_train], config.batch_size, shuffle=True, skip_incomplete=True) test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size) with spt.utils.create_session().as_default(), \ train_flow.threaded(5) as train_flow: # train the network with spt.TrainLoop(params, max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=False) as loop: trainer = spt.Trainer(loop, train_op, [input_x, input_y], train_flow, feed_dict={is_training: True}, metrics={ 'loss': loss, 'acc': acc }, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) evaluator = spt.Evaluator(loop, metrics={'test_acc': acc}, inputs=[input_x, input_y], data_flow=test_flow, feed_dict={is_training: False}, time_metric_name='test_time') evaluator.events.on( spt.EventKeys.AFTER_EXECUTION, lambda e: results.update_metrics(evaluator.last_metrics_dict)) trainer.evaluate_after_epochs(evaluator, freq=5) trainer.log_after_epochs(freq=1) trainer.run() # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close()