def run_experiment(option, use_basic_dataset): TOKEN_EMB_SIZE = 54 BATCH_SIZE = 128 if use_basic_dataset: sequence_cap = 56 else: sequence_cap = 130 X_SHAPE = (sequence_cap, TOKEN_EMB_SIZE) # set up pipeline print('Setting up data pipeline') NUMBER_BATCHES = 1000 huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {} datasource = one_hot_token_random_batcher( BATCH_SIZE, NUMBER_BATCHES, length=sequence_cap, cache_path='simple_models_{}_{}_{}'.format( 'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES, BATCH_SIZE), huzzer_kwargs=huzzer_kwargs) queue = build_single_output_queue(datasource, output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE), type=tf.uint8) raw_input_sequences = queue.dequeue(name='encoder_input') input_sequences = tf.cast(raw_input_sequences, tf.float32) if option.startswith('simple_'): z_size = int(option.split('_')[-1]) build_simple_network2(input_sequences, X_SHAPE, latent_dim=z_size, kl_limit=0.0) elif option == 'conv': z_size = 128 build_special_conv4_final( input_sequences, X_SHAPE, z_size, filter_length=3, num_filters=128, ) else: print('INVALID OPTION') exit(1) logdir = 'experiments/VAE_baseline/{}{}'.format( 'basic_' if use_basic_dataset else '', option) sv = Supervisor(logdir=logdir, save_summaries_secs=10, save_model_secs=120) # Get a TensorFlow session managed by the supervisor. with sv.managed_session() as sess: # Use the session to train the graph. for i in range(20000): if sv.should_stop(): exit() sess.run('train_on_batch', )
def run_experiment(option, use_basic_dataset): sequence_cap = 56 if use_basic_dataset else 130 print('Setting up data pipeline...') huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {} datasource = one_hot_token_random_batcher( BATCH_SIZE, NUMBER_BATCHES, length=sequence_cap, cache_path='attention_models_{}_{}_{}'.format( 'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES, BATCH_SIZE), huzzer_kwargs=huzzer_kwargs) queue = build_single_output_queue(datasource, output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE), type=tf.uint8) raw_input_sequences = queue.dequeue(name='input_sequence') sequence_lengths = get_sequence_lengths( tf.cast(raw_input_sequences, tf.int32)) input_sequences = tf.cast(raw_input_sequences, tf.float32) print('Building model..') if option.startswith('attention1'): z_size = int(option.split('_')[-1]) encoder_output = build_single_program_encoder(input_sequences, sequence_lengths, z_size) z_resampled = resampling(encoder_output) decoder_output, _ = build_attention1_decoder(z_resampled, sequence_lengths, sequence_cap, TOKEN_EMB_SIZE) cross_entropy_loss = tf.reduce_mean( ce_loss_for_sequence_batch(decoder_output, input_sequences, sequence_lengths, sequence_cap)) kl_loss = tf.reduce_mean(kl_divergence(encoder_output)) else: print('INVALID OPTION') exit(1) total_loss_op = kl_loss + cross_entropy_loss tf.summary.scalar('cross_entropy_loss', cross_entropy_loss) tf.summary.scalar('kl_loss', kl_loss) tf.summary.scalar('total_loss', total_loss_op) logdir = os.path.join(BASEDIR, ('basic_' if use_basic_dataset else '') + option) optimizer = tf.train.AdamOptimizer(1e-3) print('creating train op...') train_op = slim.learning.create_train_op(total_loss_op, optimizer) print('starting supervisor...') sv = Supervisor(logdir=logdir, save_model_secs=300, save_summaries_secs=60) print('training...') with sv.managed_session() as sess: while not sv.should_stop(): total_loss, _ = sess.run([total_loss_op, train_op])
def run_experiment(option): BATCH_SIZE = 128 X_SHAPE = (128, 54) # set up pipeline print('Setting up data pipeline') data_pipeline = one_hot_token_pipeline(for_cnn=False, length=128) # Function to pass into queue batch_index = 0 def get_batch(): nonlocal batch_index if batch_index % 100 == 0: logging.info('{} examples used'.format(batch_index * BATCH_SIZE)) code_seeds = [ str(i) for i in range(batch_index * BATCH_SIZE, (batch_index + 1) * BATCH_SIZE) ] batch = np.array(data_pipeline[code_seeds]) batch_index += 1 return batch # use the queue for training queue = build_single_output_queue(get_batch, (BATCH_SIZE, *X_SHAPE)) x = queue.dequeue(name='encoder_input') if option == 'simple': print('this no longer works: - a small refactor would work') tensor_names = build_simple_network(x, BATCH_SIZE, (256, 54)) elif option == 'conv1': tensor_names = build_conv1(x, (128, 54)) elif option == 'conv2': tensor_names = build_conv2(x, (128, 54), 32) elif option == 'conv3': tensor_names = build_conv3(x, (128, 54), 64) elif option == 'conv4': tensor_names = build_conv4(x, (128, 54), 64) else: print('INVALID OPTION') exit(1) logdir = 'experiments/VAE_baseline/{}'.format(option) sv = Supervisor(logdir=logdir, save_summaries_secs=20, save_model_secs=120) # Get a TensorFlow session managed by the supervisor. with sv.managed_session() as sess: # Use the session to train the graph. while not sv.should_stop(): sess.run('train_on_batch', )
def run_experiment(option): BATCH_SIZE = 32 if option == 'mnist_digits': gen = mnist_unlabeled_generator(BATCH_SIZE, for_cnn=True) batch_shape = gen()[0].shape print('batch shape is : {}'.format(batch_shape)) get_batch = lambda: gen()[0] queue = build_single_output_queue(get_batch, batch_shape) x = queue.dequeue(name='real_input') training_ops = build_mnist_gan_for_training(x) else: print('INVALID OPTION') exit(1) logdir = 'experiments/GAN_baseline/{}'.format(option) sv = Supervisor(logdir=logdir, save_summaries_secs=20, save_model_secs=120) # Get a TensorFlow session managed by the supervisor. with sv.managed_session() as sess: # Use the session to train the graph. d_loss = 5 g_loss = 4 steps_without_d_training = 0 i = 0 while not sv.should_stop(): # if d_loss > 0.5 or steps_without_d_training > 50: if d_loss > g_loss or i > 6000: steps_without_d_training = 0 d_loss = sess.run(training_ops['train_discriminator']) # else: # steps_without_d_training += 1 # sess.run(training_ops['train_discriminator']) g_loss = sess.run(training_ops['train_generator'])
def run_experiment(option): BATCH_SIZE = 128 X_SHAPE = (128, 54) # set up pipeline print('Setting up data pipeline') NUMBER_BATCHES = 500 dataset = one_hot_token_dataset( BATCH_SIZE, NUMBER_BATCHES, length=128, cache_path='one_hot_token_haskell_batch{}_number{}'.format(BATCH_SIZE, NUMBER_BATCHES) ) def get_batch(): return dataset()[0] # use the queue for training queue = build_single_output_queue(get_batch, (BATCH_SIZE, *X_SHAPE)) x = queue.dequeue(name='encoder_input') if option == 'simple': tensor_names = build_simple_network2(x, X_SHAPE, 32) elif option == 'simple_double_latent': tensor_names = build_simple_network2(x, X_SHAPE, 64) elif option == 'simple_256': tensor_names = build_simple_network2(x, X_SHAPE, 256) elif option == 'simple_1024': tensor_names = build_simple_network2(x, X_SHAPE, 1024) elif option == 'simple_8192': tensor_names = build_simple_network2(x, X_SHAPE, 8192) elif option == 'conv_special': tensor_names = build_special_conv(x, X_SHAPE, 64) elif option == 'conv_special_low_kl': tensor_names = build_special_conv_low_kl(x, X_SHAPE, 64) elif option == 'conv_special2': tensor_names = build_special_conv2(x, X_SHAPE, 64) elif option == 'conv_special2_l1': tensor_names = build_special_conv2_l1(x, X_SHAPE, 64) elif option == 'conv_special2_l1_128': tensor_names = build_special_conv2_l1(x, X_SHAPE, 128) # conv3 is conv2 but with initial filter length of 5 instead of 1 elif option == 'conv_special3_l1_128': tensor_names = build_special_conv2_l1(x, X_SHAPE, 128, filter_length=5) elif option == 'conv_special3_l1_256': tensor_names = build_special_conv2_l1(x, X_SHAPE, 256, filter_length=5) elif option == 'conv_special3_l1_128f_256': tensor_names = build_special_conv2_l1(x, X_SHAPE, 256, filter_length=5, num_filters=128) elif option == 'conv_special3_big_l1_512': tensor_names = build_special_conv2_l1(x, X_SHAPE, 512, filter_length=10) elif option == 'conv_special4_l1_1024': tensor_names = build_special_conv4_l1(x, X_SHAPE, 1024, filter_length=3, num_filters=256) elif option == 'conv_special4_l1_2048_f5': tensor_names = build_special_conv4_l1(x, X_SHAPE, 1024, filter_length=5, num_filters=256) else: print('INVALID OPTION') exit(1) logdir = 'experiments/VAE_baseline/{}_sss'.format(option) sv = Supervisor( logdir=logdir, save_summaries_secs=20, save_model_secs=120 ) # Get a TensorFlow session managed by the supervisor. with sv.managed_session() as sess: # Use the session to train the graph. while not sv.should_stop(): sess.run( 'train_on_batch', )
def run_experiment(option, use_basic_dataset): assert os.path.isdir(os.path.join(BASEDIR, 'pretrained_weights')), 'weights files are missing' sequence_cap = 56 if use_basic_dataset else 130 print('Setting up data pipeline...') huzzer_kwargs = BASIC_DATASET_ARGS if use_basic_dataset else {} datasource = one_hot_token_random_batcher( BATCH_SIZE, NUMBER_BATCHES, length=sequence_cap, cache_path='attention_models_{}_{}_{}'.format( 'basic' if use_basic_dataset else 'standard', NUMBER_BATCHES, BATCH_SIZE ), huzzer_kwargs=huzzer_kwargs ) queue = build_single_output_queue( datasource, output_shape=(BATCH_SIZE, sequence_cap, TOKEN_EMB_SIZE), type=tf.uint8 ) raw_input_sequences = queue.dequeue(name='input_sequence') real_sequence_lengths = get_sequence_lengths( tf.cast(raw_input_sequences, tf.int32) ) real_input_sequences = tf.cast(raw_input_sequences, tf.float32) print('Building model..') if option.startswith('attention1_gan_no_pretrain'): z_size = int(option.split('_')[-1]) random_vector = tf.random_normal( dtype=tf.float32, shape=[BATCH_SIZE, z_size], mean=0, stddev=0.1 # because that is what we will used when generating ) # we do not know the length of the generated code beforehand, so we pass in # sequence lengths of `sequence_cap` full_lengths = tf.constant( [sequence_cap for _ in range(BATCH_SIZE)], dtype=tf.float32, name='generator_lengths' ) # create the scaling const. k_t k_t = tf.Variable(0., trainable=False, name='k_t') # generator gets restored weights, and so does the with tf.variable_scope('generator'): unnormalized_generated_programs, _ = build_attention1_decoder( random_vector, full_lengths, sequence_cap, TOKEN_EMB_SIZE ) generated_programs = tf.nn.softmax( unnormalized_generated_programs, dim=-1, name='generated_programs' ) generated_lengths = get_sequence_lengths(generated_programs, epsilon=0.01) with tf.variable_scope('discriminator'): sequence_lengths = tf.concat([generated_lengths, real_sequence_lengths], axis=0) encoder_output = build_single_program_encoder( tf.concat([generated_programs, real_input_sequences], axis=0), sequence_lengths, z_size ) # get the values corresponding to mus from the encoder output_shape assert encoder_output.get_shape()[1].value == 2 * z_size encoded_v = encoder_output[:, :z_size] reconstructed, _ = build_attention1_decoder( encoded_v, sequence_lengths, sequence_cap, TOKEN_EMB_SIZE ) # these are the unnormalized_token_probs for g and d generated_reconstructed = reconstructed[:BATCH_SIZE] real_reconstructed = reconstructed[BATCH_SIZE:] generator_loss = tf.reduce_mean( ce_loss_for_sequence_batch( unnormalized_token_probs=generated_reconstructed, input_sequences=generated_programs, sequence_lengths=generated_lengths, max_length=sequence_cap ) ) real_loss = tf.reduce_mean( ce_loss_for_sequence_batch( unnormalized_token_probs=real_reconstructed, input_sequences=real_input_sequences, sequence_lengths=generated_lengths, max_length=sequence_cap ) ) discriminator_loss = real_loss - (k_t * generator_loss) optimizer = tf.train.AdamOptimizer(1e-5) print('creating discriminator train op...') d_train_op = slim.learning.create_train_op(discriminator_loss, optimizer) optimizer = tf.train.AdamOptimizer(1e-5) print('creating generator train op...') g_train_op = slim.learning.create_train_op(generator_loss, optimizer) balance = GAMMA * real_loss - generator_loss measure = real_loss + tf.abs(balance) # update k_t with tf.control_dependencies([d_train_op, g_train_op]): k_update = tf.assign( k_t, tf.clip_by_value(k_t + LAMBDA * balance, 0, 1)) # example_summary_op = tf.summary.merge([ # tf.summary.image("G", tf.expand_dims(generated_programs, -1)), # tf.summary.image("AE_G", tf.expand_dims( # tf.nn.softmax(generated_reconstructed, dim=-1), axis=-1 # )), # tf.summary.image("AE_x", tf.expand_dims( # tf.nn.softmax(real_reconstructed, dim=-1), axis=-1 # )) # ]) perf_summary_op = tf.summary.merge([ tf.summary.scalar("loss/discriminator_loss", discriminator_loss), tf.summary.scalar("loss/real_loss", real_loss), tf.summary.scalar("loss/generator_loss", generator_loss), tf.summary.scalar("misc/measure", measure), tf.summary.scalar("misc/k_t", k_t), tf.summary.scalar("misc/balance", balance), ]) else: print('INVALID OPTION') exit(1) logdir = os.path.join(BASEDIR, ('basic_' if use_basic_dataset else '') + option + '_gan') # build the model and initialise weights so supervisor can start where we left off # if not os.path.isdir(logdir): # mkdir_p(logdir) # with tf.Session() as sess: # print('saving initial pretrained weights') # with tf.variable_scope('', reuse=True): # discriminator_vars = [ # tf.get_variable('discriminator/decoder_fully_connected/bias'), # tf.get_variable('discriminator/decoder_fully_connected/weights'), # tf.get_variable('discriminator/decoder_rnn/lstm_cell/biases'), # tf.get_variable('discriminator/decoder_rnn/lstm_cell/weights'), # tf.get_variable('discriminator/rnn/lstm_cell/biases'), # tf.get_variable('discriminator/rnn/lstm_cell/weights'), # tf.get_variable('discriminator/simple_attention/bias'), # tf.get_variable('discriminator/simple_attention/weights'), # ] # generator_vars = [ # tf.get_variable('generator/decoder_fully_connected/bias'), # tf.get_variable('generator/decoder_fully_connected/weights'), # tf.get_variable('generator/decoder_rnn/lstm_cell/biases'), # tf.get_variable('generator/decoder_rnn/lstm_cell/weights'), # tf.get_variable('generator/simple_attention/bias'), # tf.get_variable('generator/simple_attention/weights'), # ] # # discriminator_saver = tf.train.Saver( # discriminator_vars # ) # generator_saver = tf.train.Saver( # generator_vars # ) # sess.run(tf.global_variables_initializer()) # discriminator_saver.restore( # sess, # os.path.join(BASEDIR, 'pretrained_weights', 'discriminator_weights.cpkt') # ) # generator_saver.restore( # sess, # os.path.join(BASEDIR, 'pretrained_weights', 'generator_weights.cpkt') # ) # # saver = tf.train.Saver() # saver.save(sess, os.path.join(logdir, 'model.cpkt-0')) print('starting supervisor...') sv = Supervisor( logdir=logdir, save_model_secs=300, save_summaries_secs=60, summary_op=perf_summary_op ) print('training...') with sv.managed_session() as sess: global_step = -1 while not sv.should_stop(): ops = { 'k_update': k_update, 'measure': measure, 'd_train_op': d_train_op, 'g_train_op': g_train_op, 'global_step': sv.global_step } # if global_step % 200 == 0: # ops.update({'images': example_summary_op}) results = sess.run(ops)