def train(config): # Initialize the text dataset dataset = TextDataset(config.txt_file) # Initialize the model model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, dropout_keep_prob=config.dropout_keep_prob) ########################################################################### # Implement code here. ########################################################################### with tf.name_scope('input'): inputs = tf.placeholder(tf.int32, shape=[config.batch_size, config.seq_length], name='inputs') labels = tf.placeholder(tf.int32, shape=[config.batch_size, config.seq_length], name='labels') input_sample = tf.placeholder(tf.int32, shape=[config.batch_size, 1], name='input_sample') state = tf.placeholder(tf.float32, [ config.lstm_num_layers, 2, config.batch_size, config.lstm_num_hidden ]) #Create tuple for the state placeholder layer = tf.unstack(state, axis=0) rnn_tuple_state = tuple([ tf.nn.rnn_cell.LSTMStateTuple(layer[i][0], layer[i][1]) for i in range(config.lstm_num_layers) ]) #Logits with tf.name_scope('logits'): logits, _ = model._build_model(inputs, rnn_tuple_state) #Loss with tf.name_scope('loss'): loss = model._compute_loss(logits, labels) tf.summary.scalar('loss', loss) #Generate text with tf.name_scope('sample_logits'): sample_logits, final_state = model._build_model( input_sample, rnn_tuple_state) #predictions with tf.name_scope('predictions'): predictions = model.predictions(sample_logits) global_step = tf.Variable(0, trainable=False, name='global_step') #decaying learning rate decaying_learning_rate = tf.train.exponential_decay( config.learning_rate, global_step, config.learning_rate_step, config.learning_rate_decay, name='decaying_eta') tf.add_to_collection(tf.GraphKeys, decaying_learning_rate) # Define the optimizer optimizer = tf.train.RMSPropOptimizer(decaying_learning_rate) # Compute the gradients for each variable grads_and_vars = optimizer.compute_gradients(loss) #train_op = optimizer.apply_gradients(grads_and_vars, global_step) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) merged = tf.summary.merge_all() test_writer = tf.summary.FileWriter(config.summary_path + '/test', graph=tf.get_default_graph()) #Initial zero state init_state = np.zeros( (config.lstm_num_layers, 2, config.batch_size, config.lstm_num_hidden)) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) ########################################################################### # Implement code here. ########################################################################### for train_step in range(int(config.train_steps)): # Only for time measurement of step through network t1 = time.time() ####################################################################### # Implement code here. ####################################################################### x_train, y_train = dataset.batch(config.batch_size, config.seq_length) #train sess.run(apply_gradients_op, feed_dict={ inputs: x_train, labels: y_train, state: init_state }) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Output the training progress if train_step % config.print_every == 0: l, summary = sess.run([loss, merged], feed_dict={ inputs: x_train, labels: y_train, state: init_state }) test_writer.add_summary(summary, train_step) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}" .format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step + 1, int(config.train_steps), config.batch_size, examples_per_second, l)) if train_step % config.sample_every == 0: sample_inputs = (np.random.randint(0, dataset.vocab_size, size=config.batch_size)) new_sample = np.reshape(sample_inputs, (sample_inputs.shape[0], 1)) new_sentence = np.empty([(config.batch_size), (config.seq_length)]) #Generate new sentence of length seq_length for i in range(config.seq_length): if i == 0: pred, final = sess.run([predictions, final_state], feed_dict={ input_sample: new_sample, state: init_state }) new_sample = pred.T new_sentence[:, i][:, None] = new_sample #When unrolling for 30 timesteps, save the state and feed it again in the model elif (i >= 30 & i < 60): pred, final = sess.run([predictions, final_state], feed_dict={ input_sample: new_sample, state: final }) new_sample = pred.T new_sentence[:, i][:, None] = new_sample else: pred, final = sess.run([predictions, final_state], feed_dict={ input_sample: new_sample, state: final }) new_sample = pred.T new_sentence[:, i][:, None] = new_sample for idx, elem in enumerate(new_sentence): #We can skip the .encode('utf-8') for better looking output. It was used in order not to produce errors when running in surfsara. print('Sentence {}:{} {}'.format( idx, dataset.convert_to_string(sample_inputs)[idx].encode( 'utf-8'), dataset.convert_to_string(elem).encode('utf-8'))) test_writer.close() sess.close()
def train(config): tf.reset_default_graph() # Initialize the text dataset dataset = TextDataset(config.txt_file, config.clean_data) # Initialize the model model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, embed_dim=config.embed_dim, decoding_model=config.decoding_mode) ########################################################################### # Implement code here. ########################################################################### warmup_seq = tf.placeholder(dtype=tf.int32, shape=(None, 1), name='warmup_decoding_sequences') warmup_decodes = model.decode_warmup(warmup_seq, config.decode_length) init_decode_char = tf.placeholder(dtype=tf.int32, shape=(config.num_rand_samples), name='rand_init_decoding') random_decodes = model.decode(decode_batch_size=config.num_rand_samples, init_input=init_decode_char, decode_length=config.decode_length, init_state=None) # Reproducibility # tf.set_random_seed(42) # np.random.seed(42) # Utility vars and ops gpu_opts = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_mem_frac, allow_growth=True) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) global_step = tf.Variable(0, trainable=False, name='global_step') # logging train_logdir = os.path.join(config.summary_path, '{}_train'.format(config.model_name)) train_log_writer = init_summary_writer(session, train_logdir) # Define the optimizer if config.optimizer.lower() == 'rmsprop': optimizer = tf.train.RMSPropOptimizer( learning_rate=config.learning_rate, decay=config.learning_rate_decay) elif config.optimizer.lower() == 'adam': optimizer = tf.train.AdamOptimizer(config.learning_rate) # Compute the gradients for each variable grads_and_vars = optimizer.compute_gradients(model.loss) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) saver = tf.train.Saver(max_to_keep=50) save_path = os.path.join(config.checkpoint_path, '{}/model.ckpt'.format(config.model_name)) _ensure_path_exists(save_path) # Summaries summary_op = tf.summary.merge_all() session.run(fetches=[ tf.global_variables_initializer(), tf.local_variables_initializer() ]) for train_step in range(int(config.train_steps)): # dim: [batch_size, time_step] batch_inputs, batch_labels = dataset.batch( batch_size=config.batch_size, seq_length=config.seq_length) # Time-major: [time_step, batch_size] batch_inputs = batch_inputs.T # Only for time measurement of step through network t1 = time.time() ####################################################################### # Implement code here ####################################################################### train_feed = {model.inputs: batch_inputs, model.labels: batch_labels} fetches = [model.loss, apply_gradients_op] if train_step % config.print_every == 0: fetches += [summary_op] loss, _, summary = session.run(feed_dict=train_feed, fetches=fetches) train_log_writer.add_summary(summary, train_step) else: loss, _ = session.run(feed_dict=train_feed, fetches=fetches) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Output the training progress if train_step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}" .format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step + 1, int(config.train_steps), config.batch_size, examples_per_second, loss)) # Decode if train_step % config.sample_every == 0: # warmup_seq = tf.placeholder(dtype=tf.int32, shape=(None, 5), name='warmup_decoding_sequences') # decoded_seqs = model.decode_warmup(warmup_seq, config.decode_length) # # init_decode_char = tf.placeholder(dtype=tf.int32, shape=(config.num_rand_samples), # name='rand_init_decoding') # random_decodes = model.decode(decode_batch_size=config.num_rand_samples, init_input=init_decode_char, # decode_length=config.decode_length, init_state=None) # random character sampling print('Random character sampling') rand_chars = np.random.choice(a=dataset.vocab_size, size=(config.num_rand_samples)) decode_feed = {init_decode_char: rand_chars} decoded_tokens = session.run(fetches=[random_decodes], feed_dict=decode_feed)[0] decoded_tokens = np.array(decoded_tokens).T for i in range(decoded_tokens.shape[0]): print('{}|{}'.format( dataset._ix_to_char[rand_chars[i]], dataset.convert_to_string(decoded_tokens[i, :]))) print('Warmup sequence sampling') warmups = [ 'Welcome to the planet Earth ', 'Human beings grew up in forests ', 'Satan said ', 'God is not ', 'theory of evolution ', 'whole groups of species ' ] for warmup in warmups: warmup_tokens = np.array([ dataset._char_to_ix[x] for x in warmup.lower() if x in dataset._char_to_ix ]).reshape((-1, 1)) feed = {warmup_seq: warmup_tokens} decoded_tokens = session.run(fetches=[warmup_decodes], feed_dict=feed)[0] print('{}|{}'.format( warmup, dataset.convert_to_string( decoded_tokens.squeeze().tolist()))) if train_step % config.checkpoint_every == 0: saver.save(session, save_path=save_path) train_log_writer.close()
def train(config): # Initialize the text dataset dataset = TextDataset(config.txt_file) # Initialize the model model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, dropout_keep_prob=config.dropout_keep_prob, prediction_mode=config.prediction_mode) ########################################################################### # Implement code here. ########################################################################### # Placeholders for model sampling init_sample_char = tf.placeholder(dtype=tf.int32, shape=(config.num_samples)) seq_samples = model._sample(init_input=init_sample_char, num_samples=config.num_samples, sample_length=config.sample_length, init_state=None) init_sentence = tf.placeholder(dtype=tf.int32, shape=(None, 1)) completed_sentence = model._complete_sentence(init_sentence, config.sample_length) gpu_opts = tf.GPUOptions( per_process_gpu_memory_fraction=config.gpu_mem_frac, allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) # Setup global step global_step = tf.Variable(0, trainable=False, name='global_step') # Define the optimizer if config.optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer( learning_rate=config.learning_rate, decay=config.learning_rate_decay) elif config.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(config.learning_rate) # Compute the gradients for each variable grads_and_vars = optimizer.compute_gradients(model._loss) #train_op = optimizer.apply_gradients(grads_and_vars, global_step) grads, variables = zip(*grads_and_vars) grads_clipped, _ = tf.clip_by_global_norm( grads, clip_norm=config.max_norm_gradient) apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step) # Saver saver = tf.train.Saver(max_to_keep=50) save_path = os.path.join(config.save_path, '{}/model.ckpt'.format(config.name)) _check_path(save_path) # Initialization init_op = tf.global_variables_initializer() local_init_op = tf.local_variables_initializer() sess.run(fetches=[init_op, local_init_op]) # Define summary operation summary_op = tf.summary.merge_all() # Logs train_log_path = os.path.join(config.summary_path, '{}'.format(config.name)) _check_path(train_log_path) train_log_writer = tf.summary.FileWriter(train_log_path, graph=sess.graph) ########################################################################### # Implement code here. ########################################################################### print(" ******* DICTIONARY ******* ") print(dataset._ix_to_char) for train_step in range(int(config.train_steps)): # Only for time measurement of step through network t1 = time.time() ####################################################################### # Implement code here. ####################################################################### x, y = dataset.batch(batch_size=config.batch_size, seq_length=config.seq_length) tr_feed = {model._inputs: x, model._targets: y} fetches = [apply_gradients_op, model._loss] if train_step % config.print_every == 0: fetches += [summary_op] _, train_loss, summary = sess.run(feed_dict=tr_feed, fetches=fetches) train_log_writer.add_summary(summary, train_step) else: _, train_loss = sess.run(feed_dict=tr_feed, fetches=fetches) # Only for time measurement of step through network t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Output the training progress if train_step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {:.4f}" .format(datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, int(config.train_steps), config.batch_size, examples_per_second, train_loss)) # Sample sentences from the model if train_step % config.sample_every == 0: # Random initial character init_chars = np.random.choice(a=dataset.vocab_size, size=(config.num_samples)) sampled_seq = sess.run(fetches=[seq_samples], feed_dict={init_sample_char: init_chars})[0] sampled_seq = np.array(sampled_seq).T print("\n ******* Random Initial Character *******") for i in range(config.num_samples): print('{} - {}|{}'.format( i, dataset._ix_to_char[init_chars[i]], dataset.convert_to_string(sampled_seq[i, :]))) #Custom sentences custom_inits = [ 'To be, or not to be, that is the question: Whether ', 'History will be kind to me for I intend to ', 'Hansel and Gr', 'Democracy is ', 'Let T be a bounded linear operator in V, a vector space.', 'Mas vale pajaro en mano que ver un ciento v' ] print("\n ******* Sentence Completion *******") for init_seq in custom_inits: init_vec = np.array([ dataset._char_to_ix[x] for x in init_seq if x in dataset._char_to_ix ]).reshape((-1, 1)) sampled_seq = sess.run(fetches=[completed_sentence], feed_dict={init_sentence: init_vec})[0] print('{}|{}'.format( init_seq, dataset.convert_to_string(sampled_seq.squeeze().tolist()))) print("\n") # Save checkpoint if train_step % config.save_every == 0 and train_step > 1: saver.save(sess, save_path=save_path) train_log_writer.close()