def run_training(data): with tf.Graph().as_default(): global_step = tf.Variable(0, name='global_step', trainable=False) images_pl = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 32, 32, 3]) labels_pl = tf.placeholder(tf.int32, shape=[BATCH_SIZE]) logits = graph.inference(images_pl) loss = graph.loss(logits, labels_pl) train_op = graph.train(loss, global_step) eval_correct = graph.evaluate(logits, labels_pl) summary_op = tf.merge_all_summaries() saver = tf.train.Saver(tf.all_variables()) init = tf.initialize_all_variables() sess = tf.Session() sess.run(init) summary_writer = tf.train.SummaryWriter(SUMMARY_DIR, sess.graph) for step in range(N_EPOCH * (DS_SIZE // BATCH_SIZE)): start_time = time.time() feed_dict = fill_feed_dict(data.train, images_pl, labels_pl) _, loss_val = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_val), 'Model diverged with loss = NaN' if step % 10 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_val, duration)) if step > 0: summary_str = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step > 0: if step < 1000 and step % 200 == 0: print('Training Data Eval:') do_eval(sess, eval_correct, images_pl, labels_pl, data.train) print('Validation Data Eval:') do_eval(sess, eval_correct, images_pl, labels_pl, data.validation) if step % 1000 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1: print('Training Data Eval:') do_eval(sess, eval_correct, images_pl, labels_pl, data.train) print('Validation Data Eval:') do_eval(sess, eval_correct, images_pl, labels_pl, data.validation) if step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1: print('Test Data Eval:') do_eval(sess, eval_correct, images_pl, labels_pl, data.test) # Save the model checkpoint periodically. if step % 1000 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1: checkpoint_path = CHECKPOINT_DIR saver.save(sess, checkpoint_path, global_step=step)
def run_training(data): with tf.Graph().as_default(): images_pl = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 512, 512, 3]) labels_pl = tf.placeholder(tf.int32, shape=[BATCH_SIZE]) logits = graph.inference(images_pl) loss = graph.loss(logits, labels_pl) train_op = graph.train(loss, 0.0001) eval_correct = graph.evaluate(logits, labels_pl) saver = tf.train.Saver(tf.trainable_variables()) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session() sess.run(init) # c,d = data.train.next_batch(BATCH_SIZE) # a = sess.run(logits,feed_dict={images_pl: c}) # print a summary_writer = tf.train.SummaryWriter("summary", sess.graph) for step in range(N_EPOCH * (DS_SIZE // BATCH_SIZE)): start_time = time.time() feed_dict = fill_feed_dict(data.train, images_pl, labels_pl) _, loss_val = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_val), 'Model diverged with loss = NaN' if step % 10 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_val, duration)) if step > 0: summary_str = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 100 == 0 or step == N_EPOCH * (DS_SIZE // BATCH_SIZE) - 1: save_path = saver.save(sess, "model.ckpt") print("Model saved in file: %s" % save_path) print('Training Data Eval:') do_eval(sess, eval_correct, images_pl, labels_pl, data.train) print('Validation Data Eval:') do_eval(sess, eval_correct, images_pl, labels_pl, data.validation)
def train(self, inputs, correct_outputs, learning_rate, embedding_learning_rate): r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0]) (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss, l1penaltys, correct_scores, noise_scores) = r to_normalize = set() for ecnt in range(len(correct_sequences)): (loss, unpenalized_loss, correct_score, noise_score) = \ (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt]) if l1penaltys.shape == (): assert l1penaltys == 0 l1penalty = 0 else: l1penalty = l1penaltys[ecnt] correct_sequence = correct_sequences[ecnt] noise_sequence = noise_sequences[ecnt] dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss] dnoise_inputs = [d[ecnt] for d in dnoise_inputss] self.trainer.update(loss, correct_score, noise_score, unpenalized_loss, l1penalty) for w in weights: assert w == weights[0] embedding_learning_rate = embedding_learning_rate * weights[0] if loss == 0: for di in dcorrect_inputs + dnoise_inputs: assert (di == 0).all() if loss != 0: for (i, di) in zip(correct_sequence, dcorrect_inputs): assert di.shape == (self.parameters.embedding_size,) self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di if NORMALIZE_EMBEDDINGS: to_normalize.add(i) for (i, di) in zip(noise_sequence, dnoise_inputs): assert di.shape == (self.parameters.embedding_size,) self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di if NORMALIZE_EMBEDDINGS: to_normalize.add(i) if len(to_normalize) > 0: to_normalize = [i for i in to_normalize] self.parameters.normalize(to_normalize)
def main(job_id, params): #re_load = False #save_file_name = 'bpe2char_biscale_decoder_adam' save_file_name = 'planning_10_true' source_dataset = params['train_data_path'] + params['source_dataset'] target_dataset = params['train_data_path'] + params['target_dataset'] valid_source_dataset = params['dev_data_path'] + params[ 'valid_source_dataset'] valid_target_dataset = params['dev_data_path'] + params[ 'valid_target_dataset'] source_dictionary = params['train_data_path'] + params['source_dictionary'] target_dictionary = params['train_data_path'] + params['target_dictionary'] decoder_type = params['decoder_type'] save_file_name = 'bpe2char_{}_adam.last'.format(decoder_type) print "We are using th decoder:", decoder_type layers = { 'ff': ('param_init_fflayer', 'fflayer'), 'fff': ('param_init_ffflayer', 'ffflayer'), 'gru': ('param_init_gru', 'gru_layer'), '{}'.format(decoder_type): ('param_init_{}'.format(decoder_type), '{}_decoder'.format(decoder_type)), } re_load = params['reload'] == 'True' print params, params['save_path'], save_file_name validerr = train( max_epochs=int(params['max_epochs']), patience=int(params['patience']), dim_word=int(params['dim_word']), dim_word_src=int(params['dim_word_src']), save_path=params['save_path'], save_file_name=save_file_name, re_load=re_load, enc_dim=int(params['enc_dim']), dec_dim=int(params['dec_dim']), n_words=int(params['n_words']), n_words_src=int(params['n_words_src']), decay_c=float(params['decay_c']), lrate=float(params['learning_rate']), optimizer=params['optimizer'], maxlen=int(params['maxlen']), maxlen_trg=int(params['maxlen_trg']), maxlen_sample=int(params['maxlen_sample']), batch_size=int(params['batch_size']), valid_batch_size=int(params['valid_batch_size']), sort_size=int(params['sort_size']), validFreq=int(params['validFreq']), dispFreq=int(params['dispFreq']), saveFreq=int(params['saveFreq']), sampleFreq=int(params['sampleFreq']), clip_c=int(params['clip_c']), datasets=[source_dataset, target_dataset], valid_datasets=[valid_source_dataset, valid_target_dataset], dictionaries=[source_dictionary, target_dictionary], use_dropout=int(params['use_dropout']), source_word_level=int(params['source_word_level']), target_word_level=int(params['target_word_level']), layers=layers, save_every_saveFreq=1, save_burn_in=0, use_bpe=1, init_params=init_params, build_model=build_model, build_sampler=build_sampler, gen_sample=gen_sample, c_lb=float(params['c_lb']), st_estimator=params['st_estimator'], use_gate=params['use_gate'] == 'True', learn_t=params['learn_t'] == 'True', plan_step=int(params['plan_step']), shuffle_dataset=params['shuffle_dataset'] == 'True', only_use_w=params['only_use_w'] == 'True', nb_cumulate=int(params['nb_cumulate']), repeat_actions=params['repeat_actions'] == 'True', decoder_type=decoder_type, layer_norm=False if not 'layer_norm' in params else params['layer_norm'] == 'True') return validerr