def _internal_eval(model, global_step, sess, iterator, iterator_feed_dict, summary_writer, label): """Computing perplexity.""" sess.run(iterator.initializer, feed_dict=iterator_feed_dict) ppl = model_helper.compute_perplexity(model, sess, label) utils.add_summary(summary_writer, global_step, "%s_ppl" % label, ppl) return ppl
def _external_eval(model, global_step, sess, hparams, iterator, iterator_feed_dict, tgt_file, lbl_file, label, summary_writer, save_on_best): """External evaluation such as BLEU and ROUGE scores.""" out_dir = hparams.out_dir decode = global_step > 0 if decode: utils.print_out("# External evaluation, global step %d" % global_step) sess.run(iterator.initializer, feed_dict=iterator_feed_dict) slot_output = os.path.join(out_dir, "slot_output_%s" % label) intent_output = os.path.join(out_dir, "intent_output_%s" % label) scores = nmt_utils.decode_and_evaluate( label, model, sess, slot_output, intent_output, ref_file=tgt_file, ref_lbl_file=lbl_file, metrics=hparams.metrics, subword_option=hparams.subword_option, beam_width=hparams.beam_width, tgt_eos=hparams.eos, task=hparams.task, decode=decode, infer_mode=hparams.infer_mode) # Save on best metrics if decode: for metric in hparams.metrics: best_metric_label = "best_" + metric utils.add_summary(summary_writer, global_step, "%s_%s" % (label, metric), scores[metric]) # metric: larger is better if save_on_best and scores[metric] > getattr( hparams, best_metric_label): setattr(hparams, best_metric_label, scores[metric]) model.saver.save(sess, os.path.join( getattr(hparams, best_metric_label + "_dir"), "translate.ckpt"), global_step=model.global_step) utils.save_hparams(out_dir, hparams) return scores
def train(train_loader, model, criterion, optimizer, epoch): global global_step losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() batch_t0 = time.time() num_batches = 0 for i, (input, target, video_names, frame_indexes) in enumerate(train_loader): num_batches += 1 input = input.cuda(async=True) target = target.cuda(async=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1,5)) losses.update(loss.data[0], target.size(0)) top1.update(prec1[0], target.size(0)) top5.update(prec5[0], target.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() if global_step%args.log_interval == 0: add_summary(tf_summary_writer, tag="Train_Prec1", raw_value=prec1[0], global_step=global_step) add_summary(tf_summary_writer, tag="Train_Prec5", raw_value=prec5[0], global_step=global_step) add_summary(tf_summary_writer, tag="Train_Loss", raw_value=loss.data[0], global_step=global_step) add_summary(tf_summary_writer, tag="Learning_Rate", raw_value=lr, global_step=global_step) elapsed_time = str(datetime.timedelta(seconds=time.time()-zero_time)) batch_time = time.time() - batch_t0 batch_t0 = time.time() ms_per_batch = 1000*(batch_time/num_batches); num_batches = 0 print 'elapsed_time : {} | epoch {:3d} | {:6d}/{:6d} batches | lr {:02.6f} | ms/batch {:8.2f} | ' \ 'loss {:5.2f} | top1 {:8.2f} | top5 {:8.2f}'.format(elapsed_time, epoch, i, len(train_loader), lr, ms_per_batch, loss.data[0], prec1[0], prec5[0]) global_step += 1 return losses.avg, top1.avg, top5.avg
def _train(model_name, model, dataset, summary_writer, init): best_loss = 1e20 batch = dataset.get_batch(dataset.train) epoch = init['epoch'] worse_step = init['worse_step'] logger.info("epoch {}".format(epoch)) if model.get_global_step( ) > config.num_training_step or worse_step > model.early_stopping: return while True: try: batchInput = dataset.next_batch(batch) global_step, loss, train_summary = model.train(batchInput) if global_step % config.steps_per_stat == 0: summary_writer.add_summary(train_summary, global_step) summary_writer.flush() logger.info("{} step : {:.5f}".format(global_step, loss)) except tf.errors.OutOfRangeError: eval_loss = evaluate(model, dataset, dataset.dev) utils.add_summary(summary_writer, global_step, "dev_loss", eval_loss) logger.info("dev loss : {:.5f}".format(eval_loss)) if eval_loss < best_loss: worse_step = 0 best_loss = eval_loss prefix = config.checkpoint_dir + "/" + model_name + config.best_model_dir model.best_saver.save(model.sess, prefix + "/best_{}".format(epoch), global_step=global_step) else: worse_step += 1 prefix = config.checkpoint_dir + "/" + model_name + config.tmp_model_dir model.tmp_saver.save(model.sess, prefix + "/tmp_{}".format(epoch), global_step=global_step) if global_step > config.num_training_step or worse_step > model.early_stopping: break else: batch = dataset.get_batch(dataset.train) epoch += 1 logger.info("\nepoch {}".format(epoch))
def fit(self, env, num_iterations, max_episode_length=None): """Fit your model to the provided environment. Its a good idea to print out things like loss, average reward, Q-values, etc to see if your agent is actually improving. You should probably also periodically save your network weights and any other useful info. This is where you should sample actions from your network, collect experience samples and add them to your replay memory, and update your network parameters. Parameters ---------- env: gym.Env This is your Atari environment. You should wrap the environment using the wrap_atari_env function in the utils.py num_iterations: int How many samples/updates to perform. max_episode_length: int How long a single episode should last before the agent resets. Can help exploration. """ ses = tf.get_default_session() writer = tf.summary.FileWriter(self.output_path, ses) writer.add_graph(tf.get_default_graph()) self.policy = LinearDecayGreedyEpsilonPolicy() n_action = env.action_space.n self.n_action = n_action it = 0 epi_num = 0 if self.use_replay_and_target_fixing == False: epi_reward = 0 epi_length = 0 state = env.reset() state = self.preprocessor.process_state_for_network(state) his_state = self.his_preprocessor.process_state_for_network(state) while True: it += 1 if it % 1000 == 0: print 'it: ', it epi_length += 1 action = self.select_action(his_state, self.q_network, self.policy) next_s, r, done, info = env.step(action) epi_reward += r r = self.preprocessor.process_reward(r) if done: epi_num += 1 y = r self.q_network.train_on_batch( [np.array([his_state]), np.array([action])], np.array([[y] * n_action])) state = env.reset() self.his_preprocessor.reset() state = self.preprocessor.process_state_for_network(state) his_state = self.his_preprocessor.process_state_for_network( state) utils.add_summary(epi_num, 'reward_vs_episode', epi_reward, writer) utils.add_summary(epi_num, 'length_vs_episode', epi_length, writer) utils.add_summary(it, 'reward_vs_step', epi_reward, writer) utils.add_summary(it, 'length_vs_step', epi_length, writer) epi_reward = 0 epi_length = 0 if epi_num % 100 == 0: print 'epi: ', epi_num, ' it: ', it evaluate_reward, evaluate_epi_length = self.evaluate( env, 20, video_path_suffix='episode-' + str(epi_num)) utils.add_summary(epi_num, 'evaluate_reward_vs_episode', evaluate_reward, writer) utils.add_summary(epi_num, 'evaluate_length_vs_episode', evaluate_epi_length, writer) utils.add_summary(it, 'evaluate_reward_vs_step', evaluate_reward, writer) utils.add_summary(it, 'evaluate_length_vs_step', evaluate_epi_length, writer) if it >= num_iterations: self.q_network.save_weights(self.weight_file_name) break else: old_his = his_state state = next_s state = self.preprocessor.process_state_for_network(state) his_state = self.his_preprocessor.process_state_for_network( state) y = r + self.gamma * max( self.calc_q_values(his_state, self.q_network)) self.q_network.train_on_batch( [np.array([old_his]), np.array([action])], np.array([[y] * n_action])) else: #use replay memory and target fixing it, epi_num = self.burn_samples(env) while it < num_iterations: epi_num += 1 epi_reward = 0 epi_length = 0 state = env.reset() self.his_preprocessor.reset() while True: # start an episode state = self.preprocessor.process_state_for_network(state) his_state = self.his_preprocessor.process_state_for_network( state) action = self.select_action(his_state, self.q_network, self.policy) next_state, reward, is_terminal, info = env.step(action) epi_reward += reward epi_length += 1 reward = self.preprocessor.process_reward(reward) it += 1 if it % 1000 == 0: print 'it: ', it self.memory.append(state, action, reward, is_terminal) state = next_state if it % self.train_freq == 0: self.update_policy() if it % self.target_update_freq == 0: utils.get_hard_target_model_updates( self.q_network2, self.q_network) if it % self.save_freq == 0: self.q_network.save_weights(self.weight_file_name) if is_terminal: # add summaries to tensorboard utils.add_summary(epi_num, 'reward_vs_episode', epi_reward, writer) utils.add_summary(epi_num, 'length_vs_episode', epi_length, writer) utils.add_summary(it, 'reward_vs_step', epi_reward, writer) utils.add_summary(it, 'length_vs_step', epi_length, writer) if epi_num % 100 == 0: print 'epi: ', epi_num, ' it: ', it evaluate_reward, evaluate_epi_length = self.evaluate( env, 20, video_path_suffix='episode-' + str(epi_num)) utils.add_summary(epi_num, 'evaluate_reward_vs_episode', evaluate_reward, writer) utils.add_summary(epi_num, 'evaluate_length_vs_episode', evaluate_epi_length, writer) utils.add_summary(it, 'evaluate_reward_vs_step', evaluate_reward, writer) utils.add_summary(it, 'evaluate_length_vs_step', evaluate_epi_length, writer) break self.q_network.save_weights(self.weight_file_name)
zero_time = time.time() print '-------------- New training session ----------------' for epoch in range(0, args.epochs): epoch_start_time = time.time() lr = adjust_learning_rate(lstm_optimizer, epoch, args.lr) # train_loss, train_prec1 train_scores = train(train_loader, lstm_model, lstm_criterion, lstm_optimizer, epoch) if (epoch+1) % 1 == 0: # valid_loss, valid_prec1, valid_map valid_scores = validate(valid_loader, lstm_model, valid_criterion, valid_func, valid_targets) add_summary(tf_summary_writer, tag="Val_Loss", raw_value=valid_scores[0], global_step=global_step) add_summary(tf_summary_writer, tag="Val_Prec1", raw_value=valid_scores[1], global_step=global_step) add_summary(tf_summary_writer, tag="Val_Prec5", raw_value=valid_scores[2], global_step=global_step) add_summary(tf_summary_writer, tag="Val_MAP", raw_value=valid_scores[3], global_step=global_step) print('-' * 89) print('| VAL : end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'top1 {:8.2f} | top5 {:8.2f} | MAP {:8.2f}'.format(epoch, (time.time() - epoch_start_time), valid_scores[0], valid_scores[1], valid_scores[2], valid_scores[3])) print('-' * 89) # test_loss, test_prec1, test_map test_scores = validate(test_loader, lstm_model, valid_criterion, valid_func, test_targets) add_summary(tf_summary_writer, tag="Test_Loss", raw_value=test_scores[0], global_step=global_step) add_summary(tf_summary_writer, tag="Test_Prec1", raw_value=test_scores[1], global_step=global_step)
def add_info_summaries(summary_writer, global_step, info): """Add stuffs in info to summaries.""" excluded_list = ["learning_rate"] for key in info: if key not in excluded_list: utils.add_summary(summary_writer, global_step, key, info[key])