예제 #1
0
def _internal_eval(model, global_step, sess, iterator, iterator_feed_dict,
                   summary_writer, label):
    """Computing perplexity."""
    sess.run(iterator.initializer, feed_dict=iterator_feed_dict)
    ppl = model_helper.compute_perplexity(model, sess, label)
    utils.add_summary(summary_writer, global_step, "%s_ppl" % label, ppl)
    return ppl
예제 #2
0
def _external_eval(model, global_step, sess, hparams, iterator,
                   iterator_feed_dict, tgt_file, lbl_file, label,
                   summary_writer, save_on_best):
    """External evaluation such as BLEU and ROUGE scores."""
    out_dir = hparams.out_dir
    decode = global_step > 0

    if decode:
        utils.print_out("# External evaluation, global step %d" % global_step)

    sess.run(iterator.initializer, feed_dict=iterator_feed_dict)

    slot_output = os.path.join(out_dir, "slot_output_%s" % label)
    intent_output = os.path.join(out_dir, "intent_output_%s" % label)
    scores = nmt_utils.decode_and_evaluate(
        label,
        model,
        sess,
        slot_output,
        intent_output,
        ref_file=tgt_file,
        ref_lbl_file=lbl_file,
        metrics=hparams.metrics,
        subword_option=hparams.subword_option,
        beam_width=hparams.beam_width,
        tgt_eos=hparams.eos,
        task=hparams.task,
        decode=decode,
        infer_mode=hparams.infer_mode)
    # Save on best metrics
    if decode:
        for metric in hparams.metrics:
            best_metric_label = "best_" + metric

            utils.add_summary(summary_writer, global_step,
                              "%s_%s" % (label, metric), scores[metric])
            # metric: larger is better
            if save_on_best and scores[metric] > getattr(
                    hparams, best_metric_label):
                setattr(hparams, best_metric_label, scores[metric])
                model.saver.save(sess,
                                 os.path.join(
                                     getattr(hparams,
                                             best_metric_label + "_dir"),
                                     "translate.ckpt"),
                                 global_step=model.global_step)
        utils.save_hparams(out_dir, hparams)
    return scores
예제 #3
0
def train(train_loader, model, criterion, optimizer, epoch):
  global global_step
  losses = AverageMeter()
  top1 = AverageMeter()
  top5 = AverageMeter()

  # switch to train mode
  model.train()
  batch_t0 = time.time()
  num_batches = 0
  for i, (input, target, video_names, frame_indexes) in enumerate(train_loader):
    num_batches += 1

    input = input.cuda(async=True)
    target = target.cuda(async=True)
    input_var = torch.autograd.Variable(input)
    target_var = torch.autograd.Variable(target)

    # compute output
    output = model(input_var)
    loss = criterion(output, target_var)

    # measure accuracy and record loss
    prec1, prec5 = accuracy(output.data, target, topk=(1,5))
    losses.update(loss.data[0], target.size(0))
    top1.update(prec1[0], target.size(0))
    top5.update(prec5[0], target.size(0))

    # compute gradient and do SGD step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if global_step%args.log_interval == 0:
      add_summary(tf_summary_writer, tag="Train_Prec1", raw_value=prec1[0], global_step=global_step)
      add_summary(tf_summary_writer, tag="Train_Prec5", raw_value=prec5[0], global_step=global_step)
      add_summary(tf_summary_writer, tag="Train_Loss", raw_value=loss.data[0], global_step=global_step)
      add_summary(tf_summary_writer, tag="Learning_Rate", raw_value=lr, global_step=global_step)

      elapsed_time = str(datetime.timedelta(seconds=time.time()-zero_time))
      batch_time = time.time() - batch_t0 
      batch_t0 = time.time()
      ms_per_batch = 1000*(batch_time/num_batches); num_batches = 0
      print 'elapsed_time : {} | epoch {:3d} | {:6d}/{:6d} batches | lr {:02.6f} | ms/batch {:8.2f} | ' \
            'loss {:5.2f} | top1 {:8.2f} | top5 {:8.2f}'.format(elapsed_time, 
                                                                epoch, 
                                                                i, 
                                                                len(train_loader),
                                                                lr,
                                                                ms_per_batch,
                                                                loss.data[0],
                                                                prec1[0],
                                                                prec5[0])
                              
    global_step += 1

  return losses.avg, top1.avg, top5.avg 
예제 #4
0
def _train(model_name, model, dataset, summary_writer, init):
    best_loss = 1e20
    batch = dataset.get_batch(dataset.train)
    epoch = init['epoch']
    worse_step = init['worse_step']
    logger.info("epoch {}".format(epoch))
    if model.get_global_step(
    ) > config.num_training_step or worse_step > model.early_stopping:
        return
    while True:
        try:
            batchInput = dataset.next_batch(batch)
            global_step, loss, train_summary = model.train(batchInput)

            if global_step % config.steps_per_stat == 0:
                summary_writer.add_summary(train_summary, global_step)
                summary_writer.flush()
                logger.info("{} step : {:.5f}".format(global_step, loss))
        except tf.errors.OutOfRangeError:
            eval_loss = evaluate(model, dataset, dataset.dev)
            utils.add_summary(summary_writer, global_step, "dev_loss",
                              eval_loss)
            logger.info("dev loss : {:.5f}".format(eval_loss))

            if eval_loss < best_loss:
                worse_step = 0
                best_loss = eval_loss
                prefix = config.checkpoint_dir + "/" + model_name + config.best_model_dir
                model.best_saver.save(model.sess,
                                      prefix + "/best_{}".format(epoch),
                                      global_step=global_step)
            else:
                worse_step += 1
                prefix = config.checkpoint_dir + "/" + model_name + config.tmp_model_dir
                model.tmp_saver.save(model.sess,
                                     prefix + "/tmp_{}".format(epoch),
                                     global_step=global_step)
            if global_step > config.num_training_step or worse_step > model.early_stopping:
                break
            else:
                batch = dataset.get_batch(dataset.train)
            epoch += 1
            logger.info("\nepoch {}".format(epoch))
예제 #5
0
파일: dqn.py 프로젝트: Ontree/QLearning
    def fit(self, env, num_iterations, max_episode_length=None):
        """Fit your model to the provided environment.

        Its a good idea to print out things like loss, average reward,
        Q-values, etc to see if your agent is actually improving.

        You should probably also periodically save your network
        weights and any other useful info.

        This is where you should sample actions from your network,
        collect experience samples and add them to your replay memory,
        and update your network parameters.

        Parameters
        ----------
        env: gym.Env
          This is your Atari environment. You should wrap the
          environment using the wrap_atari_env function in the
          utils.py
        num_iterations: int
          How many samples/updates to perform.
        max_episode_length: int
          How long a single episode should last before the agent
          resets. Can help exploration.
        """
        ses = tf.get_default_session()
        writer = tf.summary.FileWriter(self.output_path, ses)
        writer.add_graph(tf.get_default_graph())

        self.policy = LinearDecayGreedyEpsilonPolicy()
        n_action = env.action_space.n
        self.n_action = n_action
        it = 0
        epi_num = 0
        if self.use_replay_and_target_fixing == False:
            epi_reward = 0
            epi_length = 0
            state = env.reset()
            state = self.preprocessor.process_state_for_network(state)
            his_state = self.his_preprocessor.process_state_for_network(state)
            while True:
                it += 1
                if it % 1000 == 0:
                    print 'it: ', it
                epi_length += 1
                action = self.select_action(his_state, self.q_network,
                                            self.policy)
                next_s, r, done, info = env.step(action)
                epi_reward += r
                r = self.preprocessor.process_reward(r)
                if done:
                    epi_num += 1
                    y = r
                    self.q_network.train_on_batch(
                        [np.array([his_state]),
                         np.array([action])], np.array([[y] * n_action]))
                    state = env.reset()
                    self.his_preprocessor.reset()
                    state = self.preprocessor.process_state_for_network(state)
                    his_state = self.his_preprocessor.process_state_for_network(
                        state)

                    utils.add_summary(epi_num, 'reward_vs_episode', epi_reward,
                                      writer)
                    utils.add_summary(epi_num, 'length_vs_episode', epi_length,
                                      writer)
                    utils.add_summary(it, 'reward_vs_step', epi_reward, writer)
                    utils.add_summary(it, 'length_vs_step', epi_length, writer)
                    epi_reward = 0
                    epi_length = 0
                    if epi_num % 100 == 0:
                        print 'epi: ', epi_num, '  it: ', it
                        evaluate_reward, evaluate_epi_length = self.evaluate(
                            env,
                            20,
                            video_path_suffix='episode-' + str(epi_num))
                        utils.add_summary(epi_num,
                                          'evaluate_reward_vs_episode',
                                          evaluate_reward, writer)
                        utils.add_summary(epi_num,
                                          'evaluate_length_vs_episode',
                                          evaluate_epi_length, writer)
                        utils.add_summary(it, 'evaluate_reward_vs_step',
                                          evaluate_reward, writer)
                        utils.add_summary(it, 'evaluate_length_vs_step',
                                          evaluate_epi_length, writer)

                    if it >= num_iterations:
                        self.q_network.save_weights(self.weight_file_name)
                        break
                else:
                    old_his = his_state
                    state = next_s
                    state = self.preprocessor.process_state_for_network(state)
                    his_state = self.his_preprocessor.process_state_for_network(
                        state)
                    y = r + self.gamma * max(
                        self.calc_q_values(his_state, self.q_network))
                    self.q_network.train_on_batch(
                        [np.array([old_his]),
                         np.array([action])], np.array([[y] * n_action]))
        else:  #use replay memory and target fixing
            it, epi_num = self.burn_samples(env)
            while it < num_iterations:
                epi_num += 1
                epi_reward = 0
                epi_length = 0
                state = env.reset()
                self.his_preprocessor.reset()
                while True:  # start an episode
                    state = self.preprocessor.process_state_for_network(state)
                    his_state = self.his_preprocessor.process_state_for_network(
                        state)
                    action = self.select_action(his_state, self.q_network,
                                                self.policy)
                    next_state, reward, is_terminal, info = env.step(action)
                    epi_reward += reward
                    epi_length += 1
                    reward = self.preprocessor.process_reward(reward)
                    it += 1
                    if it % 1000 == 0:
                        print 'it: ', it
                    self.memory.append(state, action, reward, is_terminal)
                    state = next_state
                    if it % self.train_freq == 0:
                        self.update_policy()
                    if it % self.target_update_freq == 0:
                        utils.get_hard_target_model_updates(
                            self.q_network2, self.q_network)
                    if it % self.save_freq == 0:
                        self.q_network.save_weights(self.weight_file_name)
                    if is_terminal:  # add summaries to tensorboard
                        utils.add_summary(epi_num, 'reward_vs_episode',
                                          epi_reward, writer)
                        utils.add_summary(epi_num, 'length_vs_episode',
                                          epi_length, writer)
                        utils.add_summary(it, 'reward_vs_step', epi_reward,
                                          writer)
                        utils.add_summary(it, 'length_vs_step', epi_length,
                                          writer)
                        if epi_num % 100 == 0:
                            print 'epi: ', epi_num, '  it: ', it
                            evaluate_reward, evaluate_epi_length = self.evaluate(
                                env,
                                20,
                                video_path_suffix='episode-' + str(epi_num))
                            utils.add_summary(epi_num,
                                              'evaluate_reward_vs_episode',
                                              evaluate_reward, writer)
                            utils.add_summary(epi_num,
                                              'evaluate_length_vs_episode',
                                              evaluate_epi_length, writer)
                            utils.add_summary(it, 'evaluate_reward_vs_step',
                                              evaluate_reward, writer)
                            utils.add_summary(it, 'evaluate_length_vs_step',
                                              evaluate_epi_length, writer)
                        break

            self.q_network.save_weights(self.weight_file_name)
예제 #6
0
zero_time = time.time()
print '-------------- New training session ----------------'
for epoch in range(0, args.epochs):
  epoch_start_time = time.time()

  lr = adjust_learning_rate(lstm_optimizer, epoch, args.lr)

  # train_loss, train_prec1
  train_scores = train(train_loader, lstm_model, lstm_criterion, 
                        lstm_optimizer, epoch)

  if (epoch+1) % 1 == 0:
    # valid_loss, valid_prec1, valid_map
    valid_scores = validate(valid_loader, lstm_model, valid_criterion, 
                            valid_func, valid_targets)  
    add_summary(tf_summary_writer, tag="Val_Loss", raw_value=valid_scores[0], global_step=global_step)
    add_summary(tf_summary_writer, tag="Val_Prec1", raw_value=valid_scores[1], global_step=global_step)
    add_summary(tf_summary_writer, tag="Val_Prec5", raw_value=valid_scores[2], global_step=global_step)
    add_summary(tf_summary_writer, tag="Val_MAP", raw_value=valid_scores[3], global_step=global_step)

    print('-' * 89)
    print('| VAL : end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
            'top1 {:8.2f} | top5 {:8.2f} | MAP {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                        valid_scores[0], valid_scores[1], valid_scores[2], valid_scores[3]))
    print('-' * 89)

    # test_loss, test_prec1, test_map
    test_scores = validate(test_loader, lstm_model, valid_criterion, 
                            valid_func, test_targets)
    add_summary(tf_summary_writer, tag="Test_Loss", raw_value=test_scores[0], global_step=global_step)
    add_summary(tf_summary_writer, tag="Test_Prec1", raw_value=test_scores[1], global_step=global_step)
예제 #7
0
def add_info_summaries(summary_writer, global_step, info):
    """Add stuffs in info to summaries."""
    excluded_list = ["learning_rate"]
    for key in info:
        if key not in excluded_list:
            utils.add_summary(summary_writer, global_step, key, info[key])