Exemplo n.º 1
0
 def lap(self, actors, replay_buffer, num_train, factor, prev_steps):
     t = time.time() - self.t
     self.total_time += t
     num_act = get_num_acts(actors, prev_steps)
     act_rate = factor * (num_act - self.num_act) / t
     num_buffer = replay_buffer.num_add()
     buffer_rate = factor * (num_buffer - self.num_buffer) / t
     train_rate = factor * num_train / t
     if self.iseval:
         print(
             "Eval Speed: train: %.1f, act: %.1f, buffer_add: %.1f, buffer_size: %d"
             % (train_rate, act_rate, buffer_rate, replay_buffer.size())
         )
     else:
         print(
             "Speed: train: %.1f, act: %.1f, buffer_add: %.1f, buffer_size: %d"
             % (train_rate, act_rate, buffer_rate, replay_buffer.size())
         )
     self.num_act = num_act
     self.num_buffer = num_buffer
     self.num_train += num_train
     if self.iseval:
         print(
             "Eval Total Time: %s, %ds"
             % (common_utils.sec2str(self.total_time), self.total_time)
         )
     else:
         print(
             "Total Time: %s, %ds"
             % (common_utils.sec2str(self.total_time), self.total_time)
         )
     if self.iseval:
         print(
             "Eval Total Sample: train: %s, act: %s"
             % (
                 common_utils.num2str(self.num_train),
                 common_utils.num2str(self.num_act),
             )
         )
     else:
         print(
             "Total Sample: train: %s, act: %s"
             % (
                 common_utils.num2str(self.num_train),
                 common_utils.num2str(self.num_act),
             )
         )
Exemplo n.º 2
0
 def lap2(self, actors, num_buffer, num_train):
     t = time.time() - self.t
     self.total_time += t
     num_act = get_num_acts(actors)
     act_rate = (num_act - self.num_act) / t
     # num_buffer = replay_buffer.num_add()
     buffer_rate = (num_buffer - self.num_buffer) / t
     train_rate = num_train / t
     print("Speed: train: %.1f, act: %.1f, buffer_add: %.1f" %
           (train_rate, act_rate, buffer_rate))
     self.num_act = num_act
     self.num_buffer = num_buffer
     self.num_train += num_train
     print("Total Time: %s, %ds" %
           (common_utils.sec2str(self.total_time), self.total_time))
     print("Total Sample: train: %s, act: %s" % (common_utils.num2str(
         self.num_train), common_utils.num2str(self.num_act)))
Exemplo n.º 3
0
            if stopwatch is not None:
                torch.cuda.synchronize()
                stopwatch.time("backprop & update")

            replay_buffer.update_priority(priority)

            if stopwatch is not None:
                stopwatch.time("updating priority")

            stat["loss"].feed(loss.detach().item())
            stat["grad_norm"].feed(g_norm)

        epoch_t = time.time() - t
        train_time += epoch_t
        print("epoch: %d, time: %.1fs, total time(train): %s" %
              (epoch, epoch_t, common_utils.sec2str(train_time)))
        tachometer.lap(actors, replay_buffer, args.epoch_len * args.batchsize)
        if stopwatch is not None:
            stopwatch.summary()

        context.pause()
        eval_locker.update_model(agent)
        score = evaluate(
            args.game,
            args.num_eval_game,
            eval_locker,
            actor_cls,
            epoch * args.num_eval_game + 1,
            args.max_frame,
            0,
            terminal_on_life_loss=bool(args.one_life),