def lap(self, actors, replay_buffer, num_train, factor, prev_steps): t = time.time() - self.t self.total_time += t num_act = get_num_acts(actors, prev_steps) act_rate = factor * (num_act - self.num_act) / t num_buffer = replay_buffer.num_add() buffer_rate = factor * (num_buffer - self.num_buffer) / t train_rate = factor * num_train / t if self.iseval: print( "Eval Speed: train: %.1f, act: %.1f, buffer_add: %.1f, buffer_size: %d" % (train_rate, act_rate, buffer_rate, replay_buffer.size()) ) else: print( "Speed: train: %.1f, act: %.1f, buffer_add: %.1f, buffer_size: %d" % (train_rate, act_rate, buffer_rate, replay_buffer.size()) ) self.num_act = num_act self.num_buffer = num_buffer self.num_train += num_train if self.iseval: print( "Eval Total Time: %s, %ds" % (common_utils.sec2str(self.total_time), self.total_time) ) else: print( "Total Time: %s, %ds" % (common_utils.sec2str(self.total_time), self.total_time) ) if self.iseval: print( "Eval Total Sample: train: %s, act: %s" % ( common_utils.num2str(self.num_train), common_utils.num2str(self.num_act), ) ) else: print( "Total Sample: train: %s, act: %s" % ( common_utils.num2str(self.num_train), common_utils.num2str(self.num_act), ) )
def lap2(self, actors, num_buffer, num_train): t = time.time() - self.t self.total_time += t num_act = get_num_acts(actors) act_rate = (num_act - self.num_act) / t # num_buffer = replay_buffer.num_add() buffer_rate = (num_buffer - self.num_buffer) / t train_rate = num_train / t print("Speed: train: %.1f, act: %.1f, buffer_add: %.1f" % (train_rate, act_rate, buffer_rate)) self.num_act = num_act self.num_buffer = num_buffer self.num_train += num_train print("Total Time: %s, %ds" % (common_utils.sec2str(self.total_time), self.total_time)) print("Total Sample: train: %s, act: %s" % (common_utils.num2str( self.num_train), common_utils.num2str(self.num_act)))
if stopwatch is not None: torch.cuda.synchronize() stopwatch.time("backprop & update") replay_buffer.update_priority(priority) if stopwatch is not None: stopwatch.time("updating priority") stat["loss"].feed(loss.detach().item()) stat["grad_norm"].feed(g_norm) epoch_t = time.time() - t train_time += epoch_t print("epoch: %d, time: %.1fs, total time(train): %s" % (epoch, epoch_t, common_utils.sec2str(train_time))) tachometer.lap(actors, replay_buffer, args.epoch_len * args.batchsize) if stopwatch is not None: stopwatch.summary() context.pause() eval_locker.update_model(agent) score = evaluate( args.game, args.num_eval_game, eval_locker, actor_cls, epoch * args.num_eval_game + 1, args.max_frame, 0, terminal_on_life_loss=bool(args.one_life),