class TensorboardCallback(Callback): def __init__(self, path, args=None, events_dir=None, max_step=None, save_period=10): self.save_period = save_period self.path = path train_dir = os.path.join(path, 'training') if not os.path.exists(train_dir): os.makedirs(train_dir) self.train_logger = Logger(train_dir) valid_dir = os.path.join(path, 'validation') if not os.path.exists(valid_dir): os.makedirs(valid_dir) self.valid_logger = Logger(valid_dir) if args: text = 'Parameters\n---------\n' for (key, val) in args.items(): text += '- ' + key + ' = ' + str(val) + '\n' self.train_logger.log_text('Description', text) self.valid_logger.log_text('Description', text) if events_dir and max_step: events_files = [ F for F in scan_dir(events_dir, '')[1] if os.path.basename(F).startswith('events') ] for events_file in events_files: parent_dir = os.path.dirname(events_file).split(os.sep)[-1] if 'training' == parent_dir: train_events_file = events_file elif 'validation' == parent_dir: valid_events_file = events_file self.train_logger.copyFrom(train_events_file, max_step=max_step) self.valid_logger.copyFrom(valid_events_file, max_step=max_step) def on_epoch_begin(self, epoch, logs={}): self.starttime = time() def on_epoch_end(self, epoch, logs={}): self.train_logger.log_scalar("Speed", time() - self.starttime, epoch) self.train_logger.log_scalar("sparse_categorical_accuracy_%", logs['sparse_categorical_accuracy'] * 100, epoch) self.train_logger.log_scalar("loss", logs['loss'], epoch) self.valid_logger.log_scalar("Speed", time() - self.starttime, epoch) self.valid_logger.log_scalar( "sparse_categorical_accuracy_%", logs['val_sparse_categorical_accuracy'] * 100, epoch) self.valid_logger.log_scalar("loss", logs['val_loss'], epoch) # Model save if ((epoch + 1) % self.save_period) == 0: self.model.save( os.path.join(self.path, 'save_' + str(epoch) + '.h5')) _, oldsaves = scan_dir(self.path, '.h5') for save in oldsaves: try: if int(save.split('.')[-2].split('_')[-1]) < epoch: os.remove(save) except: continue
sparse=args.sparse) eval_env = create_environment(args.env, n_env=args.n_proc, seed=42, size=args.size, sparse=args.sparse) is_mario = True if 'Mario' in args.env else False norm_input = True # Logger TB_LOGGER = Logger(sett.LOGPATH) print('Torch Device: %s' % sett.device) # Store HYPER in the log for key, value in args._get_kwargs(): TB_LOGGER.log_text(tag=str(key), value=[str(value)], step=0) obs = env.reset() # Setup Model n_actions = env.action_space.n if env.action_space.shape == ( ) else env.action_space.shape[0] n_state = env.observation_space.n if env.observation_space.shape == ( ) else env.observation_space.shape conv = True if isinstance(n_state, tuple) else False if args.use_baseline: dqn = DQN(state_dim=n_state, tau=args.tau, action_dim=n_actions,