def main(): parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)") parser.add_argument('-w', '--weight_dir', type=str, default='', help='path to trained') parser.add_argument('-i', '--iteration', type=int, default=0, help='algo iteration') parser.add_argument('-s', '--seconds', type=int, default=10, help='testing duration') args = parser.parse_args() weight_dir = args.weight_dir iteration = args.iteration task_path = os.path.dirname(os.path.realpath(__file__)) rsc_path = task_path + "/../rsc" env_path = task_path + "/.." save_path = os.path.join(weight_dir, 'testing_' + str(iteration), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) os.makedirs(save_path) for file in os.listdir(weight_dir): if file.startswith('cfg'): cfg_abs_path = weight_dir + '/' + file # config cfg = YAML().load(open(cfg_abs_path, 'r')) cfg['environment']['num_envs'] = 1 impl = cart_pole_example_env(rsc_path, dump(cfg['environment'], Dumper=RoundTripDumper)) env = VecEnvPython(impl) actor_net = rslgym_module.MLP([32, 32], nn.Tanh, env.observation_space.shape[0], env.action_space.shape[0]) actor = rslgym_module.Actor(actor_net, rslgym_module.MultivariateGaussianDiagonalCovariance(env.action_space.shape[0], 1.0), env.observation_space.shape[0], env.action_space.shape[0], 'cpu') snapshot = torch.load(weight_dir + '/snapshot' + str(iteration) + '.pt') actor.load_state_dict(snapshot['actor_state_dict']) if cfg['environment']['render']: env.wrapper.showWindow() if cfg['environment']['record_video']: env.start_recording_video(save_path + '/test.mp4') test_steps = int(args.seconds/cfg['environment']['control_dt']) torch.manual_seed(args.seed) act = np.ndarray(shape=(1, env.wrapper.getActionDim()), dtype=np.float32) _, _, _, new_info = env.step(act, visualize=cfg['environment']['render']) # containers for analysis actions = np.zeros(shape=(2, test_steps), dtype=np.float32) obs = np.zeros(shape=(4, test_steps), dtype=np.float32) ob = env.reset() try: for i in range(test_steps): if i % 100 == 0: env.reset() act = actor.noiseless_action(ob).cpu().detach().numpy() ob, rew, done, info = env.step(act, visualize=cfg['environment']['render']) obs[:, i] = ob actions[0, i] = info['action'] actions[1, i] = act except KeyboardInterrupt: pass finally: if cfg['environment']['record_video']: env.stop_recording_video() if cfg['environment']['render']: env.wrapper.hideWindow() import matplotlib matplotlib.use('TKAgg') import matplotlib.pyplot as plt plt.figure() plt.plot(actions[0, :], label='applied action') plt.plot(actions[1, :], label='nn action') plt.grid() plt.legend() plt.figure() plt.plot(obs[0, :], label='cart pos') plt.plot(obs[2, :], label='cart vel') plt.grid() plt.legend() plt.figure() plt.plot(obs[1, :], label='pend pos') plt.plot(obs[3, :], label='pend vel') plt.grid() plt.legend() plt.show(block=False) input('press [ENTER] to exit')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--cfg_name', type=str, default='/cfg.yaml', help='configuration file') parser.add_argument('--gpu', type=int, default=0, help='gpu id (-1 for cpu)') args = parser.parse_args() cfg_name = args.cfg_name device = args.gpu if args.gpu > 0 else 'cpu' task_path = os.path.dirname(os.path.realpath(__file__)) rsc_path = task_path + "/../rsc" env_path = task_path + "/.." cfg_abs_path = task_path + "/../" + cfg_name log_dir = os.path.join(task_path, 'runs/rsl_ppo') save_items = [env_path + '/Environment.hpp', cfg_abs_path] cfg_saver = ConfigurationSaver(log_dir, save_items, args) # config cfg = YAML().load(open(cfg_abs_path, 'r')) impl = cart_pole_example_env( rsc_path, dump(cfg['environment'], Dumper=RoundTripDumper)) env = VecEnvPython(impl) n_steps = math.floor(cfg['environment']['max_time'] / cfg['environment']['control_dt']) total_steps_per_episode = n_steps * cfg['environment']['num_envs'] torch.manual_seed(cfg['environment']['seed']) obs_size = env.observation_space.shape[0] action_size = env.action_space.shape[0] actor_net = rslgym_module.MLP([32, 32], nn.Tanh, obs_size, action_size, 0.5) critic_net = rslgym_module.MLP([32, 32], nn.Tanh, obs_size, 1, 0.5) actor_dist = rslgym_module.MultivariateGaussianDiagonalCovariance( action_size, 1.0) actor = rslgym_module.Actor(actor_net, actor_dist, obs_size, action_size, device) critic = rslgym_module.Critic(critic_net, obs_size, device) ppo_training = PPO(actor=actor, critic=critic, num_envs=cfg['environment']['num_envs'], num_transitions_per_env=n_steps, num_learning_epochs=cfg['algorithm']['num_epochs'], gamma=cfg['algorithm']['discount_factor'], lam=cfg['algorithm']['gae_lam'], entropy_coef=cfg['algorithm']['ent_coef'], num_mini_batches=cfg['algorithm']['num_mini_batches'], device=device, log_dir=cfg_saver.data_dir, mini_batch_sampling="in_order", learning_rate=cfg['algorithm']['learning_rate']) avg_rewards = [] fig, ax = plt.subplots() for update in range(cfg['algorithm']['total_algo_updates']): start = time.time() obs = env.reset() reward_ll_sum = 0 done_sum = 0 # just keep the number of consecutive up to the latest "done" # can be that one env terminates multiple time, count is reset if done is received ep_len = np.zeros(shape=env.num_envs) if update % 20 == 0: env.show_window() env.start_recording_video(cfg_saver.data_dir + "/" + str(update) + ".mp4") for step in range(1 * n_steps): action_ll, _ = actor.sample( torch.from_numpy(obs).to(ppo_training.device)) t = time.time() obs, reward_ll, dones, _ = env.step( action_ll.cpu().detach().numpy(), True) # print(time.time()-t) ppo_training.save_training(cfg_saver.data_dir, update, update) obs = env.reset() env.stop_recording_video() env.hide_window() for step in range(n_steps): actor_obs = obs critic_obs = obs action = ppo_training.observe(actor_obs) obs, reward, dones, _ = env.step(action, False) ep_len[~dones] += 1 ep_len[dones] = 0 ppo_training.step(value_obs=critic_obs, rews=reward, dones=dones, infos=[]) done_sum = done_sum + sum(dones) reward_ll_sum = reward_ll_sum + sum(reward) ppo_training.update(actor_obs=obs, value_obs=obs, log_this_iteration=update % 10 == 0, update=update) end = time.time() average_ll_performance = reward_ll_sum / total_steps_per_episode average_dones = done_sum / total_steps_per_episode avg_rewards.append(average_ll_performance) avg_ep_leng = ep_len.mean() ppo_training.writer.add_scalar('Policy/average_reward', average_ll_performance, update) ppo_training.writer.add_scalar('Policy/average_dones', average_dones, update) ppo_training.writer.add_scalar('Training/elapsed_time_episode', end - start, update) ppo_training.writer.add_scalar('Training/fps', total_steps_per_episode / (end - start), update) ppo_training.writer.add_scalar('Policy/avg_ep_len', avg_ep_leng, update) print('----------------------------------------------------') print('{:>6}th iteration'.format(update)) print('{:<40} {:>6}'.format("average ll reward: ", '{:0.10f}'.format(average_ll_performance))) print('{:<40} {:>6}'.format("dones: ", '{:0.6f}'.format(average_dones))) print('{:<40} {:>6}'.format("avg_ep_len: ", '{:0.6f}'.format(avg_ep_leng))) print('{:<40} {:>6}'.format("time elapsed in this iteration: ", '{:6.4f}'.format(end - start))) print('{:<40} {:>6}'.format( "fps: ", '{:6.0f}'.format(total_steps_per_episode / (end - start)))) print('----------------------------------------------------\n') if update > 100 and len(avg_rewards) > 100: ax.plot(range(len(avg_rewards)), savgol_filter(avg_rewards, 51, 3)) else: ax.plot(range(len(avg_rewards)), avg_rewards) fig.savefig(cfg_saver.data_dir + '/demo.png', bbox_inches='tight') ax.clear()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)") parser.add_argument('-w', '--weight_dir', type=str, default='', help='path to trained') parser.add_argument('-i', '--iteration', type=int, default=0, help='algo iteration') parser.add_argument('-s', '--seconds', type=int, default=10, help='testing duration') args = parser.parse_args() weight_dir = args.weight_dir iteration = args.iteration save_path = os.path.join( weight_dir, 'testing_' + str(iteration), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) os.makedirs(save_path) for file in os.listdir(weight_dir): if file.startswith('cfg'): cfg_abs_path = weight_dir + '/' + file # config cfg = YAML().load(open(cfg_abs_path, 'r')) # single env for testing test_env = gym.make(cfg['environment']['env_name']) test_env.seed(cfg['environment']['seed']) # https://github.com/openai/gym/issues/1925 if cfg['environment']['record_video']: test_env = wrappers.Monitor(test_env, save_path, force=True, video_callable=lambda episode: True) obs_space = test_env.observation_space action_space = test_env.action_space actor_architecture = [64, 64] # obs_normalizer = normalizer.RunningMeanStd(shape=[obs_space.low.size]) obs_normalizer = None torch.manual_seed(cfg['environment']['seed']) actor_net = nn.Sequential( rslgym_module.EmpiricalNormalization([obs_space.low.size]), rslgym_module.MLP(actor_architecture, nn.LeakyReLU, obs_space.low.size, action_space.low.size)) actor = rslgym_module.Actor( actor_net, rslgym_module.MultivariateGaussianDiagonalCovariance( action_space.low.size, 1.0), obs_space.low.size, action_space.low.size, 'cpu') # load actor weights snapshot = torch.load(weight_dir + '/snapshot' + str(iteration) + '.pt') actor.load_state_dict(snapshot['actor_state_dict']) test_steps = test_env.spec.max_episode_steps torch.manual_seed(args.seed) # containers for analysis actions = np.zeros(shape=(action_space.low.size, test_steps), dtype=np.float32) obs = np.zeros(shape=(obs_space.low.size, test_steps), dtype=np.float32) rews = np.zeros(shape=(1, test_steps), dtype=np.float32) ob = test_env.reset() ob = np.array(ob).reshape(1, -1).astype(np.float32) try: for i in range(test_steps): act = actor.noiseless_action(ob).cpu().detach().numpy() ob, r, done, info = test_env.step(act[0]) ob = np.array(ob).reshape(1, -1).astype(np.float32) if cfg['environment']['render']: test_env.render() obs[:, i] = ob actions[:, i] = act rews[:, i] = r if done: break except KeyboardInterrupt: pass finally: if cfg['environment']['record_video']: # close video recording wrapper test_env.close() plt.figure() for i in range(action_space.low.size): plt.plot(actions[i, :], label='ac_' + str(i)) plt.grid() plt.legend() plt.figure() for i in range(obs_space.low.size): plt.plot(obs[i, :], label='ob_' + str(i)) plt.grid() plt.legend() plt.figure() plt.plot(rews[0, :], label='reward') plt.grid() plt.legend() plt.show(block=False) input('press [ENTER] to exit')
def main(): parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)") parser.add_argument('--cfg_name', type=str, default='/cfg_trpo.yaml', help='configuration file') parser.add_argument('--gpu', type=int, default=0, help='gpu id (-1 for cpu)') args = parser.parse_args() cfg_name = args.cfg_name device = args.gpu if args.gpu > 0 else 'cpu' task_path = os.path.dirname(os.path.realpath(__file__)) rsc_path = task_path + "/../rsc" env_path = task_path + "/.." cfg_abs_path = task_path + "/.." + cfg_name log_dir = os.path.join(task_path, 'runs/rsl_trpo') save_items = [env_path+'/Environment.hpp', cfg_abs_path, os.path.realpath(__file__)] cfg_saver = ConfigurationSaver(log_dir, save_items, args) # config cfg = YAML().load(open(cfg_abs_path, 'r')) impl = anymal_example_env(rsc_path, dump(cfg['environment'], Dumper=RoundTripDumper)) env = VecEnvPython(impl) n_steps = math.floor(cfg['environment']['max_time'] / cfg['environment']['control_dt']) total_steps_per_episode = n_steps * cfg['environment']['num_envs'] torch.manual_seed(args.seed) actor_net = rslgym_module.MLP([256, 128], nn.Tanh, env.observation_space.shape[0], env.action_space.shape[0], init_scale=1.4) critic_net = rslgym_module.MLP([256, 128], nn.Tanh, env.observation_space.shape[0], 1, init_scale=1.4) actor = rslgym_module.Actor(actor_net, rslgym_module.MultivariateGaussianDiagonalCovariance(env.action_space.shape[0], 1.0), env.observation_space.shape[0], env.action_space.shape[0], device) critic = rslgym_module.Critic(critic_net, env.observation_space.shape[0], device) agent = TRPO( actor=actor, critic=critic, num_envs=cfg['environment']['num_envs'], num_transitions_per_env=n_steps, critic_learning_epochs=cfg['algorithm']['critic_learning']['epochs'], critic_learning_rate=cfg['algorithm']['critic_learning']['learning_rate'], critic_mini_batches=cfg['algorithm']['critic_learning']['num_mini_batches'], max_d_kl=cfg['algorithm']['max_kld'], gamma=cfg['algorithm']['discount_factor'], lam=cfg['algorithm']['gae_lam'], entropy_coef=cfg['algorithm']['entropy_coef'], device=device, log_dir=cfg_saver.data_dir, mini_batch_sampling="in_order" ) avg_rewards = [] for update in range(cfg['algorithm']['total_algorithm_updates']): start = time.time() obs = env.reset() reward_ll_sum = 0 ep_len = np.zeros(shape=env.num_envs) ep_len_collected = [] if update % cfg['environment']['eval_every_n'] == 0: env.show_window() if cfg['environment']['record_video']: env.start_recording_video(cfg_saver.data_dir + "/" + str(update) + ".mp4") for step in range(n_steps): action_ll, _ = actor.sample(torch.from_numpy(obs).to(agent.device)) obs, reward_ll, dones, info = env.step(action_ll.cpu().detach().numpy(), True) agent.save_training(cfg_saver.data_dir, update, update) obs = env.reset() if cfg['environment']['record_video']: env.stop_recording_video() env.hide_window() for step in range(n_steps): actor_obs = obs critic_obs = obs action = agent.observe(actor_obs) obs, reward, dones, info = env.step(action, False) agent.step(value_obs=critic_obs, rews=reward, dones=dones, infos=[]) reward_ll_sum = reward_ll_sum + sum(reward) ep_len += 1 if any(dones): ep_len_collected += list(ep_len[dones]) ep_len[dones] = 0 if step == n_steps - 1: for length in list(ep_len): if length == n_steps: ep_len_collected.append(length) agent.update(actor_obs=obs, value_obs=obs, log_this_iteration=update % 10 == 0, update=update) end = time.time() actor.distribution.enforce_minimum_std((torch.ones(12) * 0.2).to(device)) average_ll_performance = reward_ll_sum / total_steps_per_episode avg_rewards.append(average_ll_performance) if len(ep_len_collected)> 0: avg_ep_leng = sum(ep_len_collected)/len(ep_len_collected) #incorrect else: avg_ep_leng = n_steps agent.writer.add_scalar('Policy/average_reward', average_ll_performance, update) agent.writer.add_scalar('Training/elapsed_time_episode', end - start, update) agent.writer.add_scalar('Training/fps', total_steps_per_episode / (end - start), update) agent.writer.add_scalar('Policy/avg_ep_len', avg_ep_leng, update) print('----------------------------------------------------') print('{:>6}th iteration'.format(update)) print('{:<40} {:>6}'.format("average ll reward: ", '{:0.10f}'.format(average_ll_performance))) print('{:<40} {:>6}'.format("avg_ep_len: ", '{:0.6f}'.format(avg_ep_leng))) print('{:<40} {:>6}'.format("time elapsed in this iteration: ", '{:6.4f}'.format(end - start))) print('{:<40} {:>6}'.format("fps: ", '{:6.0f}'.format(total_steps_per_episode / (end - start)))) print('{:<40} {:>6}'.format("std: ", '{}'.format(actor.distribution.log_std.exp()))) print('----------------------------------------------------\n')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--cfg_name', type=str, default='/cfg.yaml', help='configuration file') parser.add_argument('--gpu', type=int, default=0, help='gpu id (-1 for cpu)') args = parser.parse_args() cfg_name = args.cfg_name device = args.gpu if args.gpu > 0 else 'cpu' task_path = os.path.dirname(os.path.realpath(__file__)) cfg_abs_path = task_path + "/../" + cfg_name log_dir = os.path.join(task_path, 'runs/rsl_ppo') save_items = [cfg_abs_path] cfg_saver = ConfigurationSaver(log_dir, save_items, args) # config cfg = YAML().load(open(cfg_abs_path, 'r')) num_envs = cfg['environment']['num_envs'] process_seeds = np.arange(num_envs) + cfg['environment']['seed'] * num_envs assert process_seeds.max() < 2 ** 32 def make_env(process_idx, test): env = gym.make(cfg['environment']['env_name']) process_seed = int(process_seeds[process_idx]) env.seed(process_seed) return env def make_batch_env(test, n_envs): return MultiprocessVectorEnv( [ functools.partial(make_env, idx, test) for idx, env in enumerate(range(n_envs)) ] ) # batch env for training env = make_batch_env(False, num_envs) # single env for testing test_env = gym.make(cfg['environment']['env_name']) test_env.seed(cfg['environment']['seed']) # https://github.com/openai/gym/issues/1925#issuecomment-753465510 if cfg['environment']['record_video']: test_env = wrappers.Monitor(test_env, cfg_saver.data_dir, force=True, video_callable=lambda episode: True) max_episode_steps = test_env.spec.max_episode_steps obs_space = test_env.observation_space action_space = test_env.action_space total_steps = max_episode_steps * num_envs actor_architecture = [64, 64] value_net_architecture = [64, 64] torch.manual_seed(cfg['environment']['seed']) actor_net = nn.Sequential( rslgym_module.EmpiricalNormalization([obs_space.low.size]), rslgym_module.MLP(actor_architecture, nn.LeakyReLU, obs_space.low.size, action_space.low.size) ) critic_net = nn.Sequential( rslgym_module.EmpiricalNormalization([obs_space.low.size]), rslgym_module.MLP(value_net_architecture, nn.LeakyReLU, obs_space.low.size, 1) ) actor = rslgym_module.Actor(actor_net, rslgym_module.MultivariateGaussianDiagonalCovariance(action_space.low.size, 1.0), obs_space.low.size, action_space.low.size, device) critic = rslgym_module.Critic(critic_net, obs_space.low.size, device) agent = PPO(actor=actor, critic=critic, num_envs=num_envs, num_transitions_per_env=max_episode_steps, num_learning_epochs=cfg['algorithm']['num_epochs'], learning_rate=cfg['algorithm']['learning_rate'], gamma=cfg['algorithm']['discount_factor'], lam=cfg['algorithm']['gae_lam'], entropy_coef=cfg['algorithm']['ent_coef'], num_mini_batches=cfg['algorithm']['num_mini_batches'], device=device, log_dir=cfg_saver.data_dir, mini_batch_sampling='in_order', ) def obs_to_numpy(obs): o = np.array(obs).reshape(len(obs), -1).astype(np.float32) return o avg_rewards = [] fig, ax = plt.subplots() env.reset() obs = env.get_observation() obs = obs_to_numpy(obs) episode_len = np.zeros(num_envs, dtype="i") for update in range(cfg['algorithm']['total_algo_updates']): ax.set(xlabel='iteration', ylabel='avg performance', title='average performance') ax.grid() start = time.time() reward_ll_sum = 0 done_sum = 0 average_dones = 0. # evaluate if update % 50 == 0: obs_sample = test_env.reset() obs_sample = np.array(obs_sample).reshape(1, -1).astype(np.float32) for step in range(max_episode_steps): action = agent.observe(obs_sample) obs_sample, r, dones, _ = test_env.step(action[0]) obs_sample = np.array(obs_sample).reshape(1, -1).astype(np.float32) # reset if cfg['environment']['render']: test_env.render() if dones: obs_sample = test_env.reset() obs_sample = np.array(obs_sample).reshape(1, -1).astype(np.float32) break agent.save_training(cfg_saver.data_dir, update, update) for step in range(cfg['environment']['steps_per_env_and_episode']): episode_len += 1 actor_obs = obs critic_obs = obs action = agent.observe(actor_obs) reward, dones, infos = env.step(action) obs = env.get_observation() obs = obs_to_numpy(obs) reward = np.array(reward) dones = np.array(dones) resets = episode_len == max_episode_steps end = np.logical_or(resets, dones) not_end = np.logical_not(end) episode_len[end] = 0 agent.step(value_obs=critic_obs, rews=reward, dones=dones, infos=[]) done_sum = done_sum + sum(dones) reward_ll_sum = reward_ll_sum + sum(reward) env.reset(not_end) obs = env.get_observation() obs = obs_to_numpy(obs) mid = time.time() agent.update(actor_obs=obs, value_obs=obs, log_this_iteration=update % 10 == 0, update=update) end = time.time() average_ll_performance = reward_ll_sum / total_steps average_dones = done_sum / total_steps avg_rewards.append(average_ll_performance) actor.distribution.enforce_minimum_std((torch.ones(action_space.low.size)*0.2).to(device)) if update > 100 and len(avg_rewards) > 100: ax.plot(range(len(avg_rewards)), savgol_filter(avg_rewards, 51, 3)) else: ax.plot(range(len(avg_rewards)), avg_rewards) fig.savefig(cfg_saver.data_dir + '/demo.png', bbox_inches='tight') ax.clear() print('----------------------------------------------------') print('{:>6}th iteration'.format(update)) print('{:<40} {:>6}'.format("average ll reward: ", '{:0.10f}'.format(average_ll_performance))) print('{:<40} {:>6}'.format("dones: ", '{:0.6f}'.format(average_dones))) print('{:<40} {:>6}'.format("time elapsed in this iteration: ", '{:6.4f}'.format(end - start))) print('{:<40} {:>6}'.format("fps: ", '{:6.0f}'.format(total_steps / (end - start)))) print('std: ') print(np.exp(actor.distribution.log_std.cpu().detach().numpy())) print('----------------------------------------------------\n')
def main(): parser = argparse.ArgumentParser() parser.add_argument("--seed", type=int, default=0, help="Random seed [0, 2 ** 32)") parser.add_argument('-w', '--weight_dir', type=str, default='', help='path to trained') parser.add_argument('-i', '--iteration', type=int, default=0, help='algo iteration') parser.add_argument('-s', '--seconds', type=int, default=10, help='testing duration') args = parser.parse_args() weight_dir = args.weight_dir iteration = args.iteration task_path = os.path.dirname(os.path.realpath(__file__)) rsc_path = task_path + "/../rsc" env_path = task_path + "/.." save_path = os.path.join( weight_dir, 'testing_' + str(iteration), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) os.makedirs(save_path) for file in os.listdir(weight_dir): if file.startswith('cfg_ppo'): cfg_abs_path = weight_dir + '/' + file # config cfg = YAML().load(open(cfg_abs_path, 'r')) cfg['environment']['num_envs'] = 1 impl = anymal_example_env(rsc_path, dump(cfg['environment'], Dumper=RoundTripDumper)) env = VecEnvPython(impl) obs_size = env.observation_space.shape[0] action_size = env.action_space.shape[0] actor_net = nn.Sequential( rslgym_module.EmpiricalNormalization([obs_size]), rslgym_module.MLP([256, 128], nn.Tanh, obs_size, action_size, init_scale=1.4)) actor = rslgym_module.Actor( actor_net, rslgym_module.MultivariateGaussianDiagonalCovariance( env.action_space.shape[0], 1.0), env.observation_space.shape[0], env.action_space.shape[0], 'cpu') snapshot = torch.load(weight_dir + '/snapshot' + str(iteration) + '.pt') actor.load_state_dict(snapshot['actor_state_dict']) if cfg['environment']['render']: env.wrapper.showWindow() if cfg['environment']['record_video']: env.start_recording_video(save_path + '/test.mp4') test_steps = int(args.seconds / cfg['environment']['control_dt']) torch.manual_seed(args.seed) act = np.ndarray(shape=(1, env.wrapper.getActionDim()), dtype=np.float32) _, _, _, new_info = env.step(act, visualize=cfg['environment']['render']) ob = env.reset() try: for i in range(test_steps): if i % 100 == 0: env.reset() act = actor.noiseless_action(ob).cpu().detach().numpy() ob, rew, done, info = env.step( act, visualize=cfg['environment']['render']) except KeyboardInterrupt: pass finally: if cfg['environment']['record_video']: env.stop_recording_video()