state, reward, done, _ = _env.step(action.cpu().numpy()[0]) total_reward += reward state = agent.Tensor([state]) if done: break return total_reward test_episodes = 100 for env_name in [args.env]:#os.listdir(base_dir): env = NormalizedActions(gym.make(env_name)) agent = DDPG(beta=0.9, epsilon=0, learning_rate=1e-4, gamma=0.99, tau=0.01, hidden_size_dim0=args.hidden_size, hidden_size_dim1=args.hidden_size, num_inputs=env.observation_space.shape[0], action_space=env.action_space, train_mode=False, alpha=0, replay_size=0, optimizer = 0, two_player=args.two_player, normalize_obs=True) noise = uniform.Uniform(agent.Tensor([-1.0]), agent.Tensor([1.0])) basic_bm = copy.deepcopy(env.env.env.model.body_mass.copy()) env_dir = base_dir + env_name + '/' for optimizer in [args.optimizer]: #['RMSprop', 'SGLD_thermal_0.01', 'SGLD_thermal_0.001', 'SGLD_thermal_0.0001', 'SGLD_thermal_1e-05']: for noise_type in [args.action_noise]: noise_dir = env_dir + optimizer + '/' + noise_type + '/nr_mdp_' + str(args.alpha) + '_1/' if os.path.exists(noise_dir): for subdir in sorted(os.listdir(noise_dir)): results = {} run_number = 0 dir = noise_dir + subdir #+ '/' + str(run_number) print(dir) if os.path.exists(noise_dir + subdir)\
def reset_noise(a, a_noise): if a_noise is not None: a_noise.reset() total_steps = 0 print(base_dir) if args.num_steps is not None: assert args.num_epochs is None nb_epochs = int(args.num_steps) // (args.num_epochs_cycles * args.num_rollout_steps) else: nb_epochs = 500 state = agent.Tensor([env.reset()]) eval_state = agent.Tensor([eval_env.reset()]) eval_reward = 0 episode_reward = 0 agent.train() reset_noise(agent, normalnoise) if args.visualize: vis = visdom.Visdom(env=base_dir) else: vis = None train_steps = 0 ratio = args.ratio + 1