예제 #1
0
def get_obs_shape(obs_space):
    obs_shape = AttrDict()
    if hasattr(obs_space, 'spaces'):
        for key, space in obs_space.spaces.items():
            obs_shape[key] = space.shape
    else:
        obs_shape.obs = obs_space.shape

    return obs_shape
예제 #2
0
    def test_performance(self):
        params = AgentPPO.Params('test_performance')
        params.ppo_epochs = 2
        params.rollout = 16
        env = make_doom_env(doom_env_by_name(TEST_ENV_NAME))

        observation_shape = env.observation_space.shape
        experience_size = params.num_envs * params.rollout

        # generate random data
        data = AttrDict()
        data.obs = np.random.normal(size=(experience_size, ) +
                                    observation_shape)
        data.act = np.random.randint(0, 3, size=[experience_size])
        data.old_prob = np.random.uniform(0, 1, size=[experience_size])
        data.adv = np.random.normal(size=[experience_size])
        data.ret = np.random.normal(size=[experience_size])

        self.train_feed_dict(env, data, params, use_gpu=False)
        self.train_feed_dict(env, data, params, use_gpu=True)
        self.train_dataset(env, data, params, use_gpu=False)
        self.train_dataset(env, data, params, use_gpu=True)

        env.close()