def get_obs_shape(obs_space): obs_shape = AttrDict() if hasattr(obs_space, 'spaces'): for key, space in obs_space.spaces.items(): obs_shape[key] = space.shape else: obs_shape.obs = obs_space.shape return obs_shape
def test_performance(self): params = AgentPPO.Params('test_performance') params.ppo_epochs = 2 params.rollout = 16 env = make_doom_env(doom_env_by_name(TEST_ENV_NAME)) observation_shape = env.observation_space.shape experience_size = params.num_envs * params.rollout # generate random data data = AttrDict() data.obs = np.random.normal(size=(experience_size, ) + observation_shape) data.act = np.random.randint(0, 3, size=[experience_size]) data.old_prob = np.random.uniform(0, 1, size=[experience_size]) data.adv = np.random.normal(size=[experience_size]) data.ret = np.random.normal(size=[experience_size]) self.train_feed_dict(env, data, params, use_gpu=False) self.train_feed_dict(env, data, params, use_gpu=True) self.train_dataset(env, data, params, use_gpu=False) self.train_dataset(env, data, params, use_gpu=True) env.close()