Exemplo n.º 1
0
 def test_rms_normalizer_box(self):
     # image box
     space = gym.spaces.Box(low=np.zeros((64,64,3)),
                            high=np.ones((64,64,3))*255, 
                            dtype=np.uint8)
     rms_norm = utils.RMSNormalizer(space)
     self.assertFalse(rms_norm.enabled)
     self.assertTrue(rms_norm.fixed)
     batch_size = 3
     obs = [space.sample() for _ in range(batch_size)]
     obs = utils.stack_obs(obs, space)
     res_obs = rms_norm.normalize(obs)
     self.assertArrayEqual(obs, res_obs)
     # non-image box
     space = gym.spaces.Box(low=np.ones((64,64,3))*0.0,
                            high=np.ones((64,64,3))*1.0,
                            dtype=np.float32)
     rms_norm = utils.RMSNormalizer(space)
     self.assertTrue(rms_norm.enabled)
     self.assertFalse(rms_norm.fixed)
     batch_size = 3
     obs = [space.sample() for _ in range(batch_size)]
     obs = utils.stack_obs(obs, space)
     rms_norm.update(obs)
     res_obs = rms_norm.normalize(obs)
     obs_mean = np.mean(obs, axis=0)
     obs_var = np.var(obs, axis=0)
     self.assertArrayClose(rms_norm.rms.mean, obs_mean)
     self.assertArrayClose(rms_norm.rms.var, obs_var)
     eps = np.finfo(np.float32).eps.item()
     obs_norm = (obs-obs_mean)/np.sqrt(obs_var+eps)
     self.assertArrayClose(obs_norm, res_obs, decimal=3)
     # sampling
     obs2 = [space.sample() for _ in range(batch_size)]
     obs2 = utils.stack_obs(obs2, space)
     rms_norm.update(obs2)
     res_obs = rms_norm.normalize(obs2)
     concat_obs = np.concatenate((obs, obs2), axis=0)
     obs_mean = np.mean(concat_obs, axis=0)
     obs_var = np.var(concat_obs, axis=0)
     self.assertArrayClose(rms_norm.rms.mean, obs_mean)
     self.assertArrayClose(rms_norm.rms.var, obs_var)
     obs_norm = (obs2-obs_mean) / np.sqrt(obs_var+eps)
     self.assertArrayClose(obs_norm, res_obs, decimal=3)
     # test save/load
     with tempfile.NamedTemporaryFile() as f:
         rms_norm.save(f.name)
         new_rms_norm = utils.RMSNormalizer(space).load(f.name)
         new_rms_norm.fixed = True
         new_rms_norm.enabled = True
     res_obs = new_rms_norm.normalize(obs2)
     self.assertArrayClose(obs_norm, res_obs, decimal=3)
     new_rms_norm.enabled = False
     rew_obs = new_rms_norm.normalize(obs2)
     self.assertArrayClose(obs2, rew_obs, decimal=3)
Exemplo n.º 2
0
 def test_stack_obs_box(self):
     space = gym.spaces.Box(low=np.zeros((64,64,3)),
                            high=np.ones((64,64,3))*255, 
                            dtype=np.uint8)
     obs1 = space.sample()
     obs2 = space.sample()
     stacked = utils.stack_obs((obs1, obs2), space)
     self.assertArrayEqual(stacked, np.stack((obs1, obs2)))
     # test exceptions
     with self.assertRaises(ValueError):
         utils.stack_obs(obs1, space)
     with self.assertRaises(ValueError):
         utils.stack_obs([], space)
Exemplo n.º 3
0
 def test_stack_obs_tuple(self):
     space1 = gym.spaces.Box(low=np.zeros((64,64,3)),
                             high=np.ones((64,64,3))*255, 
                             dtype=np.uint8)
     space2 = gym.spaces.MultiBinary(5)
     space = gym.spaces.Tuple((space1, space2))
     obs1 = space.sample()
     obs2 = space.sample()
     stacked = utils.stack_obs((obs1, obs2), space)
     self.assertArrayEqual(stacked[0], np.stack((obs1[0], obs2[0])))
     self.assertArrayEqual(stacked[1], np.stack((obs1[1], obs2[1])))
     # test exceptions
     with self.assertRaises(ValueError):
         dict_space = gym.spaces.Dict({'pov': space1, 'vec': space2})
         utils.stack_obs((obs1, obs2), dict_space)
Exemplo n.º 4
0
 def test_running_mean_std_3d(self):
     space = gym.spaces.Box(low=np.ones((64,64,3))*0.0,
                            high=np.ones((64,64,3))*1.0,
                            dtype=np.float32)
     batch_size = 3
     obs = [space.sample() for _ in range(batch_size)]
     obs = utils.stack_obs(obs, space)
     mean = 0.0
     std = 1.0
     rms = utils.RunningMeanStd(mean, std)
     rms.update(obs)
     self.assertArrayClose(rms.mean, np.mean(obs, axis=0), decimal=6)
     self.assertArrayClose(rms.var, np.var(obs, axis=0), decimal=6)
Exemplo n.º 5
0
 def test_preprocess_observation_box_bounded(self):
     space = gym.spaces.Box(low=np.zeros((64,64,3)),
                            high=np.ones((64,64,3))*255, 
                            dtype=np.uint8)
     obs = space.sample()
     norm_obs = utils.normalize(obs, space.low, space.high, 0., 1.)
     res_obs = utils.preprocess_observation(obs, space)
     self.assertArrayClose(norm_obs, res_obs)
     # batch
     batch_size = 8
     obses = [space.sample() for _ in range(batch_size)]
     obs = utils.stack_obs(obses, space)
     norm_obs = utils.normalize(obs, space.low, space.high, 0., 1.)
     res_obs = utils.preprocess_observation(obs, space)
     self.assertArrayClose(norm_obs, res_obs)
Exemplo n.º 6
0
 def test_preprocess_observation_discrete(self):
     space_dim = 5
     space = gym.spaces.Discrete(space_dim)
     obs = space.sample()
     # one hot
     norm_obs = np.zeros((space_dim,), dtype=np.float32)
     norm_obs[obs] = 1.0
     res_obs = utils.preprocess_observation(obs, space)
     self.assertArrayClose(norm_obs, res_obs)
     # batch
     batch_size = 8
     obses = [space.sample() for _ in range(batch_size)]
     obs = utils.stack_obs(obses, space)
     # one hot
     norm_obs = np.zeros((obs.size, space_dim), dtype=np.float32)
     norm_obs[np.arange(obs.size), obs] = 1.0
     res_obs = utils.preprocess_observation(obs, space)
     self.assertArrayClose(norm_obs, res_obs)
Exemplo n.º 7
0
 def test_preprocess_observation_multidiscrete(self):
     space_dims = [4, 7]
     space = gym.spaces.MultiDiscrete(space_dims)
     obs = space.sample()
     # one hot
     offset = np.cumsum([0] + space_dims)[:len(space_dims)]
     norm_obs = np.zeros((np.sum(space_dims),), dtype=np.float32)
     norm_obs[obs+offset] = 1.0
     res_obs = utils.preprocess_observation(obs, space)
     self.assertArrayClose(norm_obs, res_obs)
     # batch
     batch_size = 8
     obses = [space.sample() for _ in range(batch_size)]
     obs = utils.stack_obs(obses, space)
     # one hot
     norm_obs = np.zeros((batch_size, np.sum(space_dims)), dtype=np.float32)
     for batch, item in zip(np.arange(batch_size), obs+offset):
         norm_obs[batch, item] = 1.0
     res_obs = utils.preprocess_observation(obs, space)
     self.assertArrayClose(norm_obs, res_obs)