def test_create(self): buffer_size = 256 obs_shape = (4, 84, 84) act_dim = 3 rb = create_buffer(buffer_size, env_dict={ "obs": { "shape": obs_shape }, "act": { "shape": act_dim }, "rew": {}, "done": {} }, next_of="obs") per = create_buffer(buffer_size, env_dict={ "obs": { "shape": obs_shape }, "act": { "shape": act_dim }, "rew": {}, "done": {} }, next_of="obs", prioritized=True) self.assertIs(type(rb), ReplayBuffer) self.assertIs(type(per), PrioritizedReplayBuffer) obs = np.random.random(obs_shape) act = np.ones(act_dim) rew = 1 done = 0 rb.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done) per.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done) o = rb.sample(1)["obs"] po = per.sample(1)["obs"] np.testing.assert_allclose(o, obs.reshape((-1, *obs.shape))) np.testing.assert_allclose(po, obs.reshape((-1, *obs.shape))) rb.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done) per.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done) no = rb._encode_sample((0))["next_obs"] pno = per._encode_sample((0))["next_obs"] np.testing.assert_allclose(no, obs.reshape((-1, *obs.shape))) np.testing.assert_allclose(pno, obs.reshape((-1, *obs.shape)))
def test_issue51(self): buffer_size = 256 obs_shape = 15 act_dim = 3 rb = create_buffer(buffer_size, env_dict={ "obs": { "shape": obs_shape }, "act": { "shape": act_dim }, "rew": {}, "done": {} }, next_of="obs") obs = np.arange(obs_shape) act = np.ones(act_dim) rew = 1 next_obs = obs + 1 done = 0 rb.add(obs=obs, act=act, rew=rew, next_obs=next_obs, done=done) np.testing.assert_allclose( rb._encode_sample((0))["next_obs"][0], next_obs)
def test_episode_termination(self): buffer_size = 256 obs_shape = (4, 84, 84) act_dim = 3 rb = create_buffer(buffer_size, { "obs": { "shape": obs_shape, "dtype": np.ubyte }, "act": { "shape": act_dim }, "rew": {}, "done": {} }, next_of="obs", stack_compress="obs") obs = np.ones(obs_shape, dtype=np.ubyte) act = np.ones(act_dim) rew = 0 done = 0 for i in range(10): for _ in range(30): rb.add(obs=obs * i, act=act, rew=rew, next_obs=obs * i, done=0) else: rb.add(obs=obs * i, act=act, rew=rew, next_obs=obs * i, done=1) rb.on_episode_end() s = rb._encode_sample(range(buffer_size)) for o in s["obs"]: self.assertTrue(np.array_equiv(o, o[0]))
def test_BatchSampling(self): size = 256 obs_shape = (84, 84, 3) act_dim = 1 batch_size = 64 rb = create_buffer(size, { "obs": { "shape": obs_shape }, "act": { "shape": act_dim }, "rew": {}, "next_obs": { "shape": obs_shape }, "done": {} }, prioritized=True, Nstep={ "size": 4, "rew": "rew", "next": "next_obs" }) obs = np.ones(obs_shape, dtype=np.double) act = 2 rew = 0.5 next_obs = np.zeros_like(obs) done = 0 rb.add(obs=obs, act=act, rew=rew, next_obs=next_obs, done=done) rb.sample(batch_size)
def test_default_dtype(self): buffer_size = 256 rb = create_buffer(buffer_size, {"done": {}}, default_dtype=np.float32) rb.add(done=1) self.assertEqual(rb.sample(1)["done"][0].dtype, np.float32)
def test_large_size(self): buffer_size = 256 obs_shape = (210, 160, 3) act_dim = 4 rb = create_buffer(buffer_size, obs_shape=obs_shape, act_dim=act_dim, is_discrete_action=True, prioritized=True)
def test_large_size(self): buffer_size = 256 obs_shape = np.array((210, 160, 3)) act_dim = 4 rb = create_buffer(buffer_size, obs_shape=obs_shape, act_dim=act_dim, is_discrete_action=True, prioritized=True) rb._encode_sample((0))
def cb_type(**kwargs): return type( create_buffer( size, { "obs": { "shape": obs_dim }, "act": { "shape": act_dim }, "rew": {}, "next_obs": { "shape": obs_dim }, "done": {} }, **kwargs))
def test_stack(self): buffer_size = 256 obs_shape = (16, 16, 4) act_dim = 5 stack_dim = -1 rb = create_buffer(buffer_size, { "obs": { "shape": obs_shape }, "act": { "shape": act_dim }, "rew": {}, "done": {} }, next_of="obs", stack_compress="obs") random_shape = np.array(obs_shape, copy=True) random_shape[stack_dim] += buffer_size obs = np.random.random(random_shape) act = np.ones(act_dim) rew = 0.5 done = 0 for i in range(buffer_size): rb.add(obs=obs.take(np.arange(i, i + obs_shape[stack_dim]), axis=stack_dim), act=act, rew=rew, next_obs=obs.take(np.arange(i + 1, i + 1 + obs_shape[stack_dim]), axis=stack_dim), done=done) for i in range(buffer_size): np.testing.assert_allclose( rb._encode_sample(i)["obs"][0], obs.take(np.arange(i, i + obs_shape[stack_dim]), axis=stack_dim)) np.testing.assert_allclose( rb._encode_sample(i)["next_obs"][0], obs.take(np.arange(i + 1, i + 1 + obs_shape[stack_dim]), axis=stack_dim))
def __init__( self, obs_spec: Space, act_spec: Space, _capacity: int = int(1e6), _batch_size: int = 128, _device: str = "cpu", ): self.obs_flat = Flatten(obs_spec) self.obs_unflat = Unflatten(obs_spec) self.act_flat = Flatten(act_spec) self.act_unflat = Unflatten(act_spec) spec = { "obs": { "dtype": np.float32, "shape": self.obs_flat.after_dim }, "act": { "dtype": np.float32, "shape": self.act_flat.after_dim }, "index": { "dtype": np.int64, "shape": 1 }, "next_obs": { "dtype": np.float32, "shape": self.obs_flat.after_dim }, "rew": { "dtype": np.float32, "shape": 1 }, "done": { "dtype": np.float32, "shape": 1 }, } self.buffer: cpprb.ReplayBuffer = cpprb.create_buffer(_capacity, spec) self.batch_size = _batch_size self.device = torch.device(_device)
def test_RGB_screen_obs(self): size = 256 obs_shape = (84, 84, 3) act_dim = 1 rb = create_buffer(size, { "obs": { "shape": obs_shape }, "act": { "shape": act_dim }, "rew": {}, "next_obs": { "shape": obs_shape }, "done": {} }, prioritized=True) obs = np.ones(obs_shape, dtype=np.double) act = 2 rew = 0.5 next_obs = np.zeros_like(obs) done = 0 rb.add(obs=obs, act=act, rew=rew, next_obs=next_obs, done=done) _o = rb._encode_sample(np.array((0)))["obs"] _no = rb._encode_sample(np.array((0)))["next_obs"] self.assertEqual(obs_shape, _o[0].shape) np.testing.assert_allclose(obs, _o[0]) self.assertEqual(obs_shape, _no[0].shape) np.testing.assert_allclose(next_obs, _no[0])
target_model = clone_model(model) optimizer = Adam() tensorboard_callback = TensorBoard(logdir, histogram_freq=1) model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) rb = create_buffer(1e6, { "obs": { "shape": observation.shape }, "act": { "shape": 1, "dtype": np.ubyte }, "rew": {}, "next_obs": { "shape": observation.shape }, "done": {} }, prioritized=prioritized) action_index = np.arange(env.action_space.n).reshape(1, -1) # Bootstrap for n_episode in range(1000): observation = env.reset() for t in range(500): action = env.action_space.sample() # Random Action