Пример #1
0
Файл: v8.py Проект: ymd-h/cpprb
    def test_create(self):
        buffer_size = 256
        obs_shape = (4, 84, 84)
        act_dim = 3

        rb = create_buffer(buffer_size,
                           env_dict={
                               "obs": {
                                   "shape": obs_shape
                               },
                               "act": {
                                   "shape": act_dim
                               },
                               "rew": {},
                               "done": {}
                           },
                           next_of="obs")
        per = create_buffer(buffer_size,
                            env_dict={
                                "obs": {
                                    "shape": obs_shape
                                },
                                "act": {
                                    "shape": act_dim
                                },
                                "rew": {},
                                "done": {}
                            },
                            next_of="obs",
                            prioritized=True)

        self.assertIs(type(rb), ReplayBuffer)
        self.assertIs(type(per), PrioritizedReplayBuffer)

        obs = np.random.random(obs_shape)
        act = np.ones(act_dim)
        rew = 1
        done = 0

        rb.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done)
        per.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done)

        o = rb.sample(1)["obs"]
        po = per.sample(1)["obs"]

        np.testing.assert_allclose(o, obs.reshape((-1, *obs.shape)))
        np.testing.assert_allclose(po, obs.reshape((-1, *obs.shape)))

        rb.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done)
        per.add(obs=obs, act=act, rew=rew, next_obs=obs, done=done)

        no = rb._encode_sample((0))["next_obs"]
        pno = per._encode_sample((0))["next_obs"]

        np.testing.assert_allclose(no, obs.reshape((-1, *obs.shape)))
        np.testing.assert_allclose(pno, obs.reshape((-1, *obs.shape)))
Пример #2
0
Файл: v8.py Проект: ymd-h/cpprb
    def test_issue51(self):
        buffer_size = 256
        obs_shape = 15
        act_dim = 3

        rb = create_buffer(buffer_size,
                           env_dict={
                               "obs": {
                                   "shape": obs_shape
                               },
                               "act": {
                                   "shape": act_dim
                               },
                               "rew": {},
                               "done": {}
                           },
                           next_of="obs")

        obs = np.arange(obs_shape)
        act = np.ones(act_dim)
        rew = 1
        next_obs = obs + 1
        done = 0

        rb.add(obs=obs, act=act, rew=rew, next_obs=next_obs, done=done)

        np.testing.assert_allclose(
            rb._encode_sample((0))["next_obs"][0], next_obs)
Пример #3
0
Файл: v8.py Проект: ymd-h/cpprb
    def test_episode_termination(self):
        buffer_size = 256
        obs_shape = (4, 84, 84)
        act_dim = 3

        rb = create_buffer(buffer_size, {
            "obs": {
                "shape": obs_shape,
                "dtype": np.ubyte
            },
            "act": {
                "shape": act_dim
            },
            "rew": {},
            "done": {}
        },
                           next_of="obs",
                           stack_compress="obs")

        obs = np.ones(obs_shape, dtype=np.ubyte)
        act = np.ones(act_dim)
        rew = 0
        done = 0

        for i in range(10):
            for _ in range(30):
                rb.add(obs=obs * i, act=act, rew=rew, next_obs=obs * i, done=0)
            else:
                rb.add(obs=obs * i, act=act, rew=rew, next_obs=obs * i, done=1)
                rb.on_episode_end()

        s = rb._encode_sample(range(buffer_size))

        for o in s["obs"]:
            self.assertTrue(np.array_equiv(o, o[0]))
Пример #4
0
    def test_BatchSampling(self):
        size = 256
        obs_shape = (84, 84, 3)
        act_dim = 1
        batch_size = 64

        rb = create_buffer(size, {
            "obs": {
                "shape": obs_shape
            },
            "act": {
                "shape": act_dim
            },
            "rew": {},
            "next_obs": {
                "shape": obs_shape
            },
            "done": {}
        },
                           prioritized=True,
                           Nstep={
                               "size": 4,
                               "rew": "rew",
                               "next": "next_obs"
                           })

        obs = np.ones(obs_shape, dtype=np.double)
        act = 2
        rew = 0.5
        next_obs = np.zeros_like(obs)
        done = 0

        rb.add(obs=obs, act=act, rew=rew, next_obs=next_obs, done=done)

        rb.sample(batch_size)
Пример #5
0
Файл: v8.py Проект: ymd-h/cpprb
    def test_default_dtype(self):
        buffer_size = 256

        rb = create_buffer(buffer_size, {"done": {}}, default_dtype=np.float32)

        rb.add(done=1)
        self.assertEqual(rb.sample(1)["done"][0].dtype, np.float32)
Пример #6
0
    def test_large_size(self):
        buffer_size = 256
        obs_shape = (210, 160, 3)
        act_dim = 4

        rb = create_buffer(buffer_size,
                           obs_shape=obs_shape,
                           act_dim=act_dim,
                           is_discrete_action=True,
                           prioritized=True)
Пример #7
0
    def test_large_size(self):
        buffer_size = 256
        obs_shape = np.array((210, 160, 3))
        act_dim = 4

        rb = create_buffer(buffer_size,
                           obs_shape=obs_shape,
                           act_dim=act_dim,
                           is_discrete_action=True,
                           prioritized=True)
        rb._encode_sample((0))
Пример #8
0
 def cb_type(**kwargs):
     return type(
         create_buffer(
             size, {
                 "obs": {
                     "shape": obs_dim
                 },
                 "act": {
                     "shape": act_dim
                 },
                 "rew": {},
                 "next_obs": {
                     "shape": obs_dim
                 },
                 "done": {}
             }, **kwargs))
Пример #9
0
Файл: v8.py Проект: ymd-h/cpprb
    def test_stack(self):
        buffer_size = 256
        obs_shape = (16, 16, 4)
        act_dim = 5
        stack_dim = -1

        rb = create_buffer(buffer_size, {
            "obs": {
                "shape": obs_shape
            },
            "act": {
                "shape": act_dim
            },
            "rew": {},
            "done": {}
        },
                           next_of="obs",
                           stack_compress="obs")

        random_shape = np.array(obs_shape, copy=True)
        random_shape[stack_dim] += buffer_size
        obs = np.random.random(random_shape)
        act = np.ones(act_dim)
        rew = 0.5
        done = 0

        for i in range(buffer_size):
            rb.add(obs=obs.take(np.arange(i, i + obs_shape[stack_dim]),
                                axis=stack_dim),
                   act=act,
                   rew=rew,
                   next_obs=obs.take(np.arange(i + 1,
                                               i + 1 + obs_shape[stack_dim]),
                                     axis=stack_dim),
                   done=done)

        for i in range(buffer_size):
            np.testing.assert_allclose(
                rb._encode_sample(i)["obs"][0],
                obs.take(np.arange(i, i + obs_shape[stack_dim]),
                         axis=stack_dim))
            np.testing.assert_allclose(
                rb._encode_sample(i)["next_obs"][0],
                obs.take(np.arange(i + 1, i + 1 + obs_shape[stack_dim]),
                         axis=stack_dim))
Пример #10
0
    def __init__(
        self,
        obs_spec: Space,
        act_spec: Space,
        _capacity: int = int(1e6),
        _batch_size: int = 128,
        _device: str = "cpu",
    ):
        self.obs_flat = Flatten(obs_spec)
        self.obs_unflat = Unflatten(obs_spec)
        self.act_flat = Flatten(act_spec)
        self.act_unflat = Unflatten(act_spec)

        spec = {
            "obs": {
                "dtype": np.float32,
                "shape": self.obs_flat.after_dim
            },
            "act": {
                "dtype": np.float32,
                "shape": self.act_flat.after_dim
            },
            "index": {
                "dtype": np.int64,
                "shape": 1
            },
            "next_obs": {
                "dtype": np.float32,
                "shape": self.obs_flat.after_dim
            },
            "rew": {
                "dtype": np.float32,
                "shape": 1
            },
            "done": {
                "dtype": np.float32,
                "shape": 1
            },
        }

        self.buffer: cpprb.ReplayBuffer = cpprb.create_buffer(_capacity, spec)
        self.batch_size = _batch_size
        self.device = torch.device(_device)
Пример #11
0
    def test_RGB_screen_obs(self):
        size = 256
        obs_shape = (84, 84, 3)
        act_dim = 1

        rb = create_buffer(size, {
            "obs": {
                "shape": obs_shape
            },
            "act": {
                "shape": act_dim
            },
            "rew": {},
            "next_obs": {
                "shape": obs_shape
            },
            "done": {}
        },
                           prioritized=True)

        obs = np.ones(obs_shape, dtype=np.double)
        act = 2
        rew = 0.5
        next_obs = np.zeros_like(obs)
        done = 0

        rb.add(obs=obs, act=act, rew=rew, next_obs=next_obs, done=done)

        _o = rb._encode_sample(np.array((0)))["obs"]
        _no = rb._encode_sample(np.array((0)))["next_obs"]

        self.assertEqual(obs_shape, _o[0].shape)
        np.testing.assert_allclose(obs, _o[0])

        self.assertEqual(obs_shape, _no[0].shape)
        np.testing.assert_allclose(next_obs, _no[0])
Пример #12
0
target_model = clone_model(model)

optimizer = Adam()
tensorboard_callback = TensorBoard(logdir, histogram_freq=1)

model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])

rb = create_buffer(1e6, {
    "obs": {
        "shape": observation.shape
    },
    "act": {
        "shape": 1,
        "dtype": np.ubyte
    },
    "rew": {},
    "next_obs": {
        "shape": observation.shape
    },
    "done": {}
},
                   prioritized=prioritized)

action_index = np.arange(env.action_space.n).reshape(1, -1)

# Bootstrap
for n_episode in range(1000):
    observation = env.reset()
    for t in range(500):
        action = env.action_space.sample()  # Random Action