Exemplo n.º 1
0
    def test_per_nstep(self):
        """
        PrioritizedReplayBuffer.on_episode_end() ignores Exception

        Ref: https://gitlab.com/ymd_h/cpprb/-/issues/111
        """

        rb = PrioritizedReplayBuffer(32, {
            "rew": {},
            "done": {}
        },
                                     Nstep={
                                         "size": 4,
                                         "rew": "rew",
                                         "gamma": 0.5
                                     })

        for _ in range(10):
            rb.add(rew=0.5, done=0.0)

        rb.add(rew=0.5, done=1.0)
        rb.on_episode_end()

        s = rb.sample(16)

        self.assertIn("discounts", s)
Exemplo n.º 2
0
    def test_save_cache_with_stack_compress(self):
        rb = PrioritizedReplayBuffer(32,
                                     env_dict={
                                         'done': {
                                             'dtype': 'bool'
                                         },
                                         'a': {
                                             'shape': (3)
                                         }
                                     },
                                     stack_compress='a')

        a = np.array([0, 1, 2])
        for i in range(3):
            done = i == 2
            rb.add(a=a, done=done)
            if done:
                rb.on_episode_end()
            a += 1
        rb.add(a=np.ones(3), done=False)

        a_ = rb.get_all_transitions()["a"]

        np.testing.assert_allclose(
            a_,
            np.asarray([[0., 1., 2.], [1., 2., 3.], [2., 3., 4.], [1., 1.,
                                                                   1.]]))
Exemplo n.º 3
0
observation = env.reset()

# Warming up
for n_step in range(100):
    action = env.action_space.sample()  # Random Action
    next_observation, reward, done, info = env.step(action)
    rb.add(obs=observation,
           act=action,
           rew=reward,
           next_obs=next_observation,
           done=done)
    observation = next_observation
    if done:
        observation = env.reset()
        rb.on_episode_end()

n_episode = 0
observation = env.reset()
for n_step in range(N_iteration):

    if np.random.rand() < egreedy:
        action = env.action_space.sample()
    else:
        Q = tf.squeeze(model(observation.reshape(1, -1)))
        action = np.argmax(Q)

    next_observation, reward, done, info = env.step(action)
    rb.add(obs=observation,
           act=action,
           rew=reward,