Exemplo n.º 1
0
class ExpReplayHelper(AgentHelper):
    """
    Example of applying experience replay. It starts a separate threads to
    run learn().
    """
    def __init__(self,
                 name,
                 communicator,
                 buffer_capacity,
                 num_experiences,
                 sample_interval=5,
                 num_seqs=1):
        super(ExpReplayHelper, self).__init__(name, communicator,
                                              sample_interval)
        # replay buffer for experience replay
        self.replay_buffer = ReplayBuffer(buffer_capacity)
        self.num_experiences = num_experiences
        self.num_seqs = num_seqs

    @staticmethod
    def exp_replay():
        return True

    def add_experience(self, e):
        self.replay_buffer.add(e)

    def sample_experiences(self):
        return self.replay_buffer.sample(self.num_experiences, self.num_seqs)
Exemplo n.º 2
0
 def __init__(self,
              name,
              communicator,
              buffer_capacity,
              num_experiences,
              sample_interval=5,
              num_seqs=1):
     super(ExpReplayHelper, self).__init__(name, communicator,
                                           sample_interval)
     # replay buffer for experience replay
     self.replay_buffer = ReplayBuffer(buffer_capacity)
     self.num_experiences = num_experiences
     self.num_seqs = num_seqs
Exemplo n.º 3
0
 def test_single_instance_replay_buffer(self):
     capacity = 30
     episode_len = 4
     buf = ReplayBuffer(capacity)
     for i in xrange(10 * capacity):
         #        obs           r      a  e
         buf.add((np.zeros(10), i * 0.5, i, (i + 1) % episode_len == 0))
         # check the circular queue in the buffer
         self.assertTrue(len(buf) == min(i + 1, capacity))
         if (len(buf) < 2):  # need at least two elements
             continue
         # should raise error when trying to pick up the last element
         exp_seqs = buf.sample(capacity, self.is_episode_end, 0)
         for exp_seq in exp_seqs:
             self.assertEqual(len(exp_seq), 2)
             self.assertNotEqual(exp_seq[0][3], 1)
             self.assertEqual(exp_seq[1][2], exp_seq[0][2] + 1)