def _dataloader(self) -> DataLoader:
        """Initialize the Replay Buffer dataset used for retrieving experiences"""
        self.buffer = MultiStepBuffer(self.replay_size, self.n_steps)
        self.populate(self.warm_start_size)

        self.dataset = ExperienceSourceDataset(self.train_batch)
        return DataLoader(dataset=self.dataset, batch_size=self.batch_size)
示例#2
0
    def test_sample_3_step(self):
        """Test that final output of the 3 step sample is correct"""
        self.buffer = MultiStepBuffer(buffer_size=10, n_step=3)

        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)
        self.buffer.append(self.experience02)

        reward_gt = 1.71

        batch = self.buffer.sample(1)

        self.assertEqual(batch[0].all(), self.experience01.state.all())
        self.assertEqual(batch[1], self.experience01.action)
        self.assertEqual(batch[2], reward_gt)
        self.assertEqual(batch[3], self.experience02.done)
        self.assertEqual(batch[4].all(), self.experience02.new_state.all())
示例#3
0
    def test_get_transition_info_3_step(self):
        """Test that the accumulated experience is correct with multi step"""
        self.buffer = MultiStepBuffer(buffer_size=10, n_step=3)

        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)
        self.buffer.append(self.experience02)

        reward, next_state, done = self.buffer.get_transition_info()

        reward_01 = self.experience02.reward + 0.9 * self.experience03.reward * (
            1 - done)
        reward_gt = self.experience01.reward + 0.9 * reward_01 * (1 - done)

        self.assertEqual(reward, reward_gt)
        self.assertEqual(next_state.all(), self.next_state_02.all())
        self.assertEqual(self.experience03.done, done)
    def test_get_transition_info_3_step(self):
        """Test that the accumulated experience is correct with multi step"""
        self.buffer = MultiStepBuffer(capacity=10, n_steps=3, gamma=self.gamma)

        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)
        self.buffer.append(self.experience02)

        reward = self.buffer.buffer[0].reward
        next_state = self.buffer.buffer[0].new_state
        done = self.buffer.buffer[0].done

        reward_01 = self.experience02.reward + self.gamma * self.experience03.reward * (1 - done)
        reward_gt = self.experience01.reward + self.gamma * reward_01 * (1 - done)

        self.assertEqual(reward, reward_gt)
        self.assertEqual(next_state.all(), self.next_state_02.all())
        self.assertEqual(self.experience03.done, done)
    def setUp(self) -> None:
        self.gamma = 0.9
        self.buffer = MultiStepBuffer(capacity=10, n_steps=2, gamma=self.gamma)

        self.state = np.zeros([32, 32])
        self.state_02 = np.ones([32, 32])
        self.next_state = np.zeros([32, 32])
        self.next_state_02 = np.ones([32, 32])
        self.action = np.zeros([1])
        self.action_02 = np.ones([1])
        self.reward = np.zeros([1])
        self.reward_02 = np.ones([1])
        self.done = np.zeros([1])
        self.done_02 = np.zeros([1])

        self.experience01 = Experience(self.state, self.action, self.reward, self.done, self.next_state)
        self.experience02 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
        self.experience03 = Experience(self.state_02, self.action_02, self.reward_02, self.done_02, self.next_state_02)
示例#6
0
    def setUp(self) -> None:
        self.buffer = MultiStepBuffer(buffer_size=10, n_step=2)

        self.state = np.zeros([32, 32])
        self.state_02 = np.ones([32, 32])
        self.next_state = np.zeros([32, 32])
        self.next_state_02 = np.ones([32, 32])
        self.action = np.zeros([1])
        self.action_02 = np.ones([1])
        self.reward = np.zeros([1])
        self.reward_02 = np.ones([1])
        self.done = np.zeros([1])
        self.done_02 = np.zeros([1])

        self.experience01 = Experience(self.state, self.action, self.reward,
                                       self.done, self.next_state)
        self.experience02 = Experience(self.state_02, self.action_02,
                                       self.reward_02, self.done_02,
                                       self.next_state_02)
        self.experience03 = Experience(self.state_02, self.action_02,
                                       self.reward_02, self.done_02,
                                       self.next_state_02)
示例#7
0
class TestMultiStepReplayBuffer(TestCase):
    def setUp(self) -> None:
        self.buffer = MultiStepBuffer(buffer_size=10, n_step=2)

        self.state = np.zeros([32, 32])
        self.state_02 = np.ones([32, 32])
        self.next_state = np.zeros([32, 32])
        self.next_state_02 = np.ones([32, 32])
        self.action = np.zeros([1])
        self.action_02 = np.ones([1])
        self.reward = np.zeros([1])
        self.reward_02 = np.ones([1])
        self.done = np.zeros([1])
        self.done_02 = np.zeros([1])

        self.experience01 = Experience(self.state, self.action, self.reward,
                                       self.done, self.next_state)
        self.experience02 = Experience(self.state_02, self.action_02,
                                       self.reward_02, self.done_02,
                                       self.next_state_02)
        self.experience03 = Experience(self.state_02, self.action_02,
                                       self.reward_02, self.done_02,
                                       self.next_state_02)

    def test_append_single_experience_less_than_n(self):
        """
        If a single experience is added and n > 1 nothing should be added to the buffer as it is waiting experiences
        to equal n
        """
        self.assertEqual(len(self.buffer), 0)

        self.buffer.append(self.experience01)

        self.assertEqual(len(self.buffer), 0)

    def test_append_single_experience(self):
        """
        If a single experience is added and n > 1 nothing should be added to the buffer as it is waiting experiences
        to equal n
        """
        self.assertEqual(len(self.buffer), 0)

        self.buffer.append(self.experience01)

        self.assertEqual(len(self.buffer), 0)
        self.assertEqual(len(self.buffer.n_step_buffer), 1)

    def test_append_single_experience2(self):
        """
        If a single experience is added and the number of experiences collected >= n, the multi step experience should
        be added to the full buffer.
        """
        self.assertEqual(len(self.buffer), 0)

        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)

        self.assertEqual(len(self.buffer), 1)
        self.assertEqual(len(self.buffer.n_step_buffer), 2)

    def test_sample_single_experience(self):
        """if there is only a single experience added, sample should return nothing"""
        self.buffer.append(self.experience01)

        with self.assertRaises(Exception) as context:
            _ = self.buffer.sample(batch_size=1)

        self.assertIsInstance(context.exception, Exception)

    def test_sample_multi_experience(self):
        """if there is only a single experience added, sample should return nothing"""
        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)

        batch = self.buffer.sample(batch_size=1)

        next_state = batch[4]
        self.assertEqual(next_state.all(), self.next_state_02.all())

    def test_get_transition_info_2_step(self):
        """Test that the accumulated experience is correct and"""
        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)

        reward, next_state, done = self.buffer.get_transition_info()

        reward_gt = self.experience01.reward + (
            0.9 * self.experience02.reward) * (1 - done)

        self.assertEqual(reward, reward_gt)
        self.assertEqual(next_state.all(), self.next_state_02.all())
        self.assertEqual(self.experience02.done, done)

    def test_get_transition_info_3_step(self):
        """Test that the accumulated experience is correct with multi step"""
        self.buffer = MultiStepBuffer(buffer_size=10, n_step=3)

        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)
        self.buffer.append(self.experience02)

        reward, next_state, done = self.buffer.get_transition_info()

        reward_01 = self.experience02.reward + 0.9 * self.experience03.reward * (
            1 - done)
        reward_gt = self.experience01.reward + 0.9 * reward_01 * (1 - done)

        self.assertEqual(reward, reward_gt)
        self.assertEqual(next_state.all(), self.next_state_02.all())
        self.assertEqual(self.experience03.done, done)

    def test_sample_3_step(self):
        """Test that final output of the 3 step sample is correct"""
        self.buffer = MultiStepBuffer(buffer_size=10, n_step=3)

        self.buffer.append(self.experience01)
        self.buffer.append(self.experience02)
        self.buffer.append(self.experience02)

        reward_gt = 1.71

        batch = self.buffer.sample(1)

        self.assertEqual(batch[0].all(), self.experience01.state.all())
        self.assertEqual(batch[1], self.experience01.action)
        self.assertEqual(batch[2], reward_gt)
        self.assertEqual(batch[3], self.experience02.done)
        self.assertEqual(batch[4].all(), self.experience02.new_state.all())