def test_push_valid(self):
        ebuffer = ExperienceReplayBuffer(self.BUFFER_SIZE)

        self.assertEqual(ebuffer.len, 0)

        ebuffer.push(self.sarsa_one)

        self.assertEqual(ebuffer.len, 1)
    def test_sample_no_padding_no_sample_less_no_pop(self):
        ebuffer = ExperienceReplayBuffer(self.BUFFER_SIZE)
        self.assertEqual(ebuffer.len, 0)

        ebuffer.push(self.sarsa_one)
        ebuffer.push(self.sarsa_two)

        batch = ebuffer.sample(self.BUFFER_SIZE)

        self.assertEqual(len(batch), self.BUFFER_SIZE)

        for act, exp in zip(batch, [self.sarsa_one, self.sarsa_two]):
            np.testing.assert_array_equal(act.state, exp.state)

        self.assertEqual(ebuffer.len, self.BUFFER_SIZE)

        batch_two = ebuffer.sample(1)

        self.assertEqual(len(batch), self.BUFFER_SIZE)

        act = batch_two[0]

        exp = self.sarsa_one

        np.testing.assert_array_equal(act.state, exp.state)

        ebuffer.push(self.sarsa_three)

        batch_three = ebuffer.sample(1)

        act = batch_three[0]

        exp = self.sarsa_two

        np.testing.assert_array_equal(act.state, exp.state)
    def test_sample_no_padding_no_less_pop(self):
        ebuffer = ExperienceReplayBuffer(self.BUFFER_SIZE)

        self.assertEqual(ebuffer.len, 0)

        ebuffer.push(self.sarsa_one)
        ebuffer.push(self.sarsa_two)

        batch = ebuffer.sample_and_pop(self.BUFFER_SIZE)

        for act, exp in zip(batch, [self.sarsa_one, self.sarsa_two]):
            np.testing.assert_array_equal(act.state, exp.state)

        self.assertEqual(ebuffer.len, 0)
    def test_sample_padding_no_less_pop(self):
        ebuffer = ExperienceReplayBuffer(self.BUFFER_SIZE)
        pad(ebuffer,
            2,
            state_col_dim=1,
            action_col_dim=1,
            reward_col_dim=1,
            next_state_col_dim=1)
        self.assertEqual(ebuffer.len, self.BUFFER_SIZE)

        batch = ebuffer.sample_and_pop(self.BUFFER_SIZE)
        for b in batch:
            np.testing.assert_array_equal(b.state, np.array([0.0]))

        self.assertEqual(ebuffer.len, 0)

        ebuffer.push(self.sarsa_one)

        self.assertEqual(ebuffer.len, 1)

        batch = ebuffer.sample_and_pop(1)

        np.testing.assert_array_equal(batch[0].state, self.sarsa_one.state)

        self.assertEqual(ebuffer.len, 0)
    def test_sample_padding_less_pop(self):
        ebuffer = ExperienceReplayBuffer(self.BUFFER_SIZE)
        pad(ebuffer,
            2,
            state_col_dim=1,
            action_col_dim=1,
            reward_col_dim=1,
            next_state_col_dim=1)
        self.assertEqual(ebuffer.len, self.BUFFER_SIZE)

        try:
            ebuffer.sample_and_pop(self.BUFFER_SIZE + 1)
            self.assertEqual(1, 0)
        except ValueError:
            self.assertEqual(1, 1)
        except Exception:
            self.assertEqual(1, 0)

        batch = ebuffer.sample_and_pop(self.BUFFER_SIZE + 1, sample_less=True)

        self.assertEqual(ebuffer.len, 0)

        for b in batch:
            np.testing.assert_array_equal(b.state, np.array([0.0]))
    def test_sample_no_padding_less_pop(self):
        ebuffer = ExperienceReplayBuffer(self.BUFFER_SIZE)

        self.assertEqual(ebuffer.len, 0)

        ebuffer.push(self.sarsa_one)

        try:
            ebuffer.sample_and_pop(self.BUFFER_SIZE)
            self.assertEqual(1, 0)
        except ValueError:
            self.assertEqual(1, 1)
        except Exception:
            self.assertEqual(1, 0)

        batch = ebuffer.sample_and_pop(self.BUFFER_SIZE, sample_less=True)

        self.assertEqual(ebuffer.len, 0)

        np.testing.assert_array_equal(batch[0].state, self.sarsa_one.state)
    def test_sample_no_padding_less_no_pop(self):
        ebuffer = ExperienceReplayBuffer(self.BUFFER_SIZE)
        self.assertEqual(ebuffer.len, 0)

        ebuffer.push(self.sarsa_one)
        ebuffer.push(self.sarsa_two)

        try:
            ebuffer.sample(self.BUFFER_SIZE + 1)
            self.assertEqual(1, 0)
        except ValueError:
            self.assertEqual(1, 1)
        except Exception:
            self.assertEqual(1, 0)

        batch = ebuffer.sample(self.BUFFER_SIZE + 1, sample_less=True)

        self.assertEqual(len(batch), self.BUFFER_SIZE)

        for act, exp in zip(batch, [self.sarsa_one, self.sarsa_two]):
            np.testing.assert_array_equal(act.state, exp.state)