def setUp(self) -> None:
        self.state = np.random.rand(32, 32)
        self.next_state = np.random.rand(32, 32)
        self.action = np.ones([1])
        self.reward = np.ones([1])
        self.done = np.zeros([1])
        self.experience = Experience(self.state, self.action, self.reward, self.done, self.next_state)

        self.source = Mock()
        self.source.step = Mock(return_value=(self.experience, torch.tensor(0), False))
        self.warm_start = 10
        self.buffer = ReplayBuffer(20)
        for _ in range(self.warm_start):
            self.buffer.append(self.experience)
示例#2
0
    def _dataloader(self) -> DataLoader:
        """Initialize the Replay Buffer dataset used for retrieving experiences"""
        self.buffer = ReplayBuffer(self.replay_size)
        self.populate(self.warm_start_size)

        self.dataset = ExperienceSourceDataset(self.train_batch)
        return DataLoader(dataset=self.dataset, batch_size=self.batch_size)
    def prepare_data(self) -> None:
        """Initialize the Replay Buffer dataset used for retrieving experiences"""
        self.source = ExperienceSource(self.env, self.agent)
        self.buffer = ReplayBuffer(self.replay_size)
        self.populate(self.warm_start_size)

        self.dataset = RLDataset(self.buffer, self.sample_len)
    def prepare_data(self) -> None:
        """Initialize the Replay Buffer dataset used for retrieving experiences"""
        device = torch.device(self.trainer.root_gpu) if self.trainer.num_gpus >= 1 else self.device
        self.source = ExperienceSource(self.env, self.agent, device)
        self.buffer = ReplayBuffer(self.replay_size)
        self.populate(self.warm_start_size)

        self.dataset = RLDataset(self.buffer, self.sample_len)
示例#5
0
    def _dataloader(self) -> DataLoader:
        """Initialize the Replay Buffer dataset used for retrieving experiences"""
        self.buffer = ReplayBuffer(self.replay_size)
        self.populate(self.warm_start_size)

        dataset = RLDataset(self.buffer, self.sample_len)
        dataloader = DataLoader(
            dataset=dataset,
            batch_size=self.batch_size,
        )
        return dataloader
示例#6
0
class TestReplayBuffer(TestCase):
    def setUp(self) -> None:
        self.state = np.random.rand(32, 32)
        self.next_state = np.random.rand(32, 32)
        self.action = np.ones([1])
        self.reward = np.ones([1])
        self.done = np.zeros([1])
        self.experience = Experience(self.state, self.action, self.reward,
                                     self.done, self.next_state)

        self.source = Mock()
        self.source.step = Mock(return_value=(self.experience, torch.tensor(0),
                                              False))
        self.warm_start = 10
        self.buffer = ReplayBuffer(20)
        for _ in range(self.warm_start):
            self.buffer.append(self.experience)

    def test_replay_buffer_append(self):
        """Test that you can append to the replay buffer"""

        self.assertEqual(len(self.buffer), self.warm_start)

        self.buffer.append(self.experience)

        self.assertEqual(len(self.buffer), self.warm_start + 1)

    def test_replay_buffer_populate(self):
        """Tests that the buffer is populated correctly with warm_start"""
        self.assertEqual(len(self.buffer.buffer), self.warm_start)

    def test_replay_buffer_update(self):
        """Tests that buffer append works correctly"""
        batch_size = 3
        self.assertEqual(len(self.buffer.buffer), self.warm_start)
        for i in range(batch_size):
            self.buffer.append(self.experience)
        self.assertEqual(len(self.buffer.buffer), self.warm_start + batch_size)

    def test_replay_buffer_sample(self):
        """Test that you can sample from the buffer and the outputs are the correct shape"""
        batch_size = 3

        for i in range(10):
            self.buffer.append(self.experience)

        batch = self.buffer.sample(batch_size)

        self.assertEqual(len(batch), 5)

        # states
        states = batch[0]
        self.assertEqual(states.shape, (batch_size, 32, 32))
        # action
        actions = batch[1]
        self.assertEqual(actions.shape, (batch_size, 1))
        # reward
        rewards = batch[2]
        self.assertEqual(rewards.shape, (batch_size, 1))
        # dones
        dones = batch[3]
        self.assertEqual(dones.shape, (batch_size, 1))
        # next states
        next_states = batch[4]
        self.assertEqual(next_states.shape, (batch_size, 32, 32))