def test_buffer_done_conversion(self): size = 5 action_space_size = 3 observation_space_size = 1 b = Buffer(size, action_space_size, observation_space_size) for i in range(size): b.add(np.ones(observation_space_size), np.ones(action_space_size), 1, np.ones(observation_space_size), False) _, _, _, _, d = b.sample(size) self.assertEqual(np.sum(d), 0) for i in range(size): b.add(np.ones(observation_space_size), np.ones(action_space_size), 1, np.ones(observation_space_size), True) _, _, _, _, d = b.sample(size) self.assertEqual(np.sum(d), size)
def test_buffer_overriding(self): size = 5 action_space_size = 3 observation_space_size = 1 b = Buffer(size, action_space_size, observation_space_size) for i in range(size): b.add(np.ones(observation_space_size), np.ones(action_space_size), 1, np.ones(observation_space_size), False) _, _, r, _, _ = b.sample(size) self.assertEqual(np.sum(r), size) for i in range(size): b.add( np.ones(observation_space_size) * 2, np.ones(action_space_size) * 2, 2, np.ones(observation_space_size) * 2, False) _, _, r, _, _ = b.sample(size) self.assertEqual(np.sum(r), size * 2)
def test_sample_stochasticity(self): size = 5 action_space_size = 3 observation_space_size = 1 b = Buffer(size, action_space_size, observation_space_size) for i in range(size): b.add( np.ones(observation_space_size) * i, np.ones(action_space_size) * i, i, np.ones(observation_space_size) * i, False) counters = np.zeros((size, 1)) sample_size = 2 sample_count = 10000 for i in range(sample_count): s0, a, r, s1, d = b.sample(sample_size) self.assert_np_array_equal(d, np.zeros((sample_size, 1))) self.assert_np_array_equal( np.ones((sample_size, observation_space_size)) * r, s0) self.assert_np_array_equal( np.ones((sample_size, observation_space_size)) * r, s1) self.assert_np_array_equal( np.ones((sample_size, action_space_size)) * r, a) indexes = r.reshape((sample_size, )).astype(int) # repeating indeces make it harder to use vectorized way for j in indexes: counters[j] += 1 counters_sum = counters.sum() self.assertEqual(counters_sum, sample_count * sample_size) # avg number of times each index should have appeared in a random draw num_per_bin = counters_sum / size # check if within margins margin = 0.1 for i in range(sample_size): self.assertTrue(counters[i] < num_per_bin * (1. + margin)) self.assertTrue(counters[i] > num_per_bin * (1 - margin))
def test_sample(self): size = 2 action_space_size = 3 observation_space_size = 1 b = Buffer(size, action_space_size, observation_space_size) b.add(np.ones(observation_space_size), np.ones(action_space_size), 0, np.ones(observation_space_size), False) b.add(np.ones(observation_space_size), np.ones(action_space_size), 0, np.ones(observation_space_size), False) s0, a, r, s1, d = b.sample(2) self.assertEqual(s0.shape, (2, observation_space_size)) self.assertEqual(s1.shape, (2, observation_space_size)) self.assertEqual(a.shape, (2, action_space_size)) self.assertEqual(r.shape, (2, 1)) self.assertEqual(d.shape, (2, 1)) self.assertEqual(d.dtype, np.float32)