class MyTestCase(unittest.TestCase): """ hello """ def setUp(self): self.history = 4 with open('./data/replay_buffer.pkl', 'rb') as file: self.replay_buffer = pickle.load(file) env_to_use = 'Pendulum-v0' env = gym.make(env_to_use) self.history = 4 self.simulator = Simulator(self.history, env) def test_format_buffer(self): # Separate into prev_observations # previous actions # next observations # and reward for next observation p_obs, p_a, n_o, p_r = self.simulator._format_buffer( self.replay_buffer) # Check prev Observations first_obs = np.array([ self.replay_buffer[i][0] for i in range(self.history - 1, -1, -1) ]).reshape(-1) self.assert_(np.array_equal(first_obs, p_obs[0])) pos = 200 second_obs = np.array([ self.replay_buffer[i][0] for i in range(pos + self.history - 1, pos - 1, -1) ]).reshape(-1) self.assert_(np.array_equal(second_obs, p_obs[197])) pos = 400 third_obs = np.array([ self.replay_buffer[i][0] for i in range(pos + self.history - 1, pos - 1, -1) ]).reshape(-1) self.assert_(np.array_equal(third_obs, p_obs[394])) # Check prev Actions first_act = np.array([ self.replay_buffer[i][1] for i in range(self.history - 1, -1, -1) ]).reshape(-1) self.assert_(np.array_equal(first_act, p_a[0])) pos = 200 second_act = np.array([ self.replay_buffer[i][1] for i in range(pos + self.history - 1, pos - 1, -1) ]).reshape(-1) self.assert_(np.array_equal(second_act, p_a[197])) pos = 400 third_act = np.array([ self.replay_buffer[i][1] for i in range(pos + self.history - 1, pos - 1, -1) ]).reshape(-1) self.assert_(np.array_equal(third_act, p_a[394])) # Check next observation first_no = self.replay_buffer[self.history - 1][2] print(first_no.shape, n_o[0].shape) self.assert_(np.array_equal(first_no, n_o[0])) pos = 200 second_no = self.replay_buffer[pos + self.history - 1][2] self.assert_(np.array_equal(second_no, n_o[197])) pos = 400 third_no = self.replay_buffer[pos + self.history - 1][2] self.assert_(np.array_equal(third_no, n_o[394])) # Check reward first_r = self.replay_buffer[self.history - 1][3] print(first_r.shape, n_o[0].shape) self.assert_(np.array_equal(first_r, p_r[0])) pos = 200 second_r = self.replay_buffer[pos + self.history - 1][3] self.assert_(np.array_equal(second_r, p_r[197])) pos = 400 third_r = self.replay_buffer[pos + self.history - 1][3] self.assert_(np.array_equal(third_r, p_r[394])) def test_train(self): pass