def test_compute_returns_shape_does_not_match_error(self): with pytest.raises(AssertionError) as excinfo: compute_returns(0.0, np.arange(5), np.arange(6), 0.9) with pytest.raises(AssertionError) as excinfo: compute_returns(0.0, np.arange(6), np.arange(5), 0.9) with pytest.raises(AssertionError) as excinfo: compute_returns(np.arange(5), np.ones((6, 5)), np.ones((5, 5)), 0.9) with pytest.raises(AssertionError) as excinfo: compute_returns(np.arange(5), np.ones((5, 5)), np.ones((6, 5)), 0.9) with pytest.raises(AssertionError) as excinfo: compute_returns(np.arange(6), np.ones((5, 5)), np.ones((5, 5)), 0.9)
def fetch(self, gamma, lam): assert self.size() > 1 step_length = self.size() - 1 obs_t = np.array(self.obs_t)[:step_length] actions_t = np.array(self.actions_t)[:step_length] rewards_tp1 = np.array(self.rewards_t)[1:step_length + 1] terminals_tp1 = np.array(self.terminals_t)[1:step_length + 1] values_t = np.array(self.values_t)[:step_length] log_probs_t = np.array(self.log_probs_t)[:step_length] bootstrap_value = self.values_t[step_length] returns_t = compute_returns(bootstrap_value, rewards_tp1, terminals_tp1, gamma) advs_t = compute_gae(bootstrap_value, rewards_tp1, values_t, terminals_tp1, gamma, lam) # normalize advantage advs_t = (advs_t - np.mean(advs_t)) / (np.std(advs_t) + 1e-8) return { 'obs_t': obs_t, 'actions_t': actions_t, 'log_probs_t': log_probs_t, 'returns_t': returns_t, 'advantages_t': advs_t, 'values_t': values_t }
def test_compute_returns_one_d_array(self): bootstrap_value = 1.0 rewards = np.array([1.0, 2.0, 3.0]) terminals = np.array([0.0, 1.0, 0.0]) answer = np.array([2.8, 2.0, 3.9]) returns = compute_returns(bootstrap_value, rewards, terminals, 0.9) self.assertTrue(np.all(returns == answer))
def test_compute_returns_two_d_array(self): bootstrap_value = np.array([1.0, 2.0, 0.0]) rewards = np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) terminals = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 0.0]]) answer = np.array([[3.61, 4.7, 3.0], [2.9, 3.0, 4.0]]) returns = compute_returns(bootstrap_value, rewards, terminals, 0.9) self.assertTrue(np.all(returns == answer))
def test_compute_returns_not_ndarray_error(self): with pytest.raises(AssertionError) as excinfo: compute_returns(0.0, np.arange(5), range(5), 0.9) with pytest.raises(AssertionError) as excinfo: compute_returns(0.0, range(5), np.arange(5), 0.9) with pytest.raises(AssertionError) as excinfo: compute_returns(0.0, np.ones((5, 5)), np.ones((5, 5)), 0.9)