def test_shaping_reward_is_shaping(self): base_reward = (1, 2) shaping_reward = (3, 4) reward = Reward(base_reward, shaping_reward) self.assertTrue(reward.is_shaping())
def test_base_reward_is_not_shaping(self): base_reward = (1, 2) shaping_reward = () reward = Reward(base_reward, shaping_reward) self.assertFalse(reward.is_shaping())
def test_shaping_reward_non_shaping_reward(self): base_reward = (1, 2) shaping_reward = (3, 4) reward = Reward(base_reward, shaping_reward) expected_non_shaping_r = sum(base_reward) / len(base_reward) self.assertAlmostEqual(expected_non_shaping_r, reward.assessment_reward())
def _store_reward(self, reward: rewards.Reward, sim: Simulation): sim[self.last_agent_reward] = reward.agent_reward() sim[self.last_assessment_reward] = reward.assessment_reward()
def assess(self, state: State, prev_state: State, is_terminal: bool) -> Reward: """ Calculates a Reward from the state transition. """ return Reward(self._base_rewards(state, prev_state, is_terminal), self._potential_based_rewards(state, prev_state, is_terminal))
def assess(self, state: State, prev_state: State, is_terminal: bool) -> Reward: base_reward = (0, ) shaping_reward = () return Reward(base_reward, shaping_reward)
def test_init_error_on_empty_base_reward(self): empty_base_reward = () shaping_reward = (1, 2) with self.assertRaises(ValueError): _ = Reward(empty_base_reward, shaping_reward)