Пример #1
0
    def test_shaping_reward_is_shaping(self):
        base_reward = (1, 2)
        shaping_reward = (3, 4)

        reward = Reward(base_reward, shaping_reward)

        self.assertTrue(reward.is_shaping())
Пример #2
0
    def test_base_reward_is_not_shaping(self):
        base_reward = (1, 2)
        shaping_reward = ()

        reward = Reward(base_reward, shaping_reward)

        self.assertFalse(reward.is_shaping())
Пример #3
0
    def test_shaping_reward_non_shaping_reward(self):
        base_reward = (1, 2)
        shaping_reward = (3, 4)

        reward = Reward(base_reward, shaping_reward)

        expected_non_shaping_r = sum(base_reward) / len(base_reward)
        self.assertAlmostEqual(expected_non_shaping_r, reward.assessment_reward())
Пример #4
0
 def _store_reward(self, reward: rewards.Reward, sim: Simulation):
     sim[self.last_agent_reward] = reward.agent_reward()
     sim[self.last_assessment_reward] = reward.assessment_reward()
Пример #5
0
 def assess(self, state: State, prev_state: State, is_terminal: bool) -> Reward:
     """ Calculates a Reward from the state transition. """
     return Reward(self._base_rewards(state, prev_state, is_terminal),
                   self._potential_based_rewards(state, prev_state, is_terminal))
Пример #6
0
 def assess(self, state: State, prev_state: State,
            is_terminal: bool) -> Reward:
     base_reward = (0, )
     shaping_reward = ()
     return Reward(base_reward, shaping_reward)
Пример #7
0
    def test_init_error_on_empty_base_reward(self):
        empty_base_reward = ()
        shaping_reward = (1, 2)

        with self.assertRaises(ValueError):
            _ = Reward(empty_base_reward, shaping_reward)