Exemplo n.º 1
0
    def test_calculate_multiple_base_components(self):
        for positive_rewards in (True, False):
            reward_values = [.1, .2, .4]
            components = tuple(
                stubs.ConstantRewardComponentStub(val)
                for val in reward_values)
            assessor = self.get_assessor(components,
                                         positive_rewards=positive_rewards)
            state = self.get_dummy_state_class()

            reward = assessor.assess(state, state, True)

            if positive_rewards:
                comp_values = [cmp.get_return_value() for cmp in components]
            else:
                comp_values = [
                    cmp.get_return_value() - 1 for cmp in components
                ]

            expected_shaping_reward = sum(comp_values) / len(comp_values)
            expected_non_shaping_reward = expected_shaping_reward  # should be same because not shaping
            self.assertAlmostEqual(expected_shaping_reward,
                                   reward.agent_reward(),
                                   msg=f'positive reward {positive_rewards}')
            self.assertAlmostEqual(expected_non_shaping_reward,
                                   reward.assessment_reward(),
                                   msg=f'positive reward {positive_rewards}')
Exemplo n.º 2
0
    def test_init_throws_error_on_empty_base_components_non_empty_shaping_components(
            self):
        base_components = ()
        shaping_components = (stubs.ConstantRewardComponentStub(), )

        with self.assertRaises(ValueError):
            _ = self.get_assessor(base_components, shaping_components)
Exemplo n.º 3
0
    def test_calculate_with_shaping_components(self):
        for positive_rewards in (True, False):
            num_state_vars = 3
            DummyState, props = stubs.FlightTaskStub.get_dummy_state_class_and_properties(
                num_state_vars)
            base_reward = 0
            base_component = stubs.ConstantRewardComponentStub(0)

            # create two states with a component that will recognise as low and high potential resp.
            state_low_potential = DummyState(*(1.0
                                               for _ in range(num_state_vars)))
            state_high_potential = DummyState(
                *(2.0 for _ in range(num_state_vars)))
            low_potential, high_potential = 0.5, 1.0
            potential_map = {
                state_low_potential: low_potential,
                state_high_potential: high_potential
            }
            shape_component = stubs.RewardComponentStub(potential_map)

            assessor = self.get_assessor((base_component, ),
                                         (shape_component, ),
                                         positive_rewards=positive_rewards)

            # if non-terminal, expect to see reward equal to potential increase
            terminal = False
            reward = assessor.assess(state_high_potential, state_low_potential,
                                     terminal)

            base_reward_as_configured = base_reward if positive_rewards else base_reward - 1
            expected_shaping_reward = (base_reward_as_configured +
                                       (high_potential - low_potential)) / 2
            expected_non_shaping_reward = base_reward_as_configured

            msg = f'positive reward {positive_rewards}'
            self.assertAlmostEqual(expected_shaping_reward,
                                   reward.agent_reward(),
                                   msg=msg)
            self.assertAlmostEqual(expected_non_shaping_reward,
                                   reward.assessment_reward(),
                                   msg=msg)

            # if terminal, expect to see reward as if terminal step potential was zero
            terminal = True
            terminal_potential = 0.0
            reward = assessor.assess(state_high_potential, state_low_potential,
                                     terminal)

            expected_shaping_reward = (
                base_reward_as_configured +
                (terminal_potential - low_potential)) / 2
            expected_non_shaping_reward = base_reward_as_configured

            self.assertAlmostEqual(expected_shaping_reward,
                                   reward.agent_reward(),
                                   msg=msg)
            self.assertAlmostEqual(expected_non_shaping_reward,
                                   reward.assessment_reward(),
                                   msg=msg)
Exemplo n.º 4
0
    def assess_reward_for_potential_change_with_dependency(
            self, state_potential: float, prev_state_potential: float,
            dependency_potential: float, positive_rewards: bool):
        """
        Calculates the reward given we transition from prev_state_potential to
        state_potential, and a dependant component unchanged at dependency_potential.

        Step is non-terminal and base reward is zero.
        """
        num_state_vars = 3
        terminal = False

        DummyState, props = stubs.FlightTaskStub.get_dummy_state_class_and_properties(
            num_state_vars)

        # want base reward to be zero so we can focus on shaped reward
        if positive_rewards:
            base_component = stubs.ConstantRewardComponentStub(0)
        else:
            base_component = stubs.ConstantRewardComponentStub(1)
        # create two states with a component that will recognise as low and high potential resp.
        state = DummyState(*(1.0 for _ in range(num_state_vars)))
        prev_state = DummyState(*(2.0 for _ in range(num_state_vars)))
        potential_map = {
            state: state_potential,
            prev_state: prev_state_potential
        }

        # make components
        shape_component = stubs.RewardComponentStub(potential_map)
        dependency_potential_map = {
            key: dependency_potential
            for key in potential_map
        }
        dependant_shape_component = stubs.RewardComponentStub(
            dependency_potential_map)

        dependency_map = {shape_component: (dependant_shape_component, )}

        assessor = self.get_assessor(
            (base_component, ), (shape_component, dependant_shape_component),
            potential_dependency_map=dependency_map,
            positive_rewards=positive_rewards)

        return assessor.assess(state, prev_state, terminal)
Exemplo n.º 5
0
    def test_calculate_with_shaping_components(self):
        for positive_rewards in (True, False):
            base_reward_vals = [.0, .1]
            shaping_reward_vals = [.2, .3]
            base_components = tuple(
                stubs.ConstantRewardComponentStub(val)
                for val in base_reward_vals)
            shape_components = tuple(
                stubs.ConstantRewardComponentStub(val)
                for val in shaping_reward_vals)
            assessor = self.get_assessor(base_components,
                                         shape_components,
                                         positive_rewards=positive_rewards)
            state = self.get_dummy_state()

            reward = assessor.assess(state, state, True)

            if positive_rewards:
                expected_shaping_reward = (
                    sum(base_reward_vals + shaping_reward_vals) /
                    len(base_reward_vals + shaping_reward_vals))
                expected_non_shaping_reward = sum(base_reward_vals) / len(
                    base_reward_vals)
            else:
                base_negative_vals = list(base_val - 1
                                          for base_val in base_reward_vals)
                expected_shaping_reward = (
                    sum(base_negative_vals + shaping_reward_vals) /
                    len(base_negative_vals + shaping_reward_vals))
                expected_non_shaping_reward = sum(base_negative_vals) / len(
                    base_negative_vals)

            self.assertAlmostEqual(expected_shaping_reward,
                                   reward.agent_reward(),
                                   msg=f'positive reward {positive_rewards}')
            self.assertAlmostEqual(expected_non_shaping_reward,
                                   reward.assessment_reward(),
                                   msg=f'positive reward {positive_rewards}')
Exemplo n.º 6
0
    def test_calculate_single_base_component(self):
        for positive_rewards in (True, False):
            component = stubs.ConstantRewardComponentStub()
            assessor = self.get_assessor(base_components=(component, ),
                                         positive_rewards=positive_rewards)
            state = self.get_dummy_state()

            reward = assessor.assess(state, state, True)

            if positive_rewards:
                expected_shaping_reward = component.get_return_value()
            else:
                expected_shaping_reward = 1 - component.get_return_value()

            expected_non_shaping_reward = expected_shaping_reward  # should be same because not shaping
            self.assertAlmostEqual(expected_shaping_reward,
                                   reward.agent_reward())
            self.assertAlmostEqual(expected_non_shaping_reward,
                                   reward.assessment_reward())