Exemplo n.º 1
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     num_rl = self.k.vehicle.num_rl_vehicles
     lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl])
     return (rewards.desired_velocity(self) + rewards.rl_forward_progress(
         self, gain=0.1) - rewards.boolean_action_penalty(
             lane_change_acts, gain=1.0))
Exemplo n.º 2
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        if self.test:
            return 0

        return - rewards.min_delay_unscaled(self) \
            - rewards.boolean_action_penalty(rl_actions >= 0.5, gain=1.0)
Exemplo n.º 3
0
    def compute_reward(self, rl_actions, **kwargs):
        """See class definition."""
        r = 0
        if rl_actions is not None:
            r = -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=2)

        if self.env_params.evaluate:
            r += -rewards.min_delay_unscaled(self)
            #print(f"Reward computed: {r}, rl_actions: {rl_actions}")

        else:
            r += (-rewards.min_delay_unscaled(self) +
                  rewards.penalize_standstill(self, gain=0.2))
        print(f"Reward computed: {r}, rl_actions: {rl_actions}")
        return r
Exemplo n.º 4
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return -rewards.min_delay_unscaled(self)
     else:
         """
         print('delay penalty:', -rewards.min_delay_unscaled(self))
         print('standstill penalty:', rewards.penalize_standstill(self, gain=0.2))
         print('action penalty:', -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.2))
         """
         #return (- rewards.min_delay_unscaled(self) +
         #        rewards.penalize_standstill(self, gain=0.2))
         return (
             -rewards.min_delay_unscaled(self) -
             rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.01) -
             rewards.waiting_penalty(self, gain=0.01))
Exemplo n.º 5
0
    def test_boolean_action_penalty(self):
        """Test the boolean_action_penalty method."""
        actions = [False, False, False, False, False]
        self.assertEqual(boolean_action_penalty(actions, gain=1), 0)
        self.assertEqual(boolean_action_penalty(actions, gain=2), 0)

        actions = [True, False, False, False, False]
        self.assertEqual(boolean_action_penalty(actions, gain=1), 1)
        self.assertEqual(boolean_action_penalty(actions, gain=2), 2)

        actions = [True, False, False, True, False]
        self.assertEqual(boolean_action_penalty(actions, gain=1), 2)
        self.assertEqual(boolean_action_penalty(actions, gain=2), 4)
Exemplo n.º 6
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     # penalize traffic light changes for occurring
     return -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=1.0)