示例#1
0
 def compute_reward(self, rl_actions, **kwargs):
     """See class definition."""
     if self.env_params.evaluate:
         return -rewards.min_delay(self)
     else:
         return (500.0 - rewards.min_delay(self) +
                 rewards.penalize_standstill(self, gain=0.2))
示例#2
0
    def test_min_delay(self):
        """Test the min_delay method."""
        # try the case of an environment with no vehicles
        vehicles = VehicleParams()
        env, _ = ring_road_exp_setup(vehicles=vehicles)

        # check that the reward function return 0 in the case of no vehicles
        self.assertEqual(min_delay(env), 0)

        # try the case of multiple vehicles
        vehicles = VehicleParams()
        vehicles.add("test", num_vehicles=10)
        env, _ = ring_road_exp_setup(vehicles=vehicles)

        # check the min_delay upon reset
        self.assertAlmostEqual(min_delay(env), 0)

        # change the speed of one vehicle
        env.k.vehicle.test_set_speed("test_0", 10)

        # check the min_delay with the new speed
        self.assertAlmostEqual(min_delay(env), 0.0333333333333)
示例#3
0
    def compute_reward(self, rl_actions, **kwargs):
        """Reward function for the RL agent(s).

        MUST BE implemented in new environments.
        Defaults to 0 for non-implemented environments.

        Parameters
        ----------
        rl_actions : array_like
            actions performed by rl vehicles
        kwargs : dict
            other parameters of interest. Contains a "fail" element, which
            is True if a vehicle crashed, and False otherwise

        Returns
        -------
        reward : float or list of float"""

        # in the warmup steps
        if rl_actions is None:
            return {}

        rewards = {}
        for rl_id in self.k.vehicle.get_rl_ids():
            if self.env_params.evaluate:
                # reward is speed of vehicle if we are in evaluation mode
                reward = self.k.vehicle.get_speed(rl_id)
            elif kwargs['fail']:
                # reward is 0 if a collision occurred
                reward = 0
            else:

                # Reward function used to encourage minimization of total delay.
                cost1 = min_delay(self)

                # Reward function that penalizes vehicle standstill (refer to all vehicles)
                # the higher the worst
                cost2 = penalize_standstill(self)

                # todo: add selfish penalization for current agent being still

                # Calculate the average delay for the current vehicle (Selfishness)
                cost3 = avg_delay_specified_vehicles(self, rl_id)

                # get the type of the agent (coop or not)
                rl_type = self.k.vehicle.get_type(rl_id)

                # then get the coop weight
                w = self.k.vehicle.type_parameters.get(rl_type).get(
                    'cooperative_weight')

                # estimate the coop part of the reward
                coop_reward = (cost1 + cost2) * w

                # jerk related reward factor, to penalize excessive de/acceleration behaviors
                jerk = self.k.vehicle.get_jerk(rl_id)

                # getting scaling factor for jerk
                scaling_factor = self.env_params.additional_params["max_accel"] \
                                 - self.env_params.additional_params["max_decel"]
                scaling_factor /= self.sim_params.sim_step

                # the higher the worst, tha
                jerk = -pow(jerk, 2) / pow(scaling_factor, 2)

                # maximum penalization can be 4
                reward = max(Params.baseline - coop_reward - cost3 - jerk, 0)

                if Params.debug:
                    termcolor.colored(
                        f"\nReward for agent {rl_id} is : {reward}", "yellow")

            rewards[rl_id] = reward
        return rewards
示例#4
0
 def compute_reward(self, rl_actions, **kwargs):
     reward = rewards.min_delay(self)
     return reward