def _add_automated_vehicles(self): """Replace a portion of vehicles with automated vehicles.""" penetration = self.env_params.additional_params["rl_penetration"] # Sort the initial vehicles by their positions. sorted_vehicles = sorted(self.k.vehicle.get_ids(), key=lambda x: self.k.vehicle.get_x_by_id(x)) # Replace every nth vehicle with an RL vehicle. for lane in range(self._num_lanes): sorted_vehicles_lane = [ veh for veh in sorted_vehicles if get_lane(self, veh) == lane ] if isinstance(self.k.network.network, I210SubNetwork): # Choose a random starting position to allow for stochasticity. i = random.randint(0, int(1 / penetration) - 1) else: i = 0 for veh_id in sorted_vehicles_lane: self.k.vehicle.set_vehicle_type(veh_id, "human") i += 1 if i % int(1 / penetration) == 0: # Don't add vehicles past the control range. pos = self.k.vehicle.get_x_by_id(veh_id) if pos < self._control_range[1]: self.k.vehicle.set_vehicle_type(veh_id, "rl")
def _add_automated_vehicles(self): """Replace a portion of vehicles with automated vehicles.""" penetration = self.env_params.additional_params["rl_penetration"] # Sort the initial vehicles by their positions. sorted_vehicles = sorted(self.k.vehicle.get_ids(), key=lambda x: self.k.vehicle.get_x_by_id(x)) # Replace every nth vehicle with an RL vehicle. for lane in range(self._num_lanes): sorted_vehicles_lane = [ veh for veh in sorted_vehicles if get_lane(self, veh) == lane ] for i, veh_id in enumerate(sorted_vehicles_lane): if (i + 1) % int(1 / penetration) == 0: # Don't add vehicles past the control range. pos = self.k.vehicle.get_x_by_id(veh_id) if pos < self._control_range[1]: self.k.vehicle.set_vehicle_type(veh_id, "rl")
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" # In case no vehicles were available in the current step, pass an empty # reward dict. if rl_actions is None: return {} reward = {} # Collect the names of the vehicles within the control range. control_min = self._control_range[0] control_max = self._control_range[1] veh_ids = [ veh_id for veh_id in self.k.vehicle.get_ids() if control_min <= self.k.vehicle.get_x_by_id(veh_id) <= control_max ] for lane in range(self._num_lanes): # Collect the names of all vehicles on the given lane, while # taking into account edges with an extra lane. veh_ids_lane = [v for v in veh_ids if get_lane(self, v) == lane] # Collect the names of the RL vehicles on the lane. rl_ids = [veh for veh in self.rl_ids() if veh in veh_ids_lane] # Collect the actions that just correspond to this lane. rl_actions_lane = { key: rl_actions[key] for key in rl_actions.keys() if key in rl_ids } # Compute the reward for a given lane. reward["lane_{}".format(lane)] = self._compute_reward_util( rl_actions=rl_actions_lane, veh_ids=veh_ids_lane, rl_ids=rl_ids, **kwargs) return reward
def additional_command(self): """See parent class. Here, the operations are done at a per-lane level. """ for lane in range(self._num_lanes): # Collect the names of the RL vehicles on the given lane, while # tacking into account edges with an extra lane. rl_ids = [ veh for veh in self.k.vehicle.get_rl_ids() if get_lane(self, veh) == lane ] # Update the RL lists. self.rl_queue[lane], self.rl_veh[lane], self.removed_veh = \ update_rl_veh( self, rl_queue=self.rl_queue[lane], rl_veh=self.rl_veh[lane], removed_veh=self.removed_veh, control_range=self._control_range, num_rl=self.num_rl, rl_ids=reversed(sorted( rl_ids, key=self.k.vehicle.get_x_by_id)), ) # Specify actions for the uncontrolled RL vehicles based on human- # driven dynamics. for veh_id in list(set(rl_ids) - set(self.rl_veh[lane])): self._rl_controller.veh_id = veh_id acceleration = self._rl_controller.get_action(self) self.k.vehicle.apply_acceleration(veh_id, acceleration) # Specify observed vehicles. for veh_id in self.leader + self.follower: self.k.vehicle.set_observed(veh_id)