def test_desired_velocity(self): """Test the desired_velocity methods.""" vehicles = Vehicles() vehicles.add("test", num_vehicles=10) env_params = EnvParams(additional_params={ "target_velocity": 10, "max_accel": 1, "max_decel": 1 }) env, scenario = ring_road_exp_setup(vehicles=vehicles, env_params=env_params) # check that the fail attribute leads to a zero return self.assertEqual(desired_velocity(env, fail=True), 0) # check the average speed upon reset self.assertEqual(desired_velocity(env, fail=False), 0) # change the speed of one vehicle env.vehicles.test_set_speed("test_0", 10) # check the new average speed self.assertAlmostEqual(desired_velocity(env, fail=False), 0.05131670)
def compute_reward(self, rl_actions, **kwargs): """ :param rl_actions: The output of the PPO network based on the last steps observation per agent :param kwargs: Can contain fail, to indicate that a crash happened :return: The individual reward for every agent """ # if a crash occurred everybody gets 0 if kwargs['fail']: return {rl_id: 0 for rl_id in self.k.vehicle.get_rl_ids()} # Average outflow over last 10 steps, divided 2000 * scaling. # TODO: NEXT: try own reward computation outflow_reward = 2 / 3 * self.k.vehicle.get_rl_outflow_rate( 10 * self.sim_step) / (2000.0 * self.scaling) rl_agent_rewards = {} if rl_actions: for rl_id in self.k.vehicle.get_rl_ids(): # Reward desired velocity in own edge + the total outflow edge_num = self.k.vehicle.get_edge(rl_id) rl_agent_rewards[rl_id] = 1 / 3 * rewards.desired_velocity( self, edge_list=[edge_num ], use_only_rl_ids=True) + outflow_reward return rl_agent_rewards
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) else: # return a reward of 0 if a collision occurred if kwargs["fail"]: return 0 # reward high system-level velocities cost1 = rewards.desired_velocity(self, fail=kwargs["fail"]) # penalize small time headways cost2 = 0 t_min = 1 # smallest acceptable time headway for rl_id in self.rl_veh: lead_id = self.k.vehicle.get_leader(rl_id) if lead_id not in ["", None] \ and self.k.vehicle.get_speed(rl_id) > 0: t_headway = max( self.k.vehicle.get_headway(rl_id) / self.k.vehicle.get_speed(rl_id), 0) cost2 += min((t_headway - t_min) / t_min, 0) if "max_inflow" in self.env_params.additional_params.keys(): #print("max_inflow specified ") max_inflow = self.env_params.additional_params["max_inflow"] else: max_inflow = 2200 InflowScale = rewards.optimize_inflow(self, max_flow=max_inflow, timespan=500) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 1.00, 0.00 reward = max(eta1 * cost1 + eta2 * cost2, 0) * InflowScale return reward
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" num_rl = self.k.vehicle.num_rl_vehicles lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl]) return (rewards.desired_velocity(self) + rewards.rl_forward_progress( self, gain=0.1) - rewards.boolean_action_penalty( lane_change_acts, gain=1.0))
def compute_reward(self, rl_actions, **kwargs): current_rl_vehs = self.rl_veh edges = [] for veh_id in current_rl_vehs: edge = self.k.vehicle.get_edge(veh_id) if edge not in edges: edges.append(edge) interested_vehs = self.k.vehicle.get_ids_by_edge(edges) if len(interested_vehs) > 0: cost1 = rewards.desired_velocity(self, fail=kwargs["fail"], edge_list=edges) # penalize small time headways cost2 = 0 t_min = 1 # smallest acceptable time headway for rl_id in self.rl_veh: lead_id = self.k.vehicle.get_leader(rl_id) if lead_id not in ["", None] \ and self.k.vehicle.get_speed(rl_id) > 0: t_headway = max( self.k.vehicle.get_headway(rl_id) / self.k.vehicle.get_speed(rl_id), 0) cost2 += min((t_headway - t_min) / t_min, 0) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 1.00, 0.10 return max(eta1 * cost1 + eta2 * cost2, 0) else: return 0
def compute_reward(self, rl_actions=None, rl_states=None, **kwargs): """See class definition.""" if rl_actions == None or rl_actions == {}: return {}, {} rew = {} info = {} cost1 = rewards.desired_velocity(self, fail=kwargs["fail"]) mean_vel = np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) outflow = self.k.vehicle.get_outflow_rate(100) if outflow == None: outflow = 0.0 # penalize small time headways t_min = self.env_params.additional_params[ "t_min"] # smallest acceptable time headway for rl_id in self.k.vehicle.get_rl_ids(): if rl_id in rl_actions: state = rl_states[rl_id] action = rl_actions[rl_id] reward = self.calculate_reward(state, action) else: reward = 0.0 # add the other information cost2 = 0.0 current_edge = self.k.vehicle.get_edge(rl_id) lead_id = self.k.vehicle.get_leader(rl_id) if lead_id not in ["", None] \ and self.k.vehicle.get_speed(rl_id) > 0: t_headway = max( self.k.vehicle.get_headway(rl_id) / self.k.vehicle.get_speed(rl_id), 0) cost2 += min((t_headway - t_min) / t_min, 0.0) info.update({ rl_id: { 'cost1': cost1, 'cost2': cost2, 'mean_vel': mean_vel, "outflow": outflow } }) # update reward rew.update({rl_id: reward}) if kwargs["fail"]: rew.update({rl_id: 0.0}) info.update({ rl_id: { 'cost1': cost1, 'cost2': cost2, 'mean_vel': mean_vel, "outflow": outflow } }) return rew, info
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" # Compute the common reward. reward = rewards.desired_velocity(self, fail=kwargs['fail']) # Reward is shared by all agents. return {key: reward for key in self.k.vehicle.get_rl_ids()}
def get_state(self): """See class definition.""" obs = {} # normalizing constants max_speed = self.k.network.max_speed() target_speed = desired_velocity(self) for rl_id in self.k.vehicle.get_rl_ids(): this_speed = self.k.vehicle.get_speed(rl_id) lead_speeds = self.k.vehicle.get_lane_leaders_speed(rl_id) lead_headways = self.k.vehicle.get_lane_headways(rl_id) follower_speeds = self.k.vehicle.get_lane_followers_speed(rl_id) follower_headways = self.k.vehicle.get_lane_tailways(rl_id) if len(lead_headways) > 1: observation = np.array([ this_speed, target_speed, max_speed, lead_headways[0], lead_headways[1], lead_headways[2], lead_speeds[0], lead_speeds[1], lead_speeds[2], follower_headways[0], follower_headways[1], follower_headways[2], follower_speeds[0], follower_speeds[1], follower_speeds[2] ]) else: observation = np.array([ this_speed, target_speed, max_speed, lead_headways[0], lead_headways[0], lead_headways[0], lead_speeds[0], lead_speeds[0], lead_speeds[0], follower_headways[0], follower_headways[0], follower_headways[0], follower_speeds[0], follower_speeds[0], follower_speeds[0] ]) obs.update({rl_id: observation}) return obs["rl_0"]
def compute_reward(self, rl_actions, **kwargs): # """See class definition.""" # # return a reward of 0 if a collision occurred # if kwargs["fail"]: # return 0 # # # reward high system-level velocities, but according to their L_2 norm, # # which is bad, since encourages increase in high-speeds more than in # # low-speeds and is not the real-reward # # # #return rewards.desired_velocity(self, fail=kwargs["fail"]) # veh_ids = self.vehicles.get_ids() # vel = np.array(self.vehicles.get_speed(veh_ids)) # num_vehicles = len(veh_ids) # # if any(vel < -100): # return 0. # # target_vel = self.env_params.additional_params['target_velocity'] # max_reward = np.array([target_vel]) # print("max_reward " + str(max_reward)) # # reward = np.sum(vel) / num_vehicles # print("reward " + str(reward)) # # #return reward / max_reward # return reward """See class definition.""" # return a reward of 0 if a collision occurred if kwargs["fail"]: return 0 # reward high system-level velocities return rewards.desired_velocity(self, fail=kwargs["fail"])
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) else: # return a reward of 0 if a collision occurred if kwargs["fail"]: return 0 # reward high system-level velocities cost1 = rewards.desired_velocity(self, fail=kwargs["fail"]) # penalize small time headways cost2 = 0 t_min = 1 # smallest acceptable time headway for rl_id in self.rl_veh: lead_id = self.k.vehicle.get_leader(rl_id) if lead_id not in ["", None] \ and self.k.vehicle.get_speed(rl_id) > 0: t_headway = max( self.k.vehicle.get_headway(rl_id) / self.k.vehicle.get_speed(rl_id), 0) cost2 += min((t_headway - t_min) / t_min, 0) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 1.00, 0.0 return max(eta1 * cost1, 0)
def compute_reward(self, state, rl_actions, **kwargs): """ See parent class """ # reward desired velocity reward = rewards.desired_velocity(self, fail=kwargs["fail"]) return reward
def compute_reward(self, state, rl_actions, **kwargs): # compute the system-level performance of vehicles from a velocity # perspective reward = rewards.desired_velocity(self, fail=kwargs["fail"]) # punish excessive lane changes by reducing the reward by a set value # every time an rl car changes lanes (10% of max reward) for veh_id in self.vehicles.get_rl_ids(): if self.vehicles.get_state(veh_id, "last_lc") == self.time_counter: reward -= 0.1 return reward
def compute_reward(self, rl_actions, **kwargs): """The agents receives opposing speed rewards. The agent receives the class definition reward, the adversary receives the negative of the agent reward """ if self.env_params.evaluate: reward = np.mean(self.vehicles.get_speed(self.vehicles.get_ids())) return {'av': reward, 'adversary': -reward} else: reward = rewards.desired_velocity(self, fail=kwargs['fail']) return {'av': reward, 'adversary': -reward}
def compute_reward(self, state, rl_actions, **kwargs): vel_reward = rewards.desired_velocity(self, fail=kwargs["fail"]) # Use a similar weighting of of the headway reward as the velocity # reward max_cost = np.array([self.env_params.additional_params[ "target_velocity"]] * self.vehicles.num_vehicles) max_cost = np.linalg.norm(max_cost) normalization = self.scenario.length / self.vehicles.num_vehicles headway_reward = 0.2 * max_cost * rewards.penalize_headway_variance( self.vehicles, self.sorted_extra_data, normalization) return vel_reward + headway_reward
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" # Compute the common reward. reward = rewards.desired_velocity(self, fail=kwargs['fail']) #reward_dict = {key: reward for key in self.k.vehicle.get_rl_ids()} max_speed = self.k.network.max_speed() reward_dict = {} penalty_coef = 0.5 fixed_reward = 0.05 for rl_id in self.k.vehicle.get_rl_ids(): if self.k.vehicle.get_speed(rl_id) < -2 * max_speed: # this condition occurs if there is a crash, applies penalty proportionate to size of system reward_dict[rl_id] = -penalty_coef * len( self.k.vehicle.get_rl_ids()) else: reward_dict[rl_id] = reward + fixed_reward # Reward is shared by all agents. return reward_dict
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) else: # return a reward of 0 if a collision occurred if kwargs["fail"]: return 0 # reward high system-level velocities cost1 = rewards.desired_velocity(self, fail=kwargs["fail"]) # encourage rl to move cost2 = rewards.rl_forward_progress(self, gain=1) / (30 * self.num_rl) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 0.50, 0.50 return max(eta1 * cost1 + eta2 * cost2, 0)
def compute_reward(self, state, rl_actions, **kwargs): """ See parent class The reward function is negative norm of the difference between the velocities of each vehicle, and the target velocity. Also, a small penalty is added for rl lane changes in order to encourage mimizing lane-changing action. """ # compute the system-level performance of vehicles from a velocity # perspective reward = rewards.desired_velocity(self, fail=kwargs["fail"]) # punish excessive lane changes by reducing the reward by a set value # every time an rl car changes lanes for veh_id in self.rl_ids: if self.vehicles.get_state(veh_id, "last_lc") == self.timer: reward -= 1 return reward
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" # in the warmup steps if rl_actions is None: return {} rewards = {} for rl_id in self.k.vehicle.get_rl_ids(): if self.env_params.evaluate: # reward is speed of vehicle if we are in evaluation mode reward = self.k.vehicle.get_speed(rl_id) elif kwargs['fail']: # reward is 0 if a collision occurred reward = 0 else: # reward high system-level velocities actual_speed = self.k.vehicle.get_speed(rl_id) # cost1 = desired_velocity(self, fail=kwargs['fail']) / actual_speed if actual_speed > 0 else 0 cost1 = (actual_speed / desired_velocity(self, fail=kwargs['fail'])) * 10 # penalize small time headways cost2 = 0 t_min = 1 # smallest acceptable time headway lead_id = self.k.vehicle.get_leader(rl_id) if lead_id not in ["", None ] and self.k.vehicle.get_speed(rl_id) > 0: t_headway = max( self.k.vehicle.get_headway(rl_id) / self.k.vehicle.get_speed(rl_id), 0) cost2 += min((t_headway - t_min) / t_min, 0) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 1.00, 0.10 reward = max(eta1 * cost1 + eta2 * cost2, 0) rewards[rl_id] = reward return rewards
def compute_reward(self, rl_actions, **kwargs): return_rewards = {} # in the warmup steps, rl_actions is None if rl_actions: # for rl_id, actions in rl_actions.items(): for rl_id in self.k.vehicle.get_rl_ids(): reward = 0 # If there is a collision all agents get no reward if not kwargs['fail']: # Reward desired velocity in own edge edge_num = self.k.vehicle.get_edge(rl_id) reward += rewards.desired_velocity(self, fail=kwargs['fail'], edge_list=[edge_num], use_only_rl_ids=True) # Punish own lane changing if rl_id in rl_actions: reward -= abs(rl_actions[rl_id][1]) return_rewards[rl_id] = reward return return_rewards
def test_desired_velocity(self): """Test the desired_velocity method.""" vehicles = VehicleParams() vehicles.add("test", num_vehicles=10) env_params = EnvParams( additional_params={ "target_velocity": np.sqrt(10), "max_accel": 1, "max_decel": 1, "sort_vehicles": False }) env, _, _ = ring_road_exp_setup(vehicles=vehicles, env_params=env_params) # check that the fail attribute leads to a zero return self.assertEqual(desired_velocity(env, fail=True), 0) # check the average speed upon reset self.assertEqual(desired_velocity(env, fail=False), 0) # check the average speed upon reset with a subset of edges self.assertEqual( desired_velocity(env, edge_list=["bottom"], fail=False), 0) # change the speed of one vehicle env.k.vehicle.test_set_speed("test_0", np.sqrt(10)) # check the new average speed self.assertAlmostEqual(desired_velocity(env, fail=False), 1 - np.sqrt(90) / 10) # check the new average speed for a subset of edges self.assertAlmostEqual( desired_velocity(env, edge_list=["bottom"], fail=False), 1 - np.sqrt(20) / np.sqrt(30)) # change the speed of one of the vehicles outside the edge list env.k.vehicle.test_set_speed("test_8", 10) # check that the new average speed is the same as before self.assertAlmostEqual( desired_velocity(env, edge_list=["bottom"], fail=False), 1 - np.sqrt(20) / np.sqrt(30))
def compute_reward(self, state, rl_actions, **kwargs): # return a reward of 0 if a collision occurred if kwargs["fail"]: return 0 # reward high system-level velocities cost1 = rewards.desired_velocity(self, fail=kwargs["fail"]) # penalize large time headways cost2 = 0 t_min = 1 # smallest acceptable time headway for rl_id in self.rl_veh: lead_id = self.vehicles.get_leader(rl_id) if lead_id not in ["", None] \ and self.vehicles.get_speed(rl_id) > 0: t_headway = max( self.vehicles.get_headway(rl_id) / self.vehicles.get_speed(rl_id), 0) cost2 += min(t_headway - t_min, 0) # weights for cost1, cost2, and cost3, respectively eta1, eta2 = 1.00, 0.10 return eta1 * cost1 + eta2 * cost2
def compute_reward(self, state, rl_actions, **kwargs): return rewards.desired_velocity(self, fail=kwargs["fail"])
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return -rewards.min_delay_unscaled(self) else: return rewards.desired_velocity(self, fail=kwargs["fail"])
def compute_reward(self, rl_actions, **kwargs): return rewards.desired_velocity(self, edge_list=["edge2", "edge3"])
def compute_reward(self, state, rl_actions, **kwargs): if self.env_params.evaluate: return rewards.min_delay_unscaled(self) else: return rewards.desired_velocity(self, fail=kwargs["fail"])
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) else: return rewards.desired_velocity(self, fail=kwargs['fail'])
def compute_reward(self, rl_actions=None, rl_states=None, **kwargs): """See class definition.""" if self.env_params.evaluate: return np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids())) else: ## return a reward of 0 if a collision occurred #if kwargs["fail"]: # return 0 rew = {} info = {} scale = self.env_params.additional_params["reward_scale"] # weights for cost1, cost2, and cost3, respectively eta1 = self.env_params.additional_params["eta1"] eta2 = self.env_params.additional_params["eta2"] eta3 = self.env_params.additional_params["eta3"] FLOW_RATE = self.env_params.additional_params["FLOW_RATE"] # reward high system-level velocities cost1 = rewards.desired_velocity(self, fail=kwargs["fail"]) # print("cost1: {}".format(cost1)) mean_vel = np.mean( self.k.vehicle.get_speed(self.k.vehicle.get_ids())) outflow = self.k.vehicle.get_outflow_rate(100) if outflow == None: outflow = 0.0 # penalize small time headways t_min = self.env_params.additional_params[ "t_min"] # smallest acceptable time headway for rl_id in self.k.vehicle.get_rl_ids(): cost2 = 0.0 current_edge = self.k.vehicle.get_edge(rl_id) # don't control cars in inflow edge, # if 'inflow' in current_edge: # continue lead_id = self.k.vehicle.get_leader(rl_id) if lead_id not in ["", None] \ and self.k.vehicle.get_speed(rl_id) > 0: t_headway = max( self.k.vehicle.get_headway(rl_id) / self.k.vehicle.get_speed(rl_id), 0) cost2 += min((t_headway - t_min) / t_min, 0.0) rew.update({ rl_id: max( eta1 * cost1 + eta2 * cost2 + eta3 * outflow / FLOW_RATE, 0.0) * scale - 1.0 }) info.update({ rl_id: { 'cost1': cost1, 'cost2': cost2, 'mean_vel': mean_vel, "outflow": outflow } }) if kwargs["fail"]: rew.update({rl_id: 0.0}) info.update({ rl_id: { 'cost1': cost1, 'cost2': cost2, 'mean_vel': mean_vel, "outflow": outflow } }) return rew, info
def compute_reward(self, state, rl_actions, **kwargs): if self.env_params.evaluate: return np.mean(self.vehicles.get_speed(self.vehicles.get_ids())) else: return rewards.desired_velocity(self, fail=kwargs["fail"])