def generate_logs(self, observation, highwayenv_score): ego_state = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start) closest_wp, _ = get_closest_waypoint( num_lookahead=100, goal_path=path, ego_position=ego_state.position, ego_heading=ego_state.heading, ) signed_dist_from_center = closest_wp.signed_lateral_error( ego_state.position) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width linear_jerk = np.linalg.norm(ego_state.linear_jerk) angular_jerk = np.linalg.norm(ego_state.angular_jerk) # Distance to goal ego_2d_position = ego_state.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) angle_error = closest_wp.relative_heading( ego_state.heading) # relative heading radians [-pi, pi] # number of violations ( ego_num_violations, social_num_violations, ) = ego_social_safety( observation, d_min_ego=1.0, t_c_ego=1.0, d_min_social=1.0, t_c_social=1.0, ignore_vehicle_behind=True, ) info = dict( position=ego_state.position, speed=ego_state.speed, steering=ego_state.steering, heading=ego_state.heading, dist_center=abs(ego_dist_center), start=start, goal=goal, closest_wp=closest_wp, events=observation.events, ego_num_violations=ego_num_violations, social_num_violations=social_num_violations, goal_dist=goal_dist, linear_jerk=np.linalg.norm(ego_state.linear_jerk), angular_jerk=np.linalg.norm(ego_state.angular_jerk), env_score=self.ultra_scores.reward_adapter(observation, highwayenv_score), ) return info
def extract_closest_waypoint( ego_goal_path, ego_position, ego_heading, num_lookahead=100 ): return get_closest_waypoint( num_lookahead=num_lookahead, goal_path=ego_goal_path, ego_position=ego_position, ego_heading=ego_heading, )
def observation_adapter(self, env_observation): ego_state = env_observation.ego_vehicle_state start = env_observation.ego_vehicle_state.mission.start goal = env_observation.ego_vehicle_state.mission.goal path = get_path_to_goal( goal=goal, paths=env_observation.waypoint_paths, start=start ) closest_wp, _ = get_closest_waypoint( num_lookahead=num_lookahead, goal_path=path, ego_position=ego_state.position, ego_heading=ego_state.heading, ) signed_dist_from_center = closest_wp.signed_lateral_error(ego_state.position) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width relative_goal_position = np.asarray(goal.position[0:2]) - np.asarray( ego_state.position[0:2] ) relative_goal_position_rotated = rotate2d_vector( relative_goal_position, -ego_state.heading ) state = dict( speed=ego_state.speed, relative_goal_position=relative_goal_position_rotated, distance_from_center=ego_dist_center, steering=ego_state.steering, angle_error=closest_wp.relative_heading(ego_state.heading), social_vehicles=env_observation.neighborhood_vehicle_states, road_speed=closest_wp.speed_limit, # ---------- # dont normalize the following, start=start.position, goal=goal.position, heading=ego_state.heading, goal_path=path, ego_position=ego_state.position, waypoint_paths=env_observation.waypoint_paths, events=env_observation.events, ) return state # ego=ego, env_observation=env_observation)
def reward_adapter(observation, reward): env_reward = reward ego_events = observation.events ego_observation = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal( goal=goal, paths=observation.waypoint_paths, start=start ) linear_jerk = np.linalg.norm(ego_observation.linear_jerk) angular_jerk = np.linalg.norm(ego_observation.angular_jerk) # Distance to goal ego_2d_position = ego_observation.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) closest_wp, _ = get_closest_waypoint( num_lookahead=num_lookahead, goal_path=path, ego_position=ego_observation.position, ego_heading=ego_observation.heading, ) angle_error = closest_wp.relative_heading( ego_observation.heading ) # relative heading radians [-pi, pi] # Distance from center signed_dist_from_center = closest_wp.signed_lateral_error( observation.ego_vehicle_state.position ) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width # number of violations (ego_num_violations, social_num_violations,) = ego_social_safety( observation, d_min_ego=1.0, t_c_ego=1.0, d_min_social=1.0, t_c_social=1.0, ignore_vehicle_behind=True, ) speed_fraction = max(0, ego_observation.speed / closest_wp.speed_limit) ego_step_reward = 0.02 * min(speed_fraction, 1) * np.cos(angle_error) ego_speed_reward = min( 0, (closest_wp.speed_limit - ego_observation.speed) * 0.01 ) # m/s ego_collision = len(ego_events.collisions) > 0 ego_collision_reward = -1.0 if ego_collision else 0.0 ego_off_road_reward = -1.0 if ego_events.off_road else 0.0 ego_off_route_reward = -1.0 if ego_events.off_route else 0.0 ego_wrong_way = -0.02 if ego_events.wrong_way else 0.0 ego_goal_reward = 0.0 ego_time_out = 0.0 ego_dist_center_reward = -0.002 * min(1, abs(ego_dist_center)) ego_angle_error_reward = -0.005 * max(0, np.cos(angle_error)) ego_reached_goal = 1.0 if ego_events.reached_goal else 0.0 ego_safety_reward = -0.02 if ego_num_violations > 0 else 0 social_safety_reward = -0.02 if social_num_violations > 0 else 0 ego_lat_speed = 0.0 # -0.1 * abs(long_lat_speed[1]) ego_linear_jerk = -0.0001 * linear_jerk ego_angular_jerk = -0.0001 * angular_jerk * math.cos(angle_error) env_reward /= 100 # DG: Different speed reward ego_speed_reward = -0.1 if speed_fraction >= 1 else 0.0 ego_speed_reward += -0.01 if speed_fraction < 0.01 else 0.0 rewards = sum( [ ego_goal_reward, ego_collision_reward, ego_off_road_reward, ego_off_route_reward, ego_wrong_way, ego_speed_reward, # ego_time_out, ego_dist_center_reward, ego_angle_error_reward, ego_reached_goal, ego_step_reward, env_reward, # ego_linear_jerk, # ego_angular_jerk, # ego_lat_speed, # ego_safety_reward, # social_safety_reward, ] ) return rewards
def adapt( observation: Observation, reward: float, info: Dict[str, Any] ) -> Dict[str, Any]: """Adapts a raw environment observation, an environment reward, and info about the agent from the environment into custom information about the agent. The raw observation from the environment must include the ego vehicle's state, events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more information on the Observation type. Args: observation (Observation): The raw environment observation received from SMARTS. reward (float): The environment reward received from SMARTS. info (dict): Information about the agent received from SMARTS. Returns: dict: The adapted information. A dictionary containing the same information as the original info argument, but also including a "logs" key containing more information about the agent. """ ego_state = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start) closest_wp, _ = get_closest_waypoint( num_lookahead=100, goal_path=path, ego_position=ego_state.position, ego_heading=ego_state.heading, ) signed_dist_from_center = closest_wp.signed_lateral_error(ego_state.position) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width linear_jerk = np.linalg.norm(ego_state.linear_jerk) angular_jerk = np.linalg.norm(ego_state.angular_jerk) # Distance to goal ego_2d_position = ego_state.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) angle_error = closest_wp.relative_heading( ego_state.heading ) # relative heading radians [-pi, pi] # number of violations (ego_num_violations, social_num_violations,) = ego_social_safety( observation, d_min_ego=1.0, t_c_ego=1.0, d_min_social=1.0, t_c_social=1.0, ignore_vehicle_behind=True, ) info["logs"] = dict( position=ego_state.position, speed=ego_state.speed, steering=ego_state.steering, heading=ego_state.heading, dist_center=abs(ego_dist_center), start=start, goal=goal, closest_wp=closest_wp, events=observation.events, ego_num_violations=ego_num_violations, social_num_violations=social_num_violations, goal_dist=goal_dist, linear_jerk=np.linalg.norm(ego_state.linear_jerk), angular_jerk=np.linalg.norm(ego_state.angular_jerk), env_score=default_reward_adapter.adapt(observation, reward), ) return info
def adapt(observation: Observation) -> Dict[str, np.ndarray]: """Adapts a raw environment observation into a dictionary of numpy.ndarrays. The raw observation from the environment must include the ego vehicle's state, events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more information on the Observation type. Args: observation (Observation): The raw environment observation received from SMARTS. Returns: dict: A dictionary with two keys, "low_dim_states" and "social_vehicles". The value of "low_dim_states" is a numpy.ndarray with shape (_SIZE,), and the value of "social_vehicles" is a numpy.ndarray with shape (_CAPACITY, _FEATURES). """ observation = copy.deepcopy(observation) ego_position = observation.ego_vehicle_state.position ego_heading = observation.ego_vehicle_state.heading ego_speed = observation.ego_vehicle_state.speed ego_steering = observation.ego_vehicle_state.steering ego_start = observation.ego_vehicle_state.mission.start ego_goal = observation.ego_vehicle_state.mission.goal ego_waypoints = observation.waypoint_paths ego_goal_path = get_path_to_goal(goal=ego_goal, paths=ego_waypoints, start=ego_start) ego_closest_waypoint, ego_lookahead_waypoints = get_closest_waypoint( num_lookahead=_WAYPOINTS, goal_path=ego_goal_path, ego_position=ego_position, ego_heading=ego_heading, ) ego_lookahead_waypoints = np.hstack(ego_lookahead_waypoints) signed_distance_from_center = ego_closest_waypoint.signed_lateral_error( ego_position) lane_width = ego_closest_waypoint.lane_width * 0.5 ego_distance_from_center = signed_distance_from_center / lane_width ego_relative_rotated_goal_position = rotate2d_vector( np.asarray(ego_goal.position[0:2]) - np.asarray(ego_position[0:2]), -ego_heading, ) observation_dict = dict( speed=ego_speed, relative_goal_position=ego_relative_rotated_goal_position, distance_from_center=ego_distance_from_center, steering=ego_steering, angle_error=ego_closest_waypoint.relative_heading(ego_heading), road_speed=ego_closest_waypoint.speed_limit, start=ego_start.position, goal=ego_goal.position, heading=ego_heading, goal_path=ego_goal_path, ego_position=ego_position, waypoint_paths=ego_waypoints, events=observation.events, waypoints_lookahead=ego_lookahead_waypoints, ) normalized_observation = [ _normalize(key, observation_dict[key]) for key in _NORMALIZATION_VALUES.keys() ] low_dim_states = np.concatenate( [ value if isinstance(value, collections.abc.Iterable) else np.asarray([value]) for value in normalized_observation ], axis=-1, ) # Adapt the social vehicles. social_vehicles = observation.neighborhood_vehicle_states if len(social_vehicles) == 0: # There are no social vehicles. Create an empty array with the correct # number of features so it can be padded. social_vehicles = np.empty((0, _FEATURES)) else: # Sort by distance to the ego vehicle. social_vehicles.sort( key=lambda vehicle: _get_distance(vehicle, ego_position), reverse=False, ) # Extract the state of each social vehicle. social_vehicles = np.asarray([ _extract_social_vehicle_state( social_vehicle=social_vehicle, ego_position=ego_position, ego_heading=ego_heading, ) for social_vehicle in social_vehicles ]) if len(social_vehicles) < _CAPACITY: # Pad with zero vectors if we don't have enough social vehicles. remain = _CAPACITY - len(social_vehicles) empty_social_vehicles = np.zeros(shape=(remain, _FEATURES)) social_vehicles = np.concatenate( (social_vehicles, empty_social_vehicles)) elif len(social_vehicles) > _CAPACITY: # Remove extra social vehicles if there were too many in the observation. social_vehicles = social_vehicles[:_CAPACITY] vector_observation = { "low_dim_states": low_dim_states.astype(np.float32), "social_vehicles": social_vehicles.astype(np.float32), } return vector_observation
def adapt(observation: Observation, reward: float) -> float: """Adapts a raw environment observation and an environment reward to a custom reward of type float. The raw observation from the environment must include the ego vehicle's state, events, and waypoint paths. See smarts.core.sensors for more information on the Observation type. Args: observation (Observation): The raw environment observation received from SMARTS. reward (float): The environment reward received from SMARTS. Returns: float: The adapted, custom reward which includes aspects of the ego vehicle's state and the ego vehicle's mission progress, in addition to the environment reward. """ env_reward = reward ego_events = observation.events ego_observation = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start) linear_jerk = np.linalg.norm(ego_observation.linear_jerk) angular_jerk = np.linalg.norm(ego_observation.angular_jerk) # Distance to goal ego_2d_position = ego_observation.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) closest_wp, _ = get_closest_waypoint( num_lookahead=_WAYPOINTS, goal_path=path, ego_position=ego_observation.position, ego_heading=ego_observation.heading, ) angle_error = closest_wp.relative_heading( ego_observation.heading) # relative heading radians [-pi, pi] # Distance from center signed_dist_from_center = closest_wp.signed_lateral_error( observation.ego_vehicle_state.position) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width # NOTE: This requires the NeighborhoodVehicles interface. # # number of violations # (ego_num_violations, social_num_violations,) = ego_social_safety( # observation, # d_min_ego=1.0, # t_c_ego=1.0, # d_min_social=1.0, # t_c_social=1.0, # ignore_vehicle_behind=True, # ) speed_fraction = max(0, ego_observation.speed / closest_wp.speed_limit) ego_step_reward = 0.02 * min(speed_fraction, 1) * np.cos(angle_error) ego_speed_reward = min( 0, (closest_wp.speed_limit - ego_observation.speed) * 0.01) # m/s ego_collision = len(ego_events.collisions) > 0 ego_collision_reward = -1.0 if ego_collision else 0.0 ego_off_road_reward = -1.0 if ego_events.off_road else 0.0 ego_off_route_reward = -1.0 if ego_events.off_route else 0.0 ego_wrong_way = -0.02 if ego_events.wrong_way else 0.0 ego_goal_reward = 0.0 ego_time_out = 0.0 ego_dist_center_reward = -0.002 * min(1, abs(ego_dist_center)) ego_angle_error_reward = -0.005 * max(0, np.cos(angle_error)) ego_reached_goal = 1.0 if ego_events.reached_goal else 0.0 # NOTE: This requires the NeighborhoodVehicles interface. # ego_safety_reward = -0.02 if ego_num_violations > 0 else 0 # NOTE: This requires the NeighborhoodVehicles interface. # social_safety_reward = -0.02 if social_num_violations > 0 else 0 ego_lat_speed = 0.0 # -0.1 * abs(long_lat_speed[1]) ego_linear_jerk = -0.0001 * linear_jerk ego_angular_jerk = -0.0001 * angular_jerk * math.cos(angle_error) env_reward /= 100 # DG: Different speed reward ego_speed_reward = -0.1 if speed_fraction >= 1 else 0.0 ego_speed_reward += -0.01 if speed_fraction < 0.01 else 0.0 rewards = sum([ ego_goal_reward, ego_collision_reward, ego_off_road_reward, ego_off_route_reward, ego_wrong_way, ego_speed_reward, # ego_time_out, ego_dist_center_reward, ego_angle_error_reward, ego_reached_goal, ego_step_reward, env_reward, # ego_linear_jerk, # ego_angular_jerk, # ego_lat_speed, # ego_safety_reward, # social_safety_reward, ]) return rewards
def preprocess_state( state, state_description, convert_action_func, observation_num_lookahead, social_capacity, social_vehicle_config, prev_action, normalize=False, unsqueeze=False, device=None, draw=False, ): state = state.copy() images = {} for k in state_description["images"]: image = torch.from_numpy(state[k]) image = image.unsqueeze(0) if unsqueeze else image image = image.to(device) if device else image image = normalize_im(image) if normalize else image images[k] = image if "action" in state: state["action"] = convert_action_func(state["action"]) # ------------------------------------- # filter lookaheads from goal_path _, lookahead_wps = get_closest_waypoint( num_lookahead=observation_num_lookahead, goal_path=state["goal_path"], ego_position=state["ego_position"], ego_heading=state["heading"], ) state["waypoints_lookahead"] = np.hstack(lookahead_wps) # ------------------------------------- # keep prev_action state["action"] = prev_action # ------------------------------------- # normalize states and concat normalized = [ _normalize(key, state[key]) for key in state_description["low_dim_states"] ] low_dim_states = [ val if isinstance(val, Iterable) else np.asarray([val]).astype(np.float32) for val in normalized ] low_dim_states = torch.cat( [torch.from_numpy(e).float() for e in low_dim_states], dim=-1 ) low_dim_states = low_dim_states.unsqueeze(0) if unsqueeze else low_dim_states low_dim_states = low_dim_states.to(device) if device else low_dim_states # ------------------------------------- # apply social vehicle encoder # only process if state is not encoded already state["social_vehicles"] = ( get_social_vehicles( ego_vehicle_pos=state["ego_position"], ego_vehicle_heading=state["heading"], neighborhood_vehicles=state["social_vehicles"], social_vehicle_config=social_vehicle_config, waypoint_paths=state["waypoint_paths"], ) if social_capacity > 0 else [] ) # check if any social capacity is 0 social_vehicle_dimension = state_description["social_vehicles"] social_vehicles = torch.empty(0, 0) if social_vehicle_dimension: social_vehicles = torch.from_numpy(np.asarray(state["social_vehicles"])).float() social_vehicles = social_vehicles.reshape((-1, social_vehicle_dimension)) social_vehicles = social_vehicles.unsqueeze(0) if unsqueeze else social_vehicles social_vehicles = social_vehicles.to(device) if device else social_vehicles out = { "images": images, "low_dim_states": low_dim_states, "social_vehicles": social_vehicles, } return out