Exemplo n.º 1
0
    def generate_logs(self, observation, highwayenv_score):
        ego_state = observation.ego_vehicle_state
        start = observation.ego_vehicle_state.mission.start
        goal = observation.ego_vehicle_state.mission.goal
        path = get_path_to_goal(goal=goal,
                                paths=observation.waypoint_paths,
                                start=start)
        closest_wp, _ = get_closest_waypoint(
            num_lookahead=100,
            goal_path=path,
            ego_position=ego_state.position,
            ego_heading=ego_state.heading,
        )
        signed_dist_from_center = closest_wp.signed_lateral_error(
            ego_state.position)
        lane_width = closest_wp.lane_width * 0.5
        ego_dist_center = signed_dist_from_center / lane_width

        linear_jerk = np.linalg.norm(ego_state.linear_jerk)
        angular_jerk = np.linalg.norm(ego_state.angular_jerk)

        # Distance to goal
        ego_2d_position = ego_state.position[0:2]
        goal_dist = distance.euclidean(ego_2d_position, goal.position)

        angle_error = closest_wp.relative_heading(
            ego_state.heading)  # relative heading radians [-pi, pi]

        # number of violations
        (
            ego_num_violations,
            social_num_violations,
        ) = ego_social_safety(
            observation,
            d_min_ego=1.0,
            t_c_ego=1.0,
            d_min_social=1.0,
            t_c_social=1.0,
            ignore_vehicle_behind=True,
        )

        info = dict(
            position=ego_state.position,
            speed=ego_state.speed,
            steering=ego_state.steering,
            heading=ego_state.heading,
            dist_center=abs(ego_dist_center),
            start=start,
            goal=goal,
            closest_wp=closest_wp,
            events=observation.events,
            ego_num_violations=ego_num_violations,
            social_num_violations=social_num_violations,
            goal_dist=goal_dist,
            linear_jerk=np.linalg.norm(ego_state.linear_jerk),
            angular_jerk=np.linalg.norm(ego_state.angular_jerk),
            env_score=self.ultra_scores.reward_adapter(observation,
                                                       highwayenv_score),
        )
        return info
Exemplo n.º 2
0
    def observation_adapter(self, env_observation):
        ego_state = env_observation.ego_vehicle_state
        start = env_observation.ego_vehicle_state.mission.start
        goal = env_observation.ego_vehicle_state.mission.goal
        path = get_path_to_goal(
            goal=goal, paths=env_observation.waypoint_paths, start=start
        )
        closest_wp, _ = get_closest_waypoint(
            num_lookahead=num_lookahead,
            goal_path=path,
            ego_position=ego_state.position,
            ego_heading=ego_state.heading,
        )
        signed_dist_from_center = closest_wp.signed_lateral_error(ego_state.position)
        lane_width = closest_wp.lane_width * 0.5
        ego_dist_center = signed_dist_from_center / lane_width

        relative_goal_position = np.asarray(goal.position[0:2]) - np.asarray(
            ego_state.position[0:2]
        )
        relative_goal_position_rotated = rotate2d_vector(
            relative_goal_position, -ego_state.heading
        )

        state = dict(
            speed=ego_state.speed,
            relative_goal_position=relative_goal_position_rotated,
            distance_from_center=ego_dist_center,
            steering=ego_state.steering,
            angle_error=closest_wp.relative_heading(ego_state.heading),
            social_vehicles=env_observation.neighborhood_vehicle_states,
            road_speed=closest_wp.speed_limit,
            # ----------
            # dont normalize the following,
            start=start.position,
            goal=goal.position,
            heading=ego_state.heading,
            goal_path=path,
            ego_position=ego_state.position,
            waypoint_paths=env_observation.waypoint_paths,
            events=env_observation.events,
        )
        return state  # ego=ego, env_observation=env_observation)
Exemplo n.º 3
0
    def reward_adapter(observation, reward):
        env_reward = reward
        ego_events = observation.events
        ego_observation = observation.ego_vehicle_state
        start = observation.ego_vehicle_state.mission.start
        goal = observation.ego_vehicle_state.mission.goal
        path = get_path_to_goal(
            goal=goal, paths=observation.waypoint_paths, start=start
        )

        linear_jerk = np.linalg.norm(ego_observation.linear_jerk)
        angular_jerk = np.linalg.norm(ego_observation.angular_jerk)

        # Distance to goal
        ego_2d_position = ego_observation.position[0:2]
        goal_dist = distance.euclidean(ego_2d_position, goal.position)

        closest_wp, _ = get_closest_waypoint(
            num_lookahead=num_lookahead,
            goal_path=path,
            ego_position=ego_observation.position,
            ego_heading=ego_observation.heading,
        )
        angle_error = closest_wp.relative_heading(
            ego_observation.heading
        )  # relative heading radians [-pi, pi]

        # Distance from center
        signed_dist_from_center = closest_wp.signed_lateral_error(
            observation.ego_vehicle_state.position
        )
        lane_width = closest_wp.lane_width * 0.5
        ego_dist_center = signed_dist_from_center / lane_width

        # number of violations
        (ego_num_violations, social_num_violations,) = ego_social_safety(
            observation,
            d_min_ego=1.0,
            t_c_ego=1.0,
            d_min_social=1.0,
            t_c_social=1.0,
            ignore_vehicle_behind=True,
        )

        speed_fraction = max(0, ego_observation.speed / closest_wp.speed_limit)
        ego_step_reward = 0.02 * min(speed_fraction, 1) * np.cos(angle_error)
        ego_speed_reward = min(
            0, (closest_wp.speed_limit - ego_observation.speed) * 0.01
        )  # m/s
        ego_collision = len(ego_events.collisions) > 0
        ego_collision_reward = -1.0 if ego_collision else 0.0
        ego_off_road_reward = -1.0 if ego_events.off_road else 0.0
        ego_off_route_reward = -1.0 if ego_events.off_route else 0.0
        ego_wrong_way = -0.02 if ego_events.wrong_way else 0.0
        ego_goal_reward = 0.0
        ego_time_out = 0.0
        ego_dist_center_reward = -0.002 * min(1, abs(ego_dist_center))
        ego_angle_error_reward = -0.005 * max(0, np.cos(angle_error))
        ego_reached_goal = 1.0 if ego_events.reached_goal else 0.0
        ego_safety_reward = -0.02 if ego_num_violations > 0 else 0
        social_safety_reward = -0.02 if social_num_violations > 0 else 0
        ego_lat_speed = 0.0  # -0.1 * abs(long_lat_speed[1])
        ego_linear_jerk = -0.0001 * linear_jerk
        ego_angular_jerk = -0.0001 * angular_jerk * math.cos(angle_error)
        env_reward /= 100
        # DG: Different speed reward
        ego_speed_reward = -0.1 if speed_fraction >= 1 else 0.0
        ego_speed_reward += -0.01 if speed_fraction < 0.01 else 0.0

        rewards = sum(
            [
                ego_goal_reward,
                ego_collision_reward,
                ego_off_road_reward,
                ego_off_route_reward,
                ego_wrong_way,
                ego_speed_reward,
                # ego_time_out,
                ego_dist_center_reward,
                ego_angle_error_reward,
                ego_reached_goal,
                ego_step_reward,
                env_reward,
                # ego_linear_jerk,
                # ego_angular_jerk,
                # ego_lat_speed,
                # ego_safety_reward,
                # social_safety_reward,
            ]
        )
        return rewards
Exemplo n.º 4
0
 def extract_ego_goal_path(ego_goal, ego_waypoints, ego_start):
     return get_path_to_goal(goal=ego_goal, paths=ego_waypoints, start=ego_start)
Exemplo n.º 5
0
def adapt(
    observation: Observation, reward: float, info: Dict[str, Any]
) -> Dict[str, Any]:
    """Adapts a raw environment observation, an environment reward, and info about the
    agent from the environment into custom information about the agent.

    The raw observation from the environment must include the ego vehicle's state,
    events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more
    information on the Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.
        reward (float): The environment reward received from SMARTS.
        info (dict): Information about the agent received from SMARTS.

    Returns:
        dict: The adapted information. A dictionary containing the same information as
            the original info argument, but also including a "logs" key containing more
            information about the agent.
    """
    ego_state = observation.ego_vehicle_state
    start = observation.ego_vehicle_state.mission.start
    goal = observation.ego_vehicle_state.mission.goal
    path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start)
    closest_wp, _ = get_closest_waypoint(
        num_lookahead=100,
        goal_path=path,
        ego_position=ego_state.position,
        ego_heading=ego_state.heading,
    )
    signed_dist_from_center = closest_wp.signed_lateral_error(ego_state.position)
    lane_width = closest_wp.lane_width * 0.5
    ego_dist_center = signed_dist_from_center / lane_width

    linear_jerk = np.linalg.norm(ego_state.linear_jerk)
    angular_jerk = np.linalg.norm(ego_state.angular_jerk)

    # Distance to goal
    ego_2d_position = ego_state.position[0:2]
    goal_dist = distance.euclidean(ego_2d_position, goal.position)

    angle_error = closest_wp.relative_heading(
        ego_state.heading
    )  # relative heading radians [-pi, pi]

    # number of violations
    (ego_num_violations, social_num_violations,) = ego_social_safety(
        observation,
        d_min_ego=1.0,
        t_c_ego=1.0,
        d_min_social=1.0,
        t_c_social=1.0,
        ignore_vehicle_behind=True,
    )

    info["logs"] = dict(
        position=ego_state.position,
        speed=ego_state.speed,
        steering=ego_state.steering,
        heading=ego_state.heading,
        dist_center=abs(ego_dist_center),
        start=start,
        goal=goal,
        closest_wp=closest_wp,
        events=observation.events,
        ego_num_violations=ego_num_violations,
        social_num_violations=social_num_violations,
        goal_dist=goal_dist,
        linear_jerk=np.linalg.norm(ego_state.linear_jerk),
        angular_jerk=np.linalg.norm(ego_state.angular_jerk),
        env_score=default_reward_adapter.adapt(observation, reward),
    )

    return info
def adapt(observation: Observation) -> Dict[str, np.ndarray]:
    """Adapts a raw environment observation into a dictionary of numpy.ndarrays.

    The raw observation from the environment must include the ego vehicle's state,
    events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more
    information on the Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.

    Returns:
        dict: A dictionary with two keys, "low_dim_states" and "social_vehicles". The
            value of "low_dim_states" is a numpy.ndarray with shape (_SIZE,), and the
            value of "social_vehicles" is a numpy.ndarray with shape
            (_CAPACITY, _FEATURES).
    """
    observation = copy.deepcopy(observation)
    ego_position = observation.ego_vehicle_state.position
    ego_heading = observation.ego_vehicle_state.heading
    ego_speed = observation.ego_vehicle_state.speed
    ego_steering = observation.ego_vehicle_state.steering
    ego_start = observation.ego_vehicle_state.mission.start
    ego_goal = observation.ego_vehicle_state.mission.goal
    ego_waypoints = observation.waypoint_paths

    ego_goal_path = get_path_to_goal(goal=ego_goal,
                                     paths=ego_waypoints,
                                     start=ego_start)
    ego_closest_waypoint, ego_lookahead_waypoints = get_closest_waypoint(
        num_lookahead=_WAYPOINTS,
        goal_path=ego_goal_path,
        ego_position=ego_position,
        ego_heading=ego_heading,
    )
    ego_lookahead_waypoints = np.hstack(ego_lookahead_waypoints)

    signed_distance_from_center = ego_closest_waypoint.signed_lateral_error(
        ego_position)
    lane_width = ego_closest_waypoint.lane_width * 0.5
    ego_distance_from_center = signed_distance_from_center / lane_width

    ego_relative_rotated_goal_position = rotate2d_vector(
        np.asarray(ego_goal.position[0:2]) - np.asarray(ego_position[0:2]),
        -ego_heading,
    )

    observation_dict = dict(
        speed=ego_speed,
        relative_goal_position=ego_relative_rotated_goal_position,
        distance_from_center=ego_distance_from_center,
        steering=ego_steering,
        angle_error=ego_closest_waypoint.relative_heading(ego_heading),
        road_speed=ego_closest_waypoint.speed_limit,
        start=ego_start.position,
        goal=ego_goal.position,
        heading=ego_heading,
        goal_path=ego_goal_path,
        ego_position=ego_position,
        waypoint_paths=ego_waypoints,
        events=observation.events,
        waypoints_lookahead=ego_lookahead_waypoints,
    )
    normalized_observation = [
        _normalize(key, observation_dict[key])
        for key in _NORMALIZATION_VALUES.keys()
    ]
    low_dim_states = np.concatenate(
        [
            value if isinstance(value, collections.abc.Iterable) else
            np.asarray([value]) for value in normalized_observation
        ],
        axis=-1,
    )

    # Adapt the social vehicles.
    social_vehicles = observation.neighborhood_vehicle_states

    if len(social_vehicles) == 0:
        # There are no social vehicles. Create an empty array with the correct
        # number of features so it can be padded.
        social_vehicles = np.empty((0, _FEATURES))
    else:
        # Sort by distance to the ego vehicle.
        social_vehicles.sort(
            key=lambda vehicle: _get_distance(vehicle, ego_position),
            reverse=False,
        )
        # Extract the state of each social vehicle.
        social_vehicles = np.asarray([
            _extract_social_vehicle_state(
                social_vehicle=social_vehicle,
                ego_position=ego_position,
                ego_heading=ego_heading,
            ) for social_vehicle in social_vehicles
        ])

    if len(social_vehicles) < _CAPACITY:
        # Pad with zero vectors if we don't have enough social vehicles.
        remain = _CAPACITY - len(social_vehicles)
        empty_social_vehicles = np.zeros(shape=(remain, _FEATURES))
        social_vehicles = np.concatenate(
            (social_vehicles, empty_social_vehicles))
    elif len(social_vehicles) > _CAPACITY:
        # Remove extra social vehicles if there were too many in the observation.
        social_vehicles = social_vehicles[:_CAPACITY]

    vector_observation = {
        "low_dim_states": low_dim_states.astype(np.float32),
        "social_vehicles": social_vehicles.astype(np.float32),
    }
    return vector_observation
Exemplo n.º 7
0
def adapt(observation: Observation, reward: float) -> float:
    """Adapts a raw environment observation and an environment reward to a custom reward
    of type float.

    The raw observation from the environment must include the ego vehicle's state,
    events, and waypoint paths. See smarts.core.sensors for more information on the
    Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.
        reward (float): The environment reward received from SMARTS.

    Returns:
        float: The adapted, custom reward which includes aspects of the ego vehicle's
            state and the ego vehicle's mission progress, in addition to the environment
            reward.
    """
    env_reward = reward
    ego_events = observation.events
    ego_observation = observation.ego_vehicle_state
    start = observation.ego_vehicle_state.mission.start
    goal = observation.ego_vehicle_state.mission.goal
    path = get_path_to_goal(goal=goal,
                            paths=observation.waypoint_paths,
                            start=start)

    linear_jerk = np.linalg.norm(ego_observation.linear_jerk)
    angular_jerk = np.linalg.norm(ego_observation.angular_jerk)

    # Distance to goal
    ego_2d_position = ego_observation.position[0:2]
    goal_dist = distance.euclidean(ego_2d_position, goal.position)

    closest_wp, _ = get_closest_waypoint(
        num_lookahead=_WAYPOINTS,
        goal_path=path,
        ego_position=ego_observation.position,
        ego_heading=ego_observation.heading,
    )
    angle_error = closest_wp.relative_heading(
        ego_observation.heading)  # relative heading radians [-pi, pi]

    # Distance from center
    signed_dist_from_center = closest_wp.signed_lateral_error(
        observation.ego_vehicle_state.position)
    lane_width = closest_wp.lane_width * 0.5
    ego_dist_center = signed_dist_from_center / lane_width

    # NOTE: This requires the NeighborhoodVehicles interface.
    # # number of violations
    # (ego_num_violations, social_num_violations,) = ego_social_safety(
    #     observation,
    #     d_min_ego=1.0,
    #     t_c_ego=1.0,
    #     d_min_social=1.0,
    #     t_c_social=1.0,
    #     ignore_vehicle_behind=True,
    # )

    speed_fraction = max(0, ego_observation.speed / closest_wp.speed_limit)
    ego_step_reward = 0.02 * min(speed_fraction, 1) * np.cos(angle_error)
    ego_speed_reward = min(
        0, (closest_wp.speed_limit - ego_observation.speed) * 0.01)  # m/s
    ego_collision = len(ego_events.collisions) > 0
    ego_collision_reward = -1.0 if ego_collision else 0.0
    ego_off_road_reward = -1.0 if ego_events.off_road else 0.0
    ego_off_route_reward = -1.0 if ego_events.off_route else 0.0
    ego_wrong_way = -0.02 if ego_events.wrong_way else 0.0
    ego_goal_reward = 0.0
    ego_time_out = 0.0
    ego_dist_center_reward = -0.002 * min(1, abs(ego_dist_center))
    ego_angle_error_reward = -0.005 * max(0, np.cos(angle_error))
    ego_reached_goal = 1.0 if ego_events.reached_goal else 0.0
    # NOTE: This requires the NeighborhoodVehicles interface.
    # ego_safety_reward = -0.02 if ego_num_violations > 0 else 0
    # NOTE: This requires the NeighborhoodVehicles interface.
    # social_safety_reward = -0.02 if social_num_violations > 0 else 0
    ego_lat_speed = 0.0  # -0.1 * abs(long_lat_speed[1])
    ego_linear_jerk = -0.0001 * linear_jerk
    ego_angular_jerk = -0.0001 * angular_jerk * math.cos(angle_error)
    env_reward /= 100
    # DG: Different speed reward
    ego_speed_reward = -0.1 if speed_fraction >= 1 else 0.0
    ego_speed_reward += -0.01 if speed_fraction < 0.01 else 0.0

    rewards = sum([
        ego_goal_reward,
        ego_collision_reward,
        ego_off_road_reward,
        ego_off_route_reward,
        ego_wrong_way,
        ego_speed_reward,
        # ego_time_out,
        ego_dist_center_reward,
        ego_angle_error_reward,
        ego_reached_goal,
        ego_step_reward,
        env_reward,
        # ego_linear_jerk,
        # ego_angular_jerk,
        # ego_lat_speed,
        # ego_safety_reward,
        # social_safety_reward,
    ])
    return rewards