예제 #1
0
    def generate_logs(self, observation, highwayenv_score):
        ego_state = observation.ego_vehicle_state
        start = observation.ego_vehicle_state.mission.start
        goal = observation.ego_vehicle_state.mission.goal
        path = get_path_to_goal(goal=goal,
                                paths=observation.waypoint_paths,
                                start=start)
        closest_wp, _ = get_closest_waypoint(
            num_lookahead=100,
            goal_path=path,
            ego_position=ego_state.position,
            ego_heading=ego_state.heading,
        )
        signed_dist_from_center = closest_wp.signed_lateral_error(
            ego_state.position)
        lane_width = closest_wp.lane_width * 0.5
        ego_dist_center = signed_dist_from_center / lane_width

        linear_jerk = np.linalg.norm(ego_state.linear_jerk)
        angular_jerk = np.linalg.norm(ego_state.angular_jerk)

        # Distance to goal
        ego_2d_position = ego_state.position[0:2]
        goal_dist = distance.euclidean(ego_2d_position, goal.position)

        angle_error = closest_wp.relative_heading(
            ego_state.heading)  # relative heading radians [-pi, pi]

        # number of violations
        (
            ego_num_violations,
            social_num_violations,
        ) = ego_social_safety(
            observation,
            d_min_ego=1.0,
            t_c_ego=1.0,
            d_min_social=1.0,
            t_c_social=1.0,
            ignore_vehicle_behind=True,
        )

        info = dict(
            position=ego_state.position,
            speed=ego_state.speed,
            steering=ego_state.steering,
            heading=ego_state.heading,
            dist_center=abs(ego_dist_center),
            start=start,
            goal=goal,
            closest_wp=closest_wp,
            events=observation.events,
            ego_num_violations=ego_num_violations,
            social_num_violations=social_num_violations,
            goal_dist=goal_dist,
            linear_jerk=np.linalg.norm(ego_state.linear_jerk),
            angular_jerk=np.linalg.norm(ego_state.angular_jerk),
            env_score=self.ultra_scores.reward_adapter(observation,
                                                       highwayenv_score),
        )
        return info
예제 #2
0
 def extract_closest_waypoint(
     ego_goal_path, ego_position, ego_heading, num_lookahead=100
 ):
     return get_closest_waypoint(
         num_lookahead=num_lookahead,
         goal_path=ego_goal_path,
         ego_position=ego_position,
         ego_heading=ego_heading,
     )
예제 #3
0
    def observation_adapter(self, env_observation):
        ego_state = env_observation.ego_vehicle_state
        start = env_observation.ego_vehicle_state.mission.start
        goal = env_observation.ego_vehicle_state.mission.goal
        path = get_path_to_goal(
            goal=goal, paths=env_observation.waypoint_paths, start=start
        )
        closest_wp, _ = get_closest_waypoint(
            num_lookahead=num_lookahead,
            goal_path=path,
            ego_position=ego_state.position,
            ego_heading=ego_state.heading,
        )
        signed_dist_from_center = closest_wp.signed_lateral_error(ego_state.position)
        lane_width = closest_wp.lane_width * 0.5
        ego_dist_center = signed_dist_from_center / lane_width

        relative_goal_position = np.asarray(goal.position[0:2]) - np.asarray(
            ego_state.position[0:2]
        )
        relative_goal_position_rotated = rotate2d_vector(
            relative_goal_position, -ego_state.heading
        )

        state = dict(
            speed=ego_state.speed,
            relative_goal_position=relative_goal_position_rotated,
            distance_from_center=ego_dist_center,
            steering=ego_state.steering,
            angle_error=closest_wp.relative_heading(ego_state.heading),
            social_vehicles=env_observation.neighborhood_vehicle_states,
            road_speed=closest_wp.speed_limit,
            # ----------
            # dont normalize the following,
            start=start.position,
            goal=goal.position,
            heading=ego_state.heading,
            goal_path=path,
            ego_position=ego_state.position,
            waypoint_paths=env_observation.waypoint_paths,
            events=env_observation.events,
        )
        return state  # ego=ego, env_observation=env_observation)
예제 #4
0
파일: adapter.py 프로젝트: zbzhu99/SMARTS
    def reward_adapter(observation, reward):
        env_reward = reward
        ego_events = observation.events
        ego_observation = observation.ego_vehicle_state
        start = observation.ego_vehicle_state.mission.start
        goal = observation.ego_vehicle_state.mission.goal
        path = get_path_to_goal(
            goal=goal, paths=observation.waypoint_paths, start=start
        )

        linear_jerk = np.linalg.norm(ego_observation.linear_jerk)
        angular_jerk = np.linalg.norm(ego_observation.angular_jerk)

        # Distance to goal
        ego_2d_position = ego_observation.position[0:2]
        goal_dist = distance.euclidean(ego_2d_position, goal.position)

        closest_wp, _ = get_closest_waypoint(
            num_lookahead=num_lookahead,
            goal_path=path,
            ego_position=ego_observation.position,
            ego_heading=ego_observation.heading,
        )
        angle_error = closest_wp.relative_heading(
            ego_observation.heading
        )  # relative heading radians [-pi, pi]

        # Distance from center
        signed_dist_from_center = closest_wp.signed_lateral_error(
            observation.ego_vehicle_state.position
        )
        lane_width = closest_wp.lane_width * 0.5
        ego_dist_center = signed_dist_from_center / lane_width

        # number of violations
        (ego_num_violations, social_num_violations,) = ego_social_safety(
            observation,
            d_min_ego=1.0,
            t_c_ego=1.0,
            d_min_social=1.0,
            t_c_social=1.0,
            ignore_vehicle_behind=True,
        )

        speed_fraction = max(0, ego_observation.speed / closest_wp.speed_limit)
        ego_step_reward = 0.02 * min(speed_fraction, 1) * np.cos(angle_error)
        ego_speed_reward = min(
            0, (closest_wp.speed_limit - ego_observation.speed) * 0.01
        )  # m/s
        ego_collision = len(ego_events.collisions) > 0
        ego_collision_reward = -1.0 if ego_collision else 0.0
        ego_off_road_reward = -1.0 if ego_events.off_road else 0.0
        ego_off_route_reward = -1.0 if ego_events.off_route else 0.0
        ego_wrong_way = -0.02 if ego_events.wrong_way else 0.0
        ego_goal_reward = 0.0
        ego_time_out = 0.0
        ego_dist_center_reward = -0.002 * min(1, abs(ego_dist_center))
        ego_angle_error_reward = -0.005 * max(0, np.cos(angle_error))
        ego_reached_goal = 1.0 if ego_events.reached_goal else 0.0
        ego_safety_reward = -0.02 if ego_num_violations > 0 else 0
        social_safety_reward = -0.02 if social_num_violations > 0 else 0
        ego_lat_speed = 0.0  # -0.1 * abs(long_lat_speed[1])
        ego_linear_jerk = -0.0001 * linear_jerk
        ego_angular_jerk = -0.0001 * angular_jerk * math.cos(angle_error)
        env_reward /= 100
        # DG: Different speed reward
        ego_speed_reward = -0.1 if speed_fraction >= 1 else 0.0
        ego_speed_reward += -0.01 if speed_fraction < 0.01 else 0.0

        rewards = sum(
            [
                ego_goal_reward,
                ego_collision_reward,
                ego_off_road_reward,
                ego_off_route_reward,
                ego_wrong_way,
                ego_speed_reward,
                # ego_time_out,
                ego_dist_center_reward,
                ego_angle_error_reward,
                ego_reached_goal,
                ego_step_reward,
                env_reward,
                # ego_linear_jerk,
                # ego_angular_jerk,
                # ego_lat_speed,
                # ego_safety_reward,
                # social_safety_reward,
            ]
        )
        return rewards
예제 #5
0
def adapt(
    observation: Observation, reward: float, info: Dict[str, Any]
) -> Dict[str, Any]:
    """Adapts a raw environment observation, an environment reward, and info about the
    agent from the environment into custom information about the agent.

    The raw observation from the environment must include the ego vehicle's state,
    events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more
    information on the Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.
        reward (float): The environment reward received from SMARTS.
        info (dict): Information about the agent received from SMARTS.

    Returns:
        dict: The adapted information. A dictionary containing the same information as
            the original info argument, but also including a "logs" key containing more
            information about the agent.
    """
    ego_state = observation.ego_vehicle_state
    start = observation.ego_vehicle_state.mission.start
    goal = observation.ego_vehicle_state.mission.goal
    path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start)
    closest_wp, _ = get_closest_waypoint(
        num_lookahead=100,
        goal_path=path,
        ego_position=ego_state.position,
        ego_heading=ego_state.heading,
    )
    signed_dist_from_center = closest_wp.signed_lateral_error(ego_state.position)
    lane_width = closest_wp.lane_width * 0.5
    ego_dist_center = signed_dist_from_center / lane_width

    linear_jerk = np.linalg.norm(ego_state.linear_jerk)
    angular_jerk = np.linalg.norm(ego_state.angular_jerk)

    # Distance to goal
    ego_2d_position = ego_state.position[0:2]
    goal_dist = distance.euclidean(ego_2d_position, goal.position)

    angle_error = closest_wp.relative_heading(
        ego_state.heading
    )  # relative heading radians [-pi, pi]

    # number of violations
    (ego_num_violations, social_num_violations,) = ego_social_safety(
        observation,
        d_min_ego=1.0,
        t_c_ego=1.0,
        d_min_social=1.0,
        t_c_social=1.0,
        ignore_vehicle_behind=True,
    )

    info["logs"] = dict(
        position=ego_state.position,
        speed=ego_state.speed,
        steering=ego_state.steering,
        heading=ego_state.heading,
        dist_center=abs(ego_dist_center),
        start=start,
        goal=goal,
        closest_wp=closest_wp,
        events=observation.events,
        ego_num_violations=ego_num_violations,
        social_num_violations=social_num_violations,
        goal_dist=goal_dist,
        linear_jerk=np.linalg.norm(ego_state.linear_jerk),
        angular_jerk=np.linalg.norm(ego_state.angular_jerk),
        env_score=default_reward_adapter.adapt(observation, reward),
    )

    return info
def adapt(observation: Observation) -> Dict[str, np.ndarray]:
    """Adapts a raw environment observation into a dictionary of numpy.ndarrays.

    The raw observation from the environment must include the ego vehicle's state,
    events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more
    information on the Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.

    Returns:
        dict: A dictionary with two keys, "low_dim_states" and "social_vehicles". The
            value of "low_dim_states" is a numpy.ndarray with shape (_SIZE,), and the
            value of "social_vehicles" is a numpy.ndarray with shape
            (_CAPACITY, _FEATURES).
    """
    observation = copy.deepcopy(observation)
    ego_position = observation.ego_vehicle_state.position
    ego_heading = observation.ego_vehicle_state.heading
    ego_speed = observation.ego_vehicle_state.speed
    ego_steering = observation.ego_vehicle_state.steering
    ego_start = observation.ego_vehicle_state.mission.start
    ego_goal = observation.ego_vehicle_state.mission.goal
    ego_waypoints = observation.waypoint_paths

    ego_goal_path = get_path_to_goal(goal=ego_goal,
                                     paths=ego_waypoints,
                                     start=ego_start)
    ego_closest_waypoint, ego_lookahead_waypoints = get_closest_waypoint(
        num_lookahead=_WAYPOINTS,
        goal_path=ego_goal_path,
        ego_position=ego_position,
        ego_heading=ego_heading,
    )
    ego_lookahead_waypoints = np.hstack(ego_lookahead_waypoints)

    signed_distance_from_center = ego_closest_waypoint.signed_lateral_error(
        ego_position)
    lane_width = ego_closest_waypoint.lane_width * 0.5
    ego_distance_from_center = signed_distance_from_center / lane_width

    ego_relative_rotated_goal_position = rotate2d_vector(
        np.asarray(ego_goal.position[0:2]) - np.asarray(ego_position[0:2]),
        -ego_heading,
    )

    observation_dict = dict(
        speed=ego_speed,
        relative_goal_position=ego_relative_rotated_goal_position,
        distance_from_center=ego_distance_from_center,
        steering=ego_steering,
        angle_error=ego_closest_waypoint.relative_heading(ego_heading),
        road_speed=ego_closest_waypoint.speed_limit,
        start=ego_start.position,
        goal=ego_goal.position,
        heading=ego_heading,
        goal_path=ego_goal_path,
        ego_position=ego_position,
        waypoint_paths=ego_waypoints,
        events=observation.events,
        waypoints_lookahead=ego_lookahead_waypoints,
    )
    normalized_observation = [
        _normalize(key, observation_dict[key])
        for key in _NORMALIZATION_VALUES.keys()
    ]
    low_dim_states = np.concatenate(
        [
            value if isinstance(value, collections.abc.Iterable) else
            np.asarray([value]) for value in normalized_observation
        ],
        axis=-1,
    )

    # Adapt the social vehicles.
    social_vehicles = observation.neighborhood_vehicle_states

    if len(social_vehicles) == 0:
        # There are no social vehicles. Create an empty array with the correct
        # number of features so it can be padded.
        social_vehicles = np.empty((0, _FEATURES))
    else:
        # Sort by distance to the ego vehicle.
        social_vehicles.sort(
            key=lambda vehicle: _get_distance(vehicle, ego_position),
            reverse=False,
        )
        # Extract the state of each social vehicle.
        social_vehicles = np.asarray([
            _extract_social_vehicle_state(
                social_vehicle=social_vehicle,
                ego_position=ego_position,
                ego_heading=ego_heading,
            ) for social_vehicle in social_vehicles
        ])

    if len(social_vehicles) < _CAPACITY:
        # Pad with zero vectors if we don't have enough social vehicles.
        remain = _CAPACITY - len(social_vehicles)
        empty_social_vehicles = np.zeros(shape=(remain, _FEATURES))
        social_vehicles = np.concatenate(
            (social_vehicles, empty_social_vehicles))
    elif len(social_vehicles) > _CAPACITY:
        # Remove extra social vehicles if there were too many in the observation.
        social_vehicles = social_vehicles[:_CAPACITY]

    vector_observation = {
        "low_dim_states": low_dim_states.astype(np.float32),
        "social_vehicles": social_vehicles.astype(np.float32),
    }
    return vector_observation
예제 #7
0
def adapt(observation: Observation, reward: float) -> float:
    """Adapts a raw environment observation and an environment reward to a custom reward
    of type float.

    The raw observation from the environment must include the ego vehicle's state,
    events, and waypoint paths. See smarts.core.sensors for more information on the
    Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.
        reward (float): The environment reward received from SMARTS.

    Returns:
        float: The adapted, custom reward which includes aspects of the ego vehicle's
            state and the ego vehicle's mission progress, in addition to the environment
            reward.
    """
    env_reward = reward
    ego_events = observation.events
    ego_observation = observation.ego_vehicle_state
    start = observation.ego_vehicle_state.mission.start
    goal = observation.ego_vehicle_state.mission.goal
    path = get_path_to_goal(goal=goal,
                            paths=observation.waypoint_paths,
                            start=start)

    linear_jerk = np.linalg.norm(ego_observation.linear_jerk)
    angular_jerk = np.linalg.norm(ego_observation.angular_jerk)

    # Distance to goal
    ego_2d_position = ego_observation.position[0:2]
    goal_dist = distance.euclidean(ego_2d_position, goal.position)

    closest_wp, _ = get_closest_waypoint(
        num_lookahead=_WAYPOINTS,
        goal_path=path,
        ego_position=ego_observation.position,
        ego_heading=ego_observation.heading,
    )
    angle_error = closest_wp.relative_heading(
        ego_observation.heading)  # relative heading radians [-pi, pi]

    # Distance from center
    signed_dist_from_center = closest_wp.signed_lateral_error(
        observation.ego_vehicle_state.position)
    lane_width = closest_wp.lane_width * 0.5
    ego_dist_center = signed_dist_from_center / lane_width

    # NOTE: This requires the NeighborhoodVehicles interface.
    # # number of violations
    # (ego_num_violations, social_num_violations,) = ego_social_safety(
    #     observation,
    #     d_min_ego=1.0,
    #     t_c_ego=1.0,
    #     d_min_social=1.0,
    #     t_c_social=1.0,
    #     ignore_vehicle_behind=True,
    # )

    speed_fraction = max(0, ego_observation.speed / closest_wp.speed_limit)
    ego_step_reward = 0.02 * min(speed_fraction, 1) * np.cos(angle_error)
    ego_speed_reward = min(
        0, (closest_wp.speed_limit - ego_observation.speed) * 0.01)  # m/s
    ego_collision = len(ego_events.collisions) > 0
    ego_collision_reward = -1.0 if ego_collision else 0.0
    ego_off_road_reward = -1.0 if ego_events.off_road else 0.0
    ego_off_route_reward = -1.0 if ego_events.off_route else 0.0
    ego_wrong_way = -0.02 if ego_events.wrong_way else 0.0
    ego_goal_reward = 0.0
    ego_time_out = 0.0
    ego_dist_center_reward = -0.002 * min(1, abs(ego_dist_center))
    ego_angle_error_reward = -0.005 * max(0, np.cos(angle_error))
    ego_reached_goal = 1.0 if ego_events.reached_goal else 0.0
    # NOTE: This requires the NeighborhoodVehicles interface.
    # ego_safety_reward = -0.02 if ego_num_violations > 0 else 0
    # NOTE: This requires the NeighborhoodVehicles interface.
    # social_safety_reward = -0.02 if social_num_violations > 0 else 0
    ego_lat_speed = 0.0  # -0.1 * abs(long_lat_speed[1])
    ego_linear_jerk = -0.0001 * linear_jerk
    ego_angular_jerk = -0.0001 * angular_jerk * math.cos(angle_error)
    env_reward /= 100
    # DG: Different speed reward
    ego_speed_reward = -0.1 if speed_fraction >= 1 else 0.0
    ego_speed_reward += -0.01 if speed_fraction < 0.01 else 0.0

    rewards = sum([
        ego_goal_reward,
        ego_collision_reward,
        ego_off_road_reward,
        ego_off_route_reward,
        ego_wrong_way,
        ego_speed_reward,
        # ego_time_out,
        ego_dist_center_reward,
        ego_angle_error_reward,
        ego_reached_goal,
        ego_step_reward,
        env_reward,
        # ego_linear_jerk,
        # ego_angular_jerk,
        # ego_lat_speed,
        # ego_safety_reward,
        # social_safety_reward,
    ])
    return rewards
예제 #8
0
def preprocess_state(
    state,
    state_description,
    convert_action_func,
    observation_num_lookahead,
    social_capacity,
    social_vehicle_config,
    prev_action,
    normalize=False,
    unsqueeze=False,
    device=None,
    draw=False,
):
    state = state.copy()
    images = {}
    for k in state_description["images"]:
        image = torch.from_numpy(state[k])
        image = image.unsqueeze(0) if unsqueeze else image
        image = image.to(device) if device else image
        image = normalize_im(image) if normalize else image
        images[k] = image

    if "action" in state:
        state["action"] = convert_action_func(state["action"])

    # -------------------------------------
    # filter lookaheads from goal_path
    _, lookahead_wps = get_closest_waypoint(
        num_lookahead=observation_num_lookahead,
        goal_path=state["goal_path"],
        ego_position=state["ego_position"],
        ego_heading=state["heading"],
    )
    state["waypoints_lookahead"] = np.hstack(lookahead_wps)

    # -------------------------------------
    # keep prev_action
    state["action"] = prev_action

    # -------------------------------------
    # normalize states and concat
    normalized = [
        _normalize(key, state[key]) for key in state_description["low_dim_states"]
    ]

    low_dim_states = [
        val if isinstance(val, Iterable) else np.asarray([val]).astype(np.float32)
        for val in normalized
    ]
    low_dim_states = torch.cat(
        [torch.from_numpy(e).float() for e in low_dim_states], dim=-1
    )
    low_dim_states = low_dim_states.unsqueeze(0) if unsqueeze else low_dim_states
    low_dim_states = low_dim_states.to(device) if device else low_dim_states

    # -------------------------------------
    # apply social vehicle encoder
    # only process if state is not encoded already
    state["social_vehicles"] = (
        get_social_vehicles(
            ego_vehicle_pos=state["ego_position"],
            ego_vehicle_heading=state["heading"],
            neighborhood_vehicles=state["social_vehicles"],
            social_vehicle_config=social_vehicle_config,
            waypoint_paths=state["waypoint_paths"],
        )
        if social_capacity > 0
        else []
    )
    # check if any social capacity is 0
    social_vehicle_dimension = state_description["social_vehicles"]
    social_vehicles = torch.empty(0, 0)

    if social_vehicle_dimension:
        social_vehicles = torch.from_numpy(np.asarray(state["social_vehicles"])).float()
        social_vehicles = social_vehicles.reshape((-1, social_vehicle_dimension))
    social_vehicles = social_vehicles.unsqueeze(0) if unsqueeze else social_vehicles
    social_vehicles = social_vehicles.to(device) if device else social_vehicles

    out = {
        "images": images,
        "low_dim_states": low_dim_states,
        "social_vehicles": social_vehicles,
    }
    return out