示例#1
0
    def observe(self) -> np.ndarray:
        if not self.env.road:
            return np.zeros(self.space().shape)

        if self.absolute:
            raise NotImplementedError()
        else:
            # Add nearby traffic
            self.grid.fill(0)
            df = pd.DataFrame.from_records(
                [v.to_dict(self.observer_vehicle) for v in self.env.road.vehicles])
            # Normalize
            df = self.normalize(df)
            # Fill-in features
            for layer, feature in enumerate(self.features):
                for _, vehicle in df.iterrows():
                    x, y = vehicle["x"], vehicle["y"]
                    # Recover unnormalized coordinates for cell index
                    if "x" in self.features_range:
                        x = utils.lmap(x, [-1, 1], [self.features_range["x"][0], self.features_range["x"][1]])
                    if "y" in self.features_range:
                        y = utils.lmap(y, [-1, 1], [self.features_range["y"][0], self.features_range["y"][1]])
                    cell = (int((x - self.grid_size[0, 0]) / self.grid_step[0]),
                            int((y - self.grid_size[1, 0]) / self.grid_step[1]))
                    if 0 <= cell[1] < self.grid.shape[-2] and 0 <= cell[0] < self.grid.shape[-1]:
                        self.grid[layer, cell[1], cell[0]] = vehicle[feature]
            # Clip
            obs = np.clip(self.grid, -1, 1)
            return obs
示例#2
0
    def _reward(self, action: Action) -> float:
        """
        The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
        :param action: the last action performed
        :return: the corresponding reward
        """
        neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index)
        lane = self.vehicle.target_lane_index[2] if isinstance(self.vehicle, ControlledVehicle) \
            else self.vehicle.lane_index[2]
        scaled_speed = utils.lmap(self.vehicle.speed,
                                  self.config["reward_speed_range"], [0, 1])

        lane_change = action == 0 or action == 2

        reward = \
            + self.config["collision_reward"] * self.vehicle.crashed \
            + self.config["right_lane_reward"] * lane / max(len(neighbours) - 1, 1) \
            + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) \
            + self.config["lane_change_reward"] * lane_change

        reward = utils.lmap(
            reward,
            [
                self.config[
                    "collision_reward"],  # + self.config["lane_change_reward"]
                self.config["high_speed_reward"] +
                self.config["right_lane_reward"]
            ],
            [0, 1])
        reward = 0 if not self.vehicle.on_road else reward
        return reward
示例#3
0
    def _agent_reward_type_1(self, vehicle, action):
        """
               The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
               :param action: the last action performed
               :return: the corresponding reward
               """
        neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index)
        lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \
            else vehicle.lane_index[2]
        scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range,
                                  [0, 1])

        # TODO: this must be read from action.py, to determine if it is a lane change or not,
        #  for now it's hard-coded to 0 and 2
        lane_change = (action == 0 or action == 2)

        reward = \
            + self.collision_reward * vehicle.crashed \
            + self.on_desired_lane_reward * lane / max(len(neighbours) - 1, 1) \
            + self.high_speed_reward * np.clip(scaled_speed, 0, 1) \
            # + self.lane_change_reward * lane_change

        if self.normalize_reward:
            reward = utils.lmap(reward, [
                self.collision_reward + self.lane_change_reward,
                self.high_speed_reward + self.on_desired_lane_reward
            ], [0, 1])

        reward = 0 if not vehicle.on_road else reward
        return reward
示例#4
0
 def act(self, action: np.ndarray) -> None:
     # if self.clip:
     #     action = np.clip(action, -1, 1)
     if self.longitudinal and self.lateral:
         self.controlled_vehicle.act({
             "acceleration": action[0],
             #    utils.lmap(action[0], [-1, 1], self.acceleration_range)
             "steering": action[1],
             #  utils.lmap(action[1], [-1, 1], self.steering_range),
         })
     elif self.longitudinal:
         self.controlled_vehicle.act({
             "acceleration":
             utils.lmap(action[0], [-1, 1], self.acceleration_range),
             "steering":
             0,
         })
     elif self.lateral:
         self.controlled_vehicle.act({
             "acceleration":
             0,
             "steering":
             utils.lmap(action[0], [-1, 1], self.steering_range)
         })
     self.last_action = action
示例#5
0
    def _agent_reward(self, action: int, vehicle: Vehicle) -> float:
        scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1])
        reward = self.config["collision_reward"] * vehicle.crashed \
                 + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1)

        reward = self.config["arrived_reward"] if self.has_arrived(vehicle) else reward
        if self.config["normalize_reward"]:
            reward = utils.lmap(reward, [self.config["collision_reward"], self.config["arrived_reward"]], [0, 1])
        return reward
示例#6
0
    def _agent_reward_highway_coop(self, vehicle, action):
        """
               The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
               :param action: the last action performed
               :return: the corresponding reward
               """
        neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index)
        lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \
            else vehicle.lane_index[2]
        reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX]
        scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1])

        # TODO: this must be read from action.py, to determine if it is a lane change or not,
        #  for now it's hard-coded to 0 and 2
        lane_change = (action == 0 or action == 2)

        collision_reward_value = self.collision_reward * vehicle.crashed
        on_desired_lane_reward_value = self.on_desired_lane_reward * lane / max(
            len(neighbours) - 1, 1)
        high_speed_reward_value = self.high_speed_reward * np.clip(
            scaled_speed, 0, 1)
        lane_change_reward_value = self.lane_change_reward * lane_change
        # cooperative_reward_value_avg_speeds = self.cooperative_reward_value * \
        #                                       np.clip(self.avg_speeds, 0, 1)
        cooperative_reward_value_avg_speeds = 0
        reward = \
            collision_reward_value \
            + on_desired_lane_reward_value \
            + high_speed_reward_value \
            + lane_change_reward_value \
            + cooperative_reward_value_avg_speeds

        if self.normalize_reward:
            reward = utils.lmap(reward, [
                self.collision_reward + self.lane_change_reward,
                self.high_speed_reward + self.on_desired_lane_reward
            ], [0, 1])

        reward = 0 if not vehicle.on_road else reward

        reward_info = {
            "reward_crashed": collision_reward_value,
            "reward_on_lane": on_desired_lane_reward_value,
            "reward_scaled_speed": high_speed_reward_value,
            "reward_lane_change": lane_change_reward_value,
            "reward_distance": 0,
            "reward_avg_speeds": cooperative_reward_value_avg_speeds,
            "reward_has_merged": 0,
            "reward_distance_merge_vehicle": 0,
            # "normalized_timestep_reward": reward
            # "vehilce_id": vehicle.id
        }

        return reward, reward_info
示例#7
0
    def observe(self) -> np.ndarray:
        if not self.env.road:
            return np.zeros(self.space().shape)

        if self.absolute:
            raise NotImplementedError()
        else:
            # Initialize empty data
            self.grid.fill(np.nan)

            # Get nearby traffic data
            df = pd.DataFrame.from_records([
                v.to_dict(self.observer_vehicle)
                for v in self.env.road.vehicles
            ])
            # Normalize
            df = self.normalize(df)
            # Fill-in features
            for layer, feature in enumerate(self.features):
                if feature in df.columns:  # A vehicle feature
                    for _, vehicle in df.iterrows():
                        x, y = vehicle["x"], vehicle["y"]
                        # Recover unnormalized coordinates for cell index
                        if "x" in self.features_range:
                            x = utils.lmap(x, [-1, 1], [
                                self.features_range["x"][0],
                                self.features_range["x"][1]
                            ])
                        if "y" in self.features_range:
                            y = utils.lmap(y, [-1, 1], [
                                self.features_range["y"][0],
                                self.features_range["y"][1]
                            ])
                        cell = self.pos_to_index((x, y),
                                                 relative=not self.absolute)
                        if 0 <= cell[1] < self.grid.shape[-2] and 0 <= cell[
                                0] < self.grid.shape[-1]:
                            self.grid[layer, cell[1],
                                      cell[0]] = vehicle[feature]
                elif feature == "on_road":
                    self.fill_road_layer_by_lanes(layer)

            obs = self.grid

            if self.clip:
                obs = np.clip(obs, -1, 1)

            if self.as_image:
                obs = ((np.clip(obs, -1, 1) + 1) / 2 * 255).astype(np.uint8)

            obs = np.nan_to_num(obs).astype(self.space().dtype)

            return obs
示例#8
0
def v_lmap(s, road_r, max_v):
    s = s.reshape(4, -1)
    x = s[0, :]
    y = s[1, :]
    theta = s[2, :]
    v = s[3, :]
    for i in range(50):
        x[i] = lmap(x[i], [-1, 1], [-(road_r + 4), (road_r + 4)])
        y[i] = lmap(y[i], [-1, 1], [0, 2 * (road_r + 4)])
        theta[i] = lmap(theta[i], [-1, 1], [-2 * np.pi, 2 * np.pi])
        v[i] = lmap(v[i], [-1, 1], [-max_v, max_v])
    return x, y, theta, v
示例#9
0
    def _agent_reward_type_2(self, vehicle, action):
        """
               The reward is defined to foster driving at high speed, on a given target lane, and to avoid collisions.
               :param action: the last action performed
               :return: the corresponding reward
               """
        lane = vehicle.target_lane_index[2] if isinstance(vehicle, ControlledVehicle) \
            else vehicle.lane_index[2]
        scaled_speed = utils.lmap(vehicle.speed, self.reward_speed_range,
                                  [0, 1])

        # TODO: this must be read from action.py, to determine if it is a lane change or not,
        #  for now it's hard-coded to 0 and 2
        lane_change = (action == 0 or action == 2)
        on_lane = (lane == self.target_lane)

        distances = []
        distance = 0
        for v in self.env.controlled_vehicles:
            if v is not vehicle:
                longitudinal_distance = vehicle.front_distance_to(v)
                # distance = np.linalg.norm(v.position - vehicle.position)
                distances.append(longitudinal_distance)
        if self.distance_reward_type == "min":
            distance_to_neighbor_car = min(np.abs(distances))
            distance = utils.lmap(
                distance_to_neighbor_car,
                [Vehicle.LENGTH, self.env.PERCEPTION_DISTANCE], [0, 1])
        elif self.distance_reward_type == "avg":
            average_distance = np.average(np.abs(distances))
            distance = utils.lmap(
                average_distance,
                [Vehicle.LENGTH, self.env.PERCEPTION_DISTANCE], [0, 1])

        reward = \
            + self.collision_reward * vehicle.crashed \
            + self.on_desired_lane_reward * on_lane \
            + self.high_speed_reward * np.clip(scaled_speed, 0, 1) \
            + self.lane_change_reward * lane_change \
            + self.distance_reward * distance

        if self.normalize_reward:
            reward = utils.lmap(reward, [
                self.collision_reward + self.lane_change_reward,
                self.high_speed_reward + self.on_desired_lane_reward
            ], [0, 1])

        reward = 0 if not vehicle.on_road else reward
        return reward
示例#10
0
    def normalize_obs(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Normalize the observation values.

        For now, assume that the road is straight along the x axis.
        :param Dataframe df: observation data
        """
        if not self.features_range:
            side_lanes = self.env.road.network.all_side_lanes(
                self.observer_vehicle.lane_index)
            self.features_range = {
                "x": [-5.0 * MDPVehicle.SPEED_MAX, 5.0 * MDPVehicle.SPEED_MAX],
                "y": [
                    -AbstractLane.DEFAULT_WIDTH * len(side_lanes),
                    AbstractLane.DEFAULT_WIDTH * len(side_lanes)
                ],
                "vx": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX],
                "vy": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX]
            }
        for feature, f_range in self.features_range.items():
            if feature in df:
                df[feature] = utils.lmap(df[feature], [f_range[0], f_range[1]],
                                         [-1, 1])
                if self.clip:
                    df[feature] = np.clip(df[feature], -1, 1)
        return df
示例#11
0
    def _reward(self, action: int) -> float:
        """
        The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions

        But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed.

        :param action: the action performed
        :return: the reward of the state-action transition
        """
        action_reward = {0: self.LANE_CHANGE_REWARD,
                         1: 0,
                         2: self.LANE_CHANGE_REWARD,
                         3: 0,
                         4: 0}
        reward = self.COLLISION_REWARD * self.vehicle.crashed \
                 + self.RIGHT_LANE_REWARD * self.vehicle.lane_index[2] / 1 \
                 + self.HIGH_SPEED_REWARD * self.vehicle.speed_index / (self.vehicle.SPEED_COUNT - 1)

        # Altruistic penalty
        for vehicle in self.road.vehicles:
            if vehicle.lane_index == ("b", "c", 2) and isinstance(vehicle, ControlledVehicle):
                reward += self.MERGING_SPEED_REWARD * \
                          (vehicle.target_speed - vehicle.speed) / vehicle.target_speed

        return utils.lmap(action_reward[action] + reward,
                          [self.COLLISION_REWARD + self.MERGING_SPEED_REWARD,
                            self.HIGH_SPEED_REWARD + self.RIGHT_LANE_REWARD],
                          [0, 1])
示例#12
0
    def _reward(self, action: int) -> float:
        """
        The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions

        But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed.

        :param action: the action performed
        :return: the reward of the state-action transition
        """
        action_reward = {
            0: self.config["lane_change_reward"],
            1: 0,
            2: self.config["lane_change_reward"],
            3: 0,
            4: 0
        }
        reward = self.config["collision_reward"] * self.vehicle.crashed \
            + self.config["right_lane_reward"] * self.vehicle.lane_index[2] / 1 \
            + self.config["high_speed_reward"] * self.vehicle.speed_index / (self.vehicle.target_speeds.size - 1)

        # Altruistic penalty
        for vehicle in self.road.vehicles:
            if vehicle.lane_index == ("b", "c", 2) and isinstance(
                    vehicle, ControlledVehicle):
                reward += self.config["merging_speed_reward"] * \
                          (vehicle.target_speed - vehicle.speed) / vehicle.target_speed

        return utils.lmap(action_reward[action] + reward, [
            self.config["collision_reward"] +
            self.config["merging_speed_reward"],
            self.config["high_speed_reward"] + self.config["right_lane_reward"]
        ], [0, 1])
示例#13
0
    def _agent_reward_intersection(self, vehicle, action):
        """
               The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
               :param action: the last action performed
               :return: the corresponding reward
               """
        neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index)
        reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX]
        scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1])

        collision_reward_value = self.collision_reward * vehicle.crashed
        high_speed_reward_value = self.high_speed_reward * np.clip(
            scaled_speed, 0, 1)
        arrived_reward_value = self.arrived_reward if self.env.has_arrived_intersection(
            vehicle) else 0

        # cooperative_reward_value_avg_speeds = self.cooperative_reward_value * \
        #                                       np.clip(self.avg_speeds, 0, 1)
        cooperative_reward_value_avg_speeds = 0
        reward = \
            collision_reward_value \
            + high_speed_reward_value \
            + cooperative_reward_value_avg_speeds\
            + arrived_reward_value

        if self.normalize_reward:
            reward = utils.lmap(reward, [
                self.collision_reward,
                self.high_speed_reward + self.arrived_reward
            ], [0, 1])

        reward = 0 if not vehicle.on_road else reward

        reward_info = {
            "reward_crashed": collision_reward_value,
            "reward_on_lane": 0,
            "reward_scaled_speed": high_speed_reward_value,
            "reward_lane_change": 0,
            "reward_distance": 0,
            "reward_avg_speeds": cooperative_reward_value_avg_speeds,
            "reward_has_merged": 0,
            "reward_distance_merge_vehicle": 0,
            # "normalized_timestep_reward": reward
            # "vehilce_id": vehicle.id
        }

        return reward, reward_info
 def _reward(self, action: int) -> float:
     reward = self.COLLISION_REWARD * self.vehicle.crashed \
              + self.HIGH_SPEED_REWARD * self.vehicle.speed_index / max(self.vehicle.SPEED_COUNT - 1, 1) \
              + self.LANE_CHANGE_REWARD * (action in [0, 2])
     return utils.lmap(reward, [
         self.COLLISION_REWARD + self.LANE_CHANGE_REWARD,
         self.HIGH_SPEED_REWARD
     ], [0, 1])
示例#15
0
    def _agent_reward_simple(self, vehicle, action):
        """
               The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
               :param action: the last action performed
               :return: the corresponding reward
               """
        reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX]
        scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1])

        collision_reward = -2
        high_speed_reward = 1
        destination_reward = 4
        collision_reward_value = collision_reward * vehicle.crashed
        high_speed_reward_value = high_speed_reward * np.clip(
            scaled_speed, 0, 1)
        final_steps = self.env.steps >= self.env.config["duration"] - 1
        arrive_destination = True if final_steps else False
        destination_reward_value = destination_reward * arrive_destination
        reward = \
            collision_reward_value \
            + high_speed_reward_value \
            + destination_reward_value

        if self.normalize_reward:
            reward = utils.lmap(
                reward,
                [collision_reward, high_speed_reward + destination_reward],
                [0, 1])

        # reward = 0 if not vehicle.on_road else reward

        reward_info = {
            "reward_crashed": collision_reward_value,
            "reward_on_lane": 0,
            "reward_scaled_speed": high_speed_reward_value,
            "reward_lane_change": 0,
            "reward_distance": 0,
            "reward_avg_speeds": 0,
            "reward_has_merged": 0,
            "reward_distance_merge_vehicle": 0,
            # "normalized_timestep_reward": reward
            # "vehilce_id": vehicle.id
        }

        return reward, reward_info
示例#16
0
 def _reward(self, action: int) -> float:
     lane_change = action == 0 or action == 2
     reward = self.COLLISION_REWARD * self.vehicle.crashed \
              + self.HIGH_SPEED_REWARD * MDPVehicle.get_speed_index(self.vehicle) / max(MDPVehicle.SPEED_COUNT - 1, 1) \
              + self.LANE_CHANGE_REWARD * lane_change
     return utils.lmap(reward, [
         self.COLLISION_REWARD + self.LANE_CHANGE_REWARD,
         self.HIGH_SPEED_REWARD
     ], [0, 1])
示例#17
0
 def _reward(self, action: int) -> float:
     lane_change = action == 0 or action == 2
     reward = self.config["collision_reward"] * self.vehicle.crashed \
         + self.config["high_speed_reward"] * \
              MDPVehicle.get_speed_index(self.vehicle) / max(MDPVehicle.SPEED_COUNT - 1, 1) \
         + self.config["lane_change_reward"] * lane_change
     return utils.lmap(reward,
                       [self.config["collision_reward"] + self.config["lane_change_reward"],
                        self.config["high_speed_reward"]], [0, 1])
示例#18
0
 def _agent_reward(self, action: int, vehicle: Vehicle) -> float:
     reward = self.config["collision_reward"] * vehicle.crashed  #\
     #+ self.HIGH_SPEED_REWARD * (vehicle.speed_index == vehicle.SPEED_COUNT - 1)
     reward = self.ARRIVED_REWARD if self.has_arrived(vehicle) else reward
     if self.config["normalize_reward"]:
         reward = utils.lmap(
             reward, [self.config["collision_reward"], self.ARRIVED_REWARD],
             [0, 1])
     return reward
示例#19
0
 def _reward(self, action: int) -> float:
     """
     The vehicle is rewarded for driving with high speed and collision avoidance.
     :param action: the action performed
     :return: the reward of the state-action transition
     """
     neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index)
     lane = self.vehicle.lane_index[2]
     scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1])
     reward = \
         + self.config["collision_reward"] * self.vehicle.crashed \
         + self.config["left_lane_reward"] * lane / max(len(neighbours) - 1, 1) \
         + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1)
     reward = utils.lmap(reward,
                         [self.config["collision_reward"],
                          self.config["high_speed_reward"] + self.config["left_lane_reward"]], [0, 1])
     reward = 0 if not self.vehicle.on_road else reward
     return reward
示例#20
0
def get_hat(env, z):
    p = env.vehicle.position
    i_h = env.vehicle.heading
    i_s = env.vehicle.speed
    road_r = env.config["radius"]
    temp_n = [
        lmap(p[0], [-(road_r + 4), (road_r + 4)], [-1, 1]),
        lmap(p[1], [0, 2 * (road_r + 4)], [-1, 1]),
        lmap(i_h, [-2 * np.pi, 2 * np.pi], [-1, 1]),
        lmap(i_s, [-max_v, max_v], [-1, 1])
    ]
    x_road, y_road, x_road_n, y_road_n = env.vehicle.target_lane_position(
        p, road_r)
    s0 = temp_n
    s0 = np.array(s0)
    s0 = torch.Tensor(s0)
    s0 = s0.unsqueeze(0)
    ss = x_road_n + y_road_n
    ss = np.array(ss)
    ss = torch.Tensor(ss)
    ss = ss.unsqueeze(0)

    label = z
    s_hat, _, _, _, _ = clpp(s0, ss, label, em)
    print("ok")
    u_hat = clde(s0, ss, hidden)
    s_hat = s_hat.squeeze(0)
    u_hat = u_hat.squeeze(0)
    s0 = s0.squeeze(0)
    s_hat = s_hat.detach().cpu().numpy()
    u_hat = u_hat.detach().cpu().numpy()
    s0 = s0.detach().cpu().numpy()

    x_f = s_hat.reshape(4, 50)[:, -1]
    print(x_f)
    # print("原本的终端是 ", x_f)
    # x_f_local_x, x_f_local_y = env.road.network.get_lane(v_lane_id).local_coordinates(np.array([x_f[0], x_f[1]]))
    # x_f_local_y = np.clip(x_f_local_y, -4, 4)
    # x_f_xy = env.road.network.get_lane(v_lane_id).position(x_f_local_x, x_f_local_y)
    # x_f[0] = x_f_xy[0]
    # x_f[1] = x_f_xy[1]
    # x_f[2] = env.road.network.get_lane(v_lane_id).heading_at(x_f_xy[0])
    # print("现在的终端是 ", x_f)
    return s0, ss, s_hat, u_hat, x_f
示例#21
0
    def _reward(self, action) -> float:
        """
        The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions

        But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed.

        :param action: the action performed
        :return: the reward of the state-action transition
        """
        '''
        action_reward = {0: self.LANE_CHANGE_REWARD,
                         1: 0,
                         2: self.LANE_CHANGE_REWARD,
                         3: 0,
                         4: 0}
        '''

        if self.vehicle.speed < 0:
            return self.COLLISION_REWARD * self.vehicle.crashed - self.HIGH_SPEED_REWARD

        current_lane = self.vehicle.lane
        longi, lat = current_lane.local_coordinates(self.vehicle.position)
        heading_reward = 1 - np.abs(current_lane.heading_at(longi) - self.vehicle.heading)
        lane_center_reward = 1 - (lat/current_lane.width)**2
        scaled_speed = utils.lmap(self.vehicle.speed, [20, 30], [0, 1])
        reward = self.COLLISION_REWARD * self.vehicle.crashed \
                 + self.RIGHT_LANE_REWARD * self.vehicle.lane_index[2] / 1 \
                 + self.HIGH_SPEED_REWARD * np.clip(scaled_speed, -1, 1) \
                 + self.LANE_HEADING_REWARD * heading_reward \
                 + self.LANE_CENTERING_REWARD * lane_center_reward
                 # + self.HIGH_SPEED_REWARD * self.vehicle.speed_index / (self.vehicle.SPEED_COUNT - 1)

        # Altruistic penalty
        for vehicle in self.road.vehicles:
            if vehicle.lane_index == ("b", "c", 2) and isinstance(vehicle, ControlledVehicle):
                reward += self.MERGING_SPEED_REWARD * \
                          (vehicle.target_speed - vehicle.speed) / vehicle.target_speed

        return utils.lmap(reward,
                          [self.COLLISION_REWARD + self.MERGING_SPEED_REWARD,
                           self.HIGH_SPEED_REWARD + self.RIGHT_LANE_REWARD +
                           self.LANE_HEADING_REWARD + self.LANE_CENTERING_REWARD],
                          [0, 1])
示例#22
0
    def _agent_reward_roundabout(self, vehicle, action):
        neighbours = self.env.road.network.all_side_lanes(vehicle.lane_index)
        lane_change = action == 0 or action == 2

        reward_speed_range = [vehicle.SPEED_MIN, vehicle.SPEED_MAX]
        scaled_speed = utils.lmap(vehicle.speed, reward_speed_range, [0, 1])

        collision_reward_value = self.collision_reward * vehicle.crashed
        high_speed_reward_value = self.high_speed_reward * np.clip(
            scaled_speed, 0, 1)
        lane_change_reward_value = self.lane_change_reward * lane_change

        # cooperative_reward_value_avg_speeds = self.cooperative_reward_value * \
        #                                       np.clip(self.avg_speeds, 0, 1)
        cooperative_reward_value_avg_speeds = 0
        reward = \
            collision_reward_value \
            + high_speed_reward_value \
            + cooperative_reward_value_avg_speeds\
            + lane_change_reward_value

        if self.normalize_reward:
            reward = utils.lmap(reward, [
                self.collision_reward + self.lane_change_reward,
                self.high_speed_reward
            ], [0, 1])

        # reward = 0 if not vehicle.on_road else reward

        reward_info = {
            "reward_crashed": collision_reward_value,
            "reward_on_lane": 0,
            "reward_scaled_speed": high_speed_reward_value,
            "reward_lane_change": lane_change_reward_value,
            "reward_distance": 0,
            "reward_avg_speeds": cooperative_reward_value_avg_speeds,
            "reward_has_merged": 0,
            "reward_distance_merge_vehicle": 0,
            # "normalized_timestep_reward": reward
            # "vehilce_id": vehicle.id
        }

        return reward, reward_info
示例#23
0
 def _reward(self, action: np.ndarray) -> float:
     _, lateral = self.vehicle.lane.local_coordinates(self.vehicle.position)
     lane_centering_reward = 1 / (
         1 + self.config["lane_centering_cost"] * lateral**2)
     action_reward = self.config["action_reward"] * np.linalg.norm(action)
     reward = lane_centering_reward \
         + action_reward \
         + self.config["collision_reward"] * self.vehicle.crashed
     reward = reward if self.vehicle.on_road else self.config[
         "collision_reward"]
     return utils.lmap(reward, [self.config["collision_reward"], 1], [0, 1])
示例#24
0
    def _reward(self, action: Action) -> float:
        """
        The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
        :param action: the last action performed
        :return: the corresponding reward
        """
        lane_change = 0 if action == 2 or action == 0 else 1
        neighbours = self.env.road.network.all_side_lanes(
            self.env.vehicle.lane_index)
        lane = self.env.vehicle.target_lane_index[2] if isinstance(self.env.vehicle, ControlledVehicle) \
            else self.env.vehicle.lane_index[2]
        # if self.env.vehicle.crashed:
        #     reward = self.env.config["collision_reward"]*100
        # else:
        #     reward = 10
        # + self.env.RIGHT_LANE_REWARD * lane*10 / max(len(neighbours) - 1, 1) \
        scaled_speed = utils.lmap(self.env.vehicle.speed,
                                  self.env.config["reward_speed_range"],
                                  [0, 1])
        # print(scaled_speed, self.env.config["reward_speed_range"])
        # print(scaled_speed)
        # print(self.env.RIGHT_LANE_REWARD*lane)
        # print(self.env.config["collision_reward"])
        crashed = -1 if self.env.vehicle.crashed else 1
        reward = \
            + 0.05*lane_change \
            + 0.2 * lane / max(len(neighbours) - 1, 1) \
            + 0.65 * scaled_speed \
            + crashed*1.1

        # print(reward)
        # reward = self.env.config["collision_reward"] if self.env.vehicle.crashed else reward
        reward = utils.lmap(reward, [-2, 2], [-1, 1])
        # print(reward)
        # reward = utils.lmap(reward,
        #                 [self.env.config["collision_reward"]*5, (self.env.HIGH_SPEED_REWARD + self.env.RIGHT_LANE_REWARD)*5],
        #                 [0, 5])
        # print(reward)
        # print(self.env.vehicle.speed, reward)
        reward = 0 if not self.env.vehicle.on_road else reward
        return reward
示例#25
0
 def target_lane_position(self, initial_position, road_r):
     target_lane = self.road.network.get_lane(self.target_lane_index)
     lane_coords = target_lane.local_coordinates(initial_position)
     # x_r = lane_coords[0] + np.arange(0, 301, 10)
     # lane_x = [target_lane.position(item, 0)[0] for item in x_r]
     # lane_y = [0 for item in x_r]
     x_r = lane_coords[0]
     y_r = 0
     lane_x = []
     lane_x_n = []
     lane_y = []
     lane_y_n = []
     for item in np.arange(0, 101, 10):
         xx, yy = target_lane.position(x_r + item, 0)
         xx_n = lmap(xx, [-(road_r + 4), (road_r + 4), 254], [-1, 1])
         yy_n = lmap(yy, [0, 2 * (road_r + 4)], [-1, 1])
         lane_x.append(xx)
         lane_x_n.append(xx_n)
         lane_y.append(yy)
         lane_y_n.append(yy_n)
     return lane_x, lane_y, lane_x_n, lane_y_n
示例#26
0
    def _reward(self, action: Action) -> float:
        """
        The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
        :param action: the last action performed
        :return: the corresponding reward
        """
        lane_index = self.vehicle.target_lane_index if isinstance(self.vehicle, ControlledVehicle) \
            else self.vehicle.lane_index
        scaled_speed = utils.lmap(self.vehicle.speed,
                                  self.config["reward_speed_range"], [0, 1])
        reward = self.config["collision_reward"] * self.vehicle.crashed \
                 + self.config["goal_reward"] * self._is_success() \
                 + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) \
                 + self.config["right_lane_reward"] * lane_index[-1]

        reward = utils.lmap(
            reward,
            [self.config["collision_reward"], self.config["goal_reward"]],
            [0, 1])
        reward = np.clip(reward, 0, 1)
        return reward
示例#27
0
    def _reward(self, action: Action) -> float:
        """
        The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
        :param action: the last action performed
        :return: the corresponding reward
        """
        reward = utils.lmap(self.vehicle.speed,
                            self.config["reward_speed_range"], [0, 1])
        if self.vehicle.crashed:
            reward = self.config["collision_reward"] * reward

        return reward
示例#28
0
    def get_avg_speeds(self, sympathy_flag=False, scaled_speed_flag=False):
        speeds = []
        if sympathy_flag:
            for vehicle in self.env.road.vehicles:
                if scaled_speed_flag:
                    scaled_speed = utils.lmap(vehicle.speed,
                                              self.reward_speed_range, [0, 1])
                else:
                    scaled_speed = vehicle.speed
                speeds.append(scaled_speed)
        else:
            for vehicle in self.env.controlled_vehicles:
                if scaled_speed_flag:
                    scaled_speed = utils.lmap(vehicle.speed,
                                              self.reward_speed_range, [0, 1])
                else:
                    scaled_speed_flag = vehicle.speed

                speeds.append(scaled_speed)

        return np.mean(speeds)
示例#29
0
    def get_sum_of_speeds(self, sympathy_flag=False, scaled_speed_flag=False):
        sum_of_speeds = 0
        if sympathy_flag:
            for vehicle in self.env.road.vehicles:
                if scaled_speed_flag:
                    scaled_speed = utils.lmap(vehicle.speed,
                                              self.reward_speed_range, [0, 1])
                else:
                    scaled_speed = vehicle.speed

                sum_of_speeds += scaled_speed
        else:
            for vehicle in self.env.controlled_vehicles:
                if scaled_speed_flag:
                    scaled_speed = utils.lmap(vehicle.speed,
                                              self.reward_speed_range, [0, 1])
                else:
                    scaled_speed = vehicle.speed

                sum_of_speeds += scaled_speed

        return sum_of_speeds
示例#30
0
 def normalize_obs(self, observation: np.ndarray) -> np.ndarray:
     """
         Normalize the observation values.
     :param ndarray observation: observation data
     """
     if self.features_range is None:
         side_lanes = self.env.road.network.all_side_lanes(
             self.env.vehicle.lane_index)
         self.features_range = {
             "lane_index": [0, len(side_lanes) - 1],
             "x": [-5.0 * MDPVehicle.SPEED_MAX, 5.0 * MDPVehicle.SPEED_MAX],
             "vx": [-2 * MDPVehicle.SPEED_MAX, 2 * MDPVehicle.SPEED_MAX]
         }
     observation[0,
                 0] = utils.lmap(observation[0, 0],
                                 self.features_range['lane_index'], [0, 1])
     observation[1:, 0] = utils.lmap(observation[1:, 0],
                                     self.features_range['x'], [-1, 1])
     observation[:, 1] = utils.lmap(observation[:, 1],
                                    self.features_range['vx'], [-1, 1])
     observation = np.clip(observation, -1, 1)
     return observation