class RLManager: def __init__(self, event: Scenario): self.eps_time = 40 self.reward_manager = reward_selector(event) self.event = event self.state_manager = StateManager(event) def terminate(self, env_desc: EnvironmentState) -> bool: """ A function which returns true if the episode must terminate. This is decided by the following conditions. * Has the vehicle collinded? * Is the episode time elapsed more than the threshold self.eps_time? * Is the maneouver completed? Args: env_desc: (EnvironmentState) ROS Message for the environment Returns: true if episode must terminate """ if self.event == Scenario.LANE_CHANGE: # return true if any of the conditions described in the description is true return env_desc.reward.collision or \ env_desc.reward.path_planner_terminate or \ env_desc.reward.time_elapsed > self.eps_time elif self.event == Scenario.PEDESTRIAN: # return true if vehicle has moved past the pedestrian # treat a pedestrian as a vehicle object ped_vehicle = VehicleState() relative_pose = VehicleState() # if there is a pedestrian closeby then check if we have passed him if env_desc.nearest_pedestrian.exist: # populate the "pedestrian vehicle" parameters ped_vehicle.vehicle_location = env_desc.nearest_pedestrian.pedestrian_location ped_vehicle.vehicle_speed = env_desc.nearest_pedestrian.pedestrian_speed relative_pose = convertToLocal(env_desc.cur_vehicle_state,ped_vehicle) if relative_pose.vehicle_location.x < -10: return True # usual conditions return env_desc.reward.collision or \ env_desc.reward.path_planner_terminate or \ env_desc.reward.time_elapsed > self.eps_time def convertDecision(self, action: int) -> RLDecision: """ Converts the action in int into an RLDecision enum Args: :param action: (int) neural network decision given as an argmax Returns: RLDecision enum """ if action == RLDecision.CONSTANT_SPEED.value: return RLDecision.CONSTANT_SPEED elif action == RLDecision.ACCELERATE.value: return RLDecision.ACCELERATE elif action == RLDecision.DECELERATE.value: return RLDecision.DECELERATE elif action == RLDecision.SWITCH_LANE.value: return RLDecision.SWITCH_LANE else: logging.error("Bug in decision conversion") raise RuntimeError("Invalid action given") def rewardCalculation(self) -> np.ndarray: raise NotImplementedError() def makeStateVector(self, env_desc: EnvironmentState, local: bool = False) -> np.ndarray: """ Creates a state embedding Args: :param env_desc: (EnvironmentState) A ROS Message describing the state :param local: (bool) Flag to select the frame of reference for the embedding Returns: A state embedding vector (np.ndarray) """ return self.state_manager.embedState(env_desc, self.event, local)