Exemplo n.º 1
0
def race_line_heading(params):
    current_position = Point(params['x'], params['y'])
    race_line = LineString(RACE_LINE_WAYPOINTS)
    current_ndist = race_line.project(current_position, normalized=True)
    if params['is_reversed']:
        next_index = bisect.bisect_left(g_race_line_dists, current_ndist) - 1
        prev_index = next_index + 1
        if next_index == -1: next_index = len(g_race_line_dists) - 1
    else:
        next_index = bisect.bisect_right(g_race_line_dists, current_ndist)
        prev_index = next_index - 1
        if next_index == len(g_race_line_dists): next_index = 0
    prev_index, next_index
    # Target heading in euler reference coordinates
    #print("nearest race line [%d,%d] at (%0.2f,%0.2f),(%0.2f,%0.2f)" % (prev_index, next_index, RACE_LINE_WAYPOINTS[prev_index][0], RACE_LINE_WAYPOINTS[prev_index][1], RACE_LINE_WAYPOINTS[next_index][0], RACE_LINE_WAYPOINTS[next_index][1]))
    target_heading = angle_of_vector(
        (RACE_LINE_WAYPOINTS[prev_index], RACE_LINE_WAYPOINTS[next_index]))

    #heading_with_steering = params['heading'] + params['steering_angle']
    #if heading_with_steering > 180: heading_with_steering -= 360
    #if heading_with_steering < -180: heading_with_steering += 360

    heading_delta = abs(target_heading - params['heading'])
    if heading_delta > 180: heading_delta = abs(heading_delta - 360)
    #print("heading %0.2f target_heading %0.2f delta %0.2f" % (params['heading'], target_heading, heading_delta))

    heading_factor = 1.0 - heading_delta / 90.0
    #print("heading_factor = %0.2f" % heading_factor)
    return max(heading_factor, 0.0)
Exemplo n.º 2
0
class DeepRacerEnv(gym.Env):
    def __init__(self):

        self.sampling_rate = 30.0

        self.sampling_sleep = (1.0/self.sampling_rate)

        self.sampling_rates = [30.0, 30.0]
        self.sampling_rate_index = 0

        self.latencies = [10.0, 20.0, 40.0, 60.0, 80.0, 100.0, 120.0]

        self.latency_index = 0

        self.latency_max_num_steps = 500 # for these steps latency will be fixed or change on reset or done after 250.

        self.latency_steps = 0

        self.latency = 10.0 #10 is the starting latency

        self.model_running_time = (2.0/1000.0) #model runtime


        screen_height = TRAINING_IMAGE_SIZE[1]
        screen_width = TRAINING_IMAGE_SIZE[0]

        self.on_track = 0
        self.progress = 0
        self.yaw = 0
        self.x = 0
        self.y = 0
        self.z = 0
        self.distance_from_center = 0
        self.distance_from_border_1 = 0
        self.distance_from_border_2 = 0
        self.steps = 0
        self.progress_at_beginning_of_race = 0

        self.reverse_dir = False
        self.start_ndist = 0.0

        # actions -> steering angle, throttle
        self.action_space = spaces.Box(low=np.array([-1, 0]), high=np.array([+1, +1]), dtype=np.float32)

        # given image from simulator
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(screen_height, screen_width, 1), dtype=np.uint8)

        self.allow_servo_step_signals = True

        #stores the time when camera images are received
        self.cam_update_time=[]

        #stores the time when consequetive actions are send
        self.cons_action_send_time=[]

        #stores the time when progress updates are received
        self.progress_update_time = []

        #folder location to store the debug data
        self.debug_data_folder = []

        self.debug_index = 0


        if node_type == SIMULATION_WORKER:
            # ROS initialization
            rospy.init_node('rl_coach', anonymous=True)


            self.ack_publisher = rospy.Publisher('/vesc/low_level/ackermann_cmd_mux/output',
                                                 AckermannDriveStamped, queue_size=100)
            self.racecar_service = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState)

            self.clear_forces_client = rospy.ServiceProxy('/gazebo/clear_joint_forces',
                                                          JointRequest)


            # Subscribe to ROS topics and register callbacks
            rospy.Subscriber('/progress', Progress, self.callback_progress)
            rospy.Subscriber('/camera/zed/rgb/image_rect_color', sensor_image, self.callback_image)

            self.world_name = 'hard_track'#rospy.get_param('WORLD_NAME')
            self.set_waypoints()

        waypoints = self.waypoints
        is_loop = np.all(waypoints[0,:] == waypoints[-1,:])
        if is_loop:
            self.center_line = LinearRing(waypoints[:,0:2])

        else:
            self.center_line = LineString(waypoints[:,0:2])

        self.center_dists = [self.center_line.project(Point(p), normalized=True) for p in self.center_line.coords[:-1]] + [1.0]
        self.track_length = self.center_line.length



        self.reward_in_episode = 0
        self.prev_progress = 0
        self.steps = 0

        # Create the publishers for sending speed and steering info to the car
        self.velocity_pub_dict = OrderedDict()
        self.steering_pub_dict = OrderedDict()

        for topic in VELOCITY_TOPICS:
            self.velocity_pub_dict[topic] = rospy.Publisher(topic, Float64, queue_size=1)

        for topic in STEERING_TOPICS:
            self.steering_pub_dict[topic] = rospy.Publisher(topic, Float64, queue_size=1)

    def get_data_debug(self):
        print("center_line",self.center_line)
        print("track_length",self.track_length)

    def reset(self,inp_x=1.75,inp_y=0.6):
        if node_type == SAGEMAKER_TRAINING_WORKER:
            return self.observation_space.sample()
        #print('Total Reward Reward=%.2f' % self.reward_in_episode,
        #      'Total Steps=%.2f' % self.steps)
        #self.send_reward_to_cloudwatch(self.reward_in_episode)

        self.reward_in_episode = 0
        self.reward = None
        self.done = False
        self.next_state = None
        self.image = None
        self.steps = 0
        self.prev_progress = 0

        # Reset car in Gazebo
        self.send_action(0, 0)  # set the throttle to 0


        self.racecar_reset(0, 0)

        self.infer_reward_state(0, 0)

        self.cam_update_time = []
        self.cons_action_send_time = []
        self.progress_update_time = []
        self.debug_index= self.debug_index+1

        return self.next_state


    def add_latency_to_image(self,observation):

        observation = observation.reshape(observation.shape[0],observation.shape[1],1)

        #print('Set latency is:',self.latency*self.latency_max)
        observation[119, 159, 0] = int(self.latency)

        #setting the sampling rate
        observation[119, 158, 0] = int(self.sampling_rate)

        #print(observation[119, 159, 0],observation[119, 158, 0] )

        return observation

    def convert_rgb_to_gray(self, observation):
        r, g, b = observation[:, :, 0], observation[:, :, 1], observation[:, :, 2]
        observation = 0.2989 * r + 0.5870 * g + 0.1140 * b
        return observation





    def set_next_state(self):
        if(self.image!=None):

            #t1 = time.time()

            image_data = self.image

            # Read the image and resize to get the state
            #print(image_data.width, image_data.height)
            image = Image.frombytes('RGB', (image_data.width, image_data.height), image_data.data, 'raw', 'RGB', 0, 1)
            image = image.resize(TRAINING_IMAGE_SIZE, resample=2)
            image = np.array(image)

            #image = do_randomization(image)

            image = self.convert_rgb_to_gray(image)
            image = self.add_latency_to_image(image)

            self.next_state = image



    def racecar_reset(self, ndist, next_index):
        rospy.wait_for_service('gazebo/set_model_state')

        #random_start = random.random()

        prev_index, next_index = self.find_prev_next_waypoints(self.start_ndist)

        # Compute the starting position and heading
        #start_point = self.center_line.interpolate(ndist, normalized=True)
        start_point = self.center_line.interpolate(self.start_ndist, normalized=True)
        start_yaw = math.atan2(self.center_line.coords[next_index][1] - start_point.y,
                               self.center_line.coords[next_index][0] - start_point.x)
        start_quaternion = Rotation.from_euler('zyx', [start_yaw, 0, 0]).as_quat()

        # Construct the model state and send to Gazebo
        model_state = ModelState()
        model_state.model_name = 'racecar'
        model_state.pose.position.x = start_point.x
        model_state.pose.position.y = start_point.y
        model_state.pose.position.z = 0
        model_state.pose.orientation.x = start_quaternion[0]
        model_state.pose.orientation.y = start_quaternion[1]
        model_state.pose.orientation.z = start_quaternion[2]
        model_state.pose.orientation.w = start_quaternion[3]
        model_state.twist.linear.x = 0
        model_state.twist.linear.y = 0
        model_state.twist.linear.z = 0
        model_state.twist.angular.x = 0
        model_state.twist.angular.y = 0
        model_state.twist.angular.z = 0

        self.racecar_service(model_state)

        for joint in EFFORT_JOINTS:
            self.clear_forces_client(joint)


        #keeping track where to start the car
        self.reverse_dir = not self.reverse_dir
        self.start_ndist = (self.start_ndist + ROUND_ROBIN_ADVANCE_DIST) % 1.0

        self.progress_at_beginning_of_race = self.progress



    def find_prev_next_waypoints(self, ndist):
        if self.reverse_dir:
            next_index = bisect.bisect_left(self.center_dists, ndist) - 1
            prev_index = next_index + 1
            if next_index == -1: next_index = len(self.center_dists) - 1
        else:
            next_index = bisect.bisect_right(self.center_dists, ndist)
            prev_index = next_index - 1
            if next_index == len(self.center_dists): next_index = 0
        return prev_index, next_index


    def step(self, action):

        self.latency_steps = self.latency_steps+1

        #print('latency set in env:',self.latency)

        #bookeeping when the action was send
        #self.cons_action_send_time.append([self.steps,time.time()])

        latency = (self.latency-2.0)/1000.0
        #10ms latency is substracted, because that is the avg default latency observed on the training machine

        if latency>0.001:
            time.sleep(latency)


        else:
            latency = 0.0


        # Initialize next state, reward, done flag
        self.next_state = None
        self.reward = None
        self.done = False

        # Send this action to Gazebo and increment the step count
        self.steering_angle = float(action[0])
        self.speed = float(action[1])

        self.send_action(self.steering_angle, self.speed)
        self.steps += 1

        #sleep to control sampling rate
        to_sleep = (self.sampling_sleep - self.model_running_time - latency)



        if to_sleep>0.001:
            time.sleep(to_sleep)


#         if self.latency_steps == self.latency_max_num_steps:

#             #update the latency
#             self.latency_index = (self.latency_index+1) % (len(self.latencies))
#             self.latency = self.latencies[self.latency_index]


#             #update the sampling rate
#             self.sampling_rate_index  = random.randint(0,1)
#             self.sampling_rate = self.sampling_rates[self.sampling_rate_index]

#             self.sampling_sleep = (1.0/self.sampling_rate)

#             if (self.latency/1000.0)> self.sampling_sleep: # match sampling input to the model and latency
#                  self.sampling_rate = 1000.0/self.latency


#             self.latency_steps = 0


        # Compute the next state and reward
        self.infer_reward_state(self.steering_angle, self.speed)


        return self.next_state, self.reward, self.done, {}


    def send_action(self, steering_angle, speed):
        # Simple v/r to computes the desired rpm
        wheel_rpm = speed/WHEEL_RADIUS

        for _, pub in self.velocity_pub_dict.items():
            pub.publish(wheel_rpm)

        for _, pub in self.steering_pub_dict.items():
            pub.publish(steering_angle)


    def callback_image(self, data):
        self.image = data

        #bookeeping when the image was received
        #self.cam_update_time.append([self.steps,time.time()])


    def callback_progress(self, data):
        self.on_track = not (data.off_track)
        self.progress = data.progress
        self.yaw = data.yaw
        self.x = data.x
        self.y = data.y
        self.z = data.z
        self.distance_from_center = data.distance_from_center
        self.distance_from_border_1 = data.distance_from_border_1
        self.distance_from_border_2 = data.distance_from_border_2

        #bookeeping when the progress was received
        #self.progress_update_time.append([self.steps,time.time()])




    def reward_function (self, on_track, x, y, distance_from_center,
    throttle, steering, track_width):

        marker_1 = 0.1 * track_width
        marker_2 = 0.15 * track_width
        marker_3 = 0.20 * track_width

        reward = (track_width - distance_from_center) #max reward = 0.44


        if distance_from_center >= 0.0 and distance_from_center <= marker_1:
            reward = reward * 2.5 #0.90, 0.44 max is scaled to 1.0
        elif distance_from_center <= marker_2:
            reward = reward * 1.33 #0.85, 0.375 max is scaled to 0.5
        elif distance_from_center <= marker_3:
            reward = reward * 0.71  #0.80, 0.352 max is scaled to 0.25
        else:
            reward = 0.001  # may go close to off track

        # penalize reward for the car taking slow actions

        if throttle < 1.6 and reward>0:
            reward *= 0.95

        if throttle < 1.4 and reward>0:
            reward *= 0.95


        return float(reward)


    def infer_reward_state(self, steering_angle, throttle):

        #state has to be set first, because we need most accurate reward signal
        self.set_next_state()


        on_track = self.on_track

        done = False

        if on_track != 1:
            reward = CRASHED
            done = True

        else:
            reward = self.reward_function(on_track, self.x, self.y, self.distance_from_center,
                                          throttle, steering_angle, self.road_width)


        #after 500 steps in episode we want to restart it
        if self.steps==500:
            done = True

            if reward > 0: #car is not crashed
                reward = reward *5.0 #bonus on completing 500 steps


        self.reward_in_episode += reward
        self.reward = reward
        self.done = done






    def set_waypoints(self):
        if self.world_name.startswith(MEDIUM_TRACK_WORLD):
            self.waypoints = vertices = np.zeros((8, 2))
            self.road_width = 0.50
            vertices[0][0] = -0.99; vertices[0][1] = 2.25;
            vertices[1][0] = 0.69;  vertices[1][1] = 2.26;
            vertices[2][0] = 1.37;  vertices[2][1] = 1.67;
            vertices[3][0] = 1.48;  vertices[3][1] = -1.54;
            vertices[4][0] = 0.81;  vertices[4][1] = -2.44;
            vertices[5][0] = -1.25; vertices[5][1] = -2.30;
            vertices[6][0] = -1.67; vertices[6][1] = -1.64;
            vertices[7][0] = -1.73; vertices[7][1] = 1.63;
        elif self.world_name.startswith(EASY_TRACK_WORLD):
            self.waypoints = vertices = np.zeros((2, 2))
            self.road_width = 0.90
            vertices[0][0] = -1.08;   vertices[0][1] = -0.05;
            vertices[1][0] =  1.08;   vertices[1][1] = -0.05;
        else:
            self.waypoints = vertices = np.zeros((30, 2))
            self.road_width = 0.44
            vertices[0][0] = 1.5;     vertices[0][1] = 0.58;
            vertices[1][0] = 5.5;     vertices[1][1] = 0.58;
            vertices[2][0] = 5.6;     vertices[2][1] = 0.6;
            vertices[3][0] = 5.7;     vertices[3][1] = 0.65;
            vertices[4][0] = 5.8;     vertices[4][1] = 0.7;
            vertices[5][0] = 5.9;     vertices[5][1] = 0.8;
            vertices[6][0] = 6.0;     vertices[6][1] = 0.9;
            vertices[7][0] = 6.08;    vertices[7][1] = 1.1;
            vertices[8][0] = 6.1;     vertices[8][1] = 1.2;
            vertices[9][0] = 6.1;     vertices[9][1] = 1.3;
            vertices[10][0] = 6.1;    vertices[10][1] = 1.4;
            vertices[11][0] = 6.07;   vertices[11][1] = 1.5;
            vertices[12][0] = 6.05;   vertices[12][1] = 1.6;
            vertices[13][0] = 6;      vertices[13][1] = 1.7;
            vertices[14][0] = 5.9;    vertices[14][1] = 1.8;
            vertices[15][0] = 5.75;   vertices[15][1] = 1.9;
            vertices[16][0] = 5.6;    vertices[16][1] = 2.0;
            vertices[17][0] = 4.2;    vertices[17][1] = 2.02;
            vertices[18][0] = 4;      vertices[18][1] = 2.1;
            vertices[19][0] = 2.6;    vertices[19][1] = 3.92;
            vertices[20][0] = 2.4;    vertices[20][1] = 4;
            vertices[21][0] = 1.2;    vertices[21][1] = 3.95;
            vertices[22][0] = 1.1;    vertices[22][1] = 3.92;
            vertices[23][0] = 1;      vertices[23][1] = 3.88;
            vertices[24][0] = 0.8;    vertices[24][1] = 3.72;
            vertices[25][0] = 0.6;    vertices[25][1] = 3.4;
            vertices[26][0] = 0.58;   vertices[26][1] = 3.3;
            vertices[27][0] = 0.57;   vertices[27][1] = 3.2;
            vertices[28][0] = 1;      vertices[28][1] = 1;
            vertices[29][0] = 1.25;   vertices[29][1] = 0.7;

    def get_closest_waypoint(self):
        res = 0
        index = 0
        x = self.x
        y = self.y
        minDistance = float('inf')
        for row in self.waypoints:
            distance = math.sqrt((row[0] - x) * (row[0] - x) + (row[1] - y) * (row[1] - y))
            if distance < minDistance:
                minDistance = distance
                res = index
            index = index + 1
        return res
class DeepRacerRacetrackEnv(gym.Env):
    def __init__(self):

        # Create the observation space
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(TRAINING_IMAGE_SIZE[1],
                                                   TRAINING_IMAGE_SIZE[0], 3),
                                            dtype=np.uint8)
        # Create the action space
        self.action_space = spaces.Box(low=np.array([-1, 0]),
                                       high=np.array([+1, +1]),
                                       dtype=np.float32)

        if node_type == SIMULATION_WORKER:

            # ROS initialization
            rospy.init_node('rl_coach', anonymous=True)

            # wait for required services
            rospy.wait_for_service(
                '/deepracer_simulation_environment/get_waypoints')
            rospy.wait_for_service(
                '/deepracer_simulation_environment/reset_car')
            rospy.wait_for_service('/gazebo/get_model_state')
            rospy.wait_for_service('/gazebo/get_link_state')
            rospy.wait_for_service('/gazebo/clear_joint_forces')

            self.get_model_state = rospy.ServiceProxy(
                '/gazebo/get_model_state', GetModelState)
            self.get_link_state = rospy.ServiceProxy('/gazebo/get_link_state',
                                                     GetLinkState)
            self.clear_forces_client = rospy.ServiceProxy(
                '/gazebo/clear_joint_forces', JointRequest)
            self.reset_car_client = rospy.ServiceProxy(
                '/deepracer_simulation_environment/reset_car', ResetCarSrv)
            get_waypoints_client = rospy.ServiceProxy(
                '/deepracer_simulation_environment/get_waypoints',
                GetWaypointSrv)

            # Create the publishers for sending speed and steering info to the car
            self.velocity_pub_dict = OrderedDict()
            self.steering_pub_dict = OrderedDict()

            for topic in VELOCITY_TOPICS:
                self.velocity_pub_dict[topic] = rospy.Publisher(topic,
                                                                Float64,
                                                                queue_size=1)

            for topic in STEERING_TOPICS:
                self.steering_pub_dict[topic] = rospy.Publisher(topic,
                                                                Float64,
                                                                queue_size=1)

            # Read in parameters
            self.world_name = rospy.get_param('WORLD_NAME')
            self.job_type = rospy.get_param('JOB_TYPE')
            self.aws_region = rospy.get_param('AWS_REGION')
            self.metrics_s3_bucket = rospy.get_param('METRICS_S3_BUCKET')
            self.metrics_s3_object_key = rospy.get_param(
                'METRICS_S3_OBJECT_KEY')
            self.metrics = []
            self.simulation_job_arn = 'arn:aws:robomaker:' + self.aws_region + ':' + \
                                      rospy.get_param('ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID') + \
                                      ':simulation-job/' + rospy.get_param('AWS_ROBOMAKER_SIMULATION_JOB_ID')

            if self.job_type == TRAINING_JOB:
                from custom_files.customer_reward_function import reward_function
                self.reward_function = reward_function
                self.metric_name = rospy.get_param('METRIC_NAME')
                self.metric_namespace = rospy.get_param('METRIC_NAMESPACE')
                self.training_job_arn = rospy.get_param('TRAINING_JOB_ARN')
                self.target_number_of_episodes = rospy.get_param(
                    'NUMBER_OF_EPISODES')
                self.target_reward_score = rospy.get_param(
                    'TARGET_REWARD_SCORE')
            else:
                from markov.defaults import reward_function
                self.reward_function = reward_function
                self.number_of_trials = 0
                self.target_number_of_trials = rospy.get_param(
                    'NUMBER_OF_TRIALS')

            # Request the waypoints
            waypoints = None
            try:
                resp = get_waypoints_client()
                waypoints = np.array(resp.waypoints).reshape(
                    resp.row, resp.col)
            except Exception as ex:
                utils.json_format_logger(
                    "Unable to retrieve waypoints: {}".format(ex),
                    **utils.build_system_error_dict(
                        utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                        utils.SIMAPP_EVENT_ERROR_CODE_500))

            is_loop = np.all(waypoints[0, :] == waypoints[-1, :])
            if is_loop:
                self.center_line = LinearRing(waypoints[:, 0:2])
                self.inner_border = LinearRing(waypoints[:, 2:4])
                self.outer_border = LinearRing(waypoints[:, 4:6])
                self.road_poly = Polygon(self.outer_border,
                                         [self.inner_border])
            else:
                self.center_line = LineString(waypoints[:, 0:2])
                self.inner_border = LineString(waypoints[:, 2:4])
                self.outer_border = LineString(waypoints[:, 4:6])
                self.road_poly = Polygon(
                    np.vstack(
                        (self.outer_border, np.flipud(self.inner_border))))
            self.center_dists = [
                self.center_line.project(Point(p), normalized=True)
                for p in self.center_line.coords[:-1]
            ] + [1.0]
            self.track_length = self.center_line.length
            # Queue used to maintain image consumption synchronicity
            self.image_queue = queue.Queue(IMG_QUEUE_BUF_SIZE)
            rospy.Subscriber('/camera/zed/rgb/image_rect_color', sensor_image,
                             self.callback_image)

            # Initialize state data
            self.episodes = 0
            self.start_ndist = 0.0
            self.reverse_dir = False
            self.change_start = rospy.get_param(
                'CHANGE_START_POSITION', (self.job_type == TRAINING_JOB))
            self.alternate_dir = rospy.get_param('ALTERNATE_DRIVING_DIRECTION',
                                                 False)
            self.is_simulation_done = False
            self.steering_angle = 0
            self.speed = 0
            self.action_taken = 0
            self.prev_progress = 0
            self.prev_point = Point(0, 0)
            self.prev_point_2 = Point(0, 0)
            self.next_state = None
            self.reward = None
            self.reward_in_episode = 0
            self.done = False
            self.steps = 0
            self.simulation_start_time = 0
            self.allow_servo_step_signals = False

    def reset(self):
        if node_type == SAGEMAKER_TRAINING_WORKER:
            return self.observation_space.sample()

        # Simulation is done - so RoboMaker will start to shut down the app.
        # Till RoboMaker shuts down the app, do nothing more else metrics may show unexpected data.
        if (node_type == SIMULATION_WORKER) and self.is_simulation_done:
            while True:
                time.sleep(1)

        self.steering_angle = 0
        self.speed = 0
        self.action_taken = 0
        self.prev_progress = 0
        self.prev_point = Point(0, 0)
        self.prev_point_2 = Point(0, 0)
        self.next_state = None
        self.reward = None
        self.reward_in_episode = 0
        self.done = False
        # Reset the car and record the simulation start time
        if self.allow_servo_step_signals:
            self.send_action(0, 0)

        self.racecar_reset()
        self.steps = 0
        self.simulation_start_time = time.time()
        self.infer_reward_state(0, 0)

        return self.next_state

    def set_next_state(self):
        # Make sure the first image is the starting image
        image_data = self.image_queue.get(block=True, timeout=None)
        # Read the image and resize to get the state
        image = Image.frombytes('RGB', (image_data.width, image_data.height),
                                image_data.data, 'raw', 'RGB', 0, 1)
        image = image.resize(TRAINING_IMAGE_SIZE, resample=2)
        self.next_state = np.array(image)

    def racecar_reset(self):
        try:
            for joint in EFFORT_JOINTS:
                self.clear_forces_client(joint)
            prev_index, next_index = self.find_prev_next_waypoints(
                self.start_ndist)
            self.reset_car_client(self.start_ndist, next_index)
            # First clear the queue so that we set the state to the start image
            _ = self.image_queue.get(block=True, timeout=None)
            self.set_next_state()

        except Exception as ex:
            utils.json_format_logger(
                "Unable to reset the car: {}".format(ex),
                **utils.build_system_error_dict(
                    utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                    utils.SIMAPP_EVENT_ERROR_CODE_500))

    def set_allow_servo_step_signals(self, allow_servo_step_signals):
        self.allow_servo_step_signals = allow_servo_step_signals

    def step(self, action):
        if node_type == SAGEMAKER_TRAINING_WORKER:
            return self.observation_space.sample(), 0, False, {}

        # Initialize next state, reward, done flag
        self.next_state = None
        self.reward = None
        self.done = False

        # Send this action to Gazebo and increment the step count
        self.steering_angle = float(action[0])
        self.speed = float(action[1])
        if self.allow_servo_step_signals:
            self.send_action(self.steering_angle, self.speed)
        self.steps += 1

        # Compute the next state and reward
        self.infer_reward_state(self.steering_angle, self.speed)
        return self.next_state, self.reward, self.done, {}

    def callback_image(self, data):
        try:
            self.image_queue.put_nowait(data)
        except queue.Full:
            pass
        except Exception as ex:
            utils.json_format_logger(
                "Error retrieving frame from gazebo: {}".format(ex),
                **utils.build_system_error_dict(
                    utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                    utils.SIMAPP_EVENT_ERROR_CODE_500))

    def send_action(self, steering_angle, speed):
        # Simple v/r to computes the desired rpm
        wheel_rpm = speed / WHEEL_RADIUS

        for _, pub in self.velocity_pub_dict.items():
            pub.publish(wheel_rpm)

        for _, pub in self.steering_pub_dict.items():
            pub.publish(steering_angle)

    def infer_reward_state(self, steering_angle, speed):
        try:
            self.set_next_state()
        except Exception as ex:
            utils.json_format_logger(
                "Unable to retrieve image from queue: {}".format(ex),
                **utils.build_system_error_dict(
                    utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                    utils.SIMAPP_EVENT_ERROR_CODE_500))

        # Read model state from Gazebo
        model_state = self.get_model_state('racecar', '')
        model_orientation = Rotation.from_quat([
            model_state.pose.orientation.x, model_state.pose.orientation.y,
            model_state.pose.orientation.z, model_state.pose.orientation.w
        ])
        model_location = np.array([
            model_state.pose.position.x,
            model_state.pose.position.y,
            model_state.pose.position.z]) + \
            model_orientation.apply(RELATIVE_POSITION_OF_FRONT_OF_CAR)
        model_point = Point(model_location[0], model_location[1])
        model_heading = model_orientation.as_euler('zyx')[0]

        # Read the wheel locations from Gazebo
        left_rear_wheel_state = self.get_link_state('racecar::left_rear_wheel',
                                                    '')
        left_front_wheel_state = self.get_link_state(
            'racecar::left_front_wheel', '')
        right_rear_wheel_state = self.get_link_state(
            'racecar::right_rear_wheel', '')
        right_front_wheel_state = self.get_link_state(
            'racecar::right_front_wheel', '')
        wheel_points = [
            Point(left_rear_wheel_state.link_state.pose.position.x,
                  left_rear_wheel_state.link_state.pose.position.y),
            Point(left_front_wheel_state.link_state.pose.position.x,
                  left_front_wheel_state.link_state.pose.position.y),
            Point(right_rear_wheel_state.link_state.pose.position.x,
                  right_rear_wheel_state.link_state.pose.position.y),
            Point(right_front_wheel_state.link_state.pose.position.x,
                  right_front_wheel_state.link_state.pose.position.y)
        ]

        # Project the current location onto the center line and find nearest points
        current_ndist = self.center_line.project(model_point, normalized=True)
        prev_index, next_index = self.find_prev_next_waypoints(current_ndist)
        distance_from_prev = model_point.distance(
            Point(self.center_line.coords[prev_index]))
        distance_from_next = model_point.distance(
            Point(self.center_line.coords[next_index]))
        closest_waypoint_index = (
            prev_index, next_index)[distance_from_next < distance_from_prev]

        # Compute distance from center and road width
        nearest_point_center = self.center_line.interpolate(current_ndist,
                                                            normalized=True)
        nearest_point_inner = self.inner_border.interpolate(
            self.inner_border.project(nearest_point_center))
        nearest_point_outer = self.outer_border.interpolate(
            self.outer_border.project(nearest_point_center))
        distance_from_center = nearest_point_center.distance(model_point)
        distance_from_inner = nearest_point_inner.distance(model_point)
        distance_from_outer = nearest_point_outer.distance(model_point)
        track_width = nearest_point_inner.distance(nearest_point_outer)
        is_left_of_center = (distance_from_outer < distance_from_inner) if self.reverse_dir \
            else (distance_from_inner < distance_from_outer)

        # Convert current progress to be [0,100] starting at the initial waypoint
        if self.reverse_dir:
            current_progress = self.start_ndist - current_ndist
        else:
            current_progress = current_ndist - self.start_ndist
        if current_progress < 0.0: current_progress = current_progress + 1.0
        current_progress = 100 * current_progress
        if current_progress < self.prev_progress:
            # Either: (1) we wrapped around and have finished the track,
            delta1 = current_progress + 100 - self.prev_progress
            # or (2) for some reason the car went backwards (this should be rare)
            delta2 = self.prev_progress - current_progress
            current_progress = (self.prev_progress, 100)[delta1 < delta2]

        # Car is off track if all wheels are outside the borders
        wheel_on_track = [self.road_poly.contains(p) for p in wheel_points]
        all_wheels_on_track = all(wheel_on_track)
        any_wheels_on_track = any(wheel_on_track)

        # Compute the reward
        if any_wheels_on_track:
            done = False
            params = {
                'all_wheels_on_track': all_wheels_on_track,
                'x': model_point.x,
                'y': model_point.y,
                'heading': model_heading * 180.0 / math.pi,
                'distance_from_center': distance_from_center,
                'progress': current_progress,
                'steps': self.steps,
                'speed': speed,
                'steering_angle': steering_angle * 180.0 / math.pi,
                'track_width': track_width,
                'waypoints': list(self.center_line.coords),
                'closest_waypoints': [prev_index, next_index],
                'is_left_of_center': is_left_of_center,
                'is_reversed': self.reverse_dir
            }
            try:
                reward = float(self.reward_function(params))
            except Exception as e:
                utils.json_format_logger(
                    "Exception {} in customer reward function. Job failed!".
                    format(e),
                    **utils.build_user_error_dict(
                        utils.SIMAPP_SIMULATION_WORKER_EXCEPTION,
                        utils.SIMAPP_EVENT_ERROR_CODE_400))
                traceback.print_exc()
                sys.exit(1)
        else:
            done = True
            reward = CRASHED

        # Reset if the car position hasn't changed in the last 2 steps
        prev_pnt_dist = min(model_point.distance(self.prev_point),
                            model_point.distance(self.prev_point_2))

        if prev_pnt_dist <= 0.0001 and self.steps % NUM_STEPS_TO_CHECK_STUCK == 0:
            done = True
            reward = CRASHED  # stuck

        # Simulation jobs are done when progress reaches 100
        if current_progress >= 100:
            done = True

        # Keep data from the previous step around
        self.prev_point_2 = self.prev_point
        self.prev_point = model_point
        self.prev_progress = current_progress

        # Set the reward and done flag
        self.reward = reward
        self.reward_in_episode += reward
        self.done = done

        # Trace logs to help us debug and visualize the training runs
        # btown TODO: This should be written to S3, not to CWL.
        logger.info(
            'SIM_TRACE_LOG:%d,%d,%.4f,%.4f,%.4f,%.2f,%.2f,%d,%.4f,%s,%s,%.4f,%d,%.2f,%s\n'
            %
            (self.episodes, self.steps, model_location[0], model_location[1],
             model_heading, self.steering_angle, self.speed, self.action_taken,
             self.reward, self.done, all_wheels_on_track, current_progress,
             closest_waypoint_index, self.track_length, time.time()))

        metrics = {
            "episodes": self.episodes,
            "steps": self.steps,
            "x": model_location[0],
            "y": model_location[1],
            "heading": model_heading,
            "steering_angle": self.steering_angle,
            "speed": self.speed,
            "action": self.action_taken,
            "reward": self.reward,
            "done": self.done,
            "all_wheels_on_track": all_wheels_on_track,
            "current_progress": current_progress,
            "closest_waypoint_index": closest_waypoint_index,
            "track_length": self.track_length,
            "time": time.time()
        }
        self.log_to_datadog(rewards)

        # Terminate this episode when ready
        if done and node_type == SIMULATION_WORKER:
            self.finish_episode(current_progress)

    def find_prev_next_waypoints(self, ndist):
        if self.reverse_dir:
            next_index = bisect.bisect_left(self.center_dists, ndist) - 1
            prev_index = next_index + 1
            if next_index == -1: next_index = len(self.center_dists) - 1
        else:
            next_index = bisect.bisect_right(self.center_dists, ndist)
            prev_index = next_index - 1
            if next_index == len(self.center_dists): next_index = 0
        return prev_index, next_index

    def stop_car(self):
        self.steering_angle = 0
        self.speed = 0
        self.action_taken = 0
        self.send_action(0, 0)
        self.racecar_reset()

    def finish_episode(self, progress):
        # Increment episode count, update start position and direction
        self.episodes += 1
        if self.change_start:
            self.start_ndist = (self.start_ndist +
                                ROUND_ROBIN_ADVANCE_DIST) % 1.0
        if self.alternate_dir:
            self.reverse_dir = not self.reverse_dir
        # Reset the car
        self.stop_car()

        # Update metrics based on job type
        if self.job_type == TRAINING_JOB:
            self.send_reward_to_cloudwatch(self.reward_in_episode)
            self.update_training_metrics()
            self.write_metrics_to_s3()
            if self.is_training_done():
                self.cancel_simulation_job()
        elif self.job_type == EVALUATION_JOB:
            self.number_of_trials += 1
            self.update_eval_metrics(progress)
            self.write_metrics_to_s3()

    def update_eval_metrics(self, progress):
        eval_metric = {}
        eval_metric['completion_percentage'] = int(progress)
        eval_metric['metric_time'] = int(round(time.time() * 1000))
        eval_metric['start_time'] = int(
            round(self.simulation_start_time * 1000))
        eval_metric['elapsed_time_in_milliseconds'] = int(
            round((time.time() - self.simulation_start_time) * 1000))
        eval_metric['trial'] = int(self.number_of_trials)
        self.metrics.append(eval_metric)

    def update_training_metrics(self):
        training_metric = {}
        training_metric['reward_score'] = int(round(self.reward_in_episode))
        training_metric['metric_time'] = int(round(time.time() * 1000))
        training_metric['start_time'] = int(
            round(self.simulation_start_time * 1000))
        training_metric['elapsed_time_in_milliseconds'] = int(
            round((time.time() - self.simulation_start_time) * 1000))
        training_metric['episode'] = int(self.episodes)
        self.metrics.append(training_metric)

    def write_metrics_to_s3(self):
        session = boto3.session.Session()
        s3_client = session.client('s3', region_name=self.aws_region)
        metrics_body = json.dumps({'metrics': self.metrics})
        s3_client.put_object(Bucket=self.metrics_s3_bucket,
                             Key=self.metrics_s3_object_key,
                             Body=bytes(metrics_body, encoding='utf-8'))

    def is_training_done(self):
        if ((self.target_number_of_episodes > 0) and (self.target_number_of_episodes == self.episodes)) or \
           ((isinstance(self.target_reward_score, (int, float))) and (self.target_reward_score <= self.reward_in_episode)):
            self.is_simulation_done = True
        return self.is_simulation_done

    def cancel_simulation_job(self):
        session = boto3.session.Session()
        robomaker_client = session.client('robomaker',
                                          region_name=self.aws_region)
        robomaker_client.cancel_simulation_job(job=self.simulation_job_arn)

    def log_to_datadog(self, metrics):
        for metric, value in metrics.items():
            if isinstance(value, (int, float)):
                api.Metric.send(metric=metric,
                                points=value,
                                tags=self.job_type + "-" +
                                SIMULATION_START_TIMESTAMP)

    def send_reward_to_cloudwatch(self, reward):
        session = boto3.session.Session()
        cloudwatch_client = session.client('cloudwatch',
                                           region_name=self.aws_region)
        cloudwatch_client.put_metric_data(MetricData=[
            {
                'MetricName':
                self.metric_name,
                'Dimensions': [
                    {
                        'Name': 'TRAINING_JOB_ARN',
                        'Value': self.training_job_arn
                    },
                ],
                'Unit':
                'None',
                'Value':
                reward
            },
        ],
                                          Namespace=self.metric_namespace)
Exemplo n.º 4
0
class DeepRacerRacetrackEnv(gym.Env):
    def __init__(self):

        # Create the observation space
        img_width = TRAINING_IMAGE_SIZE[0]
        img_height = TRAINING_IMAGE_SIZE[1]
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(img_height, img_width, 3),
                                            dtype=np.uint8)

        # Create the action space
        self.action_space = spaces.Box(low=np.array([-1, 0]),
                                       high=np.array([+1, +1]),
                                       dtype=np.float32)

        if node_type == SIMULATION_WORKER:

            # ROS initialization
            rospy.init_node('rl_coach', anonymous=True)
            rospy.Subscriber('/camera/zed/rgb/image_rect_color', sensor_image,
                             self.callback_image)
            self.ack_publisher = rospy.Publisher(
                '/vesc/low_level/ackermann_cmd_mux/output',
                AckermannDriveStamped,
                queue_size=100)
            self.set_model_state = rospy.ServiceProxy(
                '/gazebo/set_model_state', SetModelState)
            self.get_model_state = rospy.ServiceProxy(
                '/gazebo/get_model_state', GetModelState)
            self.get_link_state = rospy.ServiceProxy('/gazebo/get_link_state',
                                                     GetLinkState)

            # Read in parameters
            self.world_name = rospy.get_param('WORLD_NAME')
            self.job_type = rospy.get_param('JOB_TYPE')
            self.aws_region = rospy.get_param('AWS_REGION')
            self.metrics_s3_bucket = rospy.get_param('METRICS_S3_BUCKET')
            self.metrics_s3_object_key = rospy.get_param(
                'METRICS_S3_OBJECT_KEY')
            self.metrics = []
            self.simulation_job_arn = 'arn:aws:robomaker:' + self.aws_region + ':' + \
                                      rospy.get_param('ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID') + \
                                      ':simulation-job/' + rospy.get_param('AWS_ROBOMAKER_SIMULATION_JOB_ID')

            if self.job_type == TRAINING_JOB:
                from custom_files.customer_reward_function import reward_function
                self.reward_function = reward_function
                self.metric_name = rospy.get_param('METRIC_NAME')
                self.metric_namespace = rospy.get_param('METRIC_NAMESPACE')
                self.training_job_arn = rospy.get_param('TRAINING_JOB_ARN')
                self.target_number_of_episodes = rospy.get_param(
                    'NUMBER_OF_EPISODES')
                self.target_reward_score = rospy.get_param(
                    'TARGET_REWARD_SCORE')
            else:
                from markov.defaults import reward_function
                self.reward_function = reward_function
                self.number_of_trials = 0
                self.target_number_of_trials = rospy.get_param(
                    'NUMBER_OF_TRIALS')

            # Read in the waypoints
            BUNDLE_CURRENT_PREFIX = os.environ.get("BUNDLE_CURRENT_PREFIX",
                                                   None)
            if not BUNDLE_CURRENT_PREFIX:
                raise ValueError("Cannot get BUNDLE_CURRENT_PREFIX")
            route_file_name = os.path.join(BUNDLE_CURRENT_PREFIX, 'install',
                                           'deepracer_simulation', 'share',
                                           'deepracer_simulation', 'routes',
                                           '{}.npy'.format(self.world_name))
            waypoints = np.load(route_file_name)
            self.is_loop = np.all(waypoints[0, :] == waypoints[-1, :])
            if self.is_loop:
                self.center_line = LinearRing(waypoints[:, 0:2])
                self.inner_border = LinearRing(waypoints[:, 2:4])
                self.outer_border = LinearRing(waypoints[:, 4:6])
                self.road_poly = Polygon(self.outer_border,
                                         [self.inner_border])
            else:
                self.center_line = LineString(waypoints[:, 0:2])
                self.inner_border = LineString(waypoints[:, 2:4])
                self.outer_border = LineString(waypoints[:, 4:6])
                self.road_poly = Polygon(
                    np.vstack(
                        (self.outer_border, np.flipud(self.inner_border))))
            self.center_dists = [
                self.center_line.project(Point(p), normalized=True)
                for p in self.center_line.coords[:-1]
            ] + [1.0]
            self.track_length = self.center_line.length

            # Initialize state data
            self.episodes = 0
            self.start_dist = 0.0
            self.round_robin = (self.job_type == TRAINING_JOB)
            self.is_simulation_done = False
            self.image = None
            self.steering_angle = 0
            self.speed = 0
            self.action_taken = 0
            self.prev_progress = 0
            self.prev_point = Point(0, 0)
            self.prev_point_2 = Point(0, 0)
            self.next_state = None
            self.reward = None
            self.reward_in_episode = 0
            self.done = False
            self.steps = 0
            self.simulation_start_time = 0
            self.reverse_dir = False

    def reset(self):
        if node_type == SAGEMAKER_TRAINING_WORKER:
            return self.observation_space.sample()

        # Simulation is done - so RoboMaker will start to shut down the app.
        # Till RoboMaker shuts down the app, do nothing more else metrics may show unexpected data.
        if (node_type == SIMULATION_WORKER) and self.is_simulation_done:
            while True:
                time.sleep(1)

        self.image = None
        self.steering_angle = 0
        self.speed = 0
        self.action_taken = 0
        self.prev_progress = 0
        self.prev_point = Point(0, 0)
        self.prev_point_2 = Point(0, 0)
        self.next_state = None
        self.reward = None
        self.reward_in_episode = 0
        self.done = False

        # Reset the car and record the simulation start time
        self.send_action(0, 0)
        self.racecar_reset()
        time.sleep(SLEEP_AFTER_RESET_TIME_IN_SECOND)
        self.steps = 0
        self.simulation_start_time = time.time()

        # Compute the initial state
        self.infer_reward_state(0, 0)
        return self.next_state

    def racecar_reset(self):
        rospy.wait_for_service('/gazebo/set_model_state')

        # Compute the starting position and heading
        next_point_index = bisect.bisect(self.center_dists, self.start_dist)
        start_point = self.center_line.interpolate(self.start_dist,
                                                   normalized=True)
        start_yaw = math.atan2(
            self.center_line.coords[next_point_index][1] - start_point.y,
            self.center_line.coords[next_point_index][0] - start_point.x)
        start_quaternion = Rotation.from_euler('zyx',
                                               [start_yaw, 0, 0]).as_quat()

        # Construct the model state and send to Gazebo
        modelState = ModelState()
        modelState.model_name = 'racecar'
        modelState.pose.position.x = start_point.x
        modelState.pose.position.y = start_point.y
        modelState.pose.position.z = 0
        modelState.pose.orientation.x = start_quaternion[0]
        modelState.pose.orientation.y = start_quaternion[1]
        modelState.pose.orientation.z = start_quaternion[2]
        modelState.pose.orientation.w = start_quaternion[3]
        modelState.twist.linear.x = 0
        modelState.twist.linear.y = 0
        modelState.twist.linear.z = 0
        modelState.twist.angular.x = 0
        modelState.twist.angular.y = 0
        modelState.twist.angular.z = 0
        self.set_model_state(modelState)

    def step(self, action):
        if node_type == SAGEMAKER_TRAINING_WORKER:
            return self.observation_space.sample(), 0, False, {}

        # Initialize next state, reward, done flag
        self.next_state = None
        self.reward = None
        self.done = False

        # Send this action to Gazebo and increment the step count
        self.steering_angle = float(action[0])
        self.speed = float(action[1])
        self.send_action(self.steering_angle, self.speed)
        time.sleep(SLEEP_BETWEEN_ACTION_AND_REWARD_CALCULATION_TIME_IN_SECOND)
        self.steps += 1

        # Compute the next state and reward
        self.infer_reward_state(self.steering_angle, self.speed)
        return self.next_state, self.reward, self.done, {}

    def callback_image(self, data):
        self.image = data

    def send_action(self, steering_angle, speed):
        ack_msg = AckermannDriveStamped()
        ack_msg.header.stamp = rospy.Time.now()
        ack_msg.drive.steering_angle = steering_angle
        ack_msg.drive.speed = speed
        self.ack_publisher.publish(ack_msg)

    def infer_reward_state(self, steering_angle, speed):
        rospy.wait_for_service('/gazebo/get_model_state')
        rospy.wait_for_service('/gazebo/get_link_state')

        # Wait till we have a image from the camera
        # btown TODO: Incorporate feedback from callejae@ here (CR-6434645 rev1)
        while not self.image:
            time.sleep(SLEEP_WAITING_FOR_IMAGE_TIME_IN_SECOND)

        # Read model state from Gazebo
        model_state = self.get_model_state('racecar', '')
        model_orientation = Rotation.from_quat([
            model_state.pose.orientation.x, model_state.pose.orientation.y,
            model_state.pose.orientation.z, model_state.pose.orientation.w
        ])
        model_location = np.array([
            model_state.pose.position.x,
            model_state.pose.position.y,
            model_state.pose.position.z]) + \
            model_orientation.apply(RELATIVE_POSITION_OF_FRONT_OF_CAR)
        model_point = Point(model_location[0], model_location[1])
        model_heading = model_orientation.as_euler('zyx')[0]

        # Read the wheel locations from Gazebo
        left_rear_wheel_state = self.get_link_state('racecar::left_rear_wheel',
                                                    '')
        left_front_wheel_state = self.get_link_state(
            'racecar::left_front_wheel', '')
        right_rear_wheel_state = self.get_link_state(
            'racecar::right_rear_wheel', '')
        right_front_wheel_state = self.get_link_state(
            'racecar::right_front_wheel', '')
        wheel_points = [
            Point(left_rear_wheel_state.link_state.pose.position.x,
                  left_rear_wheel_state.link_state.pose.position.y),
            Point(left_front_wheel_state.link_state.pose.position.x,
                  left_front_wheel_state.link_state.pose.position.y),
            Point(right_rear_wheel_state.link_state.pose.position.x,
                  right_rear_wheel_state.link_state.pose.position.y),
            Point(right_front_wheel_state.link_state.pose.position.x,
                  right_front_wheel_state.link_state.pose.position.y)
        ]

        # Project the current location onto the center line and find nearest points
        current_dist = self.center_line.project(model_point, normalized=True)
        next_waypoint_index = max(
            0,
            min(bisect.bisect(self.center_dists, current_dist),
                len(self.center_dists) - 1))
        prev_waypoint_index = next_waypoint_index - 1
        distance_from_next = model_point.distance(
            Point(self.center_line.coords[next_waypoint_index]))
        distance_from_prev = model_point.distance(
            Point(self.center_line.coords[prev_waypoint_index]))
        closest_waypoint_index = (
            prev_waypoint_index,
            next_waypoint_index)[distance_from_next < distance_from_prev]

        # Compute distance from center and road width
        nearest_point_center = self.center_line.interpolate(current_dist,
                                                            normalized=True)
        nearest_point_inner = self.inner_border.interpolate(
            self.inner_border.project(nearest_point_center))
        nearest_point_outer = self.outer_border.interpolate(
            self.outer_border.project(nearest_point_center))
        distance_from_center = nearest_point_center.distance(model_point)
        distance_from_inner = nearest_point_inner.distance(model_point)
        distance_from_outer = nearest_point_outer.distance(model_point)
        track_width = nearest_point_inner.distance(nearest_point_outer)
        is_left_of_center = (distance_from_outer < distance_from_inner) if self.reverse_dir \
            else (distance_from_inner < distance_from_outer)

        # Convert current progress to be [0,100] starting at the initial waypoint
        current_progress = current_dist - self.start_dist
        if current_progress < 0.0: current_progress = current_progress + 1.0
        current_progress = 100 * current_progress
        if current_progress < self.prev_progress:
            # Either: (1) we wrapped around and have finished the track,
            delta1 = current_progress + 100 - self.prev_progress
            # or (2) for some reason the car went backwards (this should be rare)
            delta2 = self.prev_progress - current_progress
            current_progress = (self.prev_progress, 100)[delta1 < delta2]

        # Car is off track if all wheels are outside the borders
        wheel_on_track = [self.road_poly.contains(p) for p in wheel_points]
        all_wheels_on_track = all(wheel_on_track)
        any_wheels_on_track = any(wheel_on_track)

        # Compute the reward
        if any_wheels_on_track:
            done = False
            params = {
                'all_wheels_on_track': all_wheels_on_track,
                'x': model_point.x,
                'y': model_point.y,
                'heading': model_heading * 180.0 / math.pi,
                'distance_from_center': distance_from_center,
                'progress': current_progress,
                'steps': self.steps,
                'speed': speed,
                'steering_angle': steering_angle * 180.0 / math.pi,
                'track_width': track_width,
                'waypoints': list(self.center_line.coords),
                'closest_waypoints':
                [prev_waypoint_index, next_waypoint_index],
                'is_left_of_center': is_left_of_center,
                'is_reversed': self.reverse_dir
            }
            reward = self.reward_function(params)
        else:
            done = True
            reward = CRASHED

        # Reset if the car position hasn't changed in the last 2 steps
        if min(model_point.distance(self.prev_point),
               model_point.distance(self.prev_point_2)) <= 0.0001:
            done = True
            reward = CRASHED  # stuck

        # Simulation jobs are done when progress reaches 100
        if current_progress >= 100:
            done = True

        # Keep data from the previous step around
        self.prev_point_2 = self.prev_point
        self.prev_point = model_point
        self.prev_progress = current_progress

        # Read the image and resize to get the state
        image = Image.frombytes('RGB', (self.image.width, self.image.height),
                                self.image.data, 'raw', 'RGB', 0, 1)
        image = image.resize(TRAINING_IMAGE_SIZE, resample=2)
        state = np.array(image)

        # Set the next state, reward, and done flag
        self.next_state = state
        self.reward = reward
        self.reward_in_episode += reward
        self.done = done

        # Trace logs to help us debug and visualize the training runs
        # btown TODO: This should be written to S3, not to CWL.
        stdout_ = 'SIM_TRACE_LOG:%d,%d,%.4f,%.4f,%.4f,%.2f,%.2f,%d,%.4f,%s,%s,%.4f,%d,%.2f,%s\n' % (
            self.episodes, self.steps, model_location[0], model_location[1],
            model_heading, self.steering_angle, self.speed, self.action_taken,
            self.reward, self.done, all_wheels_on_track, current_progress,
            closest_waypoint_index, self.track_length, time.time())
        print(stdout_)

        # Terminate this episode when ready
        if self.done and node_type == SIMULATION_WORKER:
            self.finish_episode(current_progress)

    def finish_episode(self, progress):

        # Stop the car from moving
        self.send_action(0, 0)

        # Increment episode count, update start dist for round robin
        self.episodes += 1
        if self.round_robin:
            self.start_dist = (self.start_dist +
                               ROUND_ROBIN_ADVANCE_DIST) % 1.0

        # Update metrics based on job type
        if self.job_type == TRAINING_JOB:
            self.send_reward_to_cloudwatch(self.reward_in_episode)
            self.update_training_metrics()
            self.write_metrics_to_s3()
            if self.is_training_done():
                self.cancel_simulation_job()
        elif self.job_type == EVALUATION_JOB:
            self.number_of_trials += 1
            self.update_eval_metrics(progress)
            self.write_metrics_to_s3()
            if self.is_evaluation_done():
                self.cancel_simulation_job()

    def update_eval_metrics(self, progress):
        eval_metric = {}
        eval_metric['completion_percentage'] = int(progress)
        eval_metric['metric_time'] = int(round(time.time() * 1000))
        eval_metric['start_time'] = int(
            round(self.simulation_start_time * 1000))
        eval_metric['elapsed_time_in_milliseconds'] = int(
            round((time.time() - self.simulation_start_time) * 1000))
        eval_metric['trial'] = int(self.number_of_trials)
        self.metrics.append(eval_metric)

    def update_training_metrics(self):
        training_metric = {}
        training_metric['reward_score'] = int(round(self.reward_in_episode))
        training_metric['metric_time'] = int(round(time.time() * 1000))
        training_metric['start_time'] = int(
            round(self.simulation_start_time * 1000))
        training_metric['elapsed_time_in_milliseconds'] = int(
            round((time.time() - self.simulation_start_time) * 1000))
        training_metric['episode'] = int(self.episodes)
        self.metrics.append(training_metric)

    def write_metrics_to_s3(self):
        session = boto3.session.Session()
        s3_url = os.environ.get('S3_ENDPOINT_URL')
        s3_client = session.client('s3',
                                   region_name=self.aws_region,
                                   endpoint_url=s3_url)
        metrics_body = json.dumps({'metrics': self.metrics})
        s3_client.put_object(Bucket=self.metrics_s3_bucket,
                             Key=self.metrics_s3_object_key,
                             Body=bytes(metrics_body, encoding='utf-8'))

    def is_evaluation_done(self):
        if ((self.target_number_of_trials > 0)
                and (self.target_number_of_trials == self.number_of_trials)):
            self.is_simulation_done = True
        return self.is_simulation_done

    def is_training_done(self):
        if ((self.target_number_of_episodes > 0) and (self.target_number_of_episodes == self.episodes)) or \
           ((self.is_number(self.target_reward_score)) and (self.target_reward_score <= self.reward_in_episode)):
            self.is_simulation_done = True
        return self.is_simulation_done

    def is_number(self, value_to_check):
        try:
            float(value_to_check)
            return True
        except ValueError:
            return False

    def cancel_simulation_job(self):
        self.send_action(0, 0)
        session = boto3.session.Session()
        robomaker_client = session.client('robomaker',
                                          region_name=self.aws_region)
        robomaker_client.cancel_simulation_job(job=self.simulation_job_arn)

    def send_reward_to_cloudwatch(self, reward):
        isLocal = os.environ.get("LOCAL")
        if isLocal == None:
            session = boto3.session.Session()
            cloudwatch_client = session.client('cloudwatch',
                                               region_name=self.aws_region)
            cloudwatch_client.put_metric_data(MetricData=[
                {
                    'MetricName':
                    self.metric_name,
                    'Dimensions': [
                        {
                            'Name': 'TRAINING_JOB_ARN',
                            'Value': self.training_job_arn
                        },
                    ],
                    'Unit':
                    'None',
                    'Value':
                    reward
                },
            ],
                                              Namespace=self.metric_namespace)
        else:
            print("{}: {}".format(self.metric_name, reward))
Exemplo n.º 5
0
class DeepRacerRacetrackEnv(gym.Env):
    def __init__(self):

        # Create the observation space
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(TRAINING_IMAGE_SIZE[1],
                                                   TRAINING_IMAGE_SIZE[0], 3),
                                            dtype=np.uint8)
        # Create the action space
        self.action_space = spaces.Box(low=np.array([-1, 0]),
                                       high=np.array([+1, +1]),
                                       dtype=np.float32)

        if node_type == SIMULATION_WORKER:

            # ROS initialization
            rospy.init_node('rl_coach', anonymous=True)

            # wait for required services
            rospy.wait_for_service(
                '/deepracer_simulation_environment/get_waypoints')
            rospy.wait_for_service(
                '/deepracer_simulation_environment/reset_car')
            rospy.wait_for_service('/gazebo/get_model_state')
            rospy.wait_for_service('/gazebo/get_link_state')
            rospy.wait_for_service('/gazebo/clear_joint_forces')
            rospy.wait_for_service('/gazebo/get_physics_properties')
            rospy.wait_for_service('/gazebo/set_physics_properties')

            self.get_model_state = rospy.ServiceProxy(
                '/gazebo/get_model_state', GetModelState)
            self.get_link_state = rospy.ServiceProxy('/gazebo/get_link_state',
                                                     GetLinkState)
            self.clear_forces_client = rospy.ServiceProxy(
                '/gazebo/clear_joint_forces', JointRequest)
            self.reset_car_client = rospy.ServiceProxy(
                '/deepracer_simulation_environment/reset_car', ResetCarSrv)
            get_waypoints_client = rospy.ServiceProxy(
                '/deepracer_simulation_environment/get_waypoints',
                GetWaypointSrv)
            get_physics_properties = rospy.ServiceProxy(
                '/gazebo/get_physics_properties', GetPhysicsProperties)
            set_physics_properties = rospy.ServiceProxy(
                '/gazebo/set_physics_properties', SetPhysicsProperties)

            # Create the publishers for sending speed and steering info to the car
            self.velocity_pub_dict = OrderedDict()
            self.steering_pub_dict = OrderedDict()

            for topic in VELOCITY_TOPICS:
                self.velocity_pub_dict[topic] = rospy.Publisher(topic,
                                                                Float64,
                                                                queue_size=1)

            for topic in STEERING_TOPICS:
                self.steering_pub_dict[topic] = rospy.Publisher(topic,
                                                                Float64,
                                                                queue_size=1)

            # Read in parameters
            self.world_name = rospy.get_param('WORLD_NAME')
            self.job_type = rospy.get_param('JOB_TYPE')
            self.aws_region = rospy.get_param('AWS_REGION')
            self.metrics_s3_bucket = rospy.get_param('METRICS_S3_BUCKET')
            self.metrics_s3_object_key = rospy.get_param(
                'METRICS_S3_OBJECT_KEY')
            self.aws_endpoint_url = rospy.get_param('AWS_ENDPOINT_URL')
            self.metrics = []
            self.simulation_job_arn = 'arn:aws:robomaker:' + self.aws_region + ':' + \
                                      rospy.get_param('ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID') + \
                                      ':simulation-job/' + rospy.get_param('AWS_ROBOMAKER_SIMULATION_JOB_ID')
            self.max_update_rate = rospy.get_param('SIM_UPDATE_RATE')

            # Set the simulation rate
            try:
                resp = utils.gazebo_service_call(get_physics_properties)
                # float64 time_step
                # bool pause
                # float64 max_update_rate
                # geometry_msgs/Vector3 gravity
                # float64 x
                # float64 y
                # float64 z
                # gazebo_msgs/ODEPhysics ode_config
                # bool auto_disable_bodies
                # uint32 sor_pgs_precon_iters
                # uint32 sor_pgs_iters
                # float64 sor_pgs_w
                # float64 sor_pgs_rms_error_tol
                # float64 contact_surface_layer
                # float64 contact_max_correcting_vel
                # float64 cfm
                # float64 erp
                # uint32 max_contacts
                # bool success
                # string status_message

                update_msg = SetPhysicsPropertiesRequest()
                update_msg.time_step = resp.time_step
                update_msg.max_update_rate = self.max_update_rate
                update_msg.gravity = resp.gravity
                update_msg.ode_config = resp.ode_config
                # float64 time_step
                # float64 max_update_rate
                # geometry_msgs/Vector3 gravity
                #   float64 x
                #   float64 y
                #   float64 z
                # gazebo_msgs/ODEPhysics ode_config
                #   bool auto_disable_bodies
                #   uint32 sor_pgs_precon_iters
                #   uint32 sor_pgs_iters
                #   float64 sor_pgs_w
                #   float64 sor_pgs_rms_error_tol
                #   float64 contact_surface_layer
                #   float64 contact_max_correcting_vel
                #   float64 cfm
                #   float64 erp
                #   uint32 max_contacts
                # ---
                # bool success
                resp = utils.gazebo_service_call(set_physics_properties,
                                                 update_msg)

            except Exception as ex:
                utils.json_format_logger(
                    "Unable to set physics update rate: {}".format(ex),
                    **utils.build_system_error_dict(
                        utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                        utils.SIMAPP_EVENT_ERROR_CODE_500))

            # Setup SIM_TRACE_LOG data upload to s3
            self.setup_simtrace_data_upload_to_s3()

            if self.job_type == TRAINING_JOB:
                from custom_files.customer_reward_function import reward_function
                self.reward_function = reward_function
                self.target_number_of_episodes = rospy.get_param(
                    'NUMBER_OF_EPISODES')
                self.target_reward_score = rospy.get_param(
                    'TARGET_REWARD_SCORE')
            else:
                from markov.defaults import reward_function
                self.reward_function = reward_function
                self.number_of_trials = 0
                self.target_number_of_trials = rospy.get_param(
                    'NUMBER_OF_TRIALS')

            # Request the waypoints
            waypoints = None
            try:
                resp = utils.gazebo_service_call(get_waypoints_client)
                waypoints = np.array(resp.waypoints).reshape(
                    resp.row, resp.col)
            except Exception as ex:
                utils.json_format_logger(
                    "Unable to retrieve waypoints: {}".format(ex),
                    **utils.build_system_error_dict(
                        utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                        utils.SIMAPP_EVENT_ERROR_CODE_500))

            is_loop = np.all(waypoints[0, :] == waypoints[-1, :])
            if is_loop:
                self.center_line = LinearRing(waypoints[:, 0:2])
                self.inner_border = LinearRing(waypoints[:, 2:4])
                self.outer_border = LinearRing(waypoints[:, 4:6])
                self.road_poly = Polygon(self.outer_border,
                                         [self.inner_border])
            else:
                self.center_line = LineString(waypoints[:, 0:2])
                self.inner_border = LineString(waypoints[:, 2:4])
                self.outer_border = LineString(waypoints[:, 4:6])
                self.road_poly = Polygon(
                    np.vstack(
                        (self.outer_border, np.flipud(self.inner_border))))
            self.center_dists = [
                self.center_line.project(Point(p), normalized=True)
                for p in self.center_line.coords[:-1]
            ] + [1.0]
            self.track_length = self.center_line.length
            # Queue used to maintain image consumption synchronicity
            self.image_queue = queue.Queue(IMG_QUEUE_BUF_SIZE)
            rospy.Subscriber('/camera/zed/rgb/image_rect_color', sensor_image,
                             self.callback_image)

            # Initialize state data
            self.episodes = 0
            self.start_ndist = rospy.get_param('START_POSITION', 0.0)
            self.reverse_dir = False
            self.change_start = rospy.get_param(
                'CHANGE_START_POSITION', (self.job_type == TRAINING_JOB))
            self.alternate_dir = rospy.get_param('ALTERNATE_DRIVING_DIRECTION',
                                                 False)
            self.is_simulation_done = False
            self.steering_angle = 0
            self.speed = 0
            self.action_taken = 0
            self.prev_progress = 0
            self.prev_point = Point(0, 0)
            self.prev_point_2 = Point(0, 0)
            self.next_state = None
            self.reward = None
            self.reward_in_episode = 0
            self.done = False
            self.steps = 0
            self.simulation_start_time = 0
            self.allow_servo_step_signals = False
            #self.metadata = {'render.modes': ['rgb_array']}

    def setup_simtrace_data_upload_to_s3(self):
        if node_type == SIMULATION_WORKER:
            #start timer to upload SIM_TRACE data to s3 when simapp exits
            #There is not enough time to upload data to S3 when robomaker shuts down
            #Set up timer to upload data to S3 300 seconds before the robomaker quits
            # - 300 seocnds is randomly chosen number based on various jobs launched that
            #   provides enough time to upload data to S3
            global simapp_data_upload_timer

            # CT: Use MAX_JOB_DURATION env var instead
            #session = boto3.session.Session()
            #robomaker_client = session.client('robomaker', region_name=self.aws_region, endpoint_url=self.aws_endpoint_url)
            #result = robomaker_client.describe_simulation_job(
            #    job=self.simulation_job_arn
            #)
            result = {
                "maxJobDurationInSeconds": rospy.get_param('MAX_JOB_DURATION'),
                "lastStartedAt": datetime.datetime.now(),
                "lastUpdatedAt": datetime.datetime.now()
            }
            logger.info("robomaker job description: {}".format(result))
            self.simapp_upload_duration = result[
                'maxJobDurationInSeconds'] - SIMAPP_DATA_UPLOAD_TIME_TO_S3
            start = 0
            if self.job_type == TRAINING_JOB:
                logger.info("simapp training job started_at={}".format(
                    result['lastStartedAt']))
                start = result['lastStartedAt']
                self.simtrace_s3_endpoint_url = rospy.get_param(
                    'AWS_ENDPOINT_URL')
                self.simtrace_s3_bucket = rospy.get_param(
                    'SAGEMAKER_SHARED_S3_BUCKET')
                self.simtrace_s3_key = os.path.join(
                    rospy.get_param('SAGEMAKER_SHARED_S3_PREFIX'),
                    TRAINING_SIMTRACE_DATA_S3_OBJECT_KEY)
            else:
                logger.info("simapp evaluation job started_at={}".format(
                    result['lastUpdatedAt']))
                start = result['lastUpdatedAt']
                self.simtrace_s3_endpoint_url = rospy.get_param(
                    'AWS_ENDPOINT_URL')
                self.simtrace_s3_bucket = rospy.get_param('MODEL_S3_BUCKET')
                self.simtrace_s3_key = os.path.join(
                    rospy.get_param('MODEL_S3_PREFIX'),
                    EVALUATION_SIMTRACE_DATA_S3_OBJECT_KEY)
            end = start + datetime.timedelta(
                seconds=self.simapp_upload_duration)
            now = datetime.datetime.now(
                tz=end.tzinfo)  # use tzinfo as robomaker
            self.simapp_data_upload_time = (end - now).total_seconds()
            logger.info(
                "simapp job started_at={} now={} end={} upload_data_in={} sec".
                format(start, now, end, self.simapp_data_upload_time))
            simapp_data_upload_timer = threading.Timer(
                self.simapp_data_upload_time, simapp_data_upload_timer_expiry)
            simapp_data_upload_timer.daemon = True
            simapp_data_upload_timer.start()
            logger.info("Timer with {} seconds is {}".format(
                self.simapp_data_upload_time,
                simapp_data_upload_timer.is_alive()))

            #setup to upload SIM_TRACE_DATA to S3
            self.simtrace_data = DeepRacerRacetrackSimTraceData(
                self.simtrace_s3_bucket, self.simtrace_s3_key,
                self.simtrace_s3_endpoint_url)

    #def render(self, mode='rgb_array'):
    #    if mode == 'rgb_array':
    #        if self.next_state is None:
    #            image = np.empty(shape=(TRAINING_IMAGE_SIZE[1], TRAINING_IMAGE_SIZE[0], 3), dtype=np.uint8)
    #        else:
    #            image = self.next_state
    #        return image
    #    else:
    #        return NotImplementedError

    def reset(self):
        if node_type == SAGEMAKER_TRAINING_WORKER:
            return self.observation_space.sample()

        # Simulation is done - so RoboMaker will start to shut down the app.
        # Till RoboMaker shuts down the app, do nothing more else metrics may show unexpected data.
        if (node_type == SIMULATION_WORKER) and self.is_simulation_done:
            while True:
                time.sleep(1)

        self.steering_angle = 0
        self.speed = 0
        self.action_taken = 0
        self.prev_progress = 0
        self.prev_point = Point(0, 0)
        self.prev_point_2 = Point(0, 0)
        self.next_state = None
        self.reward = None
        self.reward_in_episode = 0
        self.done = False
        # Reset the car and record the simulation start time
        if self.allow_servo_step_signals:
            self.send_action(0, 0)

        self.racecar_reset()
        self.steps = 0
        self.simulation_start_time = time.time()
        self.infer_reward_state(0, 0)

        return self.next_state

    def set_next_state(self):
        # Make sure the first image is the starting image
        image_data = self.image_queue.get(block=True, timeout=None)
        # Read the image and resize to get the state
        image = Image.frombytes('RGB', (image_data.width, image_data.height),
                                image_data.data, 'raw', 'RGB', 0, 1)
        image = image.resize(TRAINING_IMAGE_SIZE, resample=2)

        ### IMAGE FILTERING ###
        changes = list()
        probability = 0.75

        # contrast: multiplier in RGB space
        if np.random.choice(2, p=(1.0 - probability, probability)):
            changes.append('Contrast')
            contrast_mult = np.random.exponential()
            new_contrast_array = np.array(image, 'float') * contrast_mult
            new_contrast_array = np.clip(new_contrast_array, 0, 255)
            image = Image.fromarray(new_contrast_array.astype('uint8'))

        ### BEGIN HSV SPACE ###

        # HSV conversion
        hsv_image = image.convert(mode='HSV')
        h, s, v = hsv_image.split()

        # hue: rotation
        if np.random.choice(2, p=(1.0 - probability, probability)):
            changes.append("Hue")
            # PIL uses 0..255 for Hue range
            hue_delta = np.random.randint(0, 255)
            hue_array = np.array(
                h, dtype='float')  # convert to flow to allow overflow
            new_hue_array = (hue_array + hue_delta) % 256
            h = Image.fromarray(new_hue_array.astype('uint8'), 'L')

        # saturation: multiplier
        if np.random.choice(2, p=(1.0 - probability, probability)):
            changes.append("Saturation")
            sat_mult = np.random.uniform(0.0, 1.0)
            sat_array = np.array(s, dtype='float')
            new_sat_array = sat_array * sat_mult
            new_sat_array = np.clip(new_sat_array, 0, 255)
            s = Image.fromarray(new_sat_array.astype('uint8'), 'L')

        # brightness: delta
        if np.random.choice(2, p=(1.0 - probability, probability)):
            changes.append("Brightness")
            bright_delta = np.random.randint(-127, 127)
            bright_array = np.array(v, dtype='float')
            new_bright_array = bright_array + bright_delta
            new_bright_array = np.clip(new_bright_array, 0, 255)
            v = Image.fromarray(new_bright_array.astype('uint8'), 'L')

        final_image = Image.merge('HSV', (h, s, v))
        final_image = final_image.convert(mode='RGB')

        ### END HSV SPACE ###

        if False:
            # hide the print statement here so its only invoked when we don't care about performance
            print("ObservationColorPerturbation Changes: %s" %
                  ','.join(changes))
            fname = 'color_perturb_filter_%f' % time.time()
            np.save(os.path.join("/opt/ml/model", fname),
                    np.array(final_image))

        self.next_state = np.array(final_image)

    def racecar_reset(self):
        try:
            for joint in EFFORT_JOINTS:
                utils.gazebo_service_call(self.clear_forces_client, joint)
            prev_index, next_index = self.find_prev_next_waypoints(
                self.start_ndist)
            utils.gazebo_service_call(self.reset_car_client, self.start_ndist,
                                      next_index)
            # First clear the queue so that we set the state to the start image
            _ = self.image_queue.get(block=True, timeout=None)
            self.set_next_state()

        except Exception as ex:
            utils.json_format_logger(
                "Unable to reset the car: {}".format(ex),
                **utils.build_system_error_dict(
                    utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                    utils.SIMAPP_EVENT_ERROR_CODE_500))

    def set_allow_servo_step_signals(self, allow_servo_step_signals):
        self.allow_servo_step_signals = allow_servo_step_signals

    def step(self, action):
        if node_type == SAGEMAKER_TRAINING_WORKER:
            return self.observation_space.sample(), 0, False, {}

        # Initialize next state, reward, done flag
        self.next_state = None
        self.reward = None
        self.done = False

        # Send this action to Gazebo and increment the step count
        self.steering_angle = float(action[0])
        self.speed = max(float(0.0), float(action[1]))
        if self.allow_servo_step_signals:
            self.send_action(self.steering_angle, self.speed)
        self.steps += 1

        # Compute the next state and reward
        self.infer_reward_state(self.steering_angle, self.speed)
        return self.next_state, self.reward, self.done, {}

    def callback_image(self, data):
        try:
            self.image_queue.put_nowait(data)
        except queue.Full:
            # Only warn if its the middle of an episode, not during training
            if self.allow_servo_step_signals:
                logger.info("Warning: dropping image due to queue full")
            pass
        except Exception as ex:
            utils.json_format_logger(
                "Error retrieving frame from gazebo: {}".format(ex),
                **utils.build_system_error_dict(
                    utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                    utils.SIMAPP_EVENT_ERROR_CODE_500))

    def send_action(self, steering_angle, speed):
        # Simple v/r to computes the desired rpm
        wheel_rpm = speed / WHEEL_RADIUS

        for _, pub in self.velocity_pub_dict.items():
            pub.publish(wheel_rpm)

        for _, pub in self.steering_pub_dict.items():
            pub.publish(steering_angle)

    def infer_reward_state(self, steering_angle, speed):
        try:
            self.set_next_state()
        except Exception as ex:
            utils.json_format_logger(
                "Unable to retrieve image from queue: {}".format(ex),
                **utils.build_system_error_dict(
                    utils.SIMAPP_ENVIRONMENT_EXCEPTION,
                    utils.SIMAPP_EVENT_ERROR_CODE_500))

        # Read model state from Gazebo
        model_state = utils.gazebo_service_call(self.get_model_state,
                                                'racecar', '')

        model_orientation = Rotation.from_quat([
            model_state.pose.orientation.x, model_state.pose.orientation.y,
            model_state.pose.orientation.z, model_state.pose.orientation.w
        ])
        model_location = np.array([
            model_state.pose.position.x,
            model_state.pose.position.y,
            model_state.pose.position.z]) + \
            model_orientation.apply(RELATIVE_POSITION_OF_FRONT_OF_CAR)
        model_point = Point(model_location[0], model_location[1])
        model_heading = model_orientation.as_euler('zyx')[0]

        # Read the wheel locations from Gazebo
        left_rear_wheel_state = utils.gazebo_service_call(
            self.get_link_state, 'racecar::left_rear_wheel', '')
        left_front_wheel_state = utils.gazebo_service_call(
            self.get_link_state, 'racecar::left_front_wheel', '')
        right_rear_wheel_state = utils.gazebo_service_call(
            self.get_link_state, 'racecar::right_rear_wheel', '')
        right_front_wheel_state = utils.gazebo_service_call(
            self.get_link_state, 'racecar::right_front_wheel', '')
        wheel_points = [
            Point(left_rear_wheel_state.link_state.pose.position.x,
                  left_rear_wheel_state.link_state.pose.position.y),
            Point(left_front_wheel_state.link_state.pose.position.x,
                  left_front_wheel_state.link_state.pose.position.y),
            Point(right_rear_wheel_state.link_state.pose.position.x,
                  right_rear_wheel_state.link_state.pose.position.y),
            Point(right_front_wheel_state.link_state.pose.position.x,
                  right_front_wheel_state.link_state.pose.position.y)
        ]

        # Project the current location onto the center line and find nearest points
        current_ndist = self.center_line.project(model_point, normalized=True)
        prev_index, next_index = self.find_prev_next_waypoints(current_ndist)
        distance_from_prev = model_point.distance(
            Point(self.center_line.coords[prev_index]))
        distance_from_next = model_point.distance(
            Point(self.center_line.coords[next_index]))
        closest_waypoint_index = (
            prev_index, next_index)[distance_from_next < distance_from_prev]

        # Compute distance from center and road width
        nearest_point_center = self.center_line.interpolate(current_ndist,
                                                            normalized=True)
        nearest_point_inner = self.inner_border.interpolate(
            self.inner_border.project(nearest_point_center))
        nearest_point_outer = self.outer_border.interpolate(
            self.outer_border.project(nearest_point_center))
        distance_from_center = nearest_point_center.distance(model_point)
        distance_from_inner = nearest_point_inner.distance(model_point)
        distance_from_outer = nearest_point_outer.distance(model_point)
        track_width = nearest_point_inner.distance(nearest_point_outer)
        is_left_of_center = (distance_from_outer < distance_from_inner) if self.reverse_dir \
            else (distance_from_inner < distance_from_outer)

        # Convert current progress to be [0,100] starting at the initial waypoint
        if self.reverse_dir:
            current_progress = self.start_ndist - current_ndist
        else:
            current_progress = current_ndist - self.start_ndist
        if current_progress < 0.0: current_progress = current_progress + 1.0
        current_progress = 100 * current_progress
        if current_progress < self.prev_progress:
            # Either: (1) we wrapped around and have finished the track,
            delta1 = current_progress + 100 - self.prev_progress
            # or (2) for some reason the car went backwards (this should be rare)
            delta2 = self.prev_progress - current_progress
            current_progress = (self.prev_progress, 100)[delta1 < delta2]

        # Car is off track if all wheels are outside the borders
        wheel_on_track = [self.road_poly.contains(p) for p in wheel_points]
        all_wheels_on_track = all(wheel_on_track)
        any_wheels_on_track = any(wheel_on_track)

        # Simulation elapsed time, which may be faster or slower than wall time
        simulation_time = rospy.get_rostime()

        # Compute the reward
        reward = 0.0
        if any_wheels_on_track:
            done = False
            params = {
                'all_wheels_on_track': all_wheels_on_track,
                'x': model_point.x,
                'y': model_point.y,
                'heading': model_heading * 180.0 / math.pi,
                'distance_from_center': distance_from_center,
                'progress': current_progress,
                'steps': self.steps,
                'speed': speed,
                'steering_angle': steering_angle * 180.0 / math.pi,
                'track_width': track_width,
                'waypoints': list(self.center_line.coords),
                'closest_waypoints': [prev_index, next_index],
                'is_left_of_center': is_left_of_center,
                'is_reversed': self.reverse_dir,
                'action': self.action_taken
            }
            try:
                reward = float(self.reward_function(params))
            except Exception as e:
                utils.json_format_logger(
                    "Error in the reward function: {}".format(e),
                    **utils.build_user_error_dict(
                        utils.SIMAPP_SIMULATION_WORKER_EXCEPTION,
                        utils.SIMAPP_EVENT_ERROR_CODE_400))
                traceback.print_exc()
                utils.simapp_exit_gracefully()
        else:
            done = True
            reward = CRASHED

        # Reset if the car position hasn't changed in the last 2 steps
        prev_pnt_dist = min(model_point.distance(self.prev_point),
                            model_point.distance(self.prev_point_2))

        if prev_pnt_dist <= 0.0001 and self.steps % NUM_STEPS_TO_CHECK_STUCK == 0:
            done = True
            reward = CRASHED  # stuck

        # Simulation jobs are done when progress reaches 100
        if current_progress >= 100:
            done = True

        # Keep data from the previous step around
        self.prev_point_2 = self.prev_point
        self.prev_point = model_point
        self.prev_progress = current_progress

        # Set the reward and done flag
        self.reward = reward
        self.reward_in_episode += reward
        self.done = done

        # Trace logs to help us debug and visualize the training runs
        # btown TODO: This should be written to S3, not to CWL.
        logger.info(
            'SIM_TRACE_LOG:%d,%d,%.4f,%.4f,%.4f,%.2f,%.2f,%d,%.4f,%s,%s,%.4f,%d,%.2f,%s,%.4f\n'
            %
            (self.episodes, self.steps, model_location[0], model_location[1],
             model_heading, self.steering_angle, self.speed, self.action_taken,
             self.reward, self.done, all_wheels_on_track, current_progress,
             closest_waypoint_index, self.track_length, time.time(),
             float(simulation_time.secs) + float(simulation_time.nsecs) / 1e9))

        #build json record of the reward metrics
        reward_metrics = OrderedDict()
        reward_metrics['episode'] = self.episodes
        reward_metrics['steps'] = self.steps
        reward_metrics['X'] = model_location[0]
        reward_metrics['Y'] = model_location[1]
        reward_metrics['yaw'] = model_heading
        reward_metrics['steer'] = self.steering_angle
        reward_metrics['throttle'] = self.speed
        reward_metrics['action'] = self.action_taken
        reward_metrics['reward'] = self.reward
        reward_metrics['done'] = self.done
        reward_metrics['all_wheels_on_track'] = all_wheels_on_track
        reward_metrics['progress'] = current_progress
        reward_metrics['closest_waypoint'] = closest_waypoint_index
        reward_metrics['track_len'] = self.track_length
        reward_metrics['tstamp'] = time.time()
        self.simtrace_data.write_simtrace_data(reward_metrics)

        # Terminate this episode when ready
        if done and node_type == SIMULATION_WORKER:
            self.finish_episode(current_progress)

    def find_prev_next_waypoints(self, ndist):
        if self.reverse_dir:
            next_index = bisect.bisect_left(self.center_dists, ndist) - 1
            prev_index = next_index + 1
            if next_index == -1: next_index = len(self.center_dists) - 1
        else:
            next_index = bisect.bisect_right(self.center_dists, ndist)
            prev_index = next_index - 1
            if next_index == len(self.center_dists): next_index = 0
        return prev_index, next_index

    def stop_car(self):
        self.steering_angle = 0
        self.speed = 0
        self.action_taken = 0
        self.send_action(0, 0)
        self.racecar_reset()

    def finish_episode(self, progress):
        # Increment episode count, update start position and direction
        self.episodes += 1
        if self.change_start:
            self.start_ndist = (self.start_ndist +
                                ROUND_ROBIN_ADVANCE_DIST) % 1.0
        if self.alternate_dir:
            self.reverse_dir = not self.reverse_dir
        # Reset the car
        self.stop_car()

        # upload SIM_TRACE data to S3
        self.simtrace_data.upload_to_s3(self.episodes)

        # Update metrics based on job type
        if self.job_type == TRAINING_JOB:
            self.update_training_metrics(progress)
            self.write_metrics_to_s3()
            if self.is_training_done():
                self.cancel_simulation_job()
        elif self.job_type == EVALUATION_JOB:
            self.number_of_trials += 1
            self.update_eval_metrics(progress)
            self.write_metrics_to_s3()

    def update_eval_metrics(self, progress):
        eval_metric = {}
        eval_metric['completion_percentage'] = int(progress)
        eval_metric['metric_time'] = int(round(time.time() * 1000))
        eval_metric['start_time'] = int(
            round(self.simulation_start_time * 1000))
        eval_metric['elapsed_time_in_milliseconds'] = int(
            round((time.time() - self.simulation_start_time) * 1000))
        eval_metric['trial'] = int(self.number_of_trials)
        self.metrics.append(eval_metric)

    def update_training_metrics(self, progress):
        training_metric = {}
        training_metric['reward_score'] = int(round(self.reward_in_episode))
        training_metric['metric_time'] = int(round(time.time() * 1000))
        training_metric['start_time'] = int(
            round(self.simulation_start_time * 1000))
        training_metric['elapsed_time_in_milliseconds'] = int(
            round((time.time() - self.simulation_start_time) * 1000))
        training_metric['episode'] = int(self.episodes)
        training_metric['completion_percentage'] = int(progress)
        self.metrics.append(training_metric)

    def write_metrics_to_s3(self):
        session = boto3.session.Session()
        s3_client = session.client('s3',
                                   region_name=self.aws_region,
                                   endpoint_url=self.aws_endpoint_url)
        metrics_body = json.dumps({'metrics': self.metrics})
        s3_client.put_object(Bucket=self.metrics_s3_bucket,
                             Key=self.metrics_s3_object_key,
                             Body=bytes(metrics_body, encoding='utf-8'))

    def is_training_done(self):
        if ((self.target_number_of_episodes > 0) and (self.target_number_of_episodes == self.episodes)) or \
           ((isinstance(self.target_reward_score, (int, float))) and (self.target_reward_score <= self.reward_in_episode)):
            self.is_simulation_done = True
        return self.is_simulation_done

    def cancel_simulation_job(self):
        logger.info(
            "cancel_simulation_job: make sure to shutdown simapp first")
        simapp_shutdown()