def compute_score(self, states_list, timestep=0, print_states=True, print_additionnal_info=True): """ Available information: x : horizontal position y : vertical position angle : angle relative to the vertical (negative = right, positive = left) first_leg_contact : Left leg touches ground second_leg_contact : Right leg touches ground throttle : Throttle intensity gimbal : Gimbal angle relative to the rocket axis velocity_x : horizontal velocity (negative : going Left, positive : going Right) velocity_y : vertical velocity (negative : going Down, positive : going Up) angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise) distance : distance from the center of the ship velocity : norm of the velocity vector (velocity_x,velocity_y) landed : both legs touching the ground landed_full : both legs touching ground for a second (60frames) states : dictionnary containing all variables in the state vector. For display purpose additionnal_information : dictionnary containing additionnal information. For display purpose """ # states information extraction ( x, y, angle, first_leg_contact, second_leg_contact, throttle, gimbal, velocity_x, velocity_y, angular_velocity, distance, velocity, landed, landed_full, states, additionnal_information, ) = info_extractor(states_list, self.env) if self.env.environment.landed_ticks > 59: score = 1 - abs(x) else: score = 0 return score
def reward_function(self, states_list, timestep=0, print_states=True, print_additionnal_info=True): ########## WORK NEEDED ############# ### You need to shape the reward ### #################################### """ Available information: x : horizontal position y : vertical position angle : angle relative to the vertical (negative = right, positive = left) first_leg_contact : Left leg touches ground second_leg_contact : Right leg touches ground throttle : Throttle intensity gimbal : Gimbal angle relative to the rocket axis velocity_x : horizontal velocity (negative : going Left, positive : going Right) velocity_y : vertical velocity (negative : going Down, positive : going Up) angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise) distance : distance from the center of the ship velocity : norm of the velocity vector (velocity_x,velocity_y) landed : both legs touching the ground landed_full : both legs touching ground for a second (60frames) states : dictionnary containing all variables in the state vector. For display purpose additionnal_information : dictionnary containing additionnal information. For display purpose **Hints** Be careful with the sign of the different variables Go on and shape the reward ! """ # states information extraction ( x, y, angle, first_leg_contact, second_leg_contact, throttle, gimbal, velocity_x, velocity_y, angular_velocity, distance, velocity, landed, landed_full, states, additionnal_information, ) = info_extractor(states_list, self.env) ######## REWARD SHAPING ########### # state variables for reward groundcontact = first_leg_contact or second_leg_contact reward = 0 # let's start with rewards in case of failure if not landed_full: reward = 0 - abs(angle) - abs(x) - abs(throttle) / abs(y) - abs( velocity) / abs(y) - (velocity * timestep) reward = reward / 100 print('\ry: {}'.format(y), end='') #print('\rflying: {}'.format(reward), end='') # if groundcontact: # # case in which the rocket landed (one or both legs), but didn't stabilize (broken). # # -> we set the reward to 0.5 (as ground contact is good), and substract a value depending on angle, # # horizontal distance, velocity and angular velocity, i.e. the variables we want to bring to 0 # # (ingredients for a successful landing!). We clip this value to 1, so we don't go under -0.5. # reward = 1 - min(1, (abs(x) - abs(angle) - abs(angular_velocity) - # abs(angle*angular_velocity) - abs(throttle)/abs(y))/100) # print('\rlanded improperly: {}'.format(reward), end='') # else: # # case in which the rocket is still flying. # # -> we want to incitate the rocket to go towards the center and to stabilize, so we # # start from reward = 0 and we substract a value that we want to be minimized. We clip # # this value to make sure the reward doesn't go under -1. # reward = 0 - (((abs(x) + # abs(angle) + abs(angular_velocity) + abs(angle*angular_velocity) + # abs(throttle)/abs(y)) / 100) * np.log(timestep)) # print('\rflying: {}'.format(reward), end='') # and now the rewards in case of success if landed_full: reward = 10000 print('\rlanded properly: {}'.format(reward), end='') # if distance > 0: # # case in which the rocket didn't land in the center. # # -> it's a success: we set the reward to 1 and we substract a value depending on # # the distance from the center of the platform, but not going under 0 # reward += 10000 #- abs(x)**2) # print('\rlanded uncentered: {}'.format(reward), end='') # else: # # full successful landing, right in the center! # # -> Highest reward, +1 # reward += 10000 # print('\rlanded perfectly: {}'.format(reward), end='') #reward = np.clip(reward, -1, 1) #just in case - normally it should already be clipped above display_info(states, additionnal_information, reward, timestep, verbose=False) return reward
def reward_function(self, states_list, timestep=0, print_states=True, print_additionnal_info=True): ########## WORK NEEDED ############# ### You need to shape the reward ### #################################### """ Available information: x : horizontal position y : vertical position angle : angle relative to the vertical (negative = right, positive = left) first_leg_contact : Left leg touches ground second_leg_contact : Right leg touches ground throttle : Throttle intensity gimbal : Gimbal angle relative to the rocket axis velocity_x : horizontal velocity (negative : going Left, positive : going Right) velocity_y : vertical velocity (negative : going Down, positive : going Up) angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise) distance : distance from the center of the ship velocity : norm of the velocity vector (velocity_x,velocity_y) landed : both legs touching the ground landed_full : both legs touching ground for a second (60frames) states : dictionnary containing all variables in the state vector. For display purpose additionnal_information : dictionnary containing additionnal information. For display purpose **Hints** Be careful with the sign of the different variables Go on and shape the reward ! """ # states information extraction ( x, y, angle, first_leg_contact, second_leg_contact, throttle, gimbal, velocity_x, velocity_y, angular_velocity, distance, velocity, landed, landed_full, states, additionnal_information, ) = info_extractor(states_list, self.env) ######## REWARD SHAPING ########### # reward definition (per timestep) : You have to fill it ! reward = -1 display_info(states, additionnal_information, reward, timestep, verbose=False) return reward
def reward_function(self, states_list, timestep=0, print_states=True, print_additionnal_info=True): ########## WORK NEEDED ############# ### You need to shape the reward ### #################################### """ Available information: x : horizontal position y : vertical position angle : angle relative to the vertical (negative = right, positive = left) first_leg_contact : Left leg touches ground second_leg_contact : Right leg touches ground throttle : Throttle intensity gimbal : Gimbal angle relative to the rocket axis velocity_x : horizontal velocity (negative : going Left, positive : going Right) velocity_y : vertical velocity (negative : going Down, positive : going Up) angular_velocity : angular velocity (negative : turning anti-clockwise, positive : turning clockwise) distance : distance from the center of the ship velocity : norm of the velocity vector (velocity_x,velocity_y) landed : both legs touching the ground landed_full : both legs touching ground for a second (60frames) states : dictionnary containing all variables in the state vector. For display purpose additionnal_information : dictionnary containing additionnal information. For display purpose **Hints** Be careful with the sign of the different variables Go on and shape the reward ! """ # states information extraction ( x, y, angle, first_leg_contact, second_leg_contact, throttle, gimbal, velocity_x, velocity_y, angular_velocity, distance, velocity, landed, landed_full, states, additionnal_information, ) = info_extractor(states_list, self.env) #if timestep%10 == 0: # print(f"velocity_y {velocity_y}") # print(f"angle {angle}") ######## REWARD SHAPING ########### # reward definition (per timestep) : You have to fill it ! shape = 0 reward = 0 # Penalty on ill position shape -= \ .1 * abs(distance) + \ 0.5 * abs(velocity) + \ 5 * abs(angle) + \ 0.15 * abs(angular_velocity) + \ 10 * abs(x) + \ 0.5 * max(velocity_y - y, 0) #.1 * max((velocity - y), 0) # Reward for partial failure scenarios shape += 0.1 * (float(first_leg_contact) + float(second_leg_contact)) if self.prev_shape is not None: reward += shape - self.prev_shape self.prev_shape = shape reward = np.clip(reward, -1, 1) if landed_full: reward = 100 - 100 * abs(x) display_info(states, additionnal_information, reward, timestep, verbose=False) return reward