def test_tolerance_unknown_sigmoid(self): with self.assertRaisesWithLiteralMatch( ValueError, "Unknown sigmoid type 'unsupported_sigmoid'."): rewards.tolerance(0, bounds=(0, 1), margin=.1, sigmoid="unsupported_sigmoid")
def reward_function(obs, actions): head_height = obs[:, 21] torso_upright = obs[:, 36] control = actions[:, :] center_of_mass_velocity = obs[:, 37] standing = rewards.tolerance( head_height, bounds=(_STAND_HEIGHT, float('inf')), margin=_STAND_HEIGHT / 10 # todo: 4? now 10, which means 1.26 ) upright = rewards.tolerance(torso_upright, bounds=(0.9, float('inf')), sigmoid='linear', margin=0.2, value_at_margin=0) stand_reward = standing * upright small_control = rewards.tolerance(control, margin=1, value_at_margin=0, sigmoid='quadratic').mean() small_control = (4 + small_control) / 5 com_velocity = center_of_mass_velocity move = rewards.tolerance(com_velocity, bounds=(_WALK_SPEED, float('inf')), margin=_WALK_SPEED, value_at_margin=0, sigmoid='linear') move = (5 * move + 1) / 6 return small_control * stand_reward * move
def get_reward(self, physics): """Returns a reward to the agent.""" standing = rewards.tolerance(physics.head_height(), bounds=(_STAND_HEIGHT, float('inf')), margin=_STAND_HEIGHT / 4) upright = rewards.tolerance(physics.thorax_upright(), bounds=(0.9, float('inf')), sigmoid='linear', margin=1.9, value_at_margin=0) stand_reward = standing * upright small_control = rewards.tolerance(physics.control(), margin=1, value_at_margin=0, sigmoid='quadratic').mean() small_control = (4 + small_control) / 5 if self._move_speed == 0: horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]] dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean() return small_control * stand_reward * dont_move else: com_velocity = np.linalg.norm( physics.center_of_mass_velocity()[[0, 1]]) move = rewards.tolerance(com_velocity, bounds=(self._move_speed, float('inf')), margin=self._move_speed, value_at_margin=0, sigmoid='linear') move = (5 * move + 1) / 6 return small_control * stand_reward * move
def get_reward(self, physics): """Returns a reward to the agent.""" standing = rewards.tolerance( physics.torso_height(), bounds=(_STAND_HEIGHT, float("inf")), margin=_STAND_HEIGHT / 2, ) upright = (1 + physics.torso_upright()) / 2 stand_reward = (3 * standing + upright) / 4 if self._move_speed == 0: return stand_reward else: move_reward = rewards.tolerance( physics.horizontal_velocity(), bounds=(self._move_speed, float("inf")), margin=self._move_speed / 2, value_at_margin=0.5, sigmoid="linear", ) reward = stand_reward * (5 * move_reward + 1) / 6 if self._move_type == "walk": if reward < 0.7: reward = 0 elif self._move_type == "run": if reward < 0.25: reward = 0 else: raise ValueError(self._move_type) return reward
def get_reward_factors(self, physics): """Returns a reward to the agent.""" standing = super(Fetch, self).get_reward_factors(physics) # Reward for bringing mouth close to ball. bite_radius = physics.named.model.site_size['upper_bite', 0] bite_margin = 2 reach_ball = rewards.tolerance(physics.ball_to_mouth_distance(), bounds=(0, bite_radius), sigmoid='reciprocal', margin=bite_margin) reach_ball = (6 * reach_ball + 1) / 7 # Reward for bringing the ball close to the target. target_radius = physics.named.model.geom_size['target', 0] bring_margin = physics.named.model.geom_size['floor', 0] ball_near_target = rewards.tolerance(physics.ball_to_target_distance(), bounds=(0, target_radius), sigmoid='reciprocal', margin=bring_margin) fetch_ball = (ball_near_target + 1) / 2 # Let go of the ball if it's been fetched. if physics.ball_to_target_distance() < 2 * target_radius: reach_ball = 1 return np.hstack((standing, reach_ball, fetch_ball))
def get_reward(self, physics): """Returns a reward to the agent.""" upright = (1 + physics.torso_upright()) / 2 if self._height is not None: jumping = rewards.tolerance(physics.torso_height(), bounds=(_JUMP_HEIGHT, float('inf')), margin=_JUMP_HEIGHT / 2) jumping_reward = (3 * jumping + upright) / 4 # get rid of upright? return jumping_reward standing = rewards.tolerance(physics.torso_height(), bounds=(_STAND_HEIGHT, float('inf')), margin=_STAND_HEIGHT / 2) stand_reward = (3 * standing + upright) / 4 if self._move_speed == 0: return stand_reward else: if self._move_speed < 0: bounds = (-float('inf'), self._move_speed) else: bounds = (self._move_speed, float('inf')) move_reward = rewards.tolerance(physics.horizontal_velocity(), bounds=bounds, margin=abs(self._move_speed / 2), value_at_margin=0.5, sigmoid='linear') return stand_reward * (5 * move_reward + 1) / 6
def get_reward_factors(self, physics): """Returns the factorized reward.""" # Keep the torso at standing height. torso = rewards.tolerance(physics.torso_pelvis_height()[0], bounds=(self._stand_height[0], float('inf')), margin=self._stand_height[0]) # Keep the pelvis at standing height. pelvis = rewards.tolerance(physics.torso_pelvis_height()[1], bounds=(self._stand_height[1], float('inf')), margin=self._stand_height[1]) # Keep head, torso and pelvis upright. upright = rewards.tolerance(physics.upright(), bounds=(_MIN_UPRIGHT_COSINE, float('inf')), sigmoid='linear', margin=_MIN_UPRIGHT_COSINE + 1, value_at_margin=0) # Reward for foot touch forces up to bodyweight. touch = rewards.tolerance(physics.touch_sensors().sum(), bounds=(self._body_weight, float('inf')), margin=self._body_weight, sigmoid='linear', value_at_margin=0.9) return np.hstack((torso, pelvis, upright, touch))
def get_reward(self, physics): target = physics.bind(self._pedestal.target_site).xpos obj = physics.bind(self._prop_frame).xpos tcp = physics.bind(self._hand.tool_center_point).xpos tcp_to_obj = np.linalg.norm(obj - tcp) grasp = rewards.tolerance(tcp_to_obj, bounds=(0, _TARGET_RADIUS), margin=_TARGET_RADIUS, sigmoid='long_tail') obj_to_target = np.linalg.norm(obj - target) in_place = rewards.tolerance(obj_to_target, bounds=(0, _TARGET_RADIUS), margin=_TARGET_RADIUS, sigmoid='long_tail') tcp_to_target = np.linalg.norm(tcp - target) hand_away = rewards.tolerance(tcp_to_target, bounds=(4 * _TARGET_RADIUS, np.inf), margin=3 * _TARGET_RADIUS, sigmoid='long_tail') in_place_weight = 10. grasp_or_hand_away = grasp * (1 - in_place) + hand_away * in_place return (grasp_or_hand_away + in_place_weight * in_place) / (1 + in_place_weight)
def test_tolerance_sigmoids(self, sigmoid): margins = [0.01, 1.0, 100, 10000] values_at_margin = [0.1, 0.5, 0.9] bounds_list = [(0, 0), (-1, 1), (-np.pi, np.pi), (-100, 100)] for bounds in bounds_list: for margin in margins: for value_at_margin in values_at_margin: upper_margin = bounds[1] + margin value = rewards.tolerance(x=upper_margin, bounds=bounds, margin=margin, value_at_margin=value_at_margin, sigmoid=sigmoid) self.assertAlmostEqual(value, value_at_margin, delta=np.sqrt(EPS)) lower_margin = bounds[0] - margin value = rewards.tolerance(x=lower_margin, bounds=bounds, margin=margin, value_at_margin=value_at_margin, sigmoid=sigmoid) self.assertAlmostEqual(value, value_at_margin, delta=np.sqrt(EPS))
def get_reward(self, physics): """Returns a reward to the agent.""" # Reward for moving close to the ball. arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2) workspace_radius = physics.named.model.site_size['workspace', 0] ball_radius = physics.named.model.geom_size['ball', 0] reach_reward = rewards.tolerance(physics.self_to_ball_distance(), bounds=(0, workspace_radius + ball_radius), sigmoid='linear', margin=arena_radius, value_at_margin=0) # Reward for bringing the ball to the target. target_radius = physics.named.model.site_size['target', 0] fetch_reward = rewards.tolerance(physics.ball_to_target_distance(), bounds=(0, target_radius), sigmoid='linear', margin=arena_radius, value_at_margin=0) reach_then_fetch = reach_reward * (0.5 + 0.5 * fetch_reward) # reach_then_fetch = fetch_reward return _upright_reward(physics) * reach_then_fetch
def test_tolerance_vectorization(self): bounds = (-.1, .1) margin = 0.2 x_array = np.random.randn(2, 3, 4) value_array = rewards.tolerance(x=x_array, bounds=bounds, margin=margin) self.assertEqual(x_array.shape, value_array.shape) for i, x in enumerate(x_array.ravel()): value = rewards.tolerance(x=x, bounds=bounds, margin=margin) self.assertEqual(value, value_array.ravel()[i])
def get_reward(self, physics): """Returns a reward to the agent.""" target_size = physics.named.model.geom_size['target', 0] near_target = rewards.tolerance(physics.mass_to_target_dist(), bounds=(0, target_size), margin=target_size) control_reward = rewards.tolerance(physics.control(), margin=1, value_at_margin=0, sigmoid='quadratic').mean() small_control = (control_reward + 4) / 5 return near_target * small_control
def get_reward(self, physics): """Returns a reward to the agent.""" box_size = physics.named.model.geom_size['target', 0] min_box_to_target_distance = min( physics.site_distance(name, 'target') for name in self._box_names) box_is_close = rewards.tolerance(min_box_to_target_distance, margin=2 * box_size) hand_to_target_distance = physics.site_distance('grasp', 'target') hand_is_far = rewards.tolerance(hand_to_target_distance, bounds=(.1, float('inf')), margin=_CLOSE) return box_is_close * hand_is_far
def _walker_get_reward(self, physics): walker_height = physics.bind(self._walker.root_body).xpos[2] # xpos['z'] stand_reward = rewards.tolerance(walker_height, bounds=(self._height, float('inf')), margin=self._height / 2) walker_vel = physics.bind(self._walker.root_body).subtree_linvel[0] move_reward = rewards.tolerance(walker_vel, bounds=(self._vel, float('inf')), margin=self._vel / 2, value_at_margin=0.5, sigmoid='linear') return stand_reward * (5 * move_reward + 1) / 6
def get_reward(self, physics): """Returns a reward to the agent.""" standing = rewards.tolerance(physics.head_height(), bounds=(_STAND_HEIGHT, float('inf')), margin=_STAND_HEIGHT / 4) upright = rewards.tolerance(physics.torso_upright(), bounds=(0.9, float('inf')), sigmoid='linear', margin=1.9, value_at_margin=0) stand_reward = standing * upright small_control = rewards.tolerance(physics.control(), margin=1, value_at_margin=0, sigmoid='quadratic').mean() small_control = (4 + small_control) / 5 if self._move_speed == 0: horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]] dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean() return small_control * stand_reward * dont_move else: com_velocity = np.linalg.norm( physics.center_of_mass_velocity()[[0, 1]]) '''move = rewards.tolerance(com_velocity, bounds=(self._move_speed, float('inf')), margin=self._move_speed, value_at_margin=0, sigmoid='linear')''' move = physics.center_of_mass_velocity( )[0] * physics.torso_forward() # get number joint at limits joint_angles_norm = np.abs(physics.joint_angles( self._joint_limits)) - 0.98 joint_angles_norm[joint_angles_norm < 0.0] = 0.0 joint_angles_norm = joint_angles_norm / (1.0 - 0.98) joint_angles_norm[joint_angles_norm > 1.0] = 1.0 #print("Joint angles norm2", joint_angles_norm) joints_at_limit_cost = 0.15 * np.sum(joint_angles_norm) #print ("Joints at limits cost", joints_at_limit_cost) electricity_cost = 0.005 * np.sum( np.abs(physics.control() * physics.joint_velocities())) # print ("Electricity cost", electricity_cost) # print ("Joint velocities", physics.joint_velocities()) # move = com_velocity * physics.torso_forward() return move + 2.0 + 0.1 * upright - electricity_cost - joints_at_limit_cost
def get_reward(self, physics): hand_pos = physics.bind(self._hand.tool_center_point).xpos target_pos = physics.bind(self._target).xpos distance = np.linalg.norm(hand_pos - target_pos) prop_x_distance = abs(target_pos[0] - _TARGET_PROP_XPOS) hand_reward = rewards.tolerance(distance, bounds=(0, _HAND_TARGET_RADIUS), margin=_HAND_TARGET_RADIUS * 4, value_at_margin=0.2, sigmoid='long_tail') prop_reward = rewards.tolerance(prop_x_distance, bounds=(0, _TARGET_RADIUS), margin=_TARGET_RADIUS * 4, value_at_margin=0.2, sigmoid='long_tail') return hand_reward + prop_reward
def get_reward(self, physics): """Returns a smooth reward.""" target_size = physics.named.model.geom_size['target', 0] return rewards.tolerance(physics.nose_to_target_dist(), bounds=(0, target_size), margin=5*target_size, sigmoid='long_tail')
def get_reward(self, physics): prop_height = self._get_height_of_lowest_vertex(physics) return rewards.tolerance(prop_height, bounds=(self._target_height, np.inf), margin=_DISTANCE_TO_LIFT, value_at_margin=0, sigmoid='linear')
def get_reward(self, physics): """Returns a reward to the agent.""" return rewards.tolerance(physics.speed(), bounds=(_RUN_SPEED, float('inf')), margin=_RUN_SPEED, value_at_margin=0, sigmoid='linear')
def get_reward(self, physics): hand_pos = physics.bind(self._hand.tool_center_point).xpos target_pos = physics.bind(self._target).xpos distance = np.linalg.norm(hand_pos - target_pos) return rewards.tolerance(distance, bounds=(0, _TARGET_RADIUS), margin=_TARGET_RADIUS)
def get_reward(self, physics): """Returns a reward applicable to the performed task.""" standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2)) if self._hopping: hopping = rewards.tolerance(physics.speed(), bounds=(_HOP_SPEED, float('inf')), margin=_HOP_SPEED/2, value_at_margin=0.5, sigmoid='linear') return standing * hopping else: small_control = rewards.tolerance(physics.control(), margin=1, value_at_margin=0, sigmoid='quadratic').mean() small_control = (small_control + 4) / 5 return standing * small_control
def get_reward(self, physics): walker_xvel = physics.bind(self._walker.root_body).subtree_linvel[0] xvel_term = rewards.tolerance(walker_xvel, (self._vel, self._vel), margin=self._vel, sigmoid='linear', value_at_margin=0.0) return xvel_term
def get_reward(self, physics): """Returns a reward to the agent.""" standing = rewards.tolerance(physics.torso_height(), bounds=(_STAND_HEIGHT, float('inf')), margin=_STAND_HEIGHT/2) upright = (1 + physics.torso_upright()) / 2 stand_reward = (3*standing + upright) / 4 if self._move_speed == 0: return stand_reward else: move_reward = rewards.tolerance(physics.horizontal_velocity(), bounds=(self._move_speed, float('inf')), margin=self._move_speed/2, value_at_margin=0.5, sigmoid='linear') return stand_reward * (5*move_reward + 1) / 6
def _get_reward(self, physics, sparse): if sparse: cart_in_bounds = rewards.tolerance(physics.cart_position(), self._CART_RANGE) angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(), self._ANGLE_COSINE_RANGE).prod() return cart_in_bounds * angle_in_bounds else: upright = (physics.pole_angle_cosine() + 1) / 2 centered = rewards.tolerance(physics.cart_position(), margin=2) centered = (1 + centered) / 2 small_control = rewards.tolerance(physics.control(), margin=1, value_at_margin=0, sigmoid='quadratic')[0] small_control = (4 + small_control) / 5 small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min() small_velocity = (1 + small_velocity) / 2 return upright.mean() * small_control * small_velocity * centered
def get_reward(self, physics): """Returns a smooth reward.""" radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum() in_target = rewards.tolerance(np.linalg.norm( physics.mouth_to_target()), bounds=(0, radii), margin=2 * radii) is_upright = 0.5 * (physics.upright() + 1) return (7 * in_target + is_upright) / 8
def get_reward(self, physics): """Returns a reward to the agent.""" box_size = physics.named.model.geom_size['target', 0] def target_to_box(b): return rewards.tolerance(physics.site_distance('box' + str(b), 'target'), margin=2*box_size) box_is_close = max(target_to_box(b) for b in range(self._n_boxes)) hand_to_target = physics.site_distance('grasp', 'target') hand_is_far = rewards.tolerance(hand_to_target, (.1, float('inf')), _CLOSE) return box_is_close * hand_is_far
def get_reward20(self, physics): """Returns a reward applicable to the performed task. This is called from two places: - suite > base.py > after_step: This is to visualize rewards - control.py > step: This is the main step function. """ cylinder = physics.named.data.xipos['long_cylinder', 'z'] # tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian', value_at_margin=0.1): height_cylinder = rewards.tolerance(cylinder, bounds=(0.25, np.inf), margin=0) # height_cylinder = (1 + height_cylinder)/2 reward = height_cylinder if self.physics_time != physics.time(): # We care about height of hand when it reaches the object. mocap = physics.named.data.mocap_pos['mocap', 'z'] height_mocap = rewards.tolerance(mocap, bounds=(self.initial_mocap_height + (0.25-0.125) + 0.02, np.inf), margin=0) # print("COMPARE mocap with height:", mocap, "(", height_mocap, ")" "<=>", cylinder, "(", height_cylinder, ") ------ ", self.n_rewards, "=====", self.initial_mocap_height) # if (reward > 0) or (self.n_rewards > 0): if (reward > 0) or (self.n_rewards > 0) or (height_mocap > 0): """Start/continue counting if cylinder/mocap height above the threshold. Also continue counting if counting already started for some reason. """ # Count #N times. If reward is >0 for all of them then terminate. self.n_rewards += 1 # print("Reward =", reward) if self.n_rewards >= self.generator_args["time_staying_more"]: # print("Finished @", self.generator_args["time_staying_more"]) self.termination = True self.physics_time = physics.time() # Commands of the agent to the robot in the current step: physics.control() # TODO: With velocity-based controllers we can penalize the amount of actuation sent # to actuators. We can penalize the sum over absolute values of finger actuations. # touch_data = np.log1p(self.named.data.sensordata[['touch_toe', 'touch_heel']]) # if reward < 0: # physics._reset_next_step = True # # pass return reward
def get_reward(self, physics): """Returns a reward applicable to the performed task.""" if self._task == "hop": standing = rewards.tolerance(physics.height(), (self._height, 2)) hopping = rewards.tolerance( physics.speed(), bounds=(_HOP_SPEED, float("inf")), margin=_HOP_SPEED / 2, value_at_margin=0.5, sigmoid="linear", ) return standing * hopping elif self._task == "stand": standing = rewards.tolerance(physics.height(), (self._height, 2)) small_control = rewards.tolerance(physics.control(), margin=1, value_at_margin=0, sigmoid="quadratic").mean() small_control = (small_control + 4) / 5 return standing * small_control
def get_reward(self, physics): # from dmcs radii = physics.named.model.geom_size[['target', 'finger'], 0].sum() sparse_reward = rewards.tolerance(physics.finger_to_target_dist(), (0, radii)) # print(sparse_reward) # print(sparse_reward + self.shaping_rew) # c_rew = np.square(physics.control()).sum() return sparse_reward + self.shaping_rew
def get_reward(self, physics): """Returns a reward to the agent.""" area_max_distance = physics.named.model.geom_size['floor', 0] * np.sqrt(2) workspace_radius = physics.named.model.site_size['workspace', 0] target_radius = physics.named.model.site_size['target', 0] reach_reward = rewards.tolerance( physics.self_to_target_distance(), bounds=(0, workspace_radius + target_radius), sigmoid='linear', margin=area_max_distance, value_at_margin=0) return _upright_reward(physics) * reach_reward