Пример #1
0
 def test_tolerance_unknown_sigmoid(self):
     with self.assertRaisesWithLiteralMatch(
             ValueError, "Unknown sigmoid type 'unsupported_sigmoid'."):
         rewards.tolerance(0,
                           bounds=(0, 1),
                           margin=.1,
                           sigmoid="unsupported_sigmoid")
Пример #2
0
def reward_function(obs, actions):

    head_height = obs[:, 21]
    torso_upright = obs[:, 36]
    control = actions[:, :]
    center_of_mass_velocity = obs[:, 37]

    standing = rewards.tolerance(
        head_height,
        bounds=(_STAND_HEIGHT, float('inf')),
        margin=_STAND_HEIGHT / 10  # todo: 4? now 10, which means 1.26
    )
    upright = rewards.tolerance(torso_upright,
                                bounds=(0.9, float('inf')),
                                sigmoid='linear',
                                margin=0.2,
                                value_at_margin=0)
    stand_reward = standing * upright
    small_control = rewards.tolerance(control,
                                      margin=1,
                                      value_at_margin=0,
                                      sigmoid='quadratic').mean()
    small_control = (4 + small_control) / 5
    com_velocity = center_of_mass_velocity
    move = rewards.tolerance(com_velocity,
                             bounds=(_WALK_SPEED, float('inf')),
                             margin=_WALK_SPEED,
                             value_at_margin=0,
                             sigmoid='linear')
    move = (5 * move + 1) / 6
    return small_control * stand_reward * move
Пример #3
0
 def get_reward(self, physics):
     """Returns a reward to the agent."""
     standing = rewards.tolerance(physics.head_height(),
                                  bounds=(_STAND_HEIGHT, float('inf')),
                                  margin=_STAND_HEIGHT / 4)
     upright = rewards.tolerance(physics.thorax_upright(),
                                 bounds=(0.9, float('inf')),
                                 sigmoid='linear',
                                 margin=1.9,
                                 value_at_margin=0)
     stand_reward = standing * upright
     small_control = rewards.tolerance(physics.control(),
                                       margin=1,
                                       value_at_margin=0,
                                       sigmoid='quadratic').mean()
     small_control = (4 + small_control) / 5
     if self._move_speed == 0:
         horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]]
         dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean()
         return small_control * stand_reward * dont_move
     else:
         com_velocity = np.linalg.norm(
             physics.center_of_mass_velocity()[[0, 1]])
         move = rewards.tolerance(com_velocity,
                                  bounds=(self._move_speed, float('inf')),
                                  margin=self._move_speed,
                                  value_at_margin=0,
                                  sigmoid='linear')
         move = (5 * move + 1) / 6
         return small_control * stand_reward * move
Пример #4
0
 def get_reward(self, physics):
     """Returns a reward to the agent."""
     standing = rewards.tolerance(
         physics.torso_height(),
         bounds=(_STAND_HEIGHT, float("inf")),
         margin=_STAND_HEIGHT / 2,
     )
     upright = (1 + physics.torso_upright()) / 2
     stand_reward = (3 * standing + upright) / 4
     if self._move_speed == 0:
         return stand_reward
     else:
         move_reward = rewards.tolerance(
             physics.horizontal_velocity(),
             bounds=(self._move_speed, float("inf")),
             margin=self._move_speed / 2,
             value_at_margin=0.5,
             sigmoid="linear",
         )
         reward = stand_reward * (5 * move_reward + 1) / 6
         if self._move_type == "walk":
             if reward < 0.7:
                 reward = 0
         elif self._move_type == "run":
             if reward < 0.25:
                 reward = 0
         else:
             raise ValueError(self._move_type)
         return reward
Пример #5
0
    def get_reward_factors(self, physics):
        """Returns a reward to the agent."""
        standing = super(Fetch, self).get_reward_factors(physics)

        # Reward for bringing mouth close to ball.
        bite_radius = physics.named.model.site_size['upper_bite', 0]
        bite_margin = 2
        reach_ball = rewards.tolerance(physics.ball_to_mouth_distance(),
                                       bounds=(0, bite_radius),
                                       sigmoid='reciprocal',
                                       margin=bite_margin)
        reach_ball = (6 * reach_ball + 1) / 7

        # Reward for bringing the ball close to the target.
        target_radius = physics.named.model.geom_size['target', 0]
        bring_margin = physics.named.model.geom_size['floor', 0]
        ball_near_target = rewards.tolerance(physics.ball_to_target_distance(),
                                             bounds=(0, target_radius),
                                             sigmoid='reciprocal',
                                             margin=bring_margin)
        fetch_ball = (ball_near_target + 1) / 2

        # Let go of the ball if it's been fetched.
        if physics.ball_to_target_distance() < 2 * target_radius:
            reach_ball = 1

        return np.hstack((standing, reach_ball, fetch_ball))
Пример #6
0
 def get_reward(self, physics):
     """Returns a reward to the agent."""
     upright = (1 + physics.torso_upright()) / 2
     if self._height is not None:
         jumping = rewards.tolerance(physics.torso_height(),
                                     bounds=(_JUMP_HEIGHT, float('inf')),
                                     margin=_JUMP_HEIGHT / 2)
         jumping_reward = (3 * jumping + upright) / 4  # get rid of upright?
         return jumping_reward
     standing = rewards.tolerance(physics.torso_height(),
                                  bounds=(_STAND_HEIGHT, float('inf')),
                                  margin=_STAND_HEIGHT / 2)
     stand_reward = (3 * standing + upright) / 4
     if self._move_speed == 0:
         return stand_reward
     else:
         if self._move_speed < 0:
             bounds = (-float('inf'), self._move_speed)
         else:
             bounds = (self._move_speed, float('inf'))
         move_reward = rewards.tolerance(physics.horizontal_velocity(),
                                         bounds=bounds,
                                         margin=abs(self._move_speed / 2),
                                         value_at_margin=0.5,
                                         sigmoid='linear')
         return stand_reward * (5 * move_reward + 1) / 6
Пример #7
0
    def get_reward_factors(self, physics):
        """Returns the factorized reward."""
        # Keep the torso  at standing height.
        torso = rewards.tolerance(physics.torso_pelvis_height()[0],
                                  bounds=(self._stand_height[0], float('inf')),
                                  margin=self._stand_height[0])
        # Keep the pelvis at standing height.
        pelvis = rewards.tolerance(physics.torso_pelvis_height()[1],
                                   bounds=(self._stand_height[1],
                                           float('inf')),
                                   margin=self._stand_height[1])
        # Keep head, torso and pelvis upright.
        upright = rewards.tolerance(physics.upright(),
                                    bounds=(_MIN_UPRIGHT_COSINE, float('inf')),
                                    sigmoid='linear',
                                    margin=_MIN_UPRIGHT_COSINE + 1,
                                    value_at_margin=0)

        # Reward for foot touch forces up to bodyweight.
        touch = rewards.tolerance(physics.touch_sensors().sum(),
                                  bounds=(self._body_weight, float('inf')),
                                  margin=self._body_weight,
                                  sigmoid='linear',
                                  value_at_margin=0.9)

        return np.hstack((torso, pelvis, upright, touch))
Пример #8
0
    def get_reward(self, physics):
        target = physics.bind(self._pedestal.target_site).xpos
        obj = physics.bind(self._prop_frame).xpos
        tcp = physics.bind(self._hand.tool_center_point).xpos

        tcp_to_obj = np.linalg.norm(obj - tcp)
        grasp = rewards.tolerance(tcp_to_obj,
                                  bounds=(0, _TARGET_RADIUS),
                                  margin=_TARGET_RADIUS,
                                  sigmoid='long_tail')

        obj_to_target = np.linalg.norm(obj - target)
        in_place = rewards.tolerance(obj_to_target,
                                     bounds=(0, _TARGET_RADIUS),
                                     margin=_TARGET_RADIUS,
                                     sigmoid='long_tail')

        tcp_to_target = np.linalg.norm(tcp - target)
        hand_away = rewards.tolerance(tcp_to_target,
                                      bounds=(4 * _TARGET_RADIUS, np.inf),
                                      margin=3 * _TARGET_RADIUS,
                                      sigmoid='long_tail')
        in_place_weight = 10.
        grasp_or_hand_away = grasp * (1 - in_place) + hand_away * in_place
        return (grasp_or_hand_away +
                in_place_weight * in_place) / (1 + in_place_weight)
Пример #9
0
 def test_tolerance_sigmoids(self, sigmoid):
     margins = [0.01, 1.0, 100, 10000]
     values_at_margin = [0.1, 0.5, 0.9]
     bounds_list = [(0, 0), (-1, 1), (-np.pi, np.pi), (-100, 100)]
     for bounds in bounds_list:
         for margin in margins:
             for value_at_margin in values_at_margin:
                 upper_margin = bounds[1] + margin
                 value = rewards.tolerance(x=upper_margin,
                                           bounds=bounds,
                                           margin=margin,
                                           value_at_margin=value_at_margin,
                                           sigmoid=sigmoid)
                 self.assertAlmostEqual(value,
                                        value_at_margin,
                                        delta=np.sqrt(EPS))
                 lower_margin = bounds[0] - margin
                 value = rewards.tolerance(x=lower_margin,
                                           bounds=bounds,
                                           margin=margin,
                                           value_at_margin=value_at_margin,
                                           sigmoid=sigmoid)
                 self.assertAlmostEqual(value,
                                        value_at_margin,
                                        delta=np.sqrt(EPS))
Пример #10
0
    def get_reward(self, physics):
        """Returns a reward to the agent."""

        # Reward for moving close to the ball.
        arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
        workspace_radius = physics.named.model.site_size['workspace', 0]
        ball_radius = physics.named.model.geom_size['ball', 0]
        reach_reward = rewards.tolerance(physics.self_to_ball_distance(),
                                         bounds=(0, workspace_radius +
                                                 ball_radius),
                                         sigmoid='linear',
                                         margin=arena_radius,
                                         value_at_margin=0)

        # Reward for bringing the ball to the target.
        target_radius = physics.named.model.site_size['target', 0]
        fetch_reward = rewards.tolerance(physics.ball_to_target_distance(),
                                         bounds=(0, target_radius),
                                         sigmoid='linear',
                                         margin=arena_radius,
                                         value_at_margin=0)

        reach_then_fetch = reach_reward * (0.5 + 0.5 * fetch_reward)
        #    reach_then_fetch = fetch_reward
        return _upright_reward(physics) * reach_then_fetch
Пример #11
0
 def test_tolerance_vectorization(self):
   bounds = (-.1, .1)
   margin = 0.2
   x_array = np.random.randn(2, 3, 4)
   value_array = rewards.tolerance(x=x_array, bounds=bounds, margin=margin)
   self.assertEqual(x_array.shape, value_array.shape)
   for i, x in enumerate(x_array.ravel()):
     value = rewards.tolerance(x=x, bounds=bounds, margin=margin)
     self.assertEqual(value, value_array.ravel()[i])
Пример #12
0
 def get_reward(self, physics):
   """Returns a reward to the agent."""
   target_size = physics.named.model.geom_size['target', 0]
   near_target = rewards.tolerance(physics.mass_to_target_dist(),
                                   bounds=(0, target_size), margin=target_size)
   control_reward = rewards.tolerance(physics.control(), margin=1,
                                      value_at_margin=0,
                                      sigmoid='quadratic').mean()
   small_control = (control_reward + 4) / 5
   return near_target * small_control
Пример #13
0
 def get_reward(self, physics):
     """Returns a reward to the agent."""
     box_size = physics.named.model.geom_size['target', 0]
     min_box_to_target_distance = min(
         physics.site_distance(name, 'target') for name in self._box_names)
     box_is_close = rewards.tolerance(min_box_to_target_distance,
                                      margin=2 * box_size)
     hand_to_target_distance = physics.site_distance('grasp', 'target')
     hand_is_far = rewards.tolerance(hand_to_target_distance,
                                     bounds=(.1, float('inf')),
                                     margin=_CLOSE)
     return box_is_close * hand_is_far
Пример #14
0
def _walker_get_reward(self, physics):
    walker_height = physics.bind(self._walker.root_body).xpos[2]  # xpos['z']
    stand_reward = rewards.tolerance(walker_height,
                                     bounds=(self._height, float('inf')),
                                     margin=self._height / 2)

    walker_vel = physics.bind(self._walker.root_body).subtree_linvel[0]
    move_reward = rewards.tolerance(walker_vel,
                                    bounds=(self._vel, float('inf')),
                                    margin=self._vel / 2,
                                    value_at_margin=0.5,
                                    sigmoid='linear')
    return stand_reward * (5 * move_reward + 1) / 6
Пример #15
0
    def get_reward(self, physics):
        """Returns a reward to the agent."""
        standing = rewards.tolerance(physics.head_height(),
                                     bounds=(_STAND_HEIGHT, float('inf')),
                                     margin=_STAND_HEIGHT / 4)
        upright = rewards.tolerance(physics.torso_upright(),
                                    bounds=(0.9, float('inf')),
                                    sigmoid='linear',
                                    margin=1.9,
                                    value_at_margin=0)
        stand_reward = standing * upright
        small_control = rewards.tolerance(physics.control(),
                                          margin=1,
                                          value_at_margin=0,
                                          sigmoid='quadratic').mean()
        small_control = (4 + small_control) / 5
        if self._move_speed == 0:
            horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]]
            dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean()
            return small_control * stand_reward * dont_move
        else:
            com_velocity = np.linalg.norm(
                physics.center_of_mass_velocity()[[0, 1]])
            '''move = rewards.tolerance(com_velocity,
                               bounds=(self._move_speed, float('inf')),
                               margin=self._move_speed, value_at_margin=0,
                               sigmoid='linear')'''

            move = physics.center_of_mass_velocity(
            )[0] * physics.torso_forward()

            # get number joint at limits
            joint_angles_norm = np.abs(physics.joint_angles(
                self._joint_limits)) - 0.98
            joint_angles_norm[joint_angles_norm < 0.0] = 0.0

            joint_angles_norm = joint_angles_norm / (1.0 - 0.98)
            joint_angles_norm[joint_angles_norm > 1.0] = 1.0
            #print("Joint angles norm2", joint_angles_norm)

            joints_at_limit_cost = 0.15 * np.sum(joint_angles_norm)
            #print ("Joints at limits cost", joints_at_limit_cost)

            electricity_cost = 0.005 * np.sum(
                np.abs(physics.control() * physics.joint_velocities()))
            #  print ("Electricity cost", electricity_cost)
            #  print ("Joint velocities", physics.joint_velocities())
            #  move = com_velocity * physics.torso_forward()
            return move + 2.0 + 0.1 * upright - electricity_cost - joints_at_limit_cost
Пример #16
0
 def get_reward(self, physics):
     hand_pos = physics.bind(self._hand.tool_center_point).xpos
     target_pos = physics.bind(self._target).xpos
     distance = np.linalg.norm(hand_pos - target_pos)
     prop_x_distance = abs(target_pos[0] - _TARGET_PROP_XPOS)
     hand_reward = rewards.tolerance(distance,
                                     bounds=(0, _HAND_TARGET_RADIUS),
                                     margin=_HAND_TARGET_RADIUS * 4,
                                     value_at_margin=0.2,
                                     sigmoid='long_tail')
     prop_reward = rewards.tolerance(prop_x_distance,
                                     bounds=(0, _TARGET_RADIUS),
                                     margin=_TARGET_RADIUS * 4,
                                     value_at_margin=0.2,
                                     sigmoid='long_tail')
     return hand_reward + prop_reward
Пример #17
0
 def get_reward(self, physics):
   """Returns a smooth reward."""
   target_size = physics.named.model.geom_size['target', 0]
   return rewards.tolerance(physics.nose_to_target_dist(),
                            bounds=(0, target_size),
                            margin=5*target_size,
                            sigmoid='long_tail')
Пример #18
0
 def get_reward(self, physics):
     prop_height = self._get_height_of_lowest_vertex(physics)
     return rewards.tolerance(prop_height,
                              bounds=(self._target_height, np.inf),
                              margin=_DISTANCE_TO_LIFT,
                              value_at_margin=0,
                              sigmoid='linear')
Пример #19
0
 def get_reward(self, physics):
     """Returns a reward to the agent."""
     return rewards.tolerance(physics.speed(),
                              bounds=(_RUN_SPEED, float('inf')),
                              margin=_RUN_SPEED,
                              value_at_margin=0,
                              sigmoid='linear')
Пример #20
0
 def get_reward(self, physics):
     hand_pos = physics.bind(self._hand.tool_center_point).xpos
     target_pos = physics.bind(self._target).xpos
     distance = np.linalg.norm(hand_pos - target_pos)
     return rewards.tolerance(distance,
                              bounds=(0, _TARGET_RADIUS),
                              margin=_TARGET_RADIUS)
Пример #21
0
 def get_reward(self, physics):
   """Returns a reward applicable to the performed task."""
   standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2))
   if self._hopping:
     hopping = rewards.tolerance(physics.speed(),
                                 bounds=(_HOP_SPEED, float('inf')),
                                 margin=_HOP_SPEED/2,
                                 value_at_margin=0.5,
                                 sigmoid='linear')
     return standing * hopping
   else:
     small_control = rewards.tolerance(physics.control(),
                                       margin=1, value_at_margin=0,
                                       sigmoid='quadratic').mean()
     small_control = (small_control + 4) / 5
     return standing * small_control
Пример #22
0
 def get_reward(self, physics):
     walker_xvel = physics.bind(self._walker.root_body).subtree_linvel[0]
     xvel_term = rewards.tolerance(walker_xvel, (self._vel, self._vel),
                                   margin=self._vel,
                                   sigmoid='linear',
                                   value_at_margin=0.0)
     return xvel_term
Пример #23
0
 def get_reward(self, physics):
   """Returns a reward to the agent."""
   standing = rewards.tolerance(physics.torso_height(),
                                bounds=(_STAND_HEIGHT, float('inf')),
                                margin=_STAND_HEIGHT/2)
   upright = (1 + physics.torso_upright()) / 2
   stand_reward = (3*standing + upright) / 4
   if self._move_speed == 0:
     return stand_reward
   else:
     move_reward = rewards.tolerance(physics.horizontal_velocity(),
                                     bounds=(self._move_speed, float('inf')),
                                     margin=self._move_speed/2,
                                     value_at_margin=0.5,
                                     sigmoid='linear')
     return stand_reward * (5*move_reward + 1) / 6
Пример #24
0
 def _get_reward(self, physics, sparse):
     if sparse:
         cart_in_bounds = rewards.tolerance(physics.cart_position(),
                                            self._CART_RANGE)
         angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(),
                                             self._ANGLE_COSINE_RANGE).prod()
         return cart_in_bounds * angle_in_bounds
     else:
         upright = (physics.pole_angle_cosine() + 1) / 2
         centered = rewards.tolerance(physics.cart_position(), margin=2)
         centered = (1 + centered) / 2
         small_control = rewards.tolerance(physics.control(), margin=1,
                                           value_at_margin=0,
                                           sigmoid='quadratic')[0]
         small_control = (4 + small_control) / 5
         small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min()
         small_velocity = (1 + small_velocity) / 2
         return upright.mean() * small_control * small_velocity * centered
Пример #25
0
 def get_reward(self, physics):
     """Returns a smooth reward."""
     radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
     in_target = rewards.tolerance(np.linalg.norm(
         physics.mouth_to_target()),
                                   bounds=(0, radii),
                                   margin=2 * radii)
     is_upright = 0.5 * (physics.upright() + 1)
     return (7 * in_target + is_upright) / 8
Пример #26
0
 def get_reward(self, physics):
   """Returns a reward to the agent."""
   box_size = physics.named.model.geom_size['target', 0]
   def target_to_box(b):
     return rewards.tolerance(physics.site_distance('box' + str(b), 'target'),
                              margin=2*box_size)
   box_is_close = max(target_to_box(b) for b in range(self._n_boxes))
   hand_to_target = physics.site_distance('grasp', 'target')
   hand_is_far = rewards.tolerance(hand_to_target, (.1, float('inf')), _CLOSE)
   return box_is_close * hand_is_far
Пример #27
0
    def get_reward20(self, physics):
        """Returns a reward applicable to the performed task.
        This is called from two places:
          - suite > base.py > after_step: This is to visualize rewards
          - control.py > step: This is the main step function.
        """
        cylinder = physics.named.data.xipos['long_cylinder', 'z']
        
        # tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian', value_at_margin=0.1):
        height_cylinder = rewards.tolerance(cylinder, bounds=(0.25, np.inf), margin=0)
        # height_cylinder = (1 + height_cylinder)/2
        reward = height_cylinder

        if self.physics_time != physics.time():
            # We care about height of hand when it reaches the object.
            mocap = physics.named.data.mocap_pos['mocap', 'z']
            height_mocap = rewards.tolerance(mocap, bounds=(self.initial_mocap_height + (0.25-0.125) + 0.02, np.inf), margin=0)
            # print("COMPARE mocap with height:", mocap, "(", height_mocap, ")" "<=>", cylinder, "(", height_cylinder, ") ------ ", self.n_rewards, "=====", self.initial_mocap_height)
            # if (reward > 0) or (self.n_rewards > 0):
            if (reward > 0) or (self.n_rewards > 0) or (height_mocap > 0):
                """Start/continue counting if cylinder/mocap height above the threshold.
                Also continue counting if counting already started for some reason.
                """
                # Count #N times. If reward is >0 for all of them then terminate.
                self.n_rewards += 1
                # print("Reward =", reward)
            
            if self.n_rewards >= self.generator_args["time_staying_more"]:
                # print("Finished @", self.generator_args["time_staying_more"])
                self.termination = True
            
            self.physics_time = physics.time()
        
        # Commands of the agent to the robot in the current step:      physics.control()
        # TODO: With velocity-based controllers we can penalize the amount of actuation sent
        #       to actuators. We can penalize the sum over absolute values of finger actuations.

        # touch_data = np.log1p(self.named.data.sensordata[['touch_toe', 'touch_heel']])
        # if reward < 0:
        #     physics._reset_next_step = True
        #     # pass
        return reward
Пример #28
0
 def get_reward(self, physics):
     """Returns a reward applicable to the performed task."""
     if self._task == "hop":
         standing = rewards.tolerance(physics.height(), (self._height, 2))
         hopping = rewards.tolerance(
             physics.speed(),
             bounds=(_HOP_SPEED, float("inf")),
             margin=_HOP_SPEED / 2,
             value_at_margin=0.5,
             sigmoid="linear",
         )
         return standing * hopping
     elif self._task == "stand":
         standing = rewards.tolerance(physics.height(), (self._height, 2))
         small_control = rewards.tolerance(physics.control(),
                                           margin=1,
                                           value_at_margin=0,
                                           sigmoid="quadratic").mean()
         small_control = (small_control + 4) / 5
         return standing * small_control
    def get_reward(self, physics):
        # from dmcs
        radii = physics.named.model.geom_size[['target', 'finger'], 0].sum()
        sparse_reward = rewards.tolerance(physics.finger_to_target_dist(),
                                          (0, radii))

        # print(sparse_reward)
        # print(sparse_reward + self.shaping_rew)

        # c_rew = np.square(physics.control()).sum()
        return sparse_reward + self.shaping_rew
Пример #30
0
  def get_reward(self, physics):
    """Returns a reward to the agent."""
    area_max_distance = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
    workspace_radius = physics.named.model.site_size['workspace', 0]
    target_radius = physics.named.model.site_size['target', 0]
    reach_reward = rewards.tolerance(
        physics.self_to_target_distance(),
        bounds=(0, workspace_radius + target_radius),
        sigmoid='linear',
        margin=area_max_distance, value_at_margin=0)

    return _upright_reward(physics) * reach_reward