def compute_reward(self, action, obs):
        del action
        obj = obs[4:7]
        tcp = self.tcp_center

        tcp_to_obj = np.linalg.norm(obj - tcp)
        tcp_to_obj_init = np.linalg.norm(obj - self.init_tcp)
        obj_to_target = abs(self._target_pos[2] - obj[2])

        tcp_closed = 1 - obs[3]
        near_button = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, 0.01),
            margin=tcp_to_obj_init,
            sigmoid='long_tail',
        )
        button_pressed = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, 0.005),
            margin=self._obj_to_target_init,
            sigmoid='long_tail',
        )

        reward = 5 * reward_utils.hamacher_product(tcp_closed, near_button)
        if tcp_to_obj <= 0.03:
            reward += 5 * button_pressed

        return (reward, tcp_to_obj, obs[3], obj_to_target, near_button,
                button_pressed)
示例#2
0
    def compute_reward(self, action, obs):
        gripper = obs[:3]
        handle = obs[4:7]

        handle_error = np.linalg.norm(handle - self._target_pos)

        reward_for_opening = reward_utils.tolerance(handle_error,
                                                    bounds=(0, 0.02),
                                                    margin=self.maxDist,
                                                    sigmoid='long_tail')

        handle_pos_init = self._target_pos + np.array([.0, self.maxDist, .0])
        # Emphasize XY error so that gripper is able to drop down and cage
        # handle without running into it. By doing this, we are assuming
        # that the reward in the Z direction is small enough that the agent
        # will be willing to explore raising a finger above the handle, hook it,
        # and drop back down to re-gain Z reward
        scale = np.array([3., 3., 1.])
        gripper_error = (handle - gripper) * scale
        gripper_error_init = (handle_pos_init - self.init_tcp) * scale

        reward_for_caging = reward_utils.tolerance(
            np.linalg.norm(gripper_error),
            bounds=(0, 0.01),
            margin=np.linalg.norm(gripper_error_init),
            sigmoid='long_tail')

        reward = reward_for_caging + reward_for_opening
        reward *= 5.0

        return (reward, np.linalg.norm(handle - gripper), obs[3], handle_error,
                reward_for_caging, reward_for_opening)
示例#3
0
    def compute_reward(self, actions, obs):
        _TARGET_RADIUS = 0.05
        tcp = self.tcp_center
        obj = obs[4:7]
        target = self._target_pos

        tcp_to_target = np.linalg.norm(tcp - target)
        tcp_to_obj = np.linalg.norm(tcp - obj)
        obj_to_target = np.linalg.norm(obj - target)

        in_place_margin = np.linalg.norm(self.obj_init_pos - target)
        in_place = reward_utils.tolerance(obj_to_target,
                                    bounds=(0, _TARGET_RADIUS),
                                    margin=in_place_margin,
                                    sigmoid='gaussian',)

        hand_margin = np.linalg.norm(self.hand_init_pos - obj) + 0.1
        hand_in_place = reward_utils.tolerance(tcp_to_target,
                                    bounds=(0, 0.25*_TARGET_RADIUS),
                                    margin=hand_margin,
                                    sigmoid='gaussian',)

        reward = 3 * hand_in_place + 6 * in_place

        if obj_to_target < _TARGET_RADIUS:
            reward = 10

        return [reward, obj_to_target, hand_in_place]
    def compute_reward(self, action, obs):
        gripper = obs[:3]
        lever = obs[4:7]

        # De-emphasize y error so that we get Sawyer's shoulder underneath the
        # lever prior to bumping on against
        scale = np.array([4., 1., 4.])
        # Offset so that we get the Sawyer's shoulder underneath the lever,
        # rather than its fingers
        offset = np.array([.0, .055, .07])

        shoulder_to_lever = (gripper + offset - lever) * scale
        shoulder_to_lever_init = (self.init_tcp + offset -
                                  self._lever_pos_init) * scale

        # This `ready_to_lift` reward should be a *hint* for the agent, not an
        # end in itself. Make sure to devalue it compared to the value of
        # actually lifting the lever
        ready_to_lift = reward_utils.tolerance(
            np.linalg.norm(shoulder_to_lever),
            bounds=(0, 0.02),
            margin=np.linalg.norm(shoulder_to_lever_init),
            sigmoid='long_tail',
        )

        # The skill of the agent should be measured by its ability to get the
        # lever to point straight upward. This means we'll be measuring the
        # current angle of the lever's joint, and comparing with 90deg.
        lever_angle = -self.data.get_joint_qpos('LeverAxis')
        lever_angle_desired = np.pi / 2.0

        lever_error = abs(lever_angle - lever_angle_desired)

        # We'll set the margin to 15deg from horizontal. Angles below that will
        # receive some reward to incentivize exploration, but we don't want to
        # reward accidents too much. Past 15deg is probably intentional movement
        lever_engagement = reward_utils.tolerance(lever_error,
                                                  bounds=(0, np.pi / 48.0),
                                                  margin=(np.pi / 2.0) -
                                                  (np.pi / 12.0),
                                                  sigmoid='long_tail')

        target = self._target_pos
        obj_to_target = np.linalg.norm(lever - target)
        in_place_margin = (np.linalg.norm(self._lever_pos_init - target))

        in_place = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, 0.04),
            margin=in_place_margin,
            sigmoid='long_tail',
        )

        # reward = 2.0 * ready_to_lift + 8.0 * lever_engagement
        reward = 10.0 * reward_utils.hamacher_product(ready_to_lift, in_place)
        return (reward, np.linalg.norm(shoulder_to_lever), ready_to_lift,
                lever_error, lever_engagement)
    def compute_reward(self, action, obs):
        obj = obs[4:7]
        gripper = self.tcp_center

        obj_to_target = np.linalg.norm(obj - self._target_pos)
        tcp_to_obj = np.linalg.norm(obj - gripper)
        in_place_margin = np.linalg.norm(self.obj_init_pos - self._target_pos)

        threshold = 0.03
        # floor is a 3D funnel centered on the initial object pos
        radius = np.linalg.norm(gripper[:2] - self.obj_init_pos[:2])
        if radius <= threshold:
            floor = 0.0
        else:
            floor = 0.015 * np.log(radius - threshold) + 0.15
        # prevent the hand from running into cliff edge by staying above floor
        above_floor = 1.0 if gripper[2] >= floor else reward_utils.tolerance(
            max(floor - gripper[2], 0.0),
            bounds=(0.0, 0.01),
            margin=0.02,
            sigmoid='long_tail',
        )
        object_grasped = self._gripper_caging_reward(
            action,
            obj,
            object_reach_radius=0.01,
            obj_radius=0.015,
            pad_success_thresh=0.02,
            xz_thresh=0.03,
            desired_gripper_effort=0.1,
            high_density=True)
        in_place = reward_utils.tolerance(obj_to_target,
                                          bounds=(0, 0.02),
                                          margin=in_place_margin,
                                          sigmoid='long_tail')
        reward = reward_utils.hamacher_product(object_grasped, in_place)

        near_object = tcp_to_obj < 0.04
        pinched_without_obj = obs[3] < 0.33
        lifted = obj[2] - 0.02 > self.obj_init_pos[2]
        # Increase reward when properly grabbed obj
        grasp_success = near_object and lifted and not pinched_without_obj
        if grasp_success:
            reward += 1. + 5. * reward_utils.hamacher_product(
                in_place, above_floor)
        # Maximize reward on success
        if obj_to_target < self.TARGET_RADIUS:
            reward = 10.

        return (
            reward,
            tcp_to_obj,
            grasp_success,
            obj_to_target,
            object_grasped,
            in_place,
        )
    def compute_reward(self, action, obs):
        _TARGET_RADIUS = 0.05
        tcp = self.tcp_center
        obj = obs[4:7]
        tcp_opened = obs[3]
        midpoint = np.array([-0.05, 0.77, obj[2]])
        target = self._target_pos

        tcp_to_obj = np.linalg.norm(obj - tcp)

        in_place_scaling = np.array([3., 1., 1.])
        obj_to_midpoint = np.linalg.norm((obj - midpoint) * in_place_scaling)
        obj_to_midpoint_init = np.linalg.norm((self.obj_init_pos - midpoint) * in_place_scaling)

        obj_to_target = np.linalg.norm(obj - target)
        obj_to_target_init = np.linalg.norm(self.obj_init_pos - target)

        in_place_part1 = reward_utils.tolerance(obj_to_midpoint,
            bounds=(0, _TARGET_RADIUS),
            margin=obj_to_midpoint_init,
            sigmoid='long_tail',
        )

        in_place_part2 = reward_utils.tolerance(obj_to_target,
            bounds=(0, _TARGET_RADIUS),
            margin=obj_to_target_init,
            sigmoid='long_tail'
        )

        object_grasped = self._gripper_caging_reward(
            action,
            obj,
            object_reach_radius=0.01,
            obj_radius=0.015,
            pad_success_thresh=0.05,
            xz_thresh=0.005,
            high_density=True
        )
        reward = 2 * object_grasped

        if tcp_to_obj < 0.02 and tcp_opened > 0:
            reward = 2 * object_grasped + 1. + 4. * in_place_part1
            if obj[1] > 0.75:
                reward = 2 * object_grasped + 1. + 4. + 3. * in_place_part2

        if obj_to_target < _TARGET_RADIUS:
            reward = 10.

        return [
            reward,
            tcp_to_obj,
            tcp_opened,
            np.linalg.norm(obj - target),
            object_grasped,
            in_place_part2
        ]
示例#7
0
    def _gripper_caging_reward(self, action, obj_position):
        pad_success_margin = 0.05
        x_z_success_margin = 0.005
        obj_radius = 0.015
        tcp = self.tcp_center
        left_pad = self.get_body_com('leftpad')
        right_pad = self.get_body_com('rightpad')
        delta_object_y_left_pad = left_pad[1] - obj_position[1]
        delta_object_y_right_pad = obj_position[1] - right_pad[1]
        right_caging_margin = abs(abs(obj_position[1] - self.init_right_pad[1])
            - pad_success_margin)
        left_caging_margin = abs(abs(obj_position[1] - self.init_left_pad[1])
            - pad_success_margin)

        right_caging = reward_utils.tolerance(delta_object_y_right_pad,
                                bounds=(obj_radius, pad_success_margin),
                                margin=right_caging_margin,
                                sigmoid='long_tail',)
        left_caging = reward_utils.tolerance(delta_object_y_left_pad,
                                bounds=(obj_radius, pad_success_margin),
                                margin=left_caging_margin,
                                sigmoid='long_tail',)

        y_caging = reward_utils.hamacher_product(left_caging,
                                                 right_caging)

        # compute the tcp_obj distance in the x_z plane
        tcp_xz = tcp + np.array([0., -tcp[1], 0.])
        obj_position_x_z = np.copy(obj_position) + np.array([0., -obj_position[1], 0.])
        tcp_obj_norm_x_z = np.linalg.norm(tcp_xz - obj_position_x_z, ord=2)

        # used for computing the tcp to object object margin in the x_z plane
        init_obj_x_z = self.obj_init_pos + np.array([0., -self.obj_init_pos[1], 0.])
        init_tcp_x_z = self.init_tcp + np.array([0., -self.init_tcp[1], 0.])
        tcp_obj_x_z_margin = np.linalg.norm(init_obj_x_z - init_tcp_x_z, ord=2) - x_z_success_margin

        x_z_caging = reward_utils.tolerance(tcp_obj_norm_x_z,
                                bounds=(0, x_z_success_margin),
                                margin=tcp_obj_x_z_margin,
                                sigmoid='long_tail',)

        gripper_closed = min(max(0, action[-1]), 1)
        caging = reward_utils.hamacher_product(y_caging, x_z_caging)

        gripping = gripper_closed if caging > 0.97 else 0.
        caging_and_gripping = reward_utils.hamacher_product(caging,
                                                            gripping)
        caging_and_gripping = (caging_and_gripping + caging) / 2
        return caging_and_gripping
示例#8
0
    def compute_reward(self, action, obs):
        obj = obs[4:7]
        tcp_opened = obs[3]
        tcp_to_obj = np.linalg.norm(obj - self.tcp_center)
        target_to_obj = np.linalg.norm(obj - self._target_pos)
        target_to_obj_init = np.linalg.norm(self.obj_init_pos -
                                            self._target_pos)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, self.TARGET_RADIUS),
            margin=target_to_obj_init,
            sigmoid='long_tail',
        )

        object_grasped = self._gripper_caging_reward(action,
                                                     obj,
                                                     object_reach_radius=0.01,
                                                     obj_radius=0.015,
                                                     pad_success_thresh=0.05,
                                                     xz_thresh=0.005,
                                                     high_density=True)
        reward = 2 * object_grasped

        if tcp_to_obj < 0.02 and tcp_opened > 0:
            reward += 1. + reward + 5. * in_place
        if target_to_obj < self.TARGET_RADIUS:
            reward = 10.

        return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped,
                in_place)
示例#9
0
    def compute_reward(self, action, obs):
        obj = obs[4:7]
        tcp_opened = obs[3]
        x_scaling = np.array([3., 1., 1.])
        tcp_to_obj = np.linalg.norm(obj - self.tcp_center)
        target_to_obj = np.linalg.norm((obj - self._target_pos) * x_scaling)
        target_to_obj_init = np.linalg.norm(
            (obj - self.obj_init_pos) * x_scaling)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, self.TARGET_RADIUS),
            margin=target_to_obj_init,
            sigmoid='long_tail',
        )

        goal_line = (self._target_pos[1] - 0.1)
        if obj[1] > goal_line and abs(obj[0] - self._target_pos[0]) > 0.10:
            in_place = np.clip(
                in_place - 2 * ((obj[1] - goal_line) / (1 - goal_line)), 0.,
                1.)

        object_grasped = self._gripper_caging_reward(action, obj,
                                                     self.OBJ_RADIUS)

        reward = (3 * object_grasped) + (6.5 * in_place)

        if target_to_obj < self.TARGET_RADIUS:
            reward = 10.
        return (reward, tcp_to_obj, tcp_opened,
                np.linalg.norm(obj - self._target_pos), object_grasped,
                in_place)
示例#10
0
    def compute_reward(self, action, obs):
        _TARGET_RADIUS = 0.05
        tcp = self.tcp_center
        obj = obs[4:7]
        tcp_opened = obs[3]
        target = self._target_pos

        obj_to_target = np.linalg.norm(obj - target)
        tcp_to_obj = np.linalg.norm(obj - tcp)
        in_place_margin = (np.linalg.norm(self.obj_init_pos - target))

        in_place = reward_utils.tolerance(obj_to_target,
                                    bounds=(0, _TARGET_RADIUS),
                                    margin=in_place_margin,
                                    sigmoid='long_tail',)

        object_grasped = self._gripper_caging_reward(action, obj)
        in_place_and_object_grasped = reward_utils.hamacher_product(object_grasped,
                                                                    in_place)
        reward = in_place_and_object_grasped

        if tcp_to_obj < 0.02 and (tcp_opened > 0) and (obj[2] - 0.01 > self.obj_init_pos[2]):
            reward += 1. + 5. * in_place
        if obj_to_target < _TARGET_RADIUS:
            reward = 10.
        return [reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place]
    def _reward_pos(wrench_center, target_pos):
        pos_error = target_pos - wrench_center

        radius = np.linalg.norm(pos_error[:2])

        aligned = radius < 0.02
        hooked = pos_error[2] > 0.0
        success = aligned and hooked

        # Target height is a 3D funnel centered on the peg.
        # use the success flag to widen the bottleneck once the agent
        # learns to place the wrench on the peg -- no reason to encourage
        # tons of alignment accuracy if task is already solved
        threshold = 0.02 if success else 0.01
        target_height = 0.0
        if radius > threshold:
            target_height = 0.02 * np.log(radius - threshold) + 0.2

        pos_error[2] = target_height - wrench_center[2]

        scale = np.array([1., 1., 3.])
        a = 0.1  # Relative importance of just *trying* to lift the wrench
        b = 0.9  # Relative importance of placing the wrench on the peg
        lifted = wrench_center[2] > 0.02 or radius < threshold
        in_place = a * float(lifted) + b * reward_utils.tolerance(
            np.linalg.norm(pos_error * scale),
            bounds=(0, 0.02),
            margin=0.4,
            sigmoid='long_tail',
        )

        return in_place, success
示例#12
0
    def compute_reward(self, action, obs):
        obj = obs[4:7]
        tcp_opened = obs[3]
        tcp_to_obj = np.linalg.norm(obj - self.tcp_center)
        target_to_obj = np.linalg.norm(obj - self._target_pos)
        target_to_obj_init = np.linalg.norm(self.obj_init_pos -
                                            self._target_pos)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, self.TARGET_RADIUS),
            margin=target_to_obj_init,
            sigmoid='long_tail',
        )
        object_grasped = self._gripper_caging_reward(action, obj,
                                                     self.OBJ_RADIUS)

        reward = reward_utils.hamacher_product(object_grasped, in_place)

        if (tcp_to_obj < 0.01) and (0 < tcp_opened < 0.55) and \
                (target_to_obj_init - target_to_obj > 0.01):
            reward += 1. + 5. * in_place
        if target_to_obj < self.TARGET_RADIUS:
            reward = 10.
        return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped,
                in_place)
示例#13
0
    def compute_reward(self, action, obs):
        _TARGET_RADIUS = 0.05
        tcp = self.tcp_center
        obj = obs[4:7]
        tcp_opened = obs[3]
        target = np.array([self._target_pos[0], self._target_pos[1], obj[2]])

        obj_to_target = np.linalg.norm(obj - target)
        tcp_to_obj = np.linalg.norm(obj - tcp)
        in_place_margin = np.linalg.norm(self.obj_init_pos - target)

        in_place = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, _TARGET_RADIUS),
            margin=in_place_margin,
            sigmoid='long_tail',
        )

        object_grasped = self._gripper_caging_reward(action, obj,
                                                     self.OBJ_RADIUS)
        in_place_and_object_grasped = reward_utils.hamacher_product(
            object_grasped, in_place)

        reward = (2 * object_grasped) + (6 * in_place_and_object_grasped)

        if obj_to_target < _TARGET_RADIUS:
            reward = 10.
        return [
            reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped,
            in_place
        ]
    def compute_reward(self, action, obs):
        _TARGET_RADIUS = 0.12
        tcp = self.tcp_center
        stick = obs[4:7] + np.array([.015, .0, .0])
        container = obs[11:14]
        tcp_opened = obs[3]
        target = self._target_pos

        tcp_to_stick = np.linalg.norm(stick - tcp)
        stick_to_target = np.linalg.norm(stick - target)
        stick_in_place_margin = (np.linalg.norm(self.stick_init_pos - target)) - _TARGET_RADIUS
        stick_in_place = reward_utils.tolerance(stick_to_target,
                                    bounds=(0, _TARGET_RADIUS),
                                    margin=stick_in_place_margin,
                                    sigmoid='long_tail',)

        container_to_target = np.linalg.norm(container - target)
        container_in_place_margin = np.linalg.norm(self.obj_init_pos - target) - _TARGET_RADIUS
        container_in_place = reward_utils.tolerance(container_to_target,
                                    bounds=(0, _TARGET_RADIUS),
                                    margin=container_in_place_margin,
                                    sigmoid='long_tail',)

        object_grasped = self._gripper_caging_reward(
            action=action,
            obj_pos=stick,
            obj_radius=0.04,
            pad_success_thresh=0.05,
            object_reach_radius=0.01,
            xz_thresh=0.01,
            high_density=True
        )

        reward = object_grasped

        if tcp_to_stick < 0.02 and (tcp_opened > 0) and \
                (stick[2] - 0.01 > self.stick_init_pos[2]):
            object_grasped = 1
            reward = 2. + 5. * stick_in_place + 3. * container_in_place

            if container_to_target <= _TARGET_RADIUS:
                reward = 10.

        return [reward, tcp_to_stick, tcp_opened, container_to_target, object_grasped, stick_in_place]
    def compute_reward(self, action, obs):
        del action
        gripper = obs[:3]
        lock = obs[4:7]

        # Add offset to track gripper's shoulder, rather than fingers
        offset = np.array([.0, .055, .07])

        scale = np.array([0.25, 1., 0.5])
        shoulder_to_lock = (gripper + offset - lock) * scale
        shoulder_to_lock_init = (
            self.init_tcp + offset - self.obj_init_pos
        ) * scale

        # This `ready_to_push` reward should be a *hint* for the agent, not an
        # end in itself. Make sure to devalue it compared to the value of
        # actually unlocking the lock
        ready_to_push = reward_utils.tolerance(
            np.linalg.norm(shoulder_to_lock),
            bounds=(0, 0.02),
            margin=np.linalg.norm(shoulder_to_lock_init),
            sigmoid='long_tail',
        )

        obj_to_target = abs(self._target_pos[0] - lock[0])
        pushed = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, 0.005),
            margin=self._lock_length,
            sigmoid='long_tail',
        )

        reward = 2 * ready_to_push + 8 * pushed

        return (
            reward,
            np.linalg.norm(shoulder_to_lock),
            obs[3],
            obj_to_target,
            ready_to_push,
            pushed
        )
示例#16
0
    def compute_reward(self, action, obs):
        obj = obs[4:7]

        tcp = self.tcp_center
        target = self._target_pos.copy()

        target_to_obj = (obj - target)
        target_to_obj = np.linalg.norm(target_to_obj)
        target_to_obj_init = (self.obj_init_pos - target)
        target_to_obj_init = np.linalg.norm(target_to_obj_init)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, self.TARGET_RADIUS),
            margin=abs(target_to_obj_init - self.TARGET_RADIUS),
            sigmoid='long_tail',
        )

        handle_reach_radius = 0.005
        tcp_to_obj = np.linalg.norm(obj - tcp)
        tcp_to_obj_init = np.linalg.norm(self.obj_init_pos - self.init_tcp)
        reach = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, handle_reach_radius),
            margin=abs(tcp_to_obj_init - handle_reach_radius),
            sigmoid='gaussian',
        )
        gripper_closed = min(max(0, action[-1]), 1)

        reach = reward_utils.hamacher_product(reach, gripper_closed)
        tcp_opened = 0
        object_grasped = reach

        reward = reward_utils.hamacher_product(reach, in_place)
        if target_to_obj <= self.TARGET_RADIUS + 0.015:
            reward = 1.

        reward *= 10

        return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped,
                in_place)
示例#17
0
    def compute_reward(self, action, obs):
        obj = self._get_pos_objects()
        dial_push_position = self._get_pos_objects() + np.array([0.05, 0.02, 0.09])
        tcp = self.tcp_center
        target = self._target_pos.copy()

        target_to_obj = (obj - target)
        target_to_obj = np.linalg.norm(target_to_obj)
        target_to_obj_init = (self.dial_push_position - target)
        target_to_obj_init = np.linalg.norm(target_to_obj_init)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, self.TARGET_RADIUS),
            margin=abs(target_to_obj_init - self.TARGET_RADIUS),
            sigmoid='long_tail',
        )

        dial_reach_radius = 0.005
        tcp_to_obj = np.linalg.norm(dial_push_position - tcp)
        tcp_to_obj_init = np.linalg.norm(self.dial_push_position - self.init_tcp)
        reach = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, dial_reach_radius),
            margin=abs(tcp_to_obj_init-dial_reach_radius),
            sigmoid='gaussian',
        )
        gripper_closed = min(max(0, action[-1]), 1)

        reach = reward_utils.hamacher_product(reach, gripper_closed)
        tcp_opened = 0
        object_grasped = reach

        reward = 10 * reward_utils.hamacher_product(reach, in_place)

        return (reward,
               tcp_to_obj,
               tcp_opened,
               target_to_obj,
               object_grasped,
               in_place)
示例#18
0
    def _reward_pos(obs, target_pos):
        hand = obs[:3]
        lid = obs[4:7] + np.array([.0, .0, .02])

        threshold = 0.02
        # floor is a 3D funnel centered on the lid's handle
        radius = np.linalg.norm(hand[:2] - lid[:2])
        if radius <= threshold:
            floor = 0.0
        else:
            floor = 0.04 * np.log(radius - threshold) + 0.4
        # prevent the hand from running into the handle prematurely by keeping
        # it above the "floor"
        above_floor = 1.0 if hand[2] >= floor else reward_utils.tolerance(
            floor - hand[2],
            bounds=(0.0, 0.01),
            margin=floor / 2.0,
            sigmoid='long_tail',
        )
        # grab the lid's handle
        in_place = reward_utils.tolerance(
            np.linalg.norm(hand - lid),
            bounds=(0, 0.02),
            margin=0.5,
            sigmoid='long_tail',
        )
        ready_to_lift = reward_utils.hamacher_product(above_floor, in_place)

        # now actually put the lid on the box
        pos_error = target_pos - lid
        error_scale = np.array([1., 1., 3.])  # Emphasize Z error
        a = 0.2  # Relative importance of just *trying* to lift the lid at all
        b = 0.8  # Relative importance of placing the lid on the box
        lifted = a * float(lid[2] > 0.04) + b * reward_utils.tolerance(
            np.linalg.norm(pos_error * error_scale),
            bounds=(0, 0.05),
            margin=0.25,
            sigmoid='long_tail',
        )

        return ready_to_lift, lifted
示例#19
0
    def compute_reward(self, action, obs):
        _TARGET_RADIUS = 0.05
        tcp = self.tcp_center
        obj = obs[4:7]
        tcp_opened = obs[3]
        target = self._target_pos

        obj_to_target = np.linalg.norm(obj - target)
        tcp_to_obj = np.linalg.norm(obj - tcp)
        in_place_margin = np.linalg.norm(self.obj_init_pos - target)

        in_place = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, _TARGET_RADIUS),
            margin=in_place_margin,
            sigmoid='long_tail',
        )

        object_grasped = self._gripper_caging_reward(action=action,
                                                     obj_pos=obj,
                                                     obj_radius=0.02,
                                                     pad_success_thresh=0.05,
                                                     object_reach_radius=0.01,
                                                     xz_thresh=0.01,
                                                     high_density=False)
        reward = reward_utils.hamacher_product(object_grasped, in_place)

        if (0.0 < obj[2] < 0.24 and \
                (target[0]-0.15 < obj[0] < target[0]+0.15) and \
                ((target[1] - 3*_TARGET_RADIUS) < obj[1] < target[1])):
            z_scaling = (0.24 - obj[2]) / 0.24
            y_scaling = (obj[1] -
                         (target[1] - 3 * _TARGET_RADIUS)) / (3 *
                                                              _TARGET_RADIUS)
            bound_loss = reward_utils.hamacher_product(y_scaling, z_scaling)
            in_place = np.clip(in_place - bound_loss, 0.0, 1.0)

        if ((0.0 < obj[2] < 0.24) and \
                (target[0]-0.15 < obj[0] < target[0]+0.15) and \
                (obj[1] > target[1])):
            in_place = 0.0

        if tcp_to_obj < 0.025 and (tcp_opened > 0) and \
                (obj[2] - 0.01 > self.obj_init_pos[2]):
            reward += 1. + 5. * in_place

        if obj_to_target < _TARGET_RADIUS:
            reward = 10.

        return [
            reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped,
            in_place
        ]
示例#20
0
    def _reward_pos(obs, theta):
        hand = obs[:3]
        door = obs[4:7] + np.array([-0.05, 0, 0])

        threshold = 0.12
        # floor is a 3D funnel centered on the door handle
        radius = np.linalg.norm(hand[:2] - door[:2])
        if radius <= threshold:
            floor = 0.0
        else:
            floor = 0.04 * np.log(radius - threshold) + 0.4
        # prevent the hand from running into the handle prematurely by keeping
        # it above the "floor"
        above_floor = 1.0 if hand[2] >= floor else reward_utils.tolerance(
            floor - hand[2],
            bounds=(0.0, 0.01),
            margin=floor / 2.0,
            sigmoid='long_tail',
        )
        # move the hand to a position between the handle and the main door body
        in_place = reward_utils.tolerance(
            np.linalg.norm(hand - door - np.array([0.05, 0.03, -0.01])),
            bounds=(0, threshold / 2.0),
            margin=0.5,
            sigmoid='long_tail',
        )
        ready_to_open = reward_utils.hamacher_product(above_floor, in_place)

        # now actually open the door
        door_angle = -theta
        a = 0.2  # Relative importance of just *trying* to open the door at all
        b = 0.8  # Relative importance of fully opening the door
        opened = a * float(theta < -np.pi / 90.) + b * reward_utils.tolerance(
            np.pi / 2. + np.pi / 6 - door_angle,
            bounds=(0, 0.5),
            margin=np.pi / 3.,
            sigmoid='long_tail',
        )

        return ready_to_open, opened
    def compute_reward(self, action, obs):
        del action
        obj = obs[4:7]
        tcp = self.tcp_center

        tcp_to_obj = np.linalg.norm(obj - tcp)
        tcp_to_obj_init = np.linalg.norm(obj - self.init_tcp)
        obj_to_target = abs(self._target_pos[1] - obj[1])

        near_button = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, 0.01),
            margin=tcp_to_obj_init,
            sigmoid='long_tail',
        )
        button_pressed = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, 0.005),
            margin=self._obj_to_target_init,
            sigmoid='long_tail',
        )

        reward = 0.0
        if tcp_to_obj > 0.07:
            tcp_status = (1 - obs[3]) / 2.0
            reward = 2 * reward_utils.hamacher_product(tcp_status, near_button)
        else:
            reward = 2
            reward += 2 * (1 + obs[3])
            reward += 4 * button_pressed ** 2

        return (
            reward,
            tcp_to_obj,
            obs[3],
            obj_to_target,
            near_button,
            button_pressed
        )
    def compute_reward(self, action, obs):
        del action
        obj = obs[4:7] + np.array([-.04, .0, .03])
        tcp = self.tcp_center
        target = self._target_pos.copy()

        target_to_obj = (obj - target)
        target_to_obj = np.linalg.norm(target_to_obj)
        target_to_obj_init = (self.obj_init_pos - target)
        target_to_obj_init = np.linalg.norm(target_to_obj_init)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, self._target_radius),
            margin=abs(target_to_obj_init - self._target_radius),
            sigmoid='long_tail',
        )

        faucet_reach_radius = 0.01
        tcp_to_obj = np.linalg.norm(obj - tcp)
        tcp_to_obj_init = np.linalg.norm(self.obj_init_pos - self.init_tcp)
        reach = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, faucet_reach_radius),
            margin=abs(tcp_to_obj_init - faucet_reach_radius),
            sigmoid='gaussian',
        )

        tcp_opened = 0
        object_grasped = reach

        reward = 2 * reach + 3 * in_place

        reward *= 2

        reward = 10 if target_to_obj <= self._target_radius else reward

        return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped,
                in_place)
    def compute_reward(self, actions, obs):
        _TARGET_RADIUS = 0.05
        tcp = self.tcp_center
        obj = obs[4:7]
        tcp_opened = obs[3]
        target = self._target_pos

        obj_to_target = np.linalg.norm(obj - target)
        in_place_margin = np.linalg.norm(self.obj_init_pos - target)
        in_place = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, _TARGET_RADIUS),
            margin=in_place_margin - _TARGET_RADIUS,
            sigmoid='long_tail',
        )

        tcp_to_obj = np.linalg.norm(tcp - obj)
        obj_grasped_margin = np.linalg.norm(self.init_tcp - self.obj_init_pos)
        object_grasped = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, _TARGET_RADIUS),
            margin=obj_grasped_margin - _TARGET_RADIUS,
            sigmoid='long_tail',
        )

        in_place_and_object_grasped = reward_utils.hamacher_product(
            object_grasped, in_place)
        reward = 1.5 * object_grasped

        if tcp[2] <= 0.03 and tcp_to_obj < 0.07:
            reward = 2 + (7 * in_place)

        if obj_to_target < _TARGET_RADIUS:
            reward = 10.
        return [
            reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped,
            in_place
        ]
示例#24
0
    def _reward_pos(hammer_head, target_pos):
        pos_error = target_pos - hammer_head

        a = 0.1  # Relative importance of just *trying* to lift the hammer
        b = 0.9  # Relative importance of hitting the nail
        lifted = hammer_head[2] > 0.02
        in_place = a * float(lifted) + b * reward_utils.tolerance(
            np.linalg.norm(pos_error),
            bounds=(0, 0.02),
            margin=0.2,
            sigmoid='long_tail',
        )

        return in_place
    def _reward_pos(wrench_center, target_pos):
        pos_error = target_pos + np.array([.0, .0, .1]) - wrench_center

        a = 0.1  # Relative importance of just *trying* to lift the wrench
        b = 0.9  # Relative importance of placing the wrench on the peg
        lifted = wrench_center[2] > 0.02
        in_place = a * float(lifted) + b * reward_utils.tolerance(
            np.linalg.norm(pos_error),
            bounds=(0, 0.02),
            margin=0.2,
            sigmoid='long_tail',
        )

        return in_place
示例#26
0
    def compute_reward(self, actions, obs):
        del actions

        objPos = obs[4:7]
        obj = self._get_pos_objects()
        tcp = self.tcp_center
        target = self._target_pos.copy()

        target_to_obj = (obj[2] - target[2])
        target_to_obj = np.linalg.norm(target_to_obj)
        target_to_obj_init = (self._handle_init_pos[2] - target[2])
        target_to_obj_init = np.linalg.norm(target_to_obj_init)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, self.TARGET_RADIUS),
            margin=abs(target_to_obj_init - self.TARGET_RADIUS),
            sigmoid='long_tail',
        )

        handle_radius = 0.02
        tcp_to_obj = np.linalg.norm(obj - tcp)
        tcp_to_obj_init = np.linalg.norm(self._handle_init_pos - self.init_tcp)
        reach = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, handle_radius),
            margin=abs(tcp_to_obj_init - handle_radius),
            sigmoid='long_tail',
        )
        tcp_opened = 0
        object_grasped = reach

        reward = reward_utils.hamacher_product(reach, in_place)
        reward = 1 if target_to_obj <= self.TARGET_RADIUS else reward
        reward *= 10
        return (reward, tcp_to_obj, tcp_opened, target_to_obj, object_grasped,
                in_place)
    def compute_reward(self, action, obs):
        del action
        obj = obs[4:7]
        tcp = self.get_body_com('leftpad')

        scale = np.array([0.25, 1., 0.5])
        tcp_to_obj = np.linalg.norm((obj - tcp) * scale)
        tcp_to_obj_init = np.linalg.norm((obj - self.init_left_pad) * scale)

        obj_to_target = abs(self._target_pos[2] - obj[2])

        tcp_opened = max(obs[3], 0.0)
        near_lock = reward_utils.tolerance(
            tcp_to_obj,
            bounds=(0, 0.01),
            margin=tcp_to_obj_init,
            sigmoid='long_tail',
        )
        lock_pressed = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, 0.005),
            margin=self._lock_length,
            sigmoid='long_tail',
        )

        reward = 2 * reward_utils.hamacher_product(tcp_opened, near_lock)
        reward += 8 * lock_pressed

        return (
            reward,
            tcp_to_obj,
            obs[3],
            obj_to_target,
            near_lock,
            lock_pressed
        )
示例#28
0
    def compute_reward(self, actions, obs):
        _TARGET_RADIUS = 0.05
        tcp = self.tcp_center
        obj = obs[4:7]
        tcp_opened = obs[3]
        target = self._target_pos

        tcp_to_target = np.linalg.norm(tcp - target)
        obj_to_target = np.linalg.norm(obj - target)

        in_place_margin = (np.linalg.norm(self.hand_init_pos - target))
        in_place = reward_utils.tolerance(tcp_to_target,
                                    bounds=(0, _TARGET_RADIUS),
                                    margin=in_place_margin,
                                    sigmoid='long_tail',)

        return [10 * in_place, tcp_to_target, in_place]
示例#29
0
    def compute_reward(self, action, obs):
        obj = obs[4:7]
        target = self._target_pos.copy()

        # Emphasize X and Y errors
        scale = np.array([2., 2., 1.])
        target_to_obj = (obj - target) * scale
        target_to_obj = np.linalg.norm(target_to_obj)
        target_to_obj_init = (self.obj_init_pos - target) * scale
        target_to_obj_init = np.linalg.norm(target_to_obj_init)

        in_place = reward_utils.tolerance(
            target_to_obj,
            bounds=(0, 0.05),
            margin=target_to_obj_init,
            sigmoid='long_tail',
        )
        tcp_opened = obs[3]
        tcp_to_obj = np.linalg.norm(obj - self.tcp_center)

        object_grasped = self._gripper_caging_reward(
            action,
            obj,
            object_reach_radius=0.04,
            obj_radius=0.02,
            pad_success_thresh=0.05,
            xz_thresh=0.05,
            desired_gripper_effort=0.7,
            medium_density=True
        )

        reward = reward_utils.hamacher_product(object_grasped, in_place)

        if tcp_to_obj < 0.04 and tcp_opened > 0:
            reward += 1. + 5. * in_place
        if target_to_obj < 0.05:
            reward = 10.
        return (
            reward,
            tcp_to_obj,
            tcp_opened,
            np.linalg.norm(obj - target),  # recompute to avoid `scale` above
            object_grasped,
            in_place
        )
示例#30
0
    def compute_reward(self, action, obs):
        tcp = self.tcp_center
        obj = obs[4:7]
        tcp_opened = obs[3]
        target = self._target_pos
        tcp_to_obj = np.linalg.norm(obj - tcp)
        obj_to_target = np.linalg.norm(obj - target)
        pad_success_margin = 0.05
        object_reach_radius = 0.01
        x_z_margin = 0.005
        obj_radius = 0.025

        object_grasped = self._gripper_caging_reward(
            action,
            obj,
            object_reach_radius=object_reach_radius,
            obj_radius=obj_radius,
            pad_success_thresh=pad_success_margin,
            xz_thresh=x_z_margin,
            desired_gripper_effort=0.8,
            high_density=True)
        in_place_margin = np.linalg.norm(self.obj_init_pos - target)
        
        in_place = reward_utils.tolerance(
            obj_to_target,
            bounds=(0, 0.05),
            margin=in_place_margin,
            sigmoid='long_tail',
        )
        grasp_success = (tcp_opened > 0.5 and 
            (obj[0] - self.obj_init_pos[0] > 0.015))
        
        
        reward = 2 * object_grasped

        if grasp_success and tcp_to_obj < 0.035:
            reward = 1 + 2 * object_grasped + 5 * in_place

        if obj_to_target <= 0.05:
            reward = 10.

        return reward, tcp_to_obj, tcp_opened, obj_to_target, object_grasped, in_place, float(
            grasp_success)