Exemplo n.º 1
0
    def _post_action(self, action):
        """
        Run any necessary visualization after running the action

        Args:
            action (np.array): Action being passed during this timestep

        Returns:
            3-tuple:

                - (float) reward from the environment
                - (bool) whether the current episode is completed or not
                - (dict) empty dict to be filled with information by subclassed method

        """
        reward, done, info = super()._post_action(action)
        chassis_body_id = self.sim.model.body_name2id(
            self.robots[0].robot_model.robot_base)
        body_pos_z = self.sim.data.body_xpos[chassis_body_id][2]
        quat = np.array([
            self.sim.data.qpos[x]
            for x in self.robots[0]._ref_chassis_pos_indexes
        ])[3:]
        mat = quat2mat(quat)
        euler = mat2euler(mat)
        roll, pitch = euler[:2]
        if abs(roll > 0.785) or abs(pitch > 0.785) or body_pos_z < 0.20:
            done = True
            reward = -10
        return reward, done, info
Exemplo n.º 2
0
    def _reset_internal(self):
        """
        Resets simulation internal configurations.
        """
        super()._reset_internal()

        # Reset all object positions using initializer sampler if we're not directly loading from an xml
        if not self.deterministic_reset:

            # Sample from the placement initializer for all objects
            object_placements = self.placement_initializer.sample()

            # Loop through all objects and reset their positions
            for obj_pos, obj_quat, obj in object_placements.values():
                # If prehensile, set the object normally
                if self.prehensile:
                    self.sim.data.set_joint_qpos(
                        obj.joints[0],
                        np.concatenate([np.array(obj_pos),
                                        np.array(obj_quat)]))
                # Else, set the object in the hand of the robot and loop a few steps to guarantee the robot is grasping
                #   the object initially
                else:
                    eef_rot_quat = T.mat2quat(
                        T.euler2mat(
                            [np.pi - T.mat2euler(self._eef0_xmat)[2], 0, 0]))
                    obj_quat = T.quat_multiply(obj_quat, eef_rot_quat)
                    for j in range(100):
                        # Set object in hand
                        self.sim.data.set_joint_qpos(
                            obj.joints[0],
                            np.concatenate(
                                [self._eef0_xpos,
                                 np.array(obj_quat)]))
                        # Close gripper (action = 1) and prevent arm from moving
                        if self.env_configuration == 'bimanual':
                            # Execute no-op action with gravity compensation
                            torques = np.concatenate([
                                self.robots[0].controller["right"].
                                torque_compensation, self.robots[0].
                                controller["left"].torque_compensation
                            ])
                            self.sim.data.ctrl[self.robots[
                                0]._ref_joint_actuator_indexes] = torques
                            # Execute gripper action
                            self.robots[0].grip_action(
                                gripper=self.robots[0].gripper["right"],
                                gripper_action=[1])
                        else:
                            # Execute no-op action with gravity compensation
                            self.sim.data.ctrl[self.robots[0]._ref_joint_actuator_indexes] =\
                                self.robots[0].controller.torque_compensation
                            self.sim.data.ctrl[self.robots[1]._ref_joint_actuator_indexes] = \
                                self.robots[1].controller.torque_compensation
                            # Execute gripper action
                            self.robots[0].grip_action(
                                gripper=self.robots[0].gripper,
                                gripper_action=[1])
                        # Take forward step
                        self.sim.step()
    def _get_initial_qpos(self):
        """
        Calculates the initial joint position for the robot based on the initial desired pose (self.traj_pt, self.goal_quat).

        Args:

        Returns:
            (np.array): n joint positions 
        """
        pos = self._convert_robosuite_to_toolbox_xpos(self.traj_pt)
        ori_euler = mat2euler(quat2mat(self.goal_quat))

        # desired pose
        T = SE3(pos) * SE3.RPY(ori_euler)

        # find initial joint positions
        if self.robots[0].name == "UR5e":
            robot = rtb.models.DH.UR5()
            sol = robot.ikine_min(T, q0=self.robots[0].init_qpos)

            # flip last joint around (pi)
            sol.q[-1] -= np.pi
            return sol.q

        elif self.robots[0].name == "Panda":
            robot = rtb.models.DH.Panda()
            sol = robot.ikine_min(T, q0=self.robots[0].init_qpos)
            return sol.q
Exemplo n.º 4
0
def baseline_grasp(env, noise=False):
    target_pos, target_angle = env._gen_grasp_gt()  #x,y,z,a
    target_angle = T.mat2euler(target_angle)[-1]

    if noise:
        cube_length = env.object.size[0] / 2
        gripper_noise = np.random.uniform(low=0.95, high=1.05)
        lateral_noise = np.random.uniform(low=-0.01, high=0.01)
        longitude_noise = np.random.uniform(low=cube_length * -1.5,
                                            high=cube_length)
        angle_noise = np.random.uniform(low=-0.01, high=0.01)
    else:
        gripper_noise = 1
        lateral_noise = 0
        longitude_noise = 0
        angle_noise = 0

    target_pos[2] += GRIPPER_LENGTH * gripper_noise

    target_pos[0] += np.sin(target_angle) * lateral_noise
    target_pos[1] += np.cos(target_angle) * lateral_noise

    target_pos[0] += np.cos(target_angle) * longitude_noise
    target_pos[1] += np.sin(target_angle) * longitude_noise

    target_angle += angle_noise

    return target_pos, target_angle
    def move_orient(self, env, q1):

        q0 = env.observation_spec()['eef_quat']
        x_target = env.observation_spec()['eef_pos']

        fraction_size = 50
        for i in range(fraction_size):
            q_target = T.quat_slerp(q0, q1, fraction=(i + 1) / fraction_size)
            steps = 0
            lamda = 0.01

            current = env._right_hand_orn
            target_rotation = T.quat2mat(q_target)
            drotation = current.T.dot(target_rotation)

            while (np.linalg.norm(T.mat2euler(drotation)) > 0.01):

                current = env._right_hand_orn
                target_rotation = T.quat2mat(q_target)
                drotation = current.T.dot(target_rotation)
                dquat = T.mat2quat(drotation)
                x_current = env.observation_spec()['eef_pos']
                d_pos = np.clip((x_target - x_current) * lamda, -0.05, 0.05)
                d_pos = [0, 0, 0]

                action = np.concatenate((d_pos, dquat, [self.grasp]))
                env.step(action)
                env.render()
                steps += 1

                if (steps > 20):
                    break
        return
Exemplo n.º 6
0
    def reward(self, action=None):
        """
        Reward function for the task.

        Sparse un-normalized reward:

            - a discrete reward of 2.25 is provided if the cube is lifted

        Un-normalized summed components if using reward shaping:

            - Reaching: in [0, 1], to encourage the arm to reach the cube
            - Grasping: in {0, 0.25}, non-zero if arm is grasping the cube
            - Lifting: in {0, 1}, non-zero if arm has lifted the cube

        The sparse reward only consists of the lifting component.

        Note that the final reward is normalized and scaled by
        reward_scale / 2.25 as well so that the max score is equal to reward_scale

        Args:
            action (np array): [NOT USED]

        Returns:
            float: reward value
        """
        chassis_body_id = self.sim.model.body_name2id(
            self.robots[0].robot_model.robot_base)
        body_pos = self.sim.data.body_xpos[chassis_body_id]
        quat = np.array([
            self.sim.data.qpos[x]
            for x in self.robots[0]._ref_chassis_pos_indexes
        ])[3:]
        mat = quat2mat(quat)
        euler = mat2euler(mat)
        pitch = euler[1]
        ctrl_norm = np.linalg.norm(self.sim.data.ctrl[
            self.robots[0]._ref_joint_torq_actuator_indexes])
        reward = -1*((10*(body_pos[2]-0.37))**2)-0.03*(ctrl_norm**2)-3*abs(pitch) \
                 - 0.2*((10*body_pos[0])**2+(10*body_pos[1])**2)

        # Scale reward if requested
        if self.reward_scale is not None:
            reward *= self.reward_scale

        return reward
Exemplo n.º 7
0
    def get_interpolated_goal(self):
        """
        Provides the next step in interpolation given the remaining steps.

        NOTE: If this interpolator is for orientation, it is assumed to be receiving either euler angles or quaternions

        Returns:
            np.array: Next position in the interpolated trajectory
        """
        # Grab start position
        x = np.array(self.start)
        # Calculate the desired next step based on remaining interpolation steps
        if self.ori_interpolate is not None:
            # This is an orientation interpolation, so we interpolate linearly around a sphere instead
            goal = np.array(self.goal)
            if self.ori_interpolate == "euler":
                # this is assumed to be euler angles (x,y,z), so we need to first map to quat
                x = T.mat2quat(T.euler2mat(x))
                goal = T.mat2quat(T.euler2mat(self.goal))

            # Interpolate to the next sequence
            x_current = T.quat_slerp(x,
                                     goal,
                                     fraction=(self.step + 1) /
                                     self.total_steps)
            if self.ori_interpolate == "euler":
                # Map back to euler
                x_current = T.mat2euler(T.quat2mat(x_current))
        else:
            # This is a normal interpolation
            dx = (self.goal - x) / (self.total_steps - self.step)
            x_current = x + dx

        # Increment step if there's still steps remaining based on ramp ratio
        if self.step < self.total_steps - 1:
            self.step += 1

        # Return the new interpolated step
        return x_current
Exemplo n.º 8
0
    def compute_joint_velocities_for_endpoint_velocity(
            self, endpoint_velocity_in_base_frame, joint_angles_array):

        ###
        ### Compute the Jacobian inverse at the current set of joint angles
        ###
        # Create a KDL array of the joint angles
        joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array(
            joint_angles_array)
        # Compute the Jacobian at the current joint angles
        jacobian = self.compute_jacobian(joint_angles_kdl_array)
        # Compute the pseudo-inverse
        jacobian_inverse = np.linalg.pinv(jacobian)
        ###
        ### Then, use the Jacobian inverse to compute the required joint velocities
        ###
        # Multiply the Jacobian inverse by the Cartesian velocity
        joint_velocities = jacobian_inverse * np.concatenate([
            endpoint_velocity_in_base_frame[:3, -1].reshape(3, 1),
            T.mat2euler(endpoint_velocity_in_base_frame[:3, :3],
                        axes='sxyz').reshape(3, 1)
        ])
        # Return the velocities
        return joint_velocities
Exemplo n.º 9
0
def set_goal_orientation(delta,
                         current_orientation,
                         orientation_limit=None,
                         set_ori=None):
    """
    Calculates and returns the desired goal orientation, clipping the result accordingly to @orientation_limits.
    @delta and @current_orientation must be specified if a relative goal is requested, else @set_ori must be
    an orientation matrix specified to define a global orientation

    Args:
        delta (np.array): Desired relative change in orientation, in axis-angle form [ax, ay, az]
        current_orientation (np.array): Current orientation, in rotation matrix form
        orientation_limit (None or np.array): 2d array defining the (min, max) limits of permissible orientation goal commands
        set_ori (None or np.array): If set, will ignore @delta and set the goal orientation to this value

    Returns:
        np.array: calculated goal orientation in absolute coordinates

    Raises:
        ValueError: [Invalid orientation_limit shape]
    """
    # directly set orientation
    if set_ori is not None:
        goal_orientation = set_ori

    # otherwise use delta to set goal orientation
    else:
        # convert axis-angle value to rotation matrix
        quat_error = trans.axisangle2quat(delta)
        rotation_mat_error = trans.quat2mat(quat_error)
        goal_orientation = np.dot(rotation_mat_error, current_orientation)

    # check for orientation limits
    if np.array(orientation_limit).any():
        if orientation_limit.shape != (2, 3):
            raise ValueError("Orientation limit should be shaped (2,3) "
                             "but is instead: {}".format(
                                 orientation_limit.shape))

        # Convert to euler angles for clipping
        euler = trans.mat2euler(goal_orientation)

        # Clip euler angles according to specified limits
        limited = False
        for idx in range(3):
            if orientation_limit[0][idx] < orientation_limit[1][
                    idx]:  # Normal angle sector meaning
                if orientation_limit[0][idx] < euler[idx] < orientation_limit[
                        1][idx]:
                    continue
                else:
                    limited = True
                    dist_to_lower = euler[idx] - orientation_limit[0][idx]
                    if dist_to_lower > np.pi:
                        dist_to_lower -= 2 * np.pi
                    elif dist_to_lower < -np.pi:
                        dist_to_lower += 2 * np.pi

                    dist_to_higher = euler[idx] - orientation_limit[1][idx]
                    if dist_to_lower > np.pi:
                        dist_to_higher -= 2 * np.pi
                    elif dist_to_lower < -np.pi:
                        dist_to_higher += 2 * np.pi

                    if dist_to_lower < dist_to_higher:
                        euler[idx] = orientation_limit[0][idx]
                    else:
                        euler[idx] = orientation_limit[1][idx]
            else:  # Inverted angle sector meaning
                if orientation_limit[0][idx] < euler[idx] or euler[
                        idx] < orientation_limit[1][idx]:
                    continue
                else:
                    limited = True
                    dist_to_lower = euler[idx] - orientation_limit[0][idx]
                    if dist_to_lower > np.pi:
                        dist_to_lower -= 2 * np.pi
                    elif dist_to_lower < -np.pi:
                        dist_to_lower += 2 * np.pi

                    dist_to_higher = euler[idx] - orientation_limit[1][idx]
                    if dist_to_lower > np.pi:
                        dist_to_higher -= 2 * np.pi
                    elif dist_to_lower < -np.pi:
                        dist_to_higher += 2 * np.pi

                    if dist_to_lower < dist_to_higher:
                        euler[idx] = orientation_limit[0][idx]
                    else:
                        euler[idx] = orientation_limit[1][idx]
        if limited:
            goal_orientation = trans.euler2mat(
                np.array([euler[0], euler[1], euler[2]]))
    return goal_orientation
Exemplo n.º 10
0
def test_linear_interpolator():

    for controller_name in ["IK_POSE", "OSC_POSE"]:

        for traj in ["pos", "ori"]:

            # Define counter to increment timesteps and torques for each trajectory
            timesteps = [0, 0]
            summed_abs_delta_torques = [np.zeros(7), np.zeros(7)]

            for interpolator in [None, "linear"]:
                # Define numpy seed so we guarantee consistent starting pos / ori for each trajectory
                np.random.seed(3)

                # Define controller path to load
                controller_path = os.path.join(
                    os.path.dirname(__file__), '../../robosuite',
                    'controllers/config/{}.json'.format(
                        controller_name.lower()))
                with open(controller_path) as f:
                    controller_config = json.load(f)
                    controller_config["interpolation"] = interpolator
                    controller_config["ramp_ratio"] = 1.0

                # Now, create a test env for testing the controller on
                env = suite.make(
                    "Lift",
                    robots="Sawyer",
                    has_renderer=args.
                    render,  # by default, don't use on-screen renderer for visual validation
                    has_offscreen_renderer=False,
                    use_camera_obs=False,
                    horizon=10000,
                    control_freq=20,
                    controller_configs=controller_config)

                # Reset the environment
                env.reset()

                # Hardcode the starting position for sawyer
                init_qpos = [
                    -0.5538, -0.8208, 0.4155, 1.8409, -0.4955, 0.6482, 1.9628
                ]
                env.robots[0].set_robot_joint_positions(init_qpos)
                env.robots[0].controller.update_initial_joints(init_qpos)
                env.robots[0].controller.reset_goal()

                # Notify user a new trajectory is beginning
                print(
                    "\nTesting controller {} with trajectory {} and interpolator={}..."
                    .format(controller_name, traj, interpolator))

                # If rendering, set controller to front view to get best angle for viewing robot movements
                if args.render:
                    env.viewer.set_camera(camera_id=0)

                # Keep track of state of robot eef (pos, ori (euler)) and torques
                current_torques = np.zeros(7)
                initial_state = [
                    env.robots[0]._hand_pos,
                    T.mat2euler(env.robots[0]._hand_orn)
                ]
                dstate = [
                    env.robots[0]._hand_pos - initial_state[0],
                    T.mat2euler(env.robots[0]._hand_orn) - initial_state[1]
                ]

                # Define the uniform trajectory action
                if traj == "pos":
                    pos_act = pos_action_ik if controller_name == "IK_POSE" else pos_action_osc
                    rot_act = np.zeros(3)
                else:
                    pos_act = np.zeros(3)
                    rot_act = rot_action_ik if controller_name == "IK_POSE" else rot_action_osc

                # Compose the action
                action = np.concatenate([pos_act, rot_act, [0]])

                # Determine which trajectory we're executing
                k = 0 if traj == "pos" else 1
                j = 0 if not interpolator else 1

                # Run trajectory until the threshold condition is met
                while abs(dstate[k][indexes[k]]) < abs(thresholds[k]):
                    _, summed_torques, current_torques = step(
                        env, action, current_torques)
                    if args.render:
                        env.render()

                    # Update torques, timestep count, and state
                    summed_abs_delta_torques[j] += summed_torques
                    timesteps[j] += 1
                    dstate = [
                        env.robots[0]._hand_pos - initial_state[0],
                        T.mat2euler(env.robots[0]._hand_orn) - initial_state[1]
                    ]

                # When finished, print out the timestep results
                print(
                    "Completed trajectory. Total summed absolute delta torques: {}"
                    .format(summed_abs_delta_torques[j]))

                # Shut down this env before starting the next test
                env.close()

    # Tests completed!
    print()
    print("-" * 80)
    print("All linear interpolator testing completed.\n")
def test_linear_interpolator():

    for controller_name in ["EE_POS_ORI", "EE_IK"]:

        for traj in ["pos", "ori"]:

            # Define counter to increment timesteps for each trajectory
            timesteps = [0, 0]

            for interpolator in [None, "linear"]:
                # Define numpy seed so we guarantee consistent starting pos / ori for each trajectory
                np.random.seed(3)

                # Define controller path to load
                controller_path = os.path.join(
                    os.path.dirname(__file__), '../../robosuite',
                    'controllers/config/{}.json'.format(
                        controller_name.lower()))
                with open(controller_path) as f:
                    controller_config = json.load(f)
                    controller_config["interpolation"] = interpolator
                    controller_config["ramp_ratio"] = 1.0

                # Now, create a test env for testing the controller on
                env = suite.make(
                    "Lift",
                    robots="Sawyer",
                    has_renderer=args.
                    render,  # by default, don't use on-screen renderer for visual validation
                    has_offscreen_renderer=False,
                    use_camera_obs=False,
                    horizon=10000,
                    control_freq=20,
                    controller_configs=controller_config)

                # Reset the environment
                env.reset()

                # Notify user a new trajectory is beginning
                print(
                    "\nTesting controller {} with trajectory {} and interpolator={}..."
                    .format(controller_name, traj, interpolator))

                # If rendering, set controller to front view to get best angle for viewing robot movements
                if args.render:
                    env.viewer.set_camera(camera_id=0)

                # Keep track of state of robot eef (pos, ori (euler))
                initial_state = [
                    env.robots[0]._right_hand_pos,
                    T.mat2euler(env.robots[0]._right_hand_orn)
                ]
                dstate = [
                    env.robots[0]._right_hand_pos - initial_state[0],
                    T.mat2euler(env.robots[0]._right_hand_orn) -
                    initial_state[1]
                ]

                # Define the uniform trajectory action
                if traj == "pos":
                    pos_act = pos_action_ik if controller_name.lower(
                    ) == "ee_ik" else pos_action_osc
                    rot_act = T.mat2quat(T.euler2mat(
                        np.zeros(3))) if controller_name.lower(
                        ) == "ee_ik" else np.zeros(3)
                else:
                    pos_act = np.zeros(3)
                    rot_act = rot_action_ik if controller_name.lower(
                    ) == "ee_ik" else rot_action_osc

                # Compose the action
                action = np.concatenate([pos_act, rot_act, [0]])

                # Determine which trajectory we're executing
                k = 0 if traj == "pos" else 1
                j = 0 if not interpolator else 1

                # Run trajectory until the threshold condition is met
                while abs(dstate[k][indexes[k]]) < abs(thresholds[k]):
                    env.step(action)
                    if args.render:
                        env.render()

                    # Update timestep count and state
                    timesteps[j] += 1
                    dstate = [
                        env.robots[0]._right_hand_pos - initial_state[0],
                        T.mat2euler(env.robots[0]._right_hand_orn) -
                        initial_state[1]
                    ]

                # When finished, print out the timestep results
                print("Completed trajectory. Took {} timesteps total.".format(
                    timesteps[j]))

                # Shut down this env before starting the next test
                env.close()

            # Assert that the interpolated path is slower than the non-interpolated one
            assert timesteps[1] > min_ratio * timesteps[0], "Error: Interpolated trajectory time should be longer " \
                                                            "than non-interpolated!"

    # Tests completed!
    print()
    print("-" * 80)
    print("All linear interpolator testing completed.\n")
Exemplo n.º 12
0
import numpy as np
import matplotlib.pyplot as plt
import robosuite.utils.transform_utils as T

target_pos = np.array([0.66, 0.16, 0.387])
target_quat = np.array(
    [-9.99048222e-01, 4.36193835e-02, 6.11740587e-17, 2.67091712e-18])

target_angle = T.mat2euler(T.quat2mat(target_quat))
print(target_angle)

pos_traj = np.load('pos_traj.npy')
quat_traj = np.concatenate(
    [np.load('quat_traj.npy'),
     np.load('quat_traj2.npy')], axis=0)

# plt.plot(range(len(pos_traj)), pos_traj[:,2], label='z')
# plt.plot(range(len(pos_traj)), [target_pos[2]] * len(pos_traj), label='target')
# plt.show()

angle_traj = np.array([T.mat2euler(T.quat2mat(q)) for q in quat_traj])
print(angle_traj[-1])
plt.plot(range(len(angle_traj)), angle_traj[:, 2], label='actual')
plt.plot(range(len(angle_traj)), [target_angle[2]] * len(angle_traj),
         label='target')
plt.xlabel('t')
plt.ylabel('Z-orientation (rad)')
plt.legend()
plt.show()
Exemplo n.º 13
0
                                             point=target_pos)
            target_rot_Z = T.rotation_matrix(math.pi + target_angle,
                                             np.array([0, 0, 1]),
                                             point=target_pos)
            target_rot = np.matmul(target_rot_Z,
                                   np.matmul(target_rot_Y, target_rot_X))
            target_quat = T.mat2quat(target_rot)

            dquat = T.quat_slerp(current_quat, target_quat, 0.01)
            dquat = T.quat_multiply(dquat, T.quat_inverse(current_quat))

            action = np.concatenate([dpos, dquat, [grasp]])
            obs, reward, done, info = env.step(action)

            pos_traj.append(current_pos - table_top_center)
            angle_traj.append(T.mat2euler(T.quat2mat(current_quat)))
            # env.render()

        time = 0

        done_task = False
        while not done_task:
            time += 1
            if time > 2000:
                break

            current_pos = env._right_hand_pos
            dpos = target_pos + table_top_center - current_pos

            if np.max(np.abs(dpos)) < 1e-2:
                done_task = True
Exemplo n.º 14
0
    obs = env._get_observation()

    depth = obs['depth']
    depth = cv2.flip(depth, 0)
    

    W_2, H_2 = int(depth.shape[0] / 2), int(depth.shape[0] / 2)
    crop = 64
    start_H = W_2-crop
    end_H = W_2+crop
    start_W = H_2-crop
    end_W = H_2+crop
    roi = depth[start_H:end_H, start_W:end_W]

    target_pos, target_angle = env._gen_grasp_gt() #x,y,z,a
    target_angle = T.mat2euler(target_angle)[-1]

    gripper_noise = np.random.uniform(low=0.95, high=1.05)
    target_pos[2] += GRIPPER_LENGTH * gripper_noise

    lateral_noise = np.random.uniform(low=-0.01, high=0.01)
    target_pos[0] += np.sin(target_angle) * lateral_noise
    target_pos[1] += np.cos(target_angle) * lateral_noise

    cube_length = env.object.size[0] / 2

    longitude_noise = np.random.uniform(low=cube_length*-1.5, high=cube_length)
    target_pos[0] += np.cos(target_angle) * longitude_noise
    target_pos[1] += np.sin(target_angle) * longitude_noise

    angle_noise = np.random.uniform(low=-0.01, high=0.01)