def _get_state_obs(self):
        '''
        Compute the observation at the current state.
        '''

        # Return superclass observation.
        if self.observe_joints:
            obs = super(PegInsertionEnv, self)._get_state_obs()
        else:
            obs = np.zeros(0)

        # Return superclass observation stacked with the ft observation.
        if not self.initialized:
            ft_obs = np.zeros(6)
        else:
            # Compute F/T sensor data
            ft_obs = self.sim.data.sensordata

            obs = obs / self.obs_scaling

        if self.use_ft_sensor:
            obs = np.concatenate([obs, ft_obs])

        # End effector position
        pos, rot = forwardKinSite(self.sim,
                                  ['peg_tip', 'hole_base', 'hole_top'])

        if self.use_rel_pos_err:
            pos_obs = pos[1] - pos[0]
            quat_peg_tip = mat2Quat(rot[0])
            quat_hole_base = mat2Quat(rot[1])
            rot_obs = subQuat(quat_hole_base, quat_peg_tip).copy()
            hole_top_obs = pos[2] - pos[0]
        else:
            # TODO: we probably also want the EE position in the world
            pos_obs = pos[1].copy()
            rot_obs = mat2Quat(rot[1])
            hole_top_obs = pos[2]

        # End effector velocity
        peg_tip_id = self.model.site_name2id('peg_tip')
        jacp, jacr = forwardKinJacobianSite(self.sim,
                                            peg_tip_id,
                                            recompute=False)
        peg_tip_lin_vel = jacp.dot(self.sim.data.qvel)
        peg_tip_rot_vel = jacr.dot(self.sim.data.qvel)

        # Transform into end effector frame
        if self.in_peg_frame:
            pos_obs = rot[0].T.dot(pos_obs)
            hole_top_obs = rot[0].T.dot(hole_top_obs)
            peg_tip_lin_vel = rot[0].T.dot(peg_tip_lin_vel)
            peg_tip_rot_vel = rot[0].T.dot(peg_tip_rot_vel)

        obs = np.concatenate([
            obs, pos_obs, rot_obs, peg_tip_lin_vel, peg_tip_rot_vel,
            hole_top_obs
        ])

        return obs
Пример #2
0
    def _get_reward(self, state, action):
        '''
        Compute single step reward.
        '''
        # compute position and rotation error
        pos, rot = forwardKinSite(self.sim, ['peg_tip', 'hole_base'], recompute=False)
        pos_err = pos[0] - pos[1]
        dist = np.sqrt(pos_err.dot(pos_err))
        peg_quat = mat2Quat(rot[0])
        hole_quat = mat2Quat(rot[1])
        rot_err = subQuat(peg_quat, hole_quat)

        pose_err = self.sim.data.qpos - self.good_states[0]
        
        peg_tip_id = self.model.site_name2id('peg_tip')
        jacp, jacv = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False)
        peg_tip_vel = jacp.dot(self.data.qvel[:])
        # print("reward_dist: {}".format(dist))
        
        # quadratic cost on the error and action
        # rotate the cost terms to align with the hole
        Q_pos = rotate_cost_by_matrix(self.Q_pos, rot[1].T)
        # Q_vel = rotate_cost_by_matrix(self.Q_vel,rot[1].T)
        Q_rot = self.Q_rot

        reward_info = dict()
        reward = 0.

        # reward_info['quaternion_reward'] = -rot_err.dot(Q_rot).dot(rot_err)
        if self.quadratic_rot_cost:
            reward_info['quadratic_orientation_reward'] = -rot_err.dot(Q_rot).dot(rot_err)
            reward += reward_info['quadratic_orientation_reward']

        if self.quadratic_cost:
            reward_info['quadratic_position_reward'] = -pos_err.dot(Q_pos).dot(pos_err)
            reward += reward_info['quadratic_position_reward']

        if self.linear_cost:
            reward_info['linear_position_reward'] = -np.sqrt(pos_err.dot(Q_pos).dot(pos_err))
            reward += reward_info['linear_position_reward']

        if self.logarithmic_cost:
            rew_scale = 2
            eps = 10.0**(-rew_scale)
            zero_crossing = 0.05
            reward_info['logarithmic_position_reward'] = -np.log10(np.sqrt(pos_err.dot(Q_pos).dot(pos_err))/zero_crossing*(1-eps) + eps)
            reward += reward_info['logarithmic_position_reward']

        if self.sparse_cost:
            reward_info['sparse_position_reward'] = 10.0 if np.sqrt(pos_err.dot(pos_err)) < 1e-2 else 0
            reward += reward_info['sparse_position_reward']

        if self.regularize_pose:
            reward_info['pose_regularizer_reward'] = -pose_err.dot(self.Q_pose_reg).dot(pose_err)
            reward += reward_info['pose_regularizer_reward']
        
        # reward_info['velocity_reward'] = -np.sqrt(peg_tip_vel.dot(Q_vel).dot(peg_tip_vel)) 
        # reward += reward_info['velocity_reward']

        return reward, reward_info
def test_forwardKinRotJacobian():
    # Build the model path.
    model_filename = 'full_kuka_no_collision.xml'
    model_path = os.path.join('..', '..', 'envs', 'assets', model_filename)

    # Construct the model and simulation objects.
    model = mujoco_py.load_model_from_path(model_path)
    sim = mujoco_py.MjSim(model)

    # Set an arbitrary state.
    q_nom = np.array([.1, .2, .3, .4, .5, .6, .7])
    sim.data.qpos[:] = q_nom

    # Choose an arbitrary point and a link with a dense jacobian.
    body_id = model.body_name2id('kuka_link_7')
    pos = np.array([.1, .2, .3])
    quat = np.array([1., 0., 0., 0.])

    # Compute the forward kinematics and kinematic jacobian.
    _, xrot = forwardKin(sim, pos, quat, body_id)
    xquat = mat2Quat(xrot)
    _, jacr = forwardKinJacobian(sim, pos, body_id)

    # Compute the kinematic jacobian with finite differences.
    jac_approx = np.zeros((3, 7))
    for i in range(7):
        dq = np.zeros(model.nq)
        dq[i] = 1e-6
        sim.data.qpos[:] = q_nom + dq
        _, xrot_ = forwardKin(sim, pos, quat, body_id)
        xquat_ = mat2Quat(xrot_)
        jac_approx[:, i] = subQuat(xquat_, xquat) / 1e-6

    assert np.allclose(jac_approx - jacr, np.zeros_like(jacr), atol=1e-6)
Пример #4
0
 def _get_target_obs(self):
     # Compute relative position error
     raise NotImplementedError
     if self.use_rel_pos_err:
         pos, rot = forwardKinSite(self.sim, ['hammer_tip', 'nail_top'])
         pos_obs = pos[0] - pos[1]
         quat_hammer_tip = mat2Quat(rot[0])
         quat_nail_top = mat2Quat(rot[1])
         rot_obs = subQuat(quat_hammer_tip, quat_nail_top)
         return np.concatenate([pos_obs, rot_obs])
     else:
         return np.array([self.data.qpos[self.nail_idx]])
def compute_distance_travelled(x_pos, x_rot):
    pos_dist = 0
    rot_dist = 0
    for i in range(len(x_pos) - 1):
        dx = x_pos[i + 1] - x_pos[i]
        pos_dist += np.linalg.norm(dx)

        x_quat = mat2Quat(x_rot[i])
        x_quat_next = mat2Quat(x_rot[i + 1])
        dq = subQuat(x_quat, x_quat_next)
        rot_dist += np.linalg.norm(dq)

    return pos_dist, rot_dist
    def get_torque(self):
        '''
        Update the impedance control setpoint and compute the torque.
        '''
        # Compute the pose difference.
        pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False)
        quat = mat2Quat(mat)
        dx = self.pos_set - pos
        dr = subQuat(self.quat_set, quat)
        dframe = np.concatenate((dx,dr))

        # Compute generalized forces from a virtual external force.
        jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False)
        J = np.vstack((jpos, jrot))
        external_force = J.T.dot(self.stiffness*dframe) # virtual force on the end effector

        # Cancel other dynamics and add virtual damping using inverse dynamics.
        acc_des = -self.damping*self.sim.data.qvel
        self.sim.data.qacc[:] = acc_des
        mujoco_py.functions.mj_inverse(self.model, self.sim.data)
        id_torque = self.sim.data.qfrc_inverse[:]
        
        generalized_force = id_torque + external_force
        if self.controlled_joints is None:
            torque = generalized_force
        else:
            torque = generalized_force[self.controlled_joints]
        
        return torque
    def get_torque(self):
        '''
        Update the impedance control setpoint and compute the torque.
        '''
        # Compute the pose difference.
        pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False)
        quat = mat2Quat(mat)
        dx = self.pos_set - pos
        dr = subQuat(self.quat_set, quat) # Original
        dframe = np.concatenate((dx,dr))

        # Compute generalized forces from a virtual external force.
        jpos, jrot = forwardKinJacobianSite(self.sim, self.site_name, recompute=False)
        J = np.vstack((jpos[:,self.sim_qvel_idx], jrot[:,self.sim_qvel_idx]))
        cartesian_acc_des = self.stiffness*dframe - self.damping*J.dot(self.sim.data.qvel[self.sim_qvel_idx])
        impedance_acc_des = J.T.dot(np.linalg.solve(J.dot(J.T) + 1e-6*np.eye(6), cartesian_acc_des))

        # Add stiffness and damping in the null space of the the Jacobian
        projection_matrix = J.T.dot(np.linalg.solve(J.dot(J.T), J))
        projection_matrix = np.eye(projection_matrix.shape[0]) - projection_matrix
        null_space_control = -self.null_space_damping*self.sim.data.qvel[self.sim_qvel_idx]
        null_space_control += -self.null_space_stiffness*(self.sim.data.qpos[self.sim_qpos_idx] - self.nominal_qpos)
        impedance_acc_des += projection_matrix.dot(null_space_control)

        # Compute torques via inverse dynamics.
        acc_des = np.zeros(self.sim.model.nv)
        acc_des[self.sim_qvel_idx] = impedance_acc_des
        self.sim.data.qacc[:] = acc_des
        mujoco_py.functions.mj_inverse(self.model, self.sim.data)
        id_torque = self.sim.data.qfrc_inverse[self.sim_actuators_idx].copy()
        
        return id_torque
Пример #8
0
 def residuals(q):
     sim.data.qpos[qpos_idx] = q
     xpos, xrot = forwardKin(sim, body_pos, identity_quat, body_id)
     quat = mat2Quat(xrot)
     q_diff = subQuat(quat, world_quat)
     res = np.concatenate((xpos-world_pos, q_diff, reg*(q-q_nom)))
     return res
Пример #9
0
    def _get_reward(self, state, action):
        '''
        Compute single step reward.
        '''

        # Compute peg tip velocity.        
        # peg_tip_id = self.model.site_name2id('peg_tip')
        # jacp, jacv = forwardKinJacobianSite(self.sim, peg_tip_id, recompute=False)
        # peg_tip_vel = jacp.dot(self.data.qvel[:])

        reward_info = dict()
        reward = 0.

        if self.use_qpos_cost:
            qpos_err = self.sim.data.qpos - self.target_qpos
            reward_info['qpos_reward'] = -qpos_err.dot(self.Q_qpos).dot(qpos_err)
            reward += reward_info['qpos_reward']
        if self.use_qvel_cost:
            reward_info['qvel_reward'] = -self.data.qvel.dot(self.Q_qvel).dot(self.data.qvel)
            reward += reward_info['qvel_reward']
        if self.use_pos_cost or self.use_rot_cost:
            # compute position and rotation error
            pos, rot = forwardKinSite(self.sim, ['peg_tip'], recompute=False)
            peg_pos = pos[0]
            peg_quat = mat2Quat(rot[0])
        if self.use_pos_cost:
            pos_err = peg_pos - self.target_pos
            reward_info['pos_reward'] = -pos_err.dot(self.Q_pos).dot(pos_err)
            reward += reward_info['pos_reward']
        if self.use_rot_cost:
            rot_err = subQuat(peg_quat, self.target_quat)
            reward_info['rot_reward'] = -rot_err.dot(self.Q_rot).dot(rot_err)
            reward += reward_info['rot_reward']

        return reward, reward_info
Пример #10
0
    def _get_reward(self, state, action):
        '''
        Compute single step reward.
        '''
        reward_info = dict()
        reward = 0.

        if self.pos_reward:
            pos_err = self.data.qpos[
                self.block_pos_idx][:3] - self.block_target_position[:3]
            reward_info['block_pos_reward'] = -np.linalg.norm(pos_err) * 10
            reward += reward_info['block_pos_reward']
        if self.rot_reward:
            rot_err = subQuat(self.data.qpos[self.block_pos_idx][3:],
                              self.block_target_position[3:])
            reward_info['block_rot_reward'] = -np.linalg.norm(rot_err)
            reward += reward_info['block_rot_reward']
        if self.pos_vel_reward:
            raise NotImplementedError
            # TODO: this should give positive reward when moving towards the goal and negative reward when moving away from the goal
            pos_vel = self.data.qvel[self.block_vel_idx[:3]]
            reward_info['block_pos_vel_reward'] = -np.linalg.norm(pos_vel)
            reward += reward_info['block_pos_vel_reward']
        if self.rot_vel_reward:
            raise NotImplementedError
            # TODO: this should give positive reward when moving towards the goal and negative reward when moving away from the goal
            rot_vel = self.data.qvel[self.block_vel_idx[3:]]
            reward_info['block_rot_vel_reward'] = -np.linalg.norm(rot_vel)
            reward += reward_info['block_rot_vel_reward']
        if self.peg_tip_height_reward or self.peg_tip_orientation_reward:
            tip_pos, tip_rot = forwardKinSite(self.sim, ['peg_tip'])
            tip_pos = tip_pos[0]
            tip_rot = tip_rot[0]
        if self.peg_tip_height_reward:
            tip_height_err = tip_pos[2] - self.table_height
            reward_info['peg_tip_height_reward'] = -tip_height_err**2
            reward += reward_info['peg_tip_height_reward']
        if self.peg_tip_orientation_reward:
            tip_quat = mat2Quat(tip_rot)
            tip_rot_err = subQuat(tip_quat, np.array([0., 1., 0., 0.]))
            reward_info['peg_tip_orientation_reward'] = -(
                np.linalg.norm(tip_rot_err) / 10)**2
            reward += reward_info['peg_tip_orientation_reward']
        if self.contact_reward:
            contacts = self.sim.data.contact[:self.sim.data.ncon]
            contact = 0.
            for c in contacts:
                geoms = c.geom1, c.geom2
                if (self.tip_geom_id in geoms) and (self.block_geom_id
                                                    in geoms):
                    contact = 1.
                    break
            reward_info['contact_reward'] = contact * .1
            reward += reward_info['contact_reward']

        return reward * self.reward_scale, reward_info
    def set_action(self, action):
        '''
        Set the setpoint.
        '''
        action = action * self.scale

        dx = action[0:3].astype(np.float64)
        dr = action[3:6].astype(np.float64)

        pos, mat = forwardKinSite(self.sim, self.site_name, recompute=False)
        quat = mat2Quat(mat)

        self.pos_set = pos + dx
        self.quat_set = quatIntegrate(quat, dr)
Пример #12
0
def hole_insertion_samples_unrestricted(sim,
                                        nsamples=10,
                                        insertion_range=(0, 0.05),
                                        raise_on_fail=False):
    # The points to be transformed.
    pos = np.array([0., 0., 0.])
    peg_body_id = sim.model.body_name2id('peg')
    tip_site_id = sim.model.site_name2id('peg_tip')
    tip_body_pos = sim.model.site_pos[tip_site_id]

    # The desired world coordinates
    hole_id = sim.model.body_name2id('hole')

    hole_pos_delta = np.zeros((nsamples, 3))
    hole_pos_delta[:, 2] = np.linspace(insertion_range[0], insertion_range[1],
                                       nsamples)

    world_pos_desired = []
    world_quat_desired = []
    world_quat = np.array([0., 1., 0., 0.])
    for i in range(nsamples):
        pos_desired, mat_desired = forwardKin(sim, hole_pos_delta[i, :],
                                              world_quat, hole_id)
        world_pos_desired.append(pos_desired)
        world_quat_desired.append(mat2Quat(mat_desired))

    # Compute the forward kinematics
    q_nom = np.zeros(7)
    q_init = np.zeros(7)
    upper = sim.model.jnt_range[:, 1]
    lower = sim.model.jnt_range[:, 0]

    q_sol = []
    for w_pos, w_quat in zip(world_pos_desired, world_quat_desired):
        q_opt = inverseKin(sim,
                           q_init,
                           q_nom,
                           tip_body_pos,
                           w_pos,
                           w_quat,
                           peg_body_id,
                           upper=upper,
                           lower=lower,
                           raise_on_fail=raise_on_fail)
        q_sol.append(q_opt)
        q_init = q_opt.copy()  # warm start the next solution

    return q_sol
Пример #13
0
    def _get_info(self):
        # Build the info dict.
        info = dict()

        # Joint space distances.
        qpos_err = self.data.qpos - self.target_qpos
        info['qpos_dist'] = np.linalg.norm(qpos_err)
        info['qvel_dist'] = np.linalg.norm(self.data.qvel)

        # Task space distances.
        pos, rot = forwardKinSite(self.sim, ['peg_tip'], recompute=False)
        pos_err = pos[0] - self.target_pos
        info['pos_dist'] = np.linalg.norm(pos_err)
        rot_err = subQuat(mat2Quat(rot[0]), self.target_quat)
        info['rot_dist'] = np.linalg.norm(rot_err)

        # Success metric.
        info['success'] = float(info['pos_dist'] < 1e-2)
        return info
Пример #14
0
    def _get_state_obs(self):
        '''
        Compute the observation at the current state.
        '''

        # Return superclass observation.
        if self.use_joint_observation:
            obs = super(HoldPositionEnv, self)._get_state_obs()
        else:
            obs = np.zeros(0)

        # Compute the relative pose information.
        if self.use_rel_pose_observation:
            if self.initialized:
                pos, rot = forwardKinSite(self.sim, ['peg_tip'], recompute=False)
                pos_err = pos[0] - self.target_pos
                rot_err = subQuat(mat2Quat(rot[0]), self.target_quat)
            else:
                pos_err = np.zeros(3)
                rot_err = np.zeros(3)
            obs = np.concatenate([obs, pos_err, rot_err])

        return obs
Пример #15
0
quat = np.array([1, 0, 0, 0], dtype=np.float64)
body_id = model.body_name2id('kuka_link_7')

# Compute the forward kinematics
xpos, xrot = forwardKin(sim, pos, quat, body_id)
jacp, jacr = forwardKinJacobian(sim, pos, body_id)

print("Position:\n{}\nRotation:\n{}".format(xpos, xrot))
print("Position Jacobian:\n{}\nRotation Jacobian:\n{}".format(jacp.T, jacr.T))

## The peg tip position at the home position.
model_filename = 'full_peg_insertion_experiment_no_hole_no_gravity.xml'
model_path = os.path.join(kuka_asset_dir(), model_filename)

# Construct the model and simulation objects.
model = mujoco_py.load_model_from_path(model_path)
sim = mujoco_py.MjSim(model)

sim.data.qpos[:] = np.array([np.pi/2,
                            -np.pi/6,
                            -np.pi/3,
                            -np.pi/2,
                             np.pi*3/4,
                            -np.pi/4,
                             0.0])

print(sim.data.qpos[:]s)

xpos, xrot = forwardKinSite(sim, 'peg_tip', recompute=True)
xquat = mat2Quat(xrot)
print("Position:\n{}\nRotation:\n{}".format(xpos, xquat))
Пример #16
0
    def __init__(self,
                 *args,
                 init_randomness=0.00,
                 joint_force_randomness=0.00,
                 init_qpos=None,
                 target_qpos=None,
                 use_qpos_cost=False,
                 use_qvel_cost=False,
                 use_pos_cost=True,
                 use_rot_cost=True,
                 qpos_cost=1.,
                 qvel_cost=1.,
                 pos_cost=1.,
                 rot_cost=1.,
                 use_joint_observation=True,
                 use_rel_pose_observation=True,
                 **kwargs):
        
        # Store options
        self.use_qpos_cost = use_qpos_cost
        self.use_qvel_cost = use_qvel_cost
        self.use_pos_cost = use_pos_cost
        self.use_rot_cost = use_rot_cost

        self.init_randomness = np.ones(7)*init_randomness
        self.joint_force_randomness = np.ones(7)*joint_force_randomness
        self.use_rel_pose_observation = use_rel_pose_observation
        self.use_joint_observation = use_joint_observation
        
        if target_qpos is None:
            self.target_qpos = np.array([np.pi/2,
                                        -np.pi/6,
                                        -np.pi/3,
                                        -np.pi/2,
                                         np.pi*3/4,
                                        -np.pi/4,
                                         0.0])
        else:
            self.target_qpos = target_qpos.copy()

        if init_qpos is None:
            self._init_qpos = self.target_qpos.copy()
        else:
            if isinstance(init_qpos, np.ndarray):
                self._init_qpos = init_qpos.copy()
            else:
                self._init_qpos = np.array(init_qpos)
        
        # Set the default model path.
        kwargs['model_path'] = kwargs.get('model_path', 'full_peg_insertion_experiment_no_hole_no_gravity.xml')       
        super(HoldPositionEnv, self).__init__(*args, **kwargs)
        
        # Compute the target pos and quat from forwardKinSite.
        self.set_state(self.target_qpos, np.zeros(7))
        self.sim.forward()
        pos, rot = forwardKinSite(self.sim, ['peg_tip'], recompute=True)
        self.target_pos = pos[0].copy()
        self.target_quat = mat2Quat(rot[0])

        # Keep the cost terms
        if self.use_rot_cost:
            self.Q_qpos = np.eye(7)*qpos_cost
        if self.use_qvel_cost:
            self.Q_qvel = np.eye(7)*qvel_cost
        if self.use_pos_cost:
            self.Q_pos = np.eye(3)*pos_cost
        if self.use_rot_cost:
            self.Q_rot = np.eye(3)*rot_cost