Пример #1
0
class KBEnv(object):
    def __init__(self, path):

        rospy.init_node('sawyer_arm_cntrl')
        self.load_inits(path+'/pg_init.yaml')

        self.limb = Limb()
        self.all_jnts = copy.copy(self.limb.joint_names())
        self.limb.set_joint_position_speed(0.2)


    def load_inits(self, path):        
        stream = open(path, "r")
        config = yaml.load(stream)
        stream.close()

        # these indices and names have to be in ascending order
        self.jnt_indices = config['jnt_indices']
        self.cmd_names = config['cmd_names']
        self.init_pos = config['initial-position']


    def chng_jnt_state(self, values, mode, iterations=20):
        # Command Sawyer's joints to angles or velocity
        robot = URDF.from_parameter_server()
        kdl_kin_r = KDLKinematics(robot, 'base',
                                  'right_gripper_r_finger_tip')
        kdl_kin_l = KDLKinematics(robot, 'base',
                                  'right_gripper_l_finger_tip')
        q_jnt_angles = np.zeros(8)
        q_jnt_angles[:7] = copy.copy(values)
        q_jnt_angles[7] = 0.0
        pose_r = kdl_kin_r.forward(q_jnt_angles)
        pose_l = kdl_kin_l.forward(q_jnt_angles)
        pose = 0.5*(pose_l + pose_r)
        x, y, z = transformations.translation_from_matrix(pose)[:3]
        print("goal cartesian: ", x, y, z)
        
        timeout = 30.0
        if mode == 4:
            positions = dict()
            i = 0
            for jnt_name in self.all_jnts:
                positions[jnt_name] = values[i]
                i += 1

            self.limb.move_to_joint_positions(positions, timeout)
        else:
            velocities = dict()
            i = 0
            for name in self.cmd_names:
                velocities[name] = values[i]
                i += 1
            for _ in range(iterations):
                self.limb.set_joint_velocities(velocities)
                time.sleep(0.5)
    def initialize_jnts(self):
        print("Initializing joints...")
        positions = dict()
        limb = Limb()
        calib_angles = [0.27, -3.27, 3.04, -1.60, -0.38, -1.66, 0.004]
        all_jnts = copy.copy(limb.joint_names())
        limb.set_joint_position_speed(0.2)
        positions['head_pan'] = 0.0

        enum_iter = enumerate(all_jnts, start=0)
        for i, jnt_name in enum_iter:
            positions[jnt_name] = calib_angles[i]

        limb.move_to_joint_positions(positions)
        print("Done Initializing joints!!")
Пример #3
0
class Env(object):
    def __init__(self, path, train_mode=True):
        self.train_mode = train_mode
        self.obs_lock = threading.Lock()

        # path where the python script for agent and env reside
        self.path = path
        string = "sawyer_ros"
        rospy.init_node(string + "_environment")
        self.logp = None
        if self.train_mode:
            self.logpath = "AC" + '_' + time.strftime("%d-%m-%Y_%H-%M")
            self.logpath = os.path.join(path + '/data', self.logpath)
            if not (os.path.exists(self.logpath)):
                # we should never have to create dir, as agent already done it
                os.makedirs(self.logpath)
            logfile = os.path.join(self.logpath, "ros_env.log")
            self.logp = open(logfile, "w")

        # Goal does not move and is specified by three points (for pose reasons)
        self.goal_pos_x1 = None
        self.goal_pos_y1 = None
        self.goal_pos_z1 = None
        self.goal_pos_x2 = None
        self.goal_pos_y2 = None
        self.goal_pos_z2 = None
        self.goal_pos_x3 = None
        self.goal_pos_y3 = None
        self.goal_pos_z3 = None

        self._load_inits(path)
        self.cur_obs = np.zeros(self.obs_dim)
        self.cur_act = np.zeros(self.act_dim)
        self.cur_reward = None
        self.goal_cntr = 0

        self.limb = Limb()
        self.all_jnts = copy.copy(self.limb.joint_names())
        self.limb.set_joint_position_speed(0.2)

        self.rate = rospy.Rate(10)
        self.tf_buffer = tf2_ros.Buffer()
        self.tf_listener = tf2_ros.TransformListener(self.tf_buffer)
        self.jnt_st_sub = rospy.Subscriber('/robot/joint_states',
                                           JointState,
                                           self._update_jnt_state,
                                           queue_size=1)
        self.jnt_cm_pub = rospy.Publisher('/robot/limb/right/joint_command',
                                          JointCommand,
                                          queue_size=None)
        self.jnt_ee_sub = rospy.Subscriber('/robot/limb/right/endpoint_state',
                                           EndpointState,
                                           self.update_ee_pose,
                                           queue_size=1)
        '''
        Three points on the object in "right_gripper_base" frame.
        Obect is static in gripper frame. Needs to be calculated only once
        during init time
        '''
        self.obj_pose1 = PoseStamped()
        self.obj_pose1.header.frame_id = "right_gripper_base"
        self.obj_pose1.pose.position.x = self.obj_x1
        self.obj_pose1.pose.position.y = self.obj_y1
        self.obj_pose1.pose.position.z = self.obj_z1

        self.obj_pose2 = PoseStamped()
        self.obj_pose2.header.frame_id = "right_gripper_base"
        self.obj_pose2.pose.position.x = self.obj_x2
        self.obj_pose2.pose.position.y = self.obj_y2
        self.obj_pose2.pose.position.z = self.obj_z2

        self.obj_pose3 = PoseStamped()
        self.obj_pose3.header.frame_id = "right_gripper_base"
        self.obj_pose3.pose.position.x = self.obj_x3
        self.obj_pose3.pose.position.y = self.obj_y3
        self.obj_pose3.pose.position.z = self.obj_z3

    '''
    All members of the observation vector have to be provided
    '''

    def _set_cur_obs(self, obs):
        #self._print_env_log('Waiting for obs lock')
        self.obs_lock.acquire()
        try:
            #self._print_env_log('Acquired obs lock')
            self.cur_obs = copy.copy(obs)
        finally:
            #self._print_env_log('Released obs lock')
            self.obs_lock.release()

    def _get_cur_obs(self):
        self.obs_lock.acquire()
        try:
            #self._print_env_log('Acquired obs lock')
            obs = copy.copy(self.cur_obs)
        finally:
            #self._print_env_log('Released obs lock')
            self.obs_lock.release()
        return obs

    def close_env_log(self):
        self.logp.close()
        self.logp = None

    def _print_env_log(self, string):
        if self.train_mode:
            if self.logp is None:
                return
            self.logp.write("\n")
            now = rospy.get_rostime()
            t_str = time.strftime("%H-%M")
            t_str += "-" + str(now.secs) + "-" + str(now.nsecs) + ": "
            self.logp.write(t_str + string)
            self.logp.write("\n")

    def _load_inits(self, path):
        if self.train_mode:
            # Store versions of the main code required for
            # test and debug after training
            copyfile(path + "/init.yaml", self.logpath + "/init.yaml")
            copyfile(path + "/ros_env.py", self.logpath + "/ros_env.py")

        stream = open(path + "/init.yaml", "r")
        config = yaml.load(stream)
        stream.close()
        self.distance_thresh = config['distance_thresh']
        # limits for the uniform distribution to
        # sample from when varying initial joint states during training
        # joint position limits have to be in ascending order of joint number
        # jnt_init_limits is a ordered list of [lower_limit, upper_limit] for
        # each joint in motion
        # limits for the joint positions
        self.jnt_init_limits = config['jnt_init_limits']
        self.cmd_mode = config['cmd_mode']
        if self.cmd_mode == 'VELOCITY_MODE':
            # limits for the joint positions
            self.jnt_pos_limits = config['jnt_pos_limits']
            self.vel_limit = config['vel_limit']
            self.vel_mode = config['vel_mode']
        else:
            self.torque_limit = config['torque_limit']
        '''
        # The following are the names of Sawyer's joints that will move
        # for the shape sorting cube task
        # Any one or more of the following in ascending order -
        # ['right_j0', 'right_j1', 'right_j2', 'right_j3', 'right_j4',
        # 'right_j5', 'right_j6']
        # The last joint is the gripper finger joint and stays fixed
        '''
        self.debug_lvl = config['debug_lvl']
        self.cmd_names = config['cmd_names']
        self.init_pos = config['initial-position']
        self.goal_obs_dim = config['goal_obs_dim']
        '''
        In TORQUE_MODE, jnt_obs_dim will be twice the size of the
        number of joints being controlled (2 * self.act_dim), one each for
        position and velocity. 
        The ordering in the observation vector is:
        j0:pos, j1:pos, ..., jN:pos,
        j0:vel, j1:vel ...., jN:vel, # Applicable only in torque mode
        obj_coord:x1, obj_coord:y1, obj_coord:z1, 
        obj_coord:x2, obj_coord:y2, obj_coord:z2,
        obj_coord:x3, obj_coord:y3, obj_coord:z3; 
        goal_coord:x1, goal_coord:y1, goal_coord:z1,
        goal_coord:x2, goal_coord:y2, goal_coord:z2,
        goal_coord:x3, goal_coord:y3, goal_coord:z3
        '''
        self.jnt_obs_dim = config['jnt_obs_dim']
        self.obs_dim = self.goal_obs_dim + self.jnt_obs_dim
        self.act_dim = config['act_dim']
        '''
        These indices have to be in ascending order
        The length of this ascending order list >=1 and <=7 (values 0 to 6)
        The last joint is the gripper finger joint and stays fixed
        '''
        self.jnt_indices = config['jnt_indices']
        # test time goals specified in jnt angle space (for now)
        if not self.train_mode:
            self.test_goal = config['test_goal']
        self.max_training_goals = config['max_training_goals']
        self.batch_size = config['min_timesteps_per_batch']
        self.goal_XYZs = config['goal_XYZs']
        '''
        The following 9 object coordinates are in "right_gripper_base" frame
        They are relayed by the camera observer at init time.
        They can be alternatively read from the pg_init.yaml file too
        '''
        self.obj_x1 = config['obj_x1']
        self.obj_y1 = config['obj_y1']
        self.obj_z1 = config['obj_z1']
        self.obj_x2 = config['obj_x2']
        self.obj_y2 = config['obj_y2']
        self.obj_z2 = config['obj_z2']
        self.obj_x3 = config['obj_x3']
        self.obj_y3 = config['obj_y3']
        self.obj_z3 = config['obj_z3']

    # Callback invoked when EE pose update message is received
    # This function will update the pose of object in gripper
    def update_ee_pose(self, msg):
        try:
            tform = self.tf_buffer.lookup_transform("base",
                                                    "right_gripper_base",
                                                    rospy.Time(),
                                                    rospy.Duration(1.0))
        except (tf2_ros.LookupException, tf2_ros.ConnectivityException,
                tf2_ros.ExtrapolationException):
            self._print_env_log("TF Exception, not storing update")
            return

        obs = self._get_cur_obs()

        trans = (tform.transform.translation.x, tform.transform.translation.y,
                 tform.transform.translation.z)
        rot = (tform.transform.rotation.x, tform.transform.rotation.y,
               tform.transform.rotation.z, tform.transform.rotation.w)
        mat44 = np.dot(transformations.translation_matrix(trans),
                       transformations.quaternion_matrix(rot))

        pose44 = np.dot(xyz_to_mat44(self.obj_pose1.pose.position),
                        xyzw_to_mat44(self.obj_pose1.pose.orientation))
        txpose = np.dot(mat44, pose44)
        xyz = tuple(transformations.translation_from_matrix(txpose))[:3]
        x, y, z = xyz

        obs[self.jnt_obs_dim] = x
        obs[self.jnt_obs_dim + 1] = y
        obs[self.jnt_obs_dim + 2] = z

        pose44 = np.dot(xyz_to_mat44(self.obj_pose2.pose.position),
                        xyzw_to_mat44(self.obj_pose2.pose.orientation))
        txpose = np.dot(mat44, pose44)
        xyz = tuple(transformations.translation_from_matrix(txpose))[:3]
        x, y, z = xyz

        obs[self.jnt_obs_dim + 3] = x
        obs[self.jnt_obs_dim + 4] = y
        obs[self.jnt_obs_dim + 5] = z

        pose44 = np.dot(xyz_to_mat44(self.obj_pose3.pose.position),
                        xyzw_to_mat44(self.obj_pose3.pose.orientation))
        txpose = np.dot(mat44, pose44)
        xyz = tuple(transformations.translation_from_matrix(txpose))[:3]
        x, y, z = xyz

        obs[self.jnt_obs_dim + 6] = x
        obs[self.jnt_obs_dim + 7] = y
        obs[self.jnt_obs_dim + 8] = z
        '''
        self._print_env_log("EE coordinates: "
                            + str(msg.pose.position.x) +
                            ", " + str(msg.pose.position.y) + ", " +
                            str(msg.pose.position.z))
        self._print_env_log("Obj location: "
                            + str(obs[self.jnt_obs_dim:self.jnt_obs_dim+9]))
        '''
        self._set_cur_obs(obs)

    def _update_jnt_state(self, msg):
        '''
        Only care for mesgs which have length 9
        There is a length 1 message for the gripper finger joint
        which we dont care about
        '''
        if len(msg.position) != 9:
            return

        obs = self._get_cur_obs()
        enum_iter = enumerate(self.jnt_indices, start=0)
        for i, index in enum_iter:
            '''
            Need to add a 1 to message index as it starts from head_pan
            [head_pan, j0, j1, .. torso]
            whereas joint_indices are starting from j0
            '''
            obs[i] = msg.position[index + 1]
            if self.cmd_mode == "TORQUE_MODE":
                obs[i + self.jnt_obs_dim / 2] = msg.velocity[index + 1]

        self._set_cur_obs(obs)

    '''
    This function is called from reset only and during both training and testing
    '''

    def _init_jnt_state(self):
        q_jnt_angles = copy.copy(self.init_pos)

        # Command Sawyer's joints to pre-set angles and velocity
        # JointCommand.msg mode: TRAJECTORY_MODE
        positions = dict()

        # Build some randomness in starting position
        # between each subsequent iteration
        enum_iter = enumerate(self.jnt_indices, start=0)
        for i, index in enum_iter:
            l_limit = self.jnt_init_limits[i][0]
            u_limit = self.jnt_init_limits[i][1]
            val = np.random.uniform(l_limit, u_limit)
            q_jnt_angles[index] = val

        string = "Initializing joint states to: "
        enum_iter = enumerate(self.all_jnts, start=0)
        for i, jnt_name in enum_iter:
            positions[jnt_name] = q_jnt_angles[i]
            string += str(positions[jnt_name]) + " "

        self.limb.move_to_joint_positions(positions, 30)
        self._print_env_log(string)
        # sleep for a bit to ensure that the joints reach commanded positions
        rospy.sleep(3)

    def _action_clip(self, action):
        if self.cmd_mode == "TORQUE_MODE":
            return action
            #return np.clip(action, -self.torque_limit, self.torque_limit)
        else:
            return np.clip(action, -self.vel_limit, self.vel_limit)

    def _set_joint_velocities(self, actions):
        if self.vel_mode == "raw":
            velocities = dict()
            enum_iter = enumerate(self.cmd_names, start=0)
            for i, jnt in enum_iter:
                velocities[jnt] = actions[i]
            command_msg = JointCommand()
            command_msg.names = velocities.keys()
            command_msg.velocity = velocities.values()
            command_msg.mode = JointCommand.VELOCITY_MODE
            command_msg.header.stamp = rospy.Time.now()
            self.jnt_cm_pub.publish(command_msg)
        else:
            # Command Sawyer's joints to angles as calculated by velocity*dt
            positions = dict()
            q_jnt_angles = copy.copy(self.init_pos)
            obs_prev = self._get_cur_obs()
            enum_iter = enumerate(self.jnt_indices, start=0)
            for i, index in enum_iter:
                timestep = self.dt + np.random.normal(0, 1)
                val = obs_prev[i] + actions[i] * timestep
                val = np.clip(val, self.jnt_pos_limits[i][0],
                              self.jnt_pos_limits[i][1])
                q_jnt_angles[index] = val

            enum_iter = enumerate(self.all_jnts, start=0)
            for i, jnt_name in enum_iter:
                positions[jnt_name] = q_jnt_angles[i]

            self.limb.move_to_joint_positions(positions)

    def _set_joint_torques(self, actions):
        torques = dict()
        enum_iter = enumerate(self.all_jnts, start=0)
        for i, jnt_name in enum_iter:
            torques[jnt_name] = 0.0
        enum_iter = enumerate(self.cmd_names, start=0)
        for i, jnt_name in enum_iter:
            torques[jnt_name] = actions[i]

        command_msg = JointCommand()
        command_msg.names = torques.keys()
        command_msg.effort = torques.values()
        command_msg.mode = JointCommand.TORQUE_MODE
        command_msg.header.stamp = rospy.Time.now()
        self.jnt_cm_pub.publish(command_msg)

    def step(self, action):
        self.cur_act = copy.deepcopy(action)

        # called to take a step with the provided action
        # send the action as generated by policy (clip before sending)
        clipped_acts = self._action_clip(action)
        if self.cmd_mode == "TORQUE_MODE":
            self._set_joint_torques(clipped_acts)
        else:
            self._set_joint_velocities(clipped_acts)
        '''
        NOTE: Observations are being constantly updated because
        we are subscribed to the robot state publisher and also
        subscribed to the ee topic which calculates 
        the pose of the goal and the block.
        Sleep for some time, so that the action 
        execution on the robot finishes and we wake up to 
        pick up the latest observation
        '''
        # no sleep necessary if we send velocity integrated positions
        if self.cmd_mode == "TORQUE_MODE" or self.vel_mode == "raw":
            self.rate.sleep()

        obs = self._get_cur_obs()
        diff = self._get_diff(obs)
        done = self._is_done(diff)
        self.cur_reward = self._calc_reward(diff, done)
        return obs, self.cur_reward, done

    def _set_cartesian_test_goal(self):
        self.goal_pos_x1 = self.test_goal[0][0]
        self.goal_pos_y1 = self.test_goal[0][1]
        self.goal_pos_z1 = self.test_goal[0][2]
        self.goal_pos_x1 = self.test_goal[1][0]
        self.goal_pos_y1 = self.test_goal[1][1]
        self.goal_pos_z1 = self.test_goal[1][2]
        self.goal_pos_x1 = self.test_goal[2][0]
        self.goal_pos_y1 = self.test_goal[2][1]
        self.goal_pos_z1 = self.test_goal[2][2]

    def _set_random_training_goal(self):
        k = self.goal_cntr
        l = -0.01
        u = 0.01

        self.goal_pos_x1 = self.goal_XYZs[k][0][0] + np.random.uniform(l, u)
        self.goal_pos_y1 = self.goal_XYZs[k][0][1] + np.random.uniform(l, u)
        self.goal_pos_z1 = self.goal_XYZs[k][0][2] + np.random.uniform(l, u)
        self.goal_pos_x2 = self.goal_XYZs[k][1][0] + np.random.uniform(l, u)
        self.goal_pos_y2 = self.goal_XYZs[k][1][1] + np.random.uniform(l, u)
        self.goal_pos_z2 = self.goal_XYZs[k][1][2] + np.random.uniform(l, u)
        self.goal_pos_x3 = self.goal_XYZs[k][2][0] + np.random.uniform(l, u)
        self.goal_pos_y3 = self.goal_XYZs[k][2][1] + np.random.uniform(l, u)
        self.goal_pos_z3 = self.goal_XYZs[k][2][2] + np.random.uniform(l, u)

    def reset(self):
        # called Initially when the Env is initialized
        # set the initial joint state and send the command

        if not self.train_mode:
            self._set_cartesian_test_goal()
        else:
            if self.goal_cntr == self.max_training_goals - 1:
                self.goal_cntr = 0
            else:
                self.goal_cntr += 1
            self._set_random_training_goal()

        cur_obs = self._get_cur_obs()
        # Update cur_obs with the new goal
        od = self.jnt_obs_dim
        cur_obs[od + 9] = self.goal_pos_x1
        cur_obs[od + 10] = self.goal_pos_y1
        cur_obs[od + 11] = self.goal_pos_z1
        cur_obs[od + 12] = self.goal_pos_x2
        cur_obs[od + 13] = self.goal_pos_y2
        cur_obs[od + 14] = self.goal_pos_z2
        cur_obs[od + 15] = self.goal_pos_x3
        cur_obs[od + 16] = self.goal_pos_y3
        cur_obs[od + 17] = self.goal_pos_z3
        self._set_cur_obs(cur_obs)

        # this call will result in sleeping for 3 seconds
        self._init_jnt_state()
        # send the latest observations
        return self._get_cur_obs()

    def _get_diff(self, obs):
        od = self.jnt_obs_dim

        diff = [
            abs(obs[od + 0] - obs[od + 9]),
            abs(obs[od + 1] - obs[od + 10]),
            abs(obs[od + 2] - obs[od + 11]),
            abs(obs[od + 3] - obs[od + 12]),
            abs(obs[od + 4] - obs[od + 13]),
            abs(obs[od + 5] - obs[od + 14]),
            abs(obs[od + 6] - obs[od + 15]),
            abs(obs[od + 7] - obs[od + 16]),
            abs(obs[od + 8] - obs[od + 17])
        ]

        return diff

    def _is_done(self, diff):
        # all elements of d are positive values
        done = all(d <= self.distance_thresh for d in diff)
        if done:
            self._print_env_log(" Reached the goal!!!! ")
        return done

    def _calc_reward(self, diff, done):
        l2 = np.linalg.norm(np.array(diff))
        l2sq = l2**2
        alpha = 1e-6
        w_l2 = -1e-3
        w_u = -1e-2
        w_log = -1.0
        reward = 0.0
        dist_cost = l2sq
        precision_cost = np.log(l2sq + alpha)
        cntrl_cost = np.square(self.cur_act).sum()

        reward += w_l2 * dist_cost + w_log * precision_cost + w_u * cntrl_cost

        string = "l2sq: {}, log of l2sq: {} contrl_cost: {} reward: {}".format(
            dist_cost, precision_cost, cntrl_cost, reward)
        self._print_env_log(" Current Reward: " + string)
        return reward