示例#1
0
class AmbfEnvHERDDPG(gym.GoalEnv):
    def __init__(self, action_space_limit, joints_to_control,
                 goal_position_range, position_error_threshold,
                 goal_error_margin, joint_limits, workspace_limits,
                 enable_step_throttling):
        super(AmbfEnvHERDDPG, self).__init__()
        self.obj_handle = Object
        self.world_handle = World
        self.ambf_client = Client()
        self.ambf_client.connect()
        time.sleep(5)
        self.ambf_client.create_objs_from_rostopics()
        self.seed()
        self.n_skip_steps = 5
        self.enable_step_throttling = enable_step_throttling
        self.action = []
        self.obs = Observation()
        self.initial_pos = copy.deepcopy(self.obs.cur_observation()[0])
        self.cmd_joint_pos = np.zeros(7)
        self.goal_error_margin = goal_error_margin
        self.joint_limits = joint_limits
        self.workspace_limits = workspace_limits
        self.n_actions = 7
        self.action_lims_low = -action_space_limit * np.ones(self.n_actions)
        self.action_lims_high = action_space_limit * np.ones(self.n_actions)
        self.action_space = spaces.Box(low=-action_space_limit,
                                       high=action_space_limit,
                                       shape=(self.n_actions, ),
                                       dtype="float32")
        self.observation_space = spaces.Dict(
            dict(
                desired_goal=spaces.Box(
                    -np.inf,
                    np.inf,
                    shape=self.initial_pos['achieved_goal'].shape,
                    dtype='float32'),
                achieved_goal=spaces.Box(
                    -np.inf,
                    np.inf,
                    shape=self.initial_pos['achieved_goal'].shape,
                    dtype='float32'),
                observation=spaces.Box(
                    -np.inf,
                    np.inf,
                    shape=self.initial_pos['observation'].shape,
                    dtype='float32'),
            ))
        self.joints_to_control = joints_to_control
        self.goal_position_range = goal_position_range
        self.goal = np.array([0.0, 0.0, -0.1, 0.0, 0.0, 0.0])
        self.prev_sim_step = 0
        self.pos_error_threshold = position_error_threshold
        self.count_for_print = 0

    def skip_sim_steps(self, num):
        # Function to define the number of steps that can be skipped if Step Throttling is enabled
        self.n_skip_steps = num
        self.world_handle.set_num_step_skips(num)

    def set_throttling_enable(self, check):
        # Function to set the Step Throttling Boolean
        self.enable_step_throttling = check
        self.world_handle.enable_throttling(check)

    def make(self, a_name):
        # Function to create object handle for the robot and world in AMBF
        self.obj_handle = self.ambf_client.get_obj_handle(a_name)
        # self.base_handle = self.ambf_client.get_obj_handle('SampleObj')
        self.world_handle = self.ambf_client.get_world_handle()
        self.world_handle.enable_throttling(self.enable_step_throttling)
        self.world_handle.set_num_step_skips(self.n_skip_steps)
        time.sleep(2)
        if self.obj_handle is None or self.world_handle is None:
            raise Exception

    def seed(self, seed=None):
        # Random seed
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        # Function to reset the model
        # Sets the initial position of PSM
        self.set_initial_pos_func()
        # Type 1 Reset : Uses the previous reached state as the initial state for next iteration
        # action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        # observation, _, _, _ = self.step(action)
        # Type 2 Reset : Sets the robot to a predefined initial state for each iteration
        initial_joint_pos, initial_state_vel = self.get_joint_pos_vel_func()
        end_effector_frame = compute_FK(initial_joint_pos)
        # Updates the observation to the initialized position
        observation, _, _, _ = self._update_observation(
            end_effector_frame, initial_joint_pos, initial_state_vel)
        # Samples a goal
        self.goal = self._sample_goal(observation)
        return observation

    def set_initial_pos_func(self):
        # Function to set the initial joint positions of the robot
        for joint_idx, jt_name in enumerate(self.joints_to_control):
            # Prismatic joint is set to different value to ensure at least some part of robot tip goes past the cannula
            if joint_idx == 2:
                self.obj_handle.set_joint_pos(
                    jt_name, self.joint_limits['lower_limit'][2])
            else:
                self.obj_handle.set_joint_pos(jt_name, 0)
        time.sleep(0.5)

    def get_joint_pos_vel_func(self):
        # Function to compute Joint Position and Velocity of the robot
        joint_position = np.zeros(7)
        joint_velocity = np.zeros(7)
        for joint_idx, jt_name in enumerate(self.joints_to_control):
            joint_position[joint_idx] = self.obj_handle.get_joint_pos(jt_name)
            joint_velocity[joint_idx] = self.obj_handle.get_joint_vel(jt_name)

        return joint_position, joint_velocity

    def step(self, action):
        assert len(action) == 7
        # Counter for printing position, action and reward
        self.count_for_print += 1
        # Initialization of variables
        new_state_joint_pos = np.zeros(7)
        # Clipping the action value between the desired range
        action = np.clip(action, self.action_lims_low, self.action_lims_high)
        current_joint_pos, state_vel = self.get_joint_pos_vel_func()
        # Take the action from current state to reach the new state
        for joint_idx, jt_name in enumerate(self.joints_to_control):
            new_state_joint_pos[joint_idx] = np.add(
                current_joint_pos[joint_idx], action[joint_idx])
        # Ensure the new state is within valid joint positions, if invalid then stay at the joint limit position
        desired_joint_pos = self.limit_joint_pos(new_state_joint_pos)
        # Ensures that PSM joints reach the desired joint positions
        self.set_commanded_joint_pos(desired_joint_pos)
        current_end_effector_frame = compute_FK(desired_joint_pos)
        # Update state, reward, done flag and world values in the code
        updated_state, rewards, done, info = self._update_observation(
            current_end_effector_frame, desired_joint_pos, state_vel)
        self.world_handle.update()
        # Print function for viewing the output intermittently
        if self.count_for_print % 10000 == 0:
            print("count ", self.count_for_print, "Action is ", action,
                  " new pos after action ", updated_state)
            print("Reward is ", rewards)

        return updated_state, rewards, done, info

    def set_commanded_joint_pos(self, commanded_joint_pos):
        # Function to ensure the robot tip reaches the desired goal position before moving on to next iteration
        # Counter to avoid getting stuck in while loop because of very small errors in positions
        count_for_joint_pos = 0
        while True:
            reached_joint_pos = np.zeros(7)
            for joint_idx, jt_name in enumerate(self.joints_to_control):
                self.obj_handle.set_joint_pos(jt_name,
                                              commanded_joint_pos[joint_idx])
                reached_joint_pos[joint_idx] = self.obj_handle.get_joint_pos(
                    jt_name)

            error_in_pos = np.around(np.subtract(commanded_joint_pos,
                                                 reached_joint_pos),
                                     decimals=3)
            # Since Prismatic joint limits are smaller compared to the limits of other joints
            error_in_pos_joint2 = np.around(np.subtract(
                commanded_joint_pos[2], reached_joint_pos[2]),
                                            decimals=4)
            count_for_joint_pos += 1
            if (np.all(np.abs(error_in_pos) <= self.pos_error_threshold) and
                np.abs(error_in_pos_joint2) <= 0.5*self.pos_error_threshold) \
                    or count_for_joint_pos > 75:
                break

    def limit_cartesian_pos(self, cart_pos):
        # Limits the robot tip X, Y, Z positions
        limit_cartesian_pos_values = np.zeros(3)
        cartesian_pos_lower_limit = self.workspace_limits['lower_limit']
        cartesian_pos_upper_limit = self.workspace_limits['upper_limit']
        for axis in range(3):
            limit_cartesian_pos_values[axis] = np.clip(
                cart_pos[axis], cartesian_pos_lower_limit[axis],
                cartesian_pos_upper_limit[axis])
        return limit_cartesian_pos_values

    def limit_joint_pos(self, joint_pos):
        # Limits the joint position values of the robot
        # dvrk_limits_low = np.array([-1.605, -0.93556, -0.002444, -3.0456, -3.0414, -3.0481, -3.0498])
        # dvrk_limits_high = np.array([1.5994, 0.94249, 0.24001, 3.0485, 3.0528, 3.0376, 3.0399])
        # Note: Joint 5 and 6, joint pos = 0, 0 is closed jaw and 0.5, 0.5 is open
        limit_joint_values = np.zeros(7)
        joint_lower_limit = self.joint_limits['lower_limit']
        joint_upper_limit = self.joint_limits['upper_limit']
        for joint_idx in range(len(joint_pos)):
            limit_joint_values[joint_idx] = np.clip(
                joint_pos[joint_idx], joint_lower_limit[joint_idx],
                joint_upper_limit[joint_idx])

        return limit_joint_values

    def render(self, mode):
        # Not being utilized since AMBF runs in parallel for visualization
        print(' I am {} Ironman'.format(mode))

    def _update_observation(self, end_effector_frame, joint_pos, state_vel):
        # Function to update all the values
        # Function implementing Step Throttling algorithm
        if self.enable_step_throttling:
            step_jump = 0
            while step_jump < self.n_skip_steps:
                step_jump = self.obj_handle.get_sim_step() - self.prev_sim_step
                time.sleep(0.00001)
            self.prev_sim_step = self.obj_handle.get_sim_step()
            if step_jump > self.n_skip_steps:
                print('WARN: Jumped {} steps, Default skip limit {} Steps'.
                      format(step_jump, self.n_skip_steps))
        else:
            cur_sim_step = self.obj_handle.get_sim_step()
            step_jump = cur_sim_step - self.prev_sim_step
            self.prev_sim_step = cur_sim_step

        # Find the tip position and rotation by computing forward kinematics (not being used currently)
        cartesian_pos = self.limit_cartesian_pos(
            end_effector_frame[0:3, 3]).reshape((3, 1))
        achieved_rot = np.array(
            euler_from_matrix(end_effector_frame[0:3, 0:3],
                              axes='szyx')).reshape((3, 1))
        # Combine tip position and rotation to a single numpy array as achieved goal
        achieved_goal = np.asarray(
            np.concatenate((cartesian_pos.copy(), achieved_rot.copy()),
                           axis=0)).reshape(-1)
        obs = np.asarray(
            np.concatenate((cartesian_pos.copy(), achieved_rot.copy(),
                            joint_pos.reshape((7, 1))),
                           axis=0)).reshape(-1)
        # Combine the current state positions and velocity into a single array as observation
        observation = np.concatenate((obs, state_vel), axis=None)
        # Update the observation dictionary
        self.obs.state.update(observation=observation.copy(),
                              achieved_goal=achieved_goal.copy(),
                              desired_goal=self.goal.copy())
        # Update info
        self.obs.info = self._update_info()
        # Compute the reward
        self.obs.reward = self.compute_reward(self.obs.state['achieved_goal'],
                                              self.goal, self.obs.info)
        self.obs.is_done = self._check_if_done()

        # Return the values to step function
        return self.obs.state, self.obs.reward, self.obs.is_done, self.obs.info

    def compute_reward(self, achieved_goal, goal, info):
        # Function to compute reward received by the agent
        # Find the distance between goal and achieved goal
        cur_dist = LA.norm(np.subtract(goal[0:3], achieved_goal[0:3]))
        # Continuous reward
        # reward = round(1 - float(abs(cur_dist)/0.3)*0.5, 5)
        # Sparse reward
        if abs(cur_dist) < 0.01:
            reward = 1
        else:
            reward = -1
        self.obs.dist = cur_dist
        return reward

    def _sample_goal(self, observation):
        # Function to samples new goal positions and ensures its within the workspace of PSM
        rand_val_pos = np.around(np.add(
            observation['achieved_goal'][0:3],
            self.np_random.uniform(-self.goal_position_range,
                                   self.goal_position_range,
                                   size=3)),
                                 decimals=4)
        rand_val_pos[0] = np.around(np.clip(
            rand_val_pos[0], self.workspace_limits['lower_limit'][0],
            self.workspace_limits['upper_limit'][0]),
                                    decimals=4)
        rand_val_pos[1] = np.around(np.clip(
            rand_val_pos[1], self.workspace_limits['lower_limit'][1],
            self.workspace_limits['upper_limit'][1]),
                                    decimals=4)
        rand_val_pos[2] = np.around(np.clip(
            rand_val_pos[2], self.workspace_limits['lower_limit'][2],
            self.workspace_limits['upper_limit'][2]),
                                    decimals=4)
        # Uncomment below lines if individual limits need to be set for generating desired goal state
        '''
        rand_val_pos[0] = np.around(np.clip(rand_val_pos[0], -0.1388, 0.1319), decimals=4)
        rand_val_pos[1] = np.around(np.clip(rand_val_pos[1], -0.1319, 0.1388), decimals=4)
        rand_val_pos[2] = np.around(np.clip(rand_val_pos[2], -0.1935, -0.0477), decimals=4)
        rand_val_angle[0] = np.clip(rand_val_angle[0], -0.15, 0.15)
        rand_val_angle[1] = np.clip(rand_val_angle[1], -0.15, 0.15)
        rand_val_angle[2] = np.clip(rand_val_angle[2], -0.15, 0.15)
        '''
        # Provide the range for generating the desired orientation at the terminal state
        rand_val_angle = self.np_random.uniform(-1.5, 1.5, size=3)
        goal = np.concatenate((rand_val_pos, rand_val_angle), axis=None)
        return goal.copy()

    def _check_if_done(self):
        # Function to check if the episode was successful
        if abs(self.obs.dist) < self.goal_error_margin:
            return True
        else:
            return False

    def _update_info(self):
        # Can be used to Provide information for debugging purpose
        info = {'is_success': self._is_success()}
        return info

    def _is_success(self):
        # Function to check if the robot reached the desired goal within a predefined error margin
        if abs(self.obs.dist) < self.goal_error_margin:
            return True
        else:
            return False
示例#2
0
class AmbfPSMEnv(gym.GoalEnv):
    def __init__(self, n_actions, n_states, n_goals):
        self.obj_handle = Object
        self.world_handle = World

        self.ambf_client = Client()
        self.ambf_client.create_objs_from_rostopics()
        self.n_skip_steps = 5
        self.enable_step_throttling = True
        self.action = []
        self.obs = Observation(n_states=n_states)
        self.action_lims_low = np.array(
            [-0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1])
        self.action_lims_high = np.array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
        # self.action_space = spaces.Box(self.action_lims_low, self.action_lims_high)
        self.observation_space = spaces.Box(-np.inf,
                                            np.inf,
                                            shape=(n_states, ))
        self.action_space = spaces.Box(-1.,
                                       1.,
                                       shape=(n_actions, ),
                                       dtype='float32')
        # For applying HER
        # self.observation_space = spaces.Dict(dict(
        #     desired_goal=spaces.Box(-np.inf, np.inf, shape=(n_goals,), dtype='float32'),
        #     achieved_goal=spaces.Box(-np.inf, np.inf, shape=(n_goals,), dtype='float32'),
        #     observation=spaces.Box(-np.inf, np.inf, shape=(n_states,), dtype='float32'),
        # ))

        # self.base_handle = self.ambf_client.get_obj_handle('PegBase')
        self.prev_sim_step = 0

        pass

    def skip_sim_steps(self, num):
        self.n_skip_steps = num
        self.world_handle.set_num_step_skips(num)

    def set_throttling_enable(self, check):
        self.enable_step_throttling = check
        self.world_handle.enable_throttling(check)

    def make(self, a_name):
        self.obj_handle = self.ambf_client.get_obj_handle(a_name)
        self.world_handle = self.ambf_client.get_world_handle()
        self.world_handle.enable_throttling(self.enable_step_throttling)
        self.world_handle.set_num_step_skips(self.n_skip_steps)
        if self.obj_handle is None or self.world_handle is None:
            raise Exception

    def reset(self):
        action = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        return self.step(action)[0]

    def step(self, action):
        assert len(action) == 7
        action = np.clip(action, self.action_lims_low, self.action_lims_high)
        self.action = action

        self.obj_handle.pose_command(action[0], action[1], action[2],
                                     action[3], action[4], action[5],
                                     action[6])
        self.world_handle.update()
        self._update_observation(action)
        return self.obs.cur_observation()

    def render(self, mode):
        print(' I am a {} POTATO'.format(mode))

    def _update_observation(self, action):
        if self.enable_step_throttling:
            step_jump = 0
            while step_jump < self.n_skip_steps:
                step_jump = self.obj_handle.get_sim_step() - self.prev_sim_step
                time.sleep(0.00001)
            self.prev_sim_step = self.obj_handle.get_sim_step()
            if step_jump > self.n_skip_steps:
                print('WARN: Jumped {} steps, Default skip limit {} Steps'.
                      format(step_jump, self.n_skip_steps))
        else:
            cur_sim_step = self.obj_handle.get_sim_step()
            step_jump = cur_sim_step - self.prev_sim_step
            self.prev_sim_step = cur_sim_step

        state = self.obj_handle.get_pose() + [step_jump]
        self.obs.state = state
        self.obs.reward = self._calculate_reward(state, action)
        self.obs.is_done = self._check_if_done()
        self.obs.info = self._update_info()

    def _calculate_reward(self, state, action):
        prev_dist = self.obs.dist
        cur_dist = LA.norm(np.subtract(state[6:9], state[0:3]))
        action_penalty = np.sum(np.square(action))

        reward = (prev_dist - cur_dist) - 4 * action_penalty
        self.obs.dist = cur_dist
        return reward

    def _check_if_done(self):
        return False

    def _update_info(self):
        return {}