Exemplo n.º 1
0
class PickbotEnv(gym.Env):
    def __init__(self,
                 joint_increment_value=0.02,
                 sim_time_factor=0.001,
                 running_step=0.001,
                 random_object=False,
                 random_position=False,
                 use_object_type=False,
                 env_object_type='free_shapes',
                 load_init_pos=False):
        """
        initializing all the relevant variables and connections
        :param joint_increment_value: increment of the joints
        :param running_step: gazebo simulation time factor
        :param random_object: spawn random object in the simulation
        :param random_position: change object position in each reset
        :param use_object_type: assign IDs to objects and used them in the observation space
        :param env_object_type: object type for environment, free_shapes for boxes while others are related to use_case
            'door_handle', 'combox', ...
        """

        # Assign Parameters
        self._joint_increment_value = joint_increment_value
        self.running_step = running_step
        self._random_object = random_object
        self._random_position = random_position
        self._use_object_type = use_object_type
        self._load_init_pos = load_init_pos

        # Assign MsgTypes
        self.joints_state = JointState()
        self.contact_1_state = ContactsState()
        self.contact_2_state = ContactsState()
        self.collision = Bool()
        self.camera_rgb_state = Image()
        self.camera_depth_state = Image()
        self.contact_1_force = Vector3()
        self.contact_2_force = Vector3()
        self.gripper_state = VacuumGripperState()

        self._list_of_observations = [
            "elbow_joint_state",
            "shoulder_lift_joint_state",
            "shoulder_pan_joint_state",
            "wrist_1_joint_state",
            "wrist_2_joint_state",
            "wrist_3_joint_state",
            "vacuum_gripper_pos_x",
            "vacuum_gripper_pos_y",
            "vacuum_gripper_pos_z",
            "vacuum_gripper_ori_w",
            "vacuum_gripper_ori_x",
            "vacuum_gripper_ori_y",
            "vacuum_gripper_ori_z",
            "object_pos_x",
            "object_pos_y",
            "object_pos_z",
            "object_ori_w",
            "object_ori_x",
            "object_ori_y",
            "object_ori_z",
        ]

        # if self._use_object_type:
        #     self._list_of_observations.append("object_type")

        # Establishes connection with simulator
        """
        1) Gazebo Connection 
        2) Controller Connection
        3) Joint Publisher 
        """
        self.gazebo = GazeboConnection(sim_time_factor=sim_time_factor)
        self.controllers_object = ControllersConnection()
        self.pickbot_joint_publisher_object = JointPub()

        # Define Subscribers as Sensordata
        """
        1) /pickbot/joint_states
        2) /gripper_contactsensor_1_state
        3) /gripper_contactsensor_2_state
        4) /gz_collisions
        5) /pickbot/gripper/state
        6) /camera_rgb/image_raw   
        7) /camera_depth/depth/image_raw
        """
        rospy.Subscriber("/pickbot/joint_states", JointState,
                         self.joints_state_callback)
        rospy.Subscriber("/gripper_contactsensor_1_state", ContactsState,
                         self.contact_1_callback)
        rospy.Subscriber("/gripper_contactsensor_2_state", ContactsState,
                         self.contact_2_callback)
        rospy.Subscriber("/gz_collisions", Bool, self.collision_callback)
        rospy.Subscriber("/pickbot/gripper/state", VacuumGripperState,
                         self.gripper_state_callback)
        # rospy.Subscriber("/camera_rgb/image_raw", Image, self.camera_rgb_callback)
        # rospy.Subscriber("/camera_depth/depth/image_raw", Image, self.camera_depth_callback)

        # Define Action and state Space and Reward Range
        """
        Action Space: Box Space with 6 values.
        
        State Space: Box Space with 20 values. It is a numpy array with shape (20,)

        Reward Range: -infinity to infinity 
        """

        # Directly use joint_positions as action
        if self._joint_increment_value is None:
            high_action = (math.pi - 0.05) * np.ones(6)
            low_action = -high_action
        else:  # Use joint_increments as action
            high_action = self._joint_increment_value * np.ones(6)
            low_action = -high_action

        self.action_space = spaces.Box(low_action, high_action)

        self.obs_dim = 20
        high = np.inf * np.ones(self.obs_dim)
        low = -high

        self.observation_space = spaces.Box(low, high)

        # if self._use_object_type:
        #     high = np.append(high, 9)
        #     low = np.append(low, 0)

        self.reward_range = (-np.inf, np.inf)

        self._seed()
        self.done_reward = 0

        # set up everything to publish the Episode Number and Episode Reward on a rostopic
        self.episode_num = 0
        self.accumulated_episode_reward = 0
        self.episode_steps = 0
        self.reward_pub = rospy.Publisher('/openai/reward',
                                          RLExperimentInfo,
                                          queue_size=1)
        self.reward_list = []
        self.episode_list = []
        self.step_list = []
        self.csv_name = logger.get_dir() + '/result_log'
        print("CSV NAME")
        print(self.csv_name)
        self.csv_success_exp = logger.get_dir(
        ) + '/success_exp' + datetime.datetime.now().strftime(
            '%Y-%m-%d_%Hh%Mmin') + '.csv'
        self.successful_attempts = 0

        # variable to store last observation
        self.old_obs = self.get_obs()

        # object name: name of the target object
        # object type: index of the object name in the object list
        # object list: pool of the available objects, have at least one entry
        self.object_name = ''
        self.object_type_str = ''
        self.object_type = 0
        self.object_list = U.get_target_object(env_object_type)
        print("object list {}".format(self.object_list))
        self.object_initial_position = Pose(position=Point(x=-0.13,
                                                           y=0.848,
                                                           z=1.06),
                                            orientation=quaternion_from_euler(
                                                0.002567, 0.102, 1.563))

        # select first object, set object name and object type
        # if object is random, spawn random object
        # else get the first entry of object_list
        self.set_target_object([0, 0, 0, 0, 0, 0])

        # get maximum distance to the object to calculate reward, renewed in the reset function
        self.max_distance, _ = U.get_distance_gripper_to_object()
        # The closest distance during training
        self.min_distance = 999

        # get samples from reaching task
        if self._load_init_pos:
            import environments
            self.init_samples = U.load_samples_from_prev_task(
                os.path.dirname(environments.__file__) +
                "/contacts_sample/door_sample/success_exp2019-05-21_11h41min.csv"
            )

    # Callback Functions for Subscribers to make topic values available each time the class is initialized
    def joints_state_callback(self, msg):
        self.joints_state = msg

    def contact_1_callback(self, msg):
        self.contact_1_state = msg.states

    def contact_2_callback(self, msg):
        self.contact_2_state = msg.states

    def collision_callback(self, msg):
        self.collision = msg.data

    def camera_rgb_callback(self, msg):
        self.camera_rgb_state = msg

    def camera_depth_callback(self, msg):
        self.camera_depth_state = msg

    def gripper_state_callback(self, msg):
        self.gripper_state = msg

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        """
        Reset The Robot to its initial Position and restart the Controllers

        1) Change Gravity to 0 -> That arm doesnt fall
        2) Turn Controllers off
        3) Pause Simulation
        4) Delete previous target object if randomly chosen object is set to True
        4) Reset Simulation
        5) Set Model Pose to desired one
        6) Unpause Simulation
        7) Turn on Controllers
        8) Restore Gravity
        9) Get Observations and return current State
        10) Check all Systems work
        11) Spawn new target
        12) Pause Simulation
        13) Write initial Position into Yaml File
        14) Create YAML Files for contact forces in order to get the average over 2 contacts
        15) Create YAML Files for collision to make sure to see a collision due to high noise in topic
        16) Unpause Simulation cause in next Step System must be running otherwise no data is seen by Subscribers
        17) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number
        18) Return State
        """

        # print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Reset %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
        self.gazebo.change_gravity(0, 0, 0)
        self.controllers_object.turn_off_controllers()
        # turn off the gripper
        # U.turn_off_gripper()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        time.sleep(0.1)

        # turn on the gripper
        # U.turn_on_gripper()

        if self._load_init_pos:
            # load sample from previous training result
            sample_ep = random.choice(self.init_samples)
            print("Joints from samples: {}".format(sample_ep[0:6]))
            # self.pickbot_joint_publisher_object.set_joints(sample_ep[0:6])
            self.set_target_object(sample_ep[-6:])
        else:
            self.pickbot_joint_publisher_object.set_joints()
            vg_geo = U.get_link_state("vacuum_gripper_link")
            to_geo = U.get_link_state("target")
            orientation_error = quaternion_multiply(
                vg_geo[3:], quaternion_conjugate(to_geo[3:]))
            # print("Orientation error {}".format(orientation_error))
            box_pos = U.get_random_door_handle_pos(
            ) if self._random_position else self.object_initial_position
            U.change_object_position(self.object_name, box_pos)
        # Code above is hard-coded for door handle, modify later.
        # TO-DO: Modify reset wrt the object type as in the reach env

        self.gazebo.unpauseSim()
        self.controllers_object.turn_on_controllers()
        self.gazebo.change_gravity(0, 0, -9.81)
        self._check_all_systems_ready()

        # last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        # last_position = [0, 0, 0, 0, 0, 0]
        # with open('last_position.yml', 'w') as yaml_file:
        #     yaml.dump(last_position, yaml_file, default_flow_style=False)
        # with open('contact_1_force.yml', 'w') as yaml_file:
        #     yaml.dump(0.0, yaml_file, default_flow_style=False)
        # with open('contact_2_force.yml', 'w') as yaml_file:
        #     yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(False, yaml_file, default_flow_style=False)
        observation = self.get_obs()
        # print("current joints {}".format(observation[:6]))
        # get maximum distance to the object to calculate reward
        # self.max_distance, _ = U.get_distance_gripper_to_object()
        # self.min_distance = self.max_distance
        self.gazebo.pauseSim()
        state = U.get_state(observation)
        self._update_episode()
        self.gazebo.unpauseSim()
        return state

    def step(self, action):
        """
        Given the action selected by the learning algorithm,
        we perform the corresponding movement of the robot
        return: the state of the robot, the corresponding reward for the step and if its done(terminal State)

        1) read last published joint from YAML
        2) define ne joints acording to chosen action
        3) Write joint position into YAML to save last published joints for next step
        4) Unpause, Move to that pos for defined time, Pause
        5) Get Observations and pause Simulation
        6) Convert Observations into State
        7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again
        8) Calculate reward based on Observatin and done_reward
        9) Unpause that topics can be received in next step
        10) Return State, Reward, Done
        """

        self.old_obs = self.get_obs()

        print(
            "===================================================================="
        )
        # print("action: {}".format(action))

        # 1) read last_position out of YAML File
        last_position = self.old_obs[:6]
        # with open("last_position.yml", 'r') as stream:
        #     try:
        #         last_position = (yaml.load(stream, Loader=yaml.Loader))
        #     except yaml.YAMLError as exc:
        #         print(exc)
        # 2) get the new joint positions according to chosen action
        if self._joint_increment_value is None:
            next_action_position = action
        else:
            next_action_position = self.get_action_to_position(
                np.clip(action, -self._joint_increment_value,
                        self._joint_increment_value), last_position)
        print("next action position: {}".format(
            np.around(next_action_position, decimals=3)))

        # 3) write last_position into YAML File
        # with open('last_position.yml', 'w') as yaml_file:
        #     yaml.dump(next_action_position, yaml_file, default_flow_style=False)

        # 4) unpause, move to position for certain time
        self.gazebo.unpauseSim()
        self.pickbot_joint_publisher_object.move_joints(next_action_position)
        # time.sleep(self.running_step)

        # Busy waiting until all the joints reach the next_action_position (first the third joints are reversed)
        start_ros_time = rospy.Time.now()
        while True:
            # Check collision:
            invalid_collision = self.get_collisions()
            if invalid_collision:
                print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<")
                observation = self.get_obs()
                print("joints after reset collision : {} ".format(
                    observation[:6]))

                # calculate reward immediately
                distance_error = observation[6:9] - observation[13:16]
                orientation_error = quaternion_multiply(
                    observation[9:13], quaternion_conjugate(observation[16:]))

                rewardDist = UMath.rmseFunc(distance_error)
                rewardOrientation = 2 * np.arccos(abs(orientation_error[0]))

                reward = UMath.computeReward(rewardDist, rewardOrientation,
                                             invalid_collision)
                print("Reward this step after colliding {}".format(reward))
                self.accumulated_episode_reward += reward
                return U.get_state(observation), reward, True, {}

            elapsed_time = rospy.Time.now() - start_ros_time
            if np.isclose(next_action_position,
                          self.joints_state.position,
                          rtol=0.0,
                          atol=0.01).all():
                break
            elif elapsed_time > rospy.Duration(2):  # time out
                print("TIME OUT, joints haven't reach positions")
                break

        # 5) Get Observations and pause Simulation
        observation = self.get_obs()
        print("Observation in the step: {}".format(
            np.around(observation[:6], decimals=3)))
        print("Joints      in the step: {}".format(
            np.around(self.joints_state.position, decimals=3)))
        # if observation[0] < self.min_distance:
        #     self.min_distance = observation[0]
        self.gazebo.pauseSim()

        # 6) Convert Observations into state
        state = U.get_state(observation)

        # U.get_obj_orient()

        # 7) Unpause Simulation check if its done, calculate done_reward
        self.gazebo.unpauseSim()
        done, done_reward, invalid_collision = self.is_done(
            observation, last_position)
        self.gazebo.pauseSim()

        # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward
        # reward = self.compute_reward(observation, done_reward, invalid_contact)
        # reward = UMath.compute_reward_orient(observation, done_reward, invalid_contact)

        distance_error = observation[6:9] - observation[13:16]
        orientation_error = quaternion_multiply(
            observation[9:13], quaternion_conjugate(observation[16:]))

        rewardDist = UMath.rmseFunc(distance_error)
        rewardOrientation = 2 * np.arccos(abs(orientation_error[0]))

        reward = UMath.computeReward(rewardDist, rewardOrientation,
                                     invalid_collision) + done_reward
        print("Reward this step {}".format(reward))

        self.accumulated_episode_reward += reward

        # 9) Unpause that topics can be received in next step
        self.gazebo.unpauseSim()

        self.episode_steps += 1
        # 10) Return State, Reward, Done
        return state, reward, done, {}

    def _check_all_systems_ready(self):
        """
        Checks that all subscribers for sensortopics are working

        1) /pickbot/joint_states
        2) /gripper_contactsensor_1_state
        3) /gripper_contactsensor_2_state
        7) Collisions

        not used so far
        4) /camera_rgb/image_raw
        5) /camera_depth/depth/image_raw

        """
        self.check_joint_states()
        self.check_contact_1()
        self.check_contact_2()
        self.check_collision()
        # self.check_rgb_camera()
        # self.check_rgbd_camera()
        # self.check_gripper_state()
        rospy.logdebug("ALL SYSTEMS READY")

    def check_joint_states(self):
        joint_states_msg = None
        while joint_states_msg is None and not rospy.is_shutdown():
            try:
                joint_states_msg = rospy.wait_for_message(
                    "/pickbot/joint_states", JointState, timeout=0.1)
                self.joints_state = joint_states_msg
                rospy.logdebug("Current joint_states READY")
            except Exception as e:
                rospy.logdebug(
                    "Current joint_states not ready yet, retrying==>" + str(e))
                print("EXCEPTION: Joint States not ready yet, retrying.")

    def check_contact_1(self):
        contact_1_states_msg = None
        while contact_1_states_msg is None and not rospy.is_shutdown():
            try:
                contact_1_states_msg = rospy.wait_for_message(
                    "/gripper_contactsensor_1_state",
                    ContactsState,
                    timeout=0.1)
                self.contact_1_state = contact_1_states_msg.states
                rospy.logdebug("Contactsensor 1 READY")
            except Exception as e:
                rospy.logdebug("Contactsensor 1 not ready yet, retrying==>" +
                               str(e))
                print("EXCEPTION: Contactsensor 1 not ready yet, retrying.")

    def check_contact_2(self):
        contact_2_states_msg = None
        while contact_2_states_msg is None and not rospy.is_shutdown():
            try:
                contact_2_states_msg = rospy.wait_for_message(
                    "/gripper_contactsensor_2_state",
                    ContactsState,
                    timeout=0.1)
                self.contact_2_state = contact_2_states_msg.states
                rospy.logdebug("Contactsensor 2 READY")
            except Exception as e:
                rospy.logdebug("Contactsensor 2 not ready yet, retrying==>" +
                               str(e))
                print("EXCEPTION: Contactsensor 2 not ready yet, retrying.")

    def check_collision(self):
        collision_msg = None
        while collision_msg is None and not rospy.is_shutdown():
            try:
                collision_msg = rospy.wait_for_message("/gz_collisions",
                                                       Bool,
                                                       timeout=0.1)
                self.collision = collision_msg.data
                rospy.logdebug("collision READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: Collision not ready yet, retrying==>" + str(e))

    def check_rgb_camera(self):
        camera_rgb_states_msg = None
        while camera_rgb_states_msg is None and not rospy.is_shutdown():
            try:
                camera_rgb_states_msg = rospy.wait_for_message(
                    "/camera_rgb/image_raw", Image, timeout=0.1)
                self.camera_rgb_state = camera_rgb_states_msg
                rospy.logdebug("rgb_image READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: rgb_image not ready yet, retrying==>" + str(e))

    def check_rgbd_camera(self):
        camera_depth_states_msg = None
        while camera_depth_states_msg is None and not rospy.is_shutdown():
            try:
                camera_depth_states_msg = rospy.wait_for_message(
                    "/camera_depth/depth/image_raw", Image, timeout=0.1)
                self.camera_depth_state = camera_depth_states_msg
                rospy.logdebug("rgbd_image READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: rgbd_image not ready yet, retrying==>" +
                    str(e))

    def check_gripper_state(self):
        gripper_state_msg = None
        while gripper_state_msg is None and not rospy.is_shutdown():
            try:
                gripper_state_msg = rospy.wait_for_message(
                    "/pickbot/gripper/state", VacuumGripperState, timeout=0.1)
                self.gripper_state = gripper_state_msg
                rospy.logdebug("gripper_state READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: gripper_state not ready yet, retrying==>" +
                    str(e))

    # Set target object
    # randomize: spawn object randomly from the object pool. If false, object will be the first entry of the object list
    # random_position: spawn object with random position
    def set_target_object(self, position):
        self.object_name = self.object_list[0]["name"]
        self.object_type_str = self.object_list[0]["type"]
        self.object_type = 0

        box_pos = Pose(position=Point(x=position[0],
                                      y=position[1],
                                      z=position[2]),
                       orientation=quaternion_from_euler(
                           position[3], position[4], position[5]))

        U.change_object_position(self.object_name, box_pos)
        print("Current target: ", self.object_name)

    def get_action_to_position(self, action, last_position):
        """
        Take the last published joint and increment/decrement one joint according to action chosen
        :param action: Integer that goes from 0 to 11, because we have 12 actions.
        :param last_position: array of 6 value
        :return: list with all joint positions according to chosen action
        """

        action_position = np.asarray(last_position) + action
        # clip action that is going to be published to make sure to avoid losing control of controllers
        x = np.clip(action_position, -2.9, 2.9)

        return x.tolist()

    def get_obs(self):
        """
        Returns the state of the robot needed for Algorithm to learn
        The state will be defined by a List (later converted to numpy array) of the:

        self._list_of_observations = ["elbow_joint_state",
                              "shoulder_lift_joint_state",
                              "shoulder_pan_joint_state",
                              "wrist_1_joint_state",
                              "wrist_2_joint_state",
                              "wrist_3_joint_state",
                              "vacuum_gripper_pos_x",
                              "vacuum_gripper_pos_y",
                              "vacuum_gripper_pos_z",
                              "vacuum_gripper_ori_w",
                              "vacuum_gripper_ori_x",
                              "vacuum_gripper_ori_y",
                              "vacuum_gripper_ori_z",
                              "object_pos_x",
                              "object_pos_y",
                              "object_pos_z",
                              "object_ori_w",
                              "object_ori_x",
                              "object_ori_y",
                              "object_ori_z",
                              ]

        :return: observation
        """

        # Get Joints Data out of Subscriber
        joints_state = self.joints_state.position

        for joint in self.joints_state.position:
            if joint > math.pi or joint < -math.pi:
                print(self.joints_state.name)
                print(self.joints_state.position)
                sys.exit("Joint exceeds limit")

        vacuum_gripper_geometry = U.get_link_state("vacuum_gripper_link")

        target_geometry = U.get_link_state("target")

        # Concatenate the information that defines the robot state
        state = np.r_[np.reshape(joints_state, -1),
                      np.reshape(vacuum_gripper_geometry, -1),
                      np.reshape(target_geometry, -1)]

        return state

    def get_contact_force_1(self):
        """
        Get Contact Force of contact sensor 1
        Takes average over 2 contacts so the chances are higher that both sensors say there is contact the same time due to sensor noise
        :returns force value
        """

        # get Force out of contact_1_state
        if not self.contact_1_state:
            contact1_force = 0.0
        else:
            for state in self.contact_1_state:
                self.contact_1_force = state.total_wrench.force
                contact1_force_np = np.array(
                    (self.contact_1_force.x, self.contact_1_force.y,
                     self.contact_1_force.z))
                force_magnitude_1 = np.linalg.norm(contact1_force_np)
                contact1_force = force_magnitude_1

        # read last contact force 1 value out of yaml
        with open("contact_1_force.yml", 'r') as stream:
            try:
                last_contact_1_force = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact_1_force value in yaml
        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(contact1_force, yaml_file, default_flow_style=False)
        # calculate average force
        average_contact_1_force = (last_contact_1_force + contact1_force) / 2

        return average_contact_1_force

    def get_contact_force_2(self):
        """
        Get Contact Force of contact sensor 2
        Takes average over 2 contacts so the chances are higher that both sensors say there is contact the same time due to sensor noise
        :returns force value
        """

        # get Force out of contact_2_state
        if not self.contact_2_state:
            contact2_force = 0.0
        else:
            for state in self.contact_2_state:
                self.contact_2_force = state.total_wrench.force
                contact2_force_np = np.array(
                    (self.contact_2_force.x, self.contact_2_force.y,
                     self.contact_2_force.z))
                force_magnitude_2 = np.linalg.norm(contact2_force_np)
                contact2_force = force_magnitude_2

        # read last contact_2_force value out of yaml
        with open("contact_2_force.yml", 'r') as stream:
            try:
                last_contact_2_force = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact force 2 value in yaml
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(contact2_force, yaml_file, default_flow_style=False)
        # calculate average force
        average_contact_2_force = (last_contact_2_force + contact2_force) / 2

        return average_contact_2_force

    def get_collisions(self):
        """
        Checks all the collisions by listening to rostopic /gz_collisions wich is republishing the gazebo topic (gz topic -e /gazebo/default/physics/contacts).
        The Publisher is started in a different node out of the simulation launch file.
        Stores last value yaml file and if one of the two values is showing a invalid collision it returns a invalid collision.
        This is to make shure seeing collisions due to high sensor noise and publish rate.

        If one of the 2 Messages is True it returns True.
        returns:
            False:  if no contacts or just valid ones -> Box/Shelf, Wrist3/Box, VacuumGripper/Box
            True:   if any other contact occurs which is invalid
        """

        # read last contact_2_force value out of yaml
        with open("collision.yml", 'r') as stream:
            try:
                last_collision = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact force 2 value in yaml
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(self.collision, yaml_file, default_flow_style=False)

        # Check if last_collision or self.collision is True. IF one s true return True else False
        if self.collision == True or last_collision == True:
            return True
        else:
            return False

    def is_done(self, observations, last_position):
        """Checks if episode is done based on observations given.

        Done when:
        -Successfully reached goal: Contact with both contact sensors and contact is a valid one(Wrist3 or/and Vacuum Gripper with unit_box)
        -Crashing with itself, shelf, base
        -Joints are going into limits set
        """

        done = False
        done_reward = 0
        reward_reached_goal = 1000
        reward_crashing = -2000

        # Check if there are invalid collisions
        invalid_collision = self.get_collisions()

        # Successfully reached_goal: orientation of the end-effector and target is less than threshold also
        # distance is less than threshold
        distance_gripper_to_target = np.linalg.norm(observations[6:9] -
                                                    observations[13:16])
        orientation_error = quaternion_multiply(
            observations[9:13], quaternion_conjugate(observations[16:]))
        # print("check distance {} and orientation err {} ".format(distance_gripper_to_target, orientation_error))

        if distance_gripper_to_target < 0.05 and np.abs(
                orientation_error[0]) < 0.1:
            done = True
            print("Success! Distance {} and orientation err {} ".format(
                distance_gripper_to_target, orientation_error[0]))
            done_reward = reward_reached_goal

        # Successfully reached goal: Contact with both contact sensors and there is no invalid contact
        # if observations[7] != 0 and observations[8] != 0 and not invalid_collision:
        #     done = True
        #     print('>>>>>> Success!')
        #     done_reward = reward_reached_goal
        #     # save state in csv file
        #     U.append_to_csv(self.csv_success_exp, observations)
        #     self.successful_attempts += 1
        #     print("Successful contact so far: {} attempts".format(self.successful_attempts))

        # Crashing with itself, shelf, base
        if invalid_collision:
            done = True
            print('>>>>>>>>>>>>>>>>>>>> crashing')
            # done_reward = reward_crashing

        return done, done_reward, invalid_collision

    def load_position(self):
        pass

    def _update_episode(self):
        """
        Publishes the accumulated reward of the episode and
        increases the episode number by one.
        :return:
        """
        if self.episode_num > 0:
            self._publish_reward_topic(self.accumulated_episode_reward,
                                       self.episode_steps, self.episode_num)

        self.episode_num += 1
        self.accumulated_episode_reward = 0
        self.episode_steps = 0

    def _publish_reward_topic(self, reward, steps, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)
        self.reward_list.append(reward)
        self.episode_list.append(episode_number)
        self.step_list.append(steps)
        list = str(reward) + ";" + str(episode_number) + ";" + str(
            steps) + "\n"

        with open(self.csv_name + '.csv', 'a') as csv:
            csv.write(str(list))
Exemplo n.º 2
0
class PickbotEnv(gym.Env):

    def __init__(self, joint_increment_value=0.02, running_step=0.001):
        """
        initializing all the relevant variables and connections
        """

        # Assign Parameters
        self._joint_increment_value = joint_increment_value
        self.running_step = running_step

        # Assign MsgTypes
        self.joints_state = JointState()
        self.contact_1_state = ContactsState()
        self.contact_2_state = ContactsState()
        self.collision = Bool()
        self.camera_rgb_state = Image()
        self.camera_depth_state = Image()
        self.contact_1_force = Vector3()
        self.contact_2_force = Vector3()
        self.gripper_state = VacuumGripperState()

        self._list_of_observations = ["distance_gripper_to_object",
                                      "elbow_joint_state",
                                      "shoulder_lift_joint_state",
                                      "shoulder_pan_joint_state",
                                      "wrist_1_joint_state",
                                      "wrist_2_joint_state",
                                      "wrist_3_joint_state",
                                      "contact_1_force",
                                      "contact_2_force",
                                      "object_pos_x",
                                      "object_pos_y",
                                      "object_pos_z"]

        # Establishes connection with simulator
        """
        1) Gazebo Connection 
        2) Controller Connection
        3) Joint Publisher 
        """
        self.gazebo = GazeboConnection()
        self.controllers_object = ControllersConnection()
        self.pickbot_joint_publisher_object = JointPub()

        # Define Subscribers as Sensor data
        """
        1) /pickbot/joint_states
        2) /gripper_contactsensor_1_state
        3) /gripper_contactsensor_2_state
        4) /gz_collisions

        not used so far but available in the environment 
        5) /pickbot/gripper/state
        6) /camera_rgb/image_raw   
        7) /camera_depth/depth/image_raw
        """
        rospy.Subscriber("/pickbot/joint_states", JointState, self.joints_state_callback)
        rospy.Subscriber("/gripper_contactsensor_1_state", ContactsState, self.contact_1_callback)
        rospy.Subscriber("/gripper_contactsensor_2_state", ContactsState, self.contact_2_callback)
        rospy.Subscriber("/gz_collisions", Bool, self.collision_callback)
        rospy.Subscriber("/pickbot/gripper/state", VacuumGripperState, self.gripper_state_callback)
        # rospy.Subscriber("/camera_rgb/image_raw", Image, self.camera_rgb_callback)
        # rospy.Subscriber("/camera_depth/depth/image_raw", Image, self.camera_depth_callback)

        # Define Action and state Space and Reward Range
        """
        Action Space: Discrete with 13 actions

            1-2)    Increment/Decrement joint1_position_controller
            3-4)    Increment/Decrement joint2_position_controller
            5-6)    Increment/Decrement joint3_position_controller
            7-8)    Increment/Decrement joint4_position_controller
            9-10)   Increment/Decrement joint5_position_controller
            11-12)  Increment/Decrement joint6_position_controller
            13)     Turn on/off the vacuum gripper
        
        State Space: Box Space with 13 values. It is a numpy array with shape (13,)

        Reward Range: -infinity to infinity 
        """

        self.action_space = spaces.Discrete(13)
        high = np.array([
            1,
            math.pi,
            math.pi,
            math.pi,
            math.pi,
            math.pi,
            math.pi,
            np.finfo(np.float32).max,
            np.finfo(np.float32).max,
            1,
            1.4,
            1.5])

        low = np.array([
            0,
            -math.pi,
            -math.pi,
            -math.pi,
            -math.pi,
            -math.pi,
            -math.pi,
            0,
            0,
            -1,
            0,
            0])
        self.observation_space = spaces.Box(low, high)
        self.reward_range = (-np.inf, np.inf)

        self._seed()
        self.done_reward = 0

        # set up everything to publish the Episode Number and Episode Reward on a rostopic
        self.episode_num = 0
        self.cumulated_episode_reward = 0
        self.episode_steps = 0
        self.reward_pub = rospy.Publisher('/openai/reward', RLExperimentInfo, queue_size=1)
        self.reward_list = []
        self.episode_list = []
        self.step_list = []
        self.csv_name = logger.get_dir() + '/result_log'
        print("CSV NAME")
        print(self.csv_name)

    # Callback Functions for Subscribers to make topic values available each time the class is initialized 
    def joints_state_callback(self, msg):
        self.joints_state = msg

    def contact_1_callback(self, msg):
        self.contact_1_state = msg.states

    def contact_2_callback(self, msg):
        self.contact_2_state = msg.states

    def collision_callback(self, msg):
        self.collision = msg.data

    def camera_rgb_callback(self, msg):
        self.camera_rgb_state = msg

    def camera_depth_callback(self, msg):
        self.camera_depth_state = msg

    def gripper_state_callback(self, msg):
        self.gripper_state = msg

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        print("§§§§§ RESET §§§§§§")
        """
        Reset The Robot to its initial Position and restart the Controllers 

        1) Change Gravity to 0 ->That arm doesnt fall
        2) Turn Controllers off
        3) Pause Simulation
        4) Reset Simulation
        5) Set Model Pose to desired one 
        6) Unpause Simulation 
        7) Turn on Controllers
        8) Restore Gravity
        9) Get Observations and return current State
        10) Check all Systems work
        11) Pause Simulation
        12) Write initial Position into Yaml File 
        13) Create YAML Files for contact forces in order to get the average over 2 contacts 
        14) Create YAML Files for collision to make shure to see a collision due to high noise in topic
        15) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers 
        16) Publish Episode Reward and set cumulated reward back to 0 and iterate the Episode Number
        17) Return State 
        """

        self.gazebo.change_gravity(0, 0, 0)
        self.controllers_object.turn_off_controllers()
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        self.pickbot_joint_publisher_object.set_joints()
        self.gazebo.unpauseSim()
        self.controllers_object.turn_on_controllers()
        self.gazebo.change_gravity(0, 0, -9.81)
        self._check_all_systems_ready()
        self.randomly_spawn_object()

        last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(last_position, yaml_file, default_flow_style=False)
        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(False, yaml_file, default_flow_style=False)
        observation = self.get_obs()
        self.gazebo.pauseSim()
        state = self.get_state(observation)
        self._update_episode()
        self.gazebo.unpauseSim()
        return state

    def step(self, action):
        """
        Given the action selected by the learning algorithm,
        we perform the corresponding movement of the robot
        return: the state of the robot, the corresponding reward for the step and if its done(terminal State)
        
        1) read last published joint from YAML
        2) define ne joints according to chosen action
        3) Write joint position into YAML to save last published joints for next step
        4) Unpause, Move to that pos for defined time, Pause
        5) Get Observations and pause Simulation
        6) Convert Observations into State
        7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again
        8) Calculate reward based on Observatin and done_reward 
        9) Unpause that topics can be received in next step
        10) Return State, Reward, Done
        """

        # 1) read last_position out of YAML File
        with open("last_position.yml", 'r') as stream:
            try:
                last_position = (yaml.load(stream))
            except yaml.YAMLError as exc:
                print(exc)
        # 2) get the new joint positions according to chosen action
        next_action_position = self.get_action_to_position(action, last_position)

        # 3) write last_position into YAML File
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(next_action_position, yaml_file, default_flow_style=False)

        # 4) unpause, move to position for certain time
        self.gazebo.unpauseSim()
        self.pickbot_joint_publisher_object.move_joints(next_action_position)
        time.sleep(self.running_step)

        # 5) Get Observations and pause Simulation
        observation = self.get_obs()
        self.gazebo.pauseSim()

        # 6) Convert Observations into state
        state = self.get_state(observation)

        # 7) Unpause Simulation check if its done, calculate done_reward
        self.gazebo.unpauseSim()
        done, done_reward, invalid_contact = self.is_done(observation, last_position)
        self.gazebo.pauseSim()

        # 8) Calculate reward based on Observation and done_reward and update the cumulated Episode Reward
        reward = self.compute_reward(observation, done_reward, invalid_contact)
        self.cumulated_episode_reward += reward

        # 9) Unpause that topics can be received in next step
        self.gazebo.unpauseSim()

        self.episode_steps += 1
        # 10) Return State, Reward, Done
        return state, reward, done, {}

    def _check_all_systems_ready(self):
        """
        Checks that all subscribers for sensor topics are working

        1) /pickbot/joint_states
        2) /gripper_contactsensor_1_state
        3) /gripper_contactsensor_2_state
        7) Collisions

        not used so far
        4) /camera_rgb/image_raw   
        5) /camera_depth/depth/image_raw

        """
        self.check_joint_states()
        self.check_contact_1()
        self.check_contact_2()
        self.check_collision()
        # self.check_rgb_camera()
        # self.check_rgbd_camera()
        self.check_gripper_state()
        rospy.logdebug("ALL SYSTEMS READY")

    def check_joint_states(self):
        joint_states_msg = None
        while joint_states_msg is None and not rospy.is_shutdown():
            try:
                joint_states_msg = rospy.wait_for_message("/pickbot/joint_states", JointState, timeout=0.1)
                self.joints_state = joint_states_msg
                rospy.logdebug("Current joint_states READY")
            except Exception as e:
                rospy.logdebug("Current joint_states not ready yet, retrying==>" + str(e))
                print("EXCEPTION: Joint States not ready yet, retrying.")

    def check_contact_1(self):
        contact_1_states_msg = None
        while contact_1_states_msg is None and not rospy.is_shutdown():
            try:
                contact_1_states_msg = rospy.wait_for_message("/gripper_contactsensor_1_state", ContactsState,
                                                              timeout=0.1)
                self.contact_1_state = contact_1_states_msg.states
                rospy.logdebug("Contactsensor 1 READY")
            except Exception as e:
                rospy.logdebug("Contactsensor 1 not ready yet, retrying==>" + str(e))
                print("EXCEPTION: Contactsensor 1 not ready yet, retrying.")

    def check_contact_2(self):
        contact_2_states_msg = None
        while contact_2_states_msg is None and not rospy.is_shutdown():
            try:
                contact_2_states_msg = rospy.wait_for_message("/gripper_contactsensor_2_state", ContactsState,
                                                              timeout=0.1)
                self.contact_2_state = contact_2_states_msg.states
                rospy.logdebug("Contactsensor 2 READY")
            except Exception as e:
                rospy.logdebug("Contactsensor 2 not ready yet, retrying==>" + str(e))
                print("EXCEPTION: Contactsensor 2 not ready yet, retrying.")

    def check_collision(self):
        collision_msg = None
        while collision_msg is None and not rospy.is_shutdown():
            try:
                collision_msg = rospy.wait_for_message("/gz_collisions", Bool, timeout=0.1)
                self.collision = collision_msg.data
                rospy.logdebug("collision READY")
            except Exception as e:
                rospy.logdebug("EXCEPTION: Collision not ready yet, retrying==>" + str(e))

    def check_rgb_camera(self):
        camera_rgb_states_msg = None
        while camera_rgb_states_msg is None and not rospy.is_shutdown():
            try:
                camera_rgb_states_msg = rospy.wait_for_message("/camera_rgb/image_raw", Image, timeout=0.1)
                self.camera_rgb_state = camera_rgb_states_msg
                rospy.logdebug("rgb_image READY")
            except Exception as e:
                rospy.logdebug("EXCEPTION: rgb_image not ready yet, retrying==>" + str(e))

    def check_rgbd_camera(self):
        camera_depth_states_msg = None
        while camera_depth_states_msg is None and not rospy.is_shutdown():
            try:
                camera_depth_states_msg = rospy.wait_for_message("/camera_depth/depth/image_raw", Image, timeout=0.1)
                self.camera_depth_state = camera_depth_states_msg
                rospy.logdebug("rgbd_image READY")
            except Exception as e:
                rospy.logdebug("EXCEPTION: rgbd_image not ready yet, retrying==>" + str(e))

    def check_gripper_state(self):
        gripper_state_msg = None
        while gripper_state_msg is None and not rospy.is_shutdown():
            try:
                gripper_state_msg = rospy.wait_for_message("/pickbot/gripper/state", VacuumGripperState, timeout=0.1)
                self.gripper_state = gripper_state_msg
                rospy.logdebug("gripper_state READY")
            except Exception as e:
                rospy.logdebug("EXCEPTION: gripper_state not ready yet, retrying==>" + str(e))

    def randomly_spawn_object(self):
        """
        spawn the object unit_box_0 in a random position in the shelf
        """
        try:
            spawn_box = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState)
            box = ModelState()
            box.model_name = "unit_box_0"
            box.pose.position.x = np.random.uniform(low=-0.35, high=0.3, size=None)
            box.pose.position.y = np.random.uniform(low=0.7, high=0.9, size=None)
            box.pose.position.z = 1.05
            spawn_box(box)
        except rospy.ServiceException as e:
            rospy.loginfo("Set Model State service call failed:  {0}".format(e))

    def get_distance_gripper_to_object(self):
        """
        Get the Position of the endeffektor and the object via rosservice call /gazebo/get_model_state and /gazebo/get_link_state
        Calculate distance between them

        In this case 
    
        Object:     unite_box_0 link
        Gripper:    vacuum_gripper_link ground_plane
        """

        try:
            model_coordinates = rospy.ServiceProxy('/gazebo/get_model_state', GetModelState)
            blockName = "unit_box_0"
            relative_entity_name = "link"
            object_resp_coordinates = model_coordinates(blockName, relative_entity_name)
            Object = np.array((object_resp_coordinates.pose.position.x, object_resp_coordinates.pose.position.y,
                               object_resp_coordinates.pose.position.z))

        except rospy.ServiceException as e:
            rospy.loginfo("Get Model State service call failed:  {0}".format(e))
            print("Exception get model state")

        try:
            model_coordinates = rospy.ServiceProxy('/gazebo/get_link_state', GetLinkState)
            LinkName = "vacuum_gripper_link"
            ReferenceFrame = "ground_plane"
            resp_coordinates_gripper = model_coordinates(LinkName, ReferenceFrame)
            Gripper = np.array((resp_coordinates_gripper.link_state.pose.position.x,
                                resp_coordinates_gripper.link_state.pose.position.y,
                                resp_coordinates_gripper.link_state.pose.position.z))

        except rospy.ServiceException as e:
            rospy.loginfo("Get Link State service call failed:  {0}".format(e))
            print("Exception get Gripper position")
        distance = np.linalg.norm(Object - Gripper)

        return distance, Object

    def turn_on_gripper(self):
        """
        turn on the Gripper by calling the service 
        """
        try:
            turn_on_gripper_service = rospy.ServiceProxy('/pickbot/gripper/control', VacuumGripperControl)
            enable = True
            turn_on_gripper_service(enable)
        except rospy.ServiceException as e:
            rospy.loginfo("Turn on Gripper service call failed:  {0}".format(e))

    def turn_off_gripper(self):
        """
        turn off the Gripper by calling the service
        """
        try:
            turn_off_gripper_service = rospy.ServiceProxy('/pickbot/gripper/control', VacuumGripperControl)
            enable = False
            turn_off_gripper_service(enable)
        except rospy.ServiceException as e:
            rospy.loginfo("Turn off Gripper service call failed:  {0}".format(e))

    def get_action_to_position(self, action, last_position):
        """
        Take the last published joint and increment/decrement one joint according to action chosen
        :param action: Integer that goes from 0 to 11, because we have 12 actions.
        :return: list with all joint positions according to chosen action
        """

        distance = self.get_distance_gripper_to_object()
        self._joint_increment_value = 0.18 * distance[0] + 0.01

        joint_states_position = last_position
        action_position = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

        rospy.logdebug("get_action_to_position>>>" + str(joint_states_position))
        if action == 0:  # Increment joint3_position_controller (elbow joint)
            action_position[0] = joint_states_position[0] + self._joint_increment_value / 2
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]
        elif action == 1:  # Decrement joint3_position_controller (elbow joint)
            action_position[0] = joint_states_position[0] - self._joint_increment_value / 2
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]

        elif action == 2:  # Increment joint2_position_controller (shoulder_lift_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1] + self._joint_increment_value / 2
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]
        elif action == 3:  # Decrement joint2_position_controller (shoulder_lift_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1] - self._joint_increment_value / 2
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]

        elif action == 4:  # Increment joint1_position_controller (shoulder_pan_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2] + self._joint_increment_value / 2
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]
        elif action == 5:  # Decrement joint1_position_controller (shoulder_pan_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2] - self._joint_increment_value / 2
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]

        elif action == 6:  # Increment joint4_position_controller (wrist_1_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3] + self._joint_increment_value
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]
        elif action == 7:  # Decrement joint4_position_controller (wrist_1_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3] - self._joint_increment_value
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5]

        elif action == 8:  # Increment joint5_position_controller (wrist_2_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4] + self._joint_increment_value
            action_position[5] = joint_states_position[5]
        elif action == 9:  # Decrement joint5_position_controller (wrist_2_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4] - self._joint_increment_value
            action_position[5] = joint_states_position[5]

        elif action == 10:  # Increment joint6_position_controller (wrist_3_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5] + self._joint_increment_value
        elif action == 11:  # Decrement joint6_position_controller (wrist_3_joint)
            action_position[0] = joint_states_position[0]
            action_position[1] = joint_states_position[1]
            action_position[2] = joint_states_position[2]
            action_position[3] = joint_states_position[3]
            action_position[4] = joint_states_position[4]
            action_position[5] = joint_states_position[5] - self._joint_increment_value
        elif action == 12:  # turn on/off vacuum gripper
            if self.gripper_state.enabled:
                self.turn_off_gripper()
            else:
                self.turn_on_gripper()

        return action_position

    def get_obs(self):
        """
        Returns the state of the robot needed for Algorithm to learn
        The state will be defined by a List (later converted to numpy array) of the:

        1)          Distance from desired point in meters
        2-7)        States of the 6 joints in radiants
        8,9)        Force in contact sensor in Newtons
        10,11,12)   x, y, z Position of object?

        MISSING
        10)     RGBD image 
        
        
        self._list_of_observations = ["distance_gripper_to_object",
                                    "elbow_joint_state",
                                    "shoulder_lift_joint_state",
                                    "shoulder_pan_joint_state",
                                    "wrist_1_joint_state",
                                    "wrist_2_joint_state",
                                    "wrist_3_joint_state",
                                    "contact_1_force",
                                    "contact_2_force",
                                    "object_pos_x",
                                    "object_pos_y",
                                    "object_pos_z"]


        :return: observation
        """

        # Get Distance Object to Gripper and Object position from Service Call. Needs to be done a second time
        # cause we need the distance and position after the Step execution
        distance_gripper_to_object, position_xyz_object = self.get_distance_gripper_to_object()
        object_pos_x = position_xyz_object[0]
        object_pos_y = position_xyz_object[1]
        object_pos_z = position_xyz_object[2]

        # Get Joints Data out of Subscriber
        joint_states = self.joints_state
        elbow_joint_state = joint_states.position[0]
        shoulder_lift_joint_state = joint_states.position[1]
        shoulder_pan_joint_state = joint_states.position[2]
        wrist_1_joint_state = joint_states.position[3]
        wrist_2_joint_state = joint_states.position[4]
        wrist_3_joint_state = joint_states.position[5]

        # Get Contact Forces out of get_contact_force Functions to be able to take an average over some iterations
        # otherwise chances are high that not both sensors are showing contact the same time
        contact_1_force = self.get_contact_force_1()
        contact_2_force = self.get_contact_force_2()

        # Stack all information into Observations List
        observation = []
        for obs_name in self._list_of_observations:
            if obs_name == "distance_gripper_to_object":
                observation.append(distance_gripper_to_object)
            elif obs_name == "elbow_joint_state":
                observation.append(elbow_joint_state)
            elif obs_name == "shoulder_lift_joint_state":
                observation.append(shoulder_lift_joint_state)
            elif obs_name == "shoulder_pan_joint_state":
                observation.append(shoulder_pan_joint_state)
            elif obs_name == "wrist_1_joint_state":
                observation.append(wrist_1_joint_state)
            elif obs_name == "wrist_2_joint_state":
                observation.append(wrist_2_joint_state)
            elif obs_name == "wrist_3_joint_state":
                observation.append(wrist_3_joint_state)
            elif obs_name == "contact_1_force":
                observation.append(contact_1_force)
            elif obs_name == "contact_2_force":
                observation.append(contact_2_force)
            elif obs_name == "object_pos_x":
                observation.append(object_pos_x)
            elif obs_name == "object_pos_y":
                observation.append(object_pos_y)
            elif obs_name == "object_pos_z":
                observation.append(object_pos_z)
            else:
                raise NameError('Observation Asked does not exist==' + str(obs_name))

        return observation

    def get_state(self, observation):
        """
        convert observation list intp a numpy array 
        """
        x = np.asarray(observation)
        return x

    def get_contact_force_1(self):
        """
        Get Contact Force of contact sensor 1
        Takes average over 2 contacts so the chances are higher that both sensors say there is contact the same time due to sensor noise 
        :returns force value
        """

        # get Force out of contact_1_state
        if self.contact_1_state == []:
            contact1_force = 0.0
        else:
            for state in self.contact_1_state:
                self.contact_1_force = state.total_wrench.force
                contact1_force_np = np.array((self.contact_1_force.x, self.contact_1_force.y, self.contact_1_force.z))
                force_magnitude_1 = np.linalg.norm(contact1_force_np)
                contact1_force = force_magnitude_1

        # read last contact force 1 value out of yaml
        with open("contact_1_force.yml", 'r') as stream:
            try:
                last_contact_1_force = (yaml.load(stream))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact_1_force value in yaml
        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(contact1_force, yaml_file, default_flow_style=False)
        # calculate average force
        average_contact_1_force = (last_contact_1_force + contact1_force) / 2

        return average_contact_1_force

    def get_contact_force_2(self):
        """
        Get Contact Force of contact sensor 2
        Takes average over 2 contacts so the chances are higher that both sensors say there is contact the same time due to sensor noise
        :returns force value
        """

        # get Force out of contact_2_state
        if self.contact_2_state == []:
            contact2_force = 0.0
        else:
            for state in self.contact_2_state:
                self.contact_2_force = state.total_wrench.force
                contact2_force_np = np.array((self.contact_2_force.x, self.contact_2_force.y, self.contact_2_force.z))
                force_magnitude_2 = np.linalg.norm(contact2_force_np)
                contact2_force = force_magnitude_2

        # read last contact_2_force value out of yaml
        with open("contact_2_force.yml", 'r') as stream:
            try:
                last_contact_2_force = (yaml.load(stream))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact force 2 value in yaml
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(contact2_force, yaml_file, default_flow_style=False)
        # calculate average force
        average_contact_2_force = (last_contact_2_force + contact2_force) / 2

        return average_contact_2_force

    def get_collisions(self):
        """
        Checks all the collisions by listening to rostopic /gz_collisions wich is republishing the gazebo topic (gz topic -e /gazebo/default/physics/contacts).
        The Publisher is started in a different node out of the simulation launch file.
        Stores last value yaml file and if one of the two values is showing a invalid collision it returns a invalid collision.
        This is to make shure seeing collisions due to high sensor noise and publish rate. 

        If one of the 2 Messages is True it returns True.
        returns: 
            False:  if no contacts or just valid ones -> Box/Shelf, Wrist3/Box, VacuumGripper/Box
            True:   if any other contact occures wich is invalid 
        """

        # read last contact_2_force value out of yaml
        with open("collision.yml", 'r') as stream:
            try:
                last_collision = (yaml.load(stream))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact force 2 value in yaml
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(self.collision, yaml_file, default_flow_style=False)

        # Check if last_collision or self.collision are True. IF one s true return True else False
        if self.collision or last_collision:
            return True
        else:
            return False

    def is_gripper_attached(self):
        gripper_state = None
        while gripper_state is None and not rospy.is_shutdown():
            try:
                gripper_state = rospy.wait_for_message("/pickbot/gripper/state", VacuumGripperState, timeout=0.1)
            except Exception as e:
                rospy.logdebug("Current gripper_state not ready yet, retrying==>" + str(e))
        return gripper_state.attached

    def is_done(self, observations, last_position):
        """Checks if episode is done based on observations given.
        
        Done when:
        -Sucsessfully reached goal: Contact with both contact sensors and contact is a valid one(Wrist3 or/and Vacuum Gripper with unit_box)
        -Crashing with itselfe, shelf, base
        -Joints are going into limits set
        """

        done = False
        done_reward = 0
        reward_reached_goal = 500
        reward_crashing = -200
        reward_join_range = -150
        reward_pump_attached = 2000

        # Check if there are invalid collisions
        invalid_collision = self.get_collisions()

        # Successfully reached goal: Contact with both contact sensors and there is no invalid contact
        if observations[7] != 0 and observations[8] != 0 and invalid_collision == False:
            rospy.sleep(3)
            print(self.gripper_state.attached)
            if self.gripper_state.attached:
                done_reward = reward_pump_attached
                print("GRIPPER IS ATTACHED")
            else:
                done_reward = reward_reached_goal
            done = True

        # Crashing with itself, shelf, base
        if invalid_collision:
            done = True
            done_reward = reward_crashing

        # Joints are going into limits set
        if last_position[0] < 1 or last_position[0] > 2:
            done = True
            done_reward = reward_join_range
        elif last_position[1] < -1.3 or last_position[1] > -0.7:
            done = True
            done_reward = reward_join_range
        elif last_position[2] < 0.9 or last_position[2] > 1.8:
            done = True
            done_reward = reward_join_range
        elif last_position[3] < -3.0 or last_position[3] > 0:
            done = True
            done_reward = reward_join_range
        elif last_position[4] < -3.1 or last_position[4] > 0:
            done = True
            done_reward = reward_join_range
        elif last_position[5] < -3 or last_position[5] > 3:
            done = True
            done_reward = reward_join_range

        return done, done_reward, invalid_collision

    def compute_reward(self, observation, done_reward, invalid_contact):
        """
        Calculates the reward in each Step
        Reward for:
        Distance:       Reward for Distance to the Object   
        Contact:        Reward for Contact with one contact sensor and invalid_contact must be false. As soon as
                        both contact sensors have contact and there is no invallid contact the goal is considered
                        to be reached and the episode is over. Reward is then set in is_done

        Calculates the Reward for the Terminal State 
        Done Reward:    Reward when episode is Done. Negative Reward for Crashing and going into set Joint Limits.
                        High Positive Reward for having contact with both contact sensors and not having an invalid collision
        """
        reward_distance = 0
        reward_contact = 0

        # Reward for Distance
        distance = observation[0]

        # Reward distance will be 1.4 at distance 0.01 and 0.18 at distance 0.55. In between logarithmic curve
        reward_distance = math.log10(distance) * (-1) * 0.7

        # Reward for Contact
        contact_1 = observation[7]
        contact_2 = observation[8]

        if contact_1 == 0 and contact_2 == 0:
            reward_contact = 0
        elif contact_1 != 0 and contact_2 == 0 and not invalid_contact or contact_1 == 0 and contact_2 != 0 \
                and not invalid_contact:
            reward_contact = 20

        total_reward = reward_distance + reward_contact + done_reward

        return total_reward

    def _update_episode(self):
        """
        Publishes the cumulated reward of the episode and 
        increases the episode number by one.
        :return:
        """
        if self.episode_num > 0:
            self._publish_reward_topic(
                self.cumulated_episode_reward,
                self.episode_steps,
                self.episode_num
            )

        self.episode_num += 1
        self.cumulated_episode_reward = 0
        self.episode_steps = 0

    def _publish_reward_topic(self, reward, steps, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)
        self.reward_list.append(reward)
        self.episode_list.append(episode_number)
        self.step_list.append(steps)
        liste = str(reward) + ";" + str(episode_number) + ";" + str(steps) + "\n"

        with open(self.csv_name + '.csv', 'a') as csv:
            csv.write(str(liste))
Exemplo n.º 3
0
class PickbotEnv(gym.Env):
    def __init__(self,
                 joint_increment=None,
                 sim_time_factor=0.001,
                 running_step=0.001,
                 random_object=False,
                 random_position=False,
                 use_object_type=False,
                 populate_object=False,
                 env_object_type='free_shapes'):
        """
        initializing all the relevant variables and connections
        :param joint_increment: increment of the joints
        :param running_step: gazebo simulation time factor
        :param random_object: spawn random object in the simulation
        :param random_position: change object position in each reset
        :param use_object_type: assign IDs to objects and used them in the observation space
        :param populate_object: to populate object(s) in the simulation using sdf file
        :param env_object_type: object type for environment, free_shapes for boxes while others are related to use_case
            'door_handle', 'combox', ...
        """

        # Assign Parameters
        self._joint_increment = joint_increment
        self.running_step = running_step
        self._random_object = random_object
        self._random_position = random_position
        self._use_object_type = use_object_type
        self._populate_object = populate_object

        # Assign MsgTypes
        self.joints_state = JointState()
        self.contact_1_state = ContactsState()
        self.contact_2_state = ContactsState()
        self.collisions = Bool()
        self.camera_rgb_state = Image()
        self.camera_depth_state = Image()
        self.contact_1_force = Vector3()
        self.contact_2_force = Vector3()
        self.gripper_state = VacuumGripperState()

        self._list_of_observations = [
            "distance_gripper_to_object", "elbow_joint_state",
            "shoulder_lift_joint_state", "shoulder_pan_joint_state",
            "wrist_1_joint_state", "wrist_2_joint_state",
            "wrist_3_joint_state", "contact_1_force", "contact_2_force",
            "object_pos_x", "object_pos_y", "object_pos_z",
            "min_distance_gripper_to_object"
        ]

        if self._use_object_type:
            self._list_of_observations.append("object_type")

        # Establishes connection with simulator
        """
        1) Gazebo Connection 
        2) Controller Connection
        3) Joint Publisher 
        """
        self.gazebo = GazeboConnection(sim_time_factor=sim_time_factor)
        self.controllers_object = ControllersConnection()
        self.pickbot_joint_pubisher_object = JointPub()

        # Define Subscribers as Sensordata
        """
        1) /pickbot/joint_states
        2) /gripper_contactsensor_1_state
        3) /gripper_contactsensor_2_state
        4) /gz_collisions

        not used so far but available in the environment 
        5) /pickbot/gripper/state
        6) /camera_rgb/image_raw   
        7) /camera_depth/depth/image_raw
        """
        rospy.Subscriber("/pickbot/joint_states", JointState,
                         self.joints_state_callback)
        rospy.Subscriber("/gripper_contactsensor_1_state", ContactsState,
                         self.contact_1_callback)
        rospy.Subscriber("/gripper_contactsensor_2_state", ContactsState,
                         self.contact_2_callback)
        rospy.Subscriber("/gz_collisions", Bool, self.collision_callback)
        # rospy.Subscriber("/pickbot/gripper/state", VacuumGripperState, self.gripper_state_callback)
        # rospy.Subscriber("/camera_rgb/image_raw", Image, self.camera_rgb_callback)
        # rospy.Subscriber("/camera_depth/depth/image_raw", Image, self.camera_depth_callback)

        # Define Action and state Space and Reward Range
        """
        Action Space: Box Space with 6 values.
        
        State Space: Box Space with 12 values. It is a numpy array with shape (12,)

        Reward Range: -infinity to infinity 
        """
        # Directly use joint_positions as action
        if self._joint_increment is None:
            low_action = np.array([
                -(math.pi - 0.05), -(math.pi - 0.05), -(math.pi - 0.05),
                -(math.pi - 0.05), -(math.pi - 0.05), -(math.pi - 0.05)
            ])

            high_action = np.array([
                math.pi - 0.05, math.pi - 0.05, math.pi - 0.05, math.pi - 0.05,
                math.pi - 0.05, math.pi - 0.05
            ])
        else:  # Use joint_increments as action
            low_action = np.array([
                -self._joint_increment, -self._joint_increment,
                -self._joint_increment, -self._joint_increment,
                -self._joint_increment, -self._joint_increment
            ])

            high_action = np.array([
                self._joint_increment, self._joint_increment,
                self._joint_increment, self._joint_increment,
                self._joint_increment, self._joint_increment
            ])

        self.action_space = spaces.Box(low_action, high_action)

        high = np.array([
            999,  # distance_gripper_to_object
            math.pi,  # elbow_joint_state
            math.pi,  # shoulder_lift_joint_state
            math.pi,  # shoulder_pan_joint_state
            math.pi,  # wrist_1_joint_state
            math.pi,  # wrist_2_joint_state
            math.pi,  # wrist_3_joint_state
            np.finfo(np.float32).max,  # contact_1_force
            np.finfo(np.float32).max,  # contact_2_force
            1,  # object_pos_x
            1.4,  # object_pos_y
            1.5,  # object_pos_z
            999
        ])  # min_distance_gripper_to_object

        low = np.array([
            0,  # distance_gripper_to_object
            -math.pi,  # elbow_joint_state
            -math.pi,  # shoulder_lift_joint_state
            -math.pi,  # shoulder_pan_joint_state
            -math.pi,  # wrist_1_joint_state
            -math.pi,  # wrist_2_joint_state
            -math.pi,  # wrist_3_joint_state
            0,  # contact_1_force
            0,  # contact_2_force
            -1,  # object_pos_x
            0,  # object_pos_y
            0,  # object_pos_z
            0
        ])  # min distance

        if self._use_object_type:
            high = np.append(high, 9)
            low = np.append(low, 0)

        self.observation_space = spaces.Box(low, high)
        self.reward_range = (-np.inf, np.inf)

        self._seed()
        self.done_reward = 0

        # set up everything to publish the Episode Number and Episode Reward on a rostopic
        self.episode_num = 0
        self.accumulated_episode_reward = 0
        self.episode_steps = 0
        self.reward_pub = rospy.Publisher('/openai/reward',
                                          RLExperimentInfo,
                                          queue_size=1)
        self.reward_list = []
        self.episode_list = []
        self.step_list = []
        self.csv_name = logger.get_dir() + '/result_log'
        print("CSV NAME")
        print(self.csv_name)
        self.csv_success_exp = logger.get_dir(
        ) + "/success_exp" + datetime.datetime.now().strftime(
            '%Y-%m-%d_%Hh%Mmin') + ".csv"
        self.success_2_contact = 0
        self.success_1_contact = 0

        # object name: name of the target object
        # object type: index of the object name in the object list
        # object list: pool of the available objects, have at least one entry
        self.object_name = ''
        self.object_type_str = ''
        self.object_type = 0
        self.object_list = U.get_target_object(env_object_type)
        print("object list {}".format(self.object_list))
        self.object_initial_position = Pose(
            position=Point(x=-0.13, y=0.848, z=1.06),  # x=0.0, y=0.9, z=1.05
            orientation=quaternion_from_euler(0.002567, 0.102, 1.563))
        self.pickbot_initial_position = Pose(position=Point(x=0.0,
                                                            y=0.0,
                                                            z=1.12),
                                             orientation=Quaternion(x=0.0,
                                                                    y=0.0,
                                                                    z=0.0,
                                                                    w=1.0))

        if self._populate_object:
            # populate objects from object list
            self.populate_objects()

        # select first object, set object name and object type
        # if object is random, spawn random object
        # else get the first entry of object_list
        self.set_target_object(random_object=self._random_object,
                               random_position=self._random_position)

        # The distance between gripper and object, when the arm is in initial pose
        self.max_distance, _ = U.get_distance_gripper_to_object()
        # The minimum distance between gripper and object during training
        self.min_distance = 999

    # Callback Functions for Subscribers to make topic values available each time the class is initialized
    def joints_state_callback(self, msg):
        self.joints_state = msg

    def contact_1_callback(self, msg):
        self.contact_1_state = msg.states

    def contact_2_callback(self, msg):
        self.contact_2_state = msg.states

    def collision_callback(self, msg):
        self.collisions = msg.data

    def camera_rgb_callback(self, msg):
        self.camera_rgb_state = msg

    def camera_depth_callback(self, msg):
        self.camera_depth_state = msg

    def gripper_state_callback(self, msg):
        self.gripper_state = msg

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):
        """
        Reset The Robot to its initial Position and restart the Controllers 

        1) Change Gravity to 0 ->That arm doesnt fall
        2) Turn Controllers off
        3) Pause Simulation
        4) Reset Simulation
        5) Set Model Pose to desired one 
        6) Unpause Simulation 
        7) Turn on Controllers
        8) Restore Gravity
        9) Get Observations and return current State
        10) Check all Systems work
        11) Pause Simulation
        12) Write initial Position into Yaml File 
        13) Create YAML Files for contact forces in order to get the average over 2 contacts 
        14) Create YAML Files for collision to make shure to see a collision due to high noise in topic
        15) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers 
        16) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number
        17) Return State 
        """

        ###### TEST
        obs = self.get_obs()
        print("Before RESET Joint: {}".format(np.around(obs[1:7], decimals=3)))
        ###### END of TEST

        self.gazebo.change_gravity(0, 0, 0)
        self.controllers_object.turn_off_controllers()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()

        ##### TEST #########
        # idx = 0
        # sys_exit = False
        # correction_ids = []
        # reset_target_pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        # current_joint_pos = obs[1:7]
        #
        # for joint_pos in obs[1:7]:
        #     if np.abs(joint_pos - math.pi) < 0.1:
        #         sys_exit = True
        #         correction_ids.append(idx)
        #     idx += 1
        #
        # if sys_exit:
        #     for i in correction_ids:
        #         print("i:{}".format(i))
        #         reset_target_pos[i] = 2.0 if current_joint_pos[i] > 0 else -2.0
        #
        # self.pickbot_joint_pubisher_object.set_joints(reset_target_pos)
        # self.pickbot_joint_pubisher_object.set_joints([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
        ####  END of TEST #####

        self.pickbot_joint_pubisher_object.set_joints()
        self.set_target_object(random_object=self._random_object,
                               random_position=self._random_position)
        self.gazebo.unpauseSim()
        self.controllers_object.turn_on_controllers()
        self.gazebo.change_gravity(0, 0, -9.81)
        self._check_all_systems_ready()

        ######  TEST  #########
        # init_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        # self.pickbot_joint_pubisher_object.move_joints(init_position)
        #
        # start_ros_time = rospy.Time.now()
        # while True:
        #     elapsed_time = rospy.Time.now() - start_ros_time
        #     if np.isclose(init_position, self.joints_state.position, rtol=0.0, atol=0.01).all():
        #         break
        #     elif elapsed_time > rospy.Duration(2):  # time out
        #         break
        ###### END of TEST ########

        last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(last_position, yaml_file, default_flow_style=False)
        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(False, yaml_file, default_flow_style=False)
        observation = self.get_obs()
        print("After  RESET Joint: {}".format(
            np.around(observation[1:7], decimals=3)))
        # if sys_exit:
        #     print("##################################################")
        #     print("############# Joint near Pi ######################")
        #     print("Reset_target_pos:   {}".format(reset_target_pos))
        #     print("##################################################")

        # get maximum distance to the object to calculate reward
        self.max_distance, _ = U.get_distance_gripper_to_object()
        self.min_distance = self.max_distance
        self.gazebo.pauseSim()
        state = U.get_state(observation)
        self._update_episode()
        self.gazebo.unpauseSim()
        return state

    def step(self, action):
        """
        Given the action selected by the learning algorithm,
        we perform the corresponding movement of the robot
        return: the state of the robot, the corresponding reward for the step and if its done(terminal State)
        
        1) read last published joint from YAML
        2) define ne joints acording to chosen action
        3) Write joint position into YAML to save last published joints for next step
        4) Unpause, Move to that pos for defined time, Pause
        5) Get Observations and pause Simulation
        6) Convert Observations into State
        7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again
        8) Calculate reward based on Observatin and done_reward 
        9) Unpause that topics can be received in next step
        10) Return State, Reward, Done
        """

        # print("action: {}".format(action))

        # 1) read last_position out of YAML File
        with open("last_position.yml", 'r') as stream:
            try:
                last_position = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)

        old_observation = self.get_obs()
        last_position = old_observation[1:7]
        # 2) get the new joint positions according to chosen action
        if self._joint_increment is None:
            next_action_position = action
        else:
            next_action_position = self.get_action_to_position(
                np.clip(action, -self._joint_increment, self._joint_increment),
                last_position)
        # 3) write last_position into YAML File
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(next_action_position,
                      yaml_file,
                      default_flow_style=False)

        # 4) unpause, move to position for certain time
        self.gazebo.unpauseSim()
        self.pickbot_joint_pubisher_object.move_joints(next_action_position)

        # Busy waiting until all the joints reach the next_action_position (first the third joints are reversed)
        start_ros_time = rospy.Time.now()
        while True:
            # Check collision:
            invalid_collision = self.get_collisions()
            if invalid_collision:
                print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<")
                observation = self.get_obs()
                reward = UMath.compute_reward(observation, -200, True)
                observation = self.get_obs()
                print("Test Joint: {}".format(
                    np.around(observation[1:7], decimals=3)))
                return U.get_state(observation), reward, True, {}

            elapsed_time = rospy.Time.now() - start_ros_time
            if np.isclose(next_action_position,
                          self.joints_state.position,
                          rtol=0.0,
                          atol=0.01).all():
                break
            elif elapsed_time > rospy.Duration(2):  # time out
                print("TIME OUT, have not reached destination")
                break
        # time.sleep(self.running_step)
        """
        #execute action as long as the current position is close to the target position and there is no invalid collision and time spend in the while loop is below 1.2 seconds to avoid beeing stuck touching the object and not beeing able to go to the desired position     
        time1=time.time()
        while np.linalg.norm(np.asarray(self.joints_state.position)-np.asarray(next_action_position))>0.1 and self.get_collisions()==False and time.time()-time1<0.1:         
            rospy.loginfo("Not yet reached target position and no collision")
        """
        # 5) Get Observations, update the minimum distance, and pause Simulation
        observation = self.get_obs()
        if observation[0] < self.min_distance:
            self.min_distance = observation[0]
        self.gazebo.pauseSim()

        # 6) Convert Observations into state
        state = U.get_state(observation)

        # 7) Unpause Simulation check if its done, calculate done_reward
        self.gazebo.unpauseSim()
        done, done_reward, invalid_contact = self.is_done(observation)
        self.gazebo.pauseSim()

        # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward
        reward = UMath.compute_reward(observation, done_reward,
                                      invalid_contact)
        self.accumulated_episode_reward += reward

        # 9) Unpause that topics can be received in next step
        self.gazebo.unpauseSim()

        self.episode_steps += 1
        # 10) Return State, Reward, Done
        return state, reward, done, {}

    def _check_all_systems_ready(self):
        """
        Checks that all subscribers for sensortopics are working

        1) /pickbot/joint_states
        2) /gripper_contactsensor_1_state
        3) /gripper_contactsensor_2_state
        7) Collisions

        not used so far
        4) /camera_rgb/image_raw   
        5) /camera_depth/depth/image_raw

        """
        self.check_joint_states()
        self.check_contact_1()
        self.check_contact_2()
        self.check_collision()
        # self.check_rgb_camera()
        # self.check_rgbd_camera()
        # self.check_gripper_state()
        rospy.logdebug("ALL SYSTEMS READY")

    def check_joint_states(self):
        joint_states_msg = None
        while joint_states_msg is None and not rospy.is_shutdown():
            try:
                joint_states_msg = rospy.wait_for_message(
                    "/pickbot/joint_states", JointState, timeout=0.1)
                self.joints_state = joint_states_msg
                rospy.logdebug("Current joint_states READY")
            except Exception as e:
                rospy.logdebug(
                    "Current joint_states not ready yet, retrying==>" + str(e))
                print("EXCEPTION: Joint States not ready yet, retrying.")

    def check_contact_1(self):
        contact_1_states_msg = None
        while contact_1_states_msg is None and not rospy.is_shutdown():
            try:
                contact_1_states_msg = rospy.wait_for_message(
                    "/gripper_contactsensor_1_state",
                    ContactsState,
                    timeout=0.1)
                self.contact_1_state = contact_1_states_msg.states
                rospy.logdebug("Contactsensor 1 READY")
            except Exception as e:
                rospy.logdebug("Contactsensor 1 not ready yet, retrying==>" +
                               str(e))
                print("EXCEPTION: Contactsensor 1 not ready yet, retrying.")

    def check_contact_2(self):
        contact_2_states_msg = None
        while contact_2_states_msg is None and not rospy.is_shutdown():
            try:
                contact_2_states_msg = rospy.wait_for_message(
                    "/gripper_contactsensor_2_state",
                    ContactsState,
                    timeout=0.1)
                self.contact_2_state = contact_2_states_msg.states
                rospy.logdebug("Contactsensor 2 READY")
            except Exception as e:
                rospy.logdebug("Contactsensor 2 not ready yet, retrying==>" +
                               str(e))
                print("EXCEPTION: Contactsensor 2 not ready yet, retrying.")

    def check_collision(self):
        collision_msg = None
        while collision_msg is None and not rospy.is_shutdown():
            try:
                collision_msg = rospy.wait_for_message("/gz_collisions",
                                                       Bool,
                                                       timeout=0.1)
                self.collisions = collision_msg.data
                rospy.logdebug("collision READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: Collision not ready yet, retrying==>" + str(e))

    def check_rgb_camera(self):
        camera_rgb_states_msg = None
        while camera_rgb_states_msg is None and not rospy.is_shutdown():
            try:
                camera_rgb_states_msg = rospy.wait_for_message(
                    "/camera_rgb/image_raw", Image, timeout=0.1)
                self.camera_rgb_state = camera_rgb_states_msg
                rospy.logdebug("rgb_image READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: rgb_image not ready yet, retrying==>" + str(e))

    def check_rgbd_camera(self):
        camera_depth_states_msg = None
        while camera_depth_states_msg is None and not rospy.is_shutdown():
            try:
                camera_depth_states_msg = rospy.wait_for_message(
                    "/camera_depth/depth/image_raw", Image, timeout=0.1)
                self.camera_depth_state = camera_depth_states_msg
                rospy.logdebug("rgbd_image READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: rgbd_image not ready yet, retrying==>" +
                    str(e))

    def check_gripper_state(self):
        gripper_state_msg = None
        while gripper_state_msg is None and not rospy.is_shutdown():
            try:
                gripper_state_msg = rospy.wait_for_message(
                    "/pickbot/gripper/state", VacuumGripperState, timeout=0.1)
                self.gripper_state = gripper_state_msg
                rospy.logdebug("gripper_state READY")
            except Exception as e:
                rospy.logdebug(
                    "EXCEPTION: gripper_state not ready yet, retrying==>" +
                    str(e))

    # Set target object
    # randomize: spawn object randomly from the object pool. If false, object will be the first entry of the object list
    # random_position: spawn object with random position
    def set_target_object(self, random_object=False, random_position=False):
        if random_object:
            rand_object = random.choice(self.object_list)
            self.object_name = rand_object["name"]
            self.object_type_str = rand_object["type"]
            self.object_type = self.object_list.index(rand_object)
            init_pos = rand_object["init_pos"]
            self.object_initial_position = Pose(
                position=Point(x=init_pos[0], y=init_pos[1], z=init_pos[2]),
                orientation=quaternion_from_euler(init_pos[3], init_pos[4],
                                                  init_pos[5]))
        else:
            self.object_name = self.object_list[0]["name"]
            self.object_type_str = self.object_list[0]["type"]
            self.object_type = 0
            init_pos = self.object_list[0]["init_pos"]
            self.object_initial_position = Pose(
                position=Point(x=init_pos[0], y=init_pos[1], z=init_pos[2]),
                orientation=quaternion_from_euler(init_pos[3], init_pos[4],
                                                  init_pos[5]))

        if random_position:
            if self.object_type_str == "door_handle":
                box_pos = U.get_random_door_handle_pos()
            else:
                box_pos = Pose(position=Point(x=np.random.uniform(low=-0.3,
                                                                  high=0.3,
                                                                  size=None),
                                              y=np.random.uniform(low=0.9,
                                                                  high=1.1,
                                                                  size=None),
                                              z=1.05),
                               orientation=quaternion_from_euler(0, 0, 0))
        else:
            box_pos = self.object_initial_position

        U.change_object_position(self.object_name, box_pos)
        print("Current target: ", self.object_name)

    def randomly_spawn_object(self):
        """
        spawn the object unit_box_0 in a random position in the shelf
        """
        try:
            spawn_box = rospy.ServiceProxy('/gazebo/set_model_state',
                                           SetModelState)
            box = ModelState()
            box.model_name = self.object_name
            box.pose.position.x = np.random.uniform(low=-0.35,
                                                    high=0.3,
                                                    size=None)
            box.pose.position.y = np.random.uniform(low=0.7,
                                                    high=0.9,
                                                    size=None)
            box.pose.position.z = 1.05
            spawn_box(box)
        except rospy.ServiceException as e:
            rospy.loginfo(
                "Set Model State service call failed:  {0}".format(e))

    def populate_objects(self):
        """
        populate objects, called in init
        :return: -
        """
        if not self._random_object:  # only populate the first object
            U.spawn_object(self.object_list[0], self.object_initial_position)
        else:
            rand_x = np.random.uniform(low=-0.35,
                                       high=0.35,
                                       size=(len(self.object_list), ))
            rand_y = np.random.uniform(low=2.2,
                                       high=2.45,
                                       size=(len(self.object_list), ))
            for idx, obj in enumerate(self.object_list):
                box_pos = Pose(
                    position=Point(x=rand_x[idx], y=rand_y[idx], z=1.05))
                U.spawn_object(obj, box_pos)

    def get_action_to_position(self, action, last_position):
        """
        takes the last position and adds the increments for each joint
        returns the new position       
        """
        action_position = np.asarray(last_position) + action
        # clip action that is going to be published to -2.9 and 2.9 just to make sure to avoid loosing controll of controllers
        x = np.clip(action_position, -math.pi, math.pi)

        return x.tolist()

    def get_obs(self):
        """
        Returns the state of the robot needed for Algorithm to learn
        The state will be defined by a List (later converted to numpy array) of the:

        1)          Distance from desired point in meters
        2-7)        States of the 6 joints in radiants
        8,9)        Force in contact sensor in Newtons
        10,11,12)   x, y, z Position of object?

        MISSING
        10)     RGBD image 
        
        
        self._list_of_observations = ["distance_gripper_to_object",
                                    "elbow_joint_state",
                                    "shoulder_lift_joint_state",
                                    "shoulder_pan_joint_state",
                                    "wrist_1_joint_state",
                                    "wrist_2_joint_state",
                                    "wrist_3_joint_state",
                                    "contact_1_force",
                                    "contact_2_force",
                                    "object_pos_x",
                                    "object_pos_y",
                                    "object_pos_z",
                                    "object_type", -- if use_object_type set to True
                                    "min_distance_gripper_to_object]


        :return: observation
        """

        # Get Distance Object to Gripper and Object position from Service Call.
        # Needs to be done a second time cause we need the distance and position after the Step execution
        distance_gripper_to_object, position_xyz_object = U.get_distance_gripper_to_object(
        )
        object_pos_x = position_xyz_object[0]
        object_pos_y = position_xyz_object[1]
        object_pos_z = position_xyz_object[2]

        # Get Joints Data out of Subscriber
        joint_states = self.joints_state
        elbow_joint_state = joint_states.position[0]
        shoulder_lift_joint_state = joint_states.position[1]
        shoulder_pan_joint_state = joint_states.position[2]
        wrist_1_joint_state = joint_states.position[3]
        wrist_2_joint_state = joint_states.position[4]
        wrist_3_joint_state = joint_states.position[5]

        for joint in joint_states.position:
            if joint > math.pi or joint < -math.pi:
                print(joint_states.name)
                print(np.around(joint_states.position, decimals=3))

                # self.controllers_object.turn_off_controllers()
                # self.gazebo.pauseSim()
                # self.gazebo.resetSim()
                # U.delete_object("pickbot")
                # U.spawn_urdf_object("pickbot", self.pickbot_initial_position)
                # self.gazebo.unpauseSim()
                # self.controllers_object.turn_off_controllers()
                #
                # print("###############################")
                # print("#####  Pickbot respawned  #####")
                # print("###############################")
                sys.exit("Joint exceeds limit")

        # Get Contact Forces out of get_contact_force Functions to be able to take an average over some iterations
        # otherwise chances are high that not both sensors are showing contact the same time
        contact_1_force = self.get_contact_force_1()
        contact_2_force = self.get_contact_force_2()

        # Stack all information into Observations List
        observation = []
        for obs_name in self._list_of_observations:
            if obs_name == "distance_gripper_to_object":
                observation.append(distance_gripper_to_object)
            elif obs_name == "elbow_joint_state":
                observation.append(elbow_joint_state)
            elif obs_name == "shoulder_lift_joint_state":
                observation.append(shoulder_lift_joint_state)
            elif obs_name == "shoulder_pan_joint_state":
                observation.append(shoulder_pan_joint_state)
            elif obs_name == "wrist_1_joint_state":
                observation.append(wrist_1_joint_state)
            elif obs_name == "wrist_2_joint_state":
                observation.append(wrist_2_joint_state)
            elif obs_name == "wrist_3_joint_state":
                observation.append(wrist_3_joint_state)
            elif obs_name == "contact_1_force":
                observation.append(contact_1_force)
            elif obs_name == "contact_2_force":
                observation.append(contact_2_force)
            elif obs_name == "object_pos_x":
                observation.append(object_pos_x)
            elif obs_name == "object_pos_y":
                observation.append(object_pos_y)
            elif obs_name == "object_pos_z":
                observation.append(object_pos_z)
            elif obs_name == "object_type":
                observation.append(self.object_type)
            elif obs_name == "min_distance_gripper_to_object":
                observation.append(self.min_distance)
            else:
                raise NameError('Observation Asked does not exist==' +
                                str(obs_name))

        return observation

    def get_contact_force_1(self):
        """
        Get Contact Force of contact sensor 1
        Takes average over 2 contacts so the chances are higher that both sensors say there is contact the same time due to sensor noise
        :returns force value
        """

        # get Force out of contact_1_state
        if self.contact_1_state == []:
            contact1_force = 0.0
        else:
            for state in self.contact_1_state:
                self.contact_1_force = state.total_wrench.force
                contact1_force_np = np.array(
                    (self.contact_1_force.x, self.contact_1_force.y,
                     self.contact_1_force.z))
                force_magnitude_1 = np.linalg.norm(contact1_force_np)
                contact1_force = force_magnitude_1

        # read last contact force 1 value out of yaml
        with open("contact_1_force.yml", 'r') as stream:
            try:
                last_contact_1_force = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact_1_force value in yaml
        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(contact1_force, yaml_file, default_flow_style=False)
        # calculate average force
        average_contact_1_force = (last_contact_1_force + contact1_force) / 2

        return average_contact_1_force

    def get_contact_force_2(self):
        """
        Get Contact Force of contact sensor 2
        Takes average over 2 contacts so the chances are higher that both sensors say there is contact the same time due to sensor noise
        :returns force value
        """

        # get Force out of contact_2_state
        if self.contact_2_state == []:
            contact2_force = 0.0
        else:
            for state in self.contact_2_state:
                self.contact_2_force = state.total_wrench.force
                contact2_force_np = np.array(
                    (self.contact_2_force.x, self.contact_2_force.y,
                     self.contact_2_force.z))
                force_magnitude_2 = np.linalg.norm(contact2_force_np)
                contact2_force = force_magnitude_2

        # read last contact_2_force value out of yaml
        with open("contact_2_force.yml", 'r') as stream:
            try:
                last_contact_2_force = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact force 2 value in yaml
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(contact2_force, yaml_file, default_flow_style=False)
        # calculate average force
        average_contact_2_force = (last_contact_2_force + contact2_force) / 2

        return average_contact_2_force

    def get_collisions(self):
        """
        Checks all the collisions by listening to rostopic /gz_collisions wich is republishing the gazebo topic (gz topic -e /gazebo/default/physics/contacts).
        The Publisher is started in a different node out of the simulation launch file.
        Stores last value yaml file and if one of the two values is showing a invalid collision it returns a invalid collision.
        This is to make shure seeing collisions due to high sensor noise and publish rate. 

        If one of the 2 Messages is True it returns True.
        returns: 
            False:  if no contacts or just valid ones -> Box/Shelf, Wrist3/Box, VacuumGripper/Box
            True:   if any other contact occures wich is invalid 
        """

        # read last contact_2_force value out of yaml
        with open("collision.yml", 'r') as stream:
            try:
                last_collision = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)
        # write new contact force 2 value in yaml
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(self.collisions, yaml_file, default_flow_style=False)

        # Check if last_collision or self.collision is True. IF one s true return True else False
        if self.collisions == True or last_collision == True:
            return True
        else:
            return False

    def is_done(self, observations):
        """Checks if episode is done based on observations given.
        
        Done when:
        -Successfully reached goal: Contact with both contact sensors and contact is a valid one(Wrist3 or/and Vavuum Gripper with unit_box)
        -Crashing with itself, shelf, base
        -Joints are going into limits set
        """

        done = False
        done_reward = 0
        reward_reached_goal = 20000
        reward_crashing = -200
        reward_join_range = -150

        # Check if there are invalid collisions
        invalid_collision = self.get_collisions()

        # Successfully reached goal: Contact with both contact sensors and there is no invalid contact
        if observations[7] != 0 and observations[
                8] != 0 and not invalid_collision:
            done = True
            done_reward = reward_reached_goal
            # save state in csv file
            U.append_to_csv(self.csv_success_exp, observations)
            self.success_2_contact += 1
            print("Successful 2 contacts so far: {} attempts".format(
                self.success_2_contact))

        if observations[
                7] != 0 or observations[8] != 0 and not invalid_collision:
            U.append_to_csv(self.csv_success_exp, observations)
            done = True
            self.success_1_contact += 1
            print("Successful 1 contacts so far: {} attempts".format(
                self.success_1_contact))

        # Crashing with itself, shelf, base
        if invalid_collision:
            done = True
            print('>>>>>>>>>>>>>>>>>>>> crashing')
            done_reward = reward_crashing

        # Joints are going into limits set
        if self.joints_state.position[0] < -2.9 or self.joints_state.position[
                0] > 2.9:
            done = True
            done_reward = reward_join_range
            print('>>>>>>>>>>>>>>>>>>>> joint 3 exceeds limit')
        elif self.joints_state.position[
                1] < -2.9 or self.joints_state.position[1] > 2.9:
            done = True
            done_reward = reward_join_range
            print('>>>>>>>>>>>>>>>>>>>> joint 2 exceeds limit')
        elif self.joints_state.position[
                2] < -2.9 or self.joints_state.position[2] > 2.9:
            done = True
            done_reward = reward_join_range
            print('>>>>>>>>>>>>>>>>>>>> joint 1 exceeds limit')
        elif self.joints_state.position[
                3] < -2.9 or self.joints_state.position[3] > 2.9:
            done = True
            done_reward = reward_join_range
            print('>>>>>>>>>>>>>>>>>>>> joint 4 exceeds limit')
        elif self.joints_state.position[
                4] < -2.9 or self.joints_state.position[4] > 2.9:
            done = True
            done_reward = reward_join_range
            print('>>>>>>>>>>>>>>>>>>>> joint 5 exceeds limit')
        elif self.joints_state.position[
                5] < -2.9 or self.joints_state.position[5] > 2.9:
            done = True
            done_reward = reward_join_range
            print('>>>>>>>>>>>>>>>>>>>> joint 6 exceeds limit')

        return done, done_reward, invalid_collision

    def _update_episode(self):
        """
        Publishes the accumulated reward of the episode and 
        increases the episode number by one.
        :return:
        """
        if self.episode_num > 0:
            self._publish_reward_topic(self.accumulated_episode_reward,
                                       self.episode_steps, self.episode_num)

        self.episode_num += 1
        self.accumulated_episode_reward = 0
        self.episode_steps = 0

    def _publish_reward_topic(self, reward, steps, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)
        self.reward_list.append(reward)
        self.episode_list.append(episode_number)
        self.step_list.append(steps)
        list = str(reward) + ";" + str(episode_number) + ";" + str(
            steps) + "\n"

        with open(self.csv_name + '.csv', 'a') as csv:
            csv.write(str(list))