示例#1
0
class EnvPollos(Env):
    def __init__(self, ep_length=100):
        """
        Pollos environment for testing purposes
        :param dim: (int) the size of the dimensions you want to learn
        :param ep_length: (int) the length of each episodes in timesteps
        """
        self.vrep = vrep
        logging.basicConfig(level=logging.DEBUG)
        # they have to be simmetric. We interpret actions as angular increments
        #self.action_space = Box(low=np.array([-0.1, -1.7, -2.5, -1.5, 0.0, 0.0]),
        #                        high=np.array([0.1, 1.7, 2.5, 1.5, 0.0, 0.0]))
        inc = 0.1
        self.action_space = Box(low=np.array([-inc, -inc, -inc, -inc, 0,
                                              -inc]),
                                high=np.array([inc, inc, inc, inc, 0, inc]))

        self.observation_space = Box(low=np.array([-0.5, 0.0, -0.2]),
                                     high=np.array([0.5, 1.0, 1.0]))

        #
        self.pr = PyRep()
        self.pr.launch(SCENE_FILE, headless=False)
        self.pr.start()
        self.agent = UR10()
        self.agent.max_velocity = 1
        self.agent.set_control_loop_enabled(True)
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.joints = [
            Joint('UR10_joint1'),
            Joint('UR10_joint2'),
            Joint('UR10_joint3'),
            Joint('UR10_joint4'),
            Joint('UR10_joint5'),
            Joint('UR10_joint6')
        ]
        #self.joints_limits = [[j.get_joint_interval()[1][0],j.get_joint_interval()[1][0]+j.get_joint_interval()[1][1]]
        #                      for j in self.joints]
        self.high_joints_limits = [0.1, 1.7, 2.7, 0.0, 0.02, 0.3]
        self.low_joints_limits = [-0.1, -0.2, 0.0, -1.5, -0.02, -0.5]
        #self.agent_hand = Shape('hand')

        self.initial_agent_tip_position = self.agent.get_tip().get_position()
        self.initial_agent_tip_quaternion = self.agent.get_tip(
        ).get_quaternion()
        self.initial_agent_tip_euler = self.agent.get_tip().get_orientation()
        self.target = Dummy('UR10_target')
        self.initial_target_orientation = self.target.get_orientation()
        self.initial_joint_positions = self.agent.get_joint_positions()
        self.pollo_target = Dummy('pollo_target')
        self.pollo = Shape('pollo')
        #self.table_target = Dummy('table_target')
        self.initial_pollo_position = self.pollo.get_position()
        self.initial_pollo_orientation = self.pollo.get_quaternion()
        #self.table_target = Dummy('table_target')

        #self.succion = Shape('suctionPad')
        self.waypoints = [Dummy('dummy_gancho%d' % i) for i in range(4)]

        # objects
        #self.scene_objects = [Shape('table0'), Shape('Plane'), Shape('Plane0'), Shape('ConcretBlock')]
        #self.agent_tip = self.agent.get_tip()
        self.initial_distance = np.linalg.norm(
            np.array(self.initial_pollo_position) -
            np.array(self.initial_agent_tip_position))

        # camera
        self.camera = VisionSensor('kinect_depth')
        self.camera_matrix_extrinsics = vrep.simGetObjectMatrix(
            self.camera.get_handle(), -1)
        self.np_camera_matrix_extrinsics = np.delete(
            np.linalg.inv(self.vrepToNP(self.camera_matrix_extrinsics)), 3, 0)
        width = 640.0
        height = 480.0
        angle = math.radians(57.0)
        focalx_px = (width / 2.0) / math.tan(angle / 2.0)
        focaly_px = (height / 2.0) / math.tan(angle / 2.0)
        self.np_camera_matrix_intrinsics = np.array([[-focalx_px, 0.0, 320.0],
                                                     [0.0, -focalx_px, 240.0],
                                                     [0.0, 0.0, 1.0]])

        self.reset()

    def reset(self):
        pos = list(
            np.random.uniform([-0.1, -0.1, 0.0], [0.1, 0.1, 0.1]) +
            self.initial_pollo_position)
        self.pollo.set_position(pos)
        self.pollo.set_quaternion(self.initial_pollo_orientation)
        self.agent.set_joint_positions(self.initial_joint_positions)
        self.initial_epoch_time = time.time()
        while True:  # wait for arm to stop
            self.pr.step()  # Step the physics simulation
            a = self.agent.get_joint_velocities()
            if not np.any(np.where(np.fabs(a) < 0.1, False, True)):
                break
        print("ENV RESET DONE")
        return self._get_state()

    def step(self, action):
        if action is None:
            self.pr.step()
            return self._get_state(), 0.0, False, {}

        # check for nan
        if np.any(np.isnan(action)):
            print(action)
            return self._get_state(), -1.0, False, {}

        # action[1] = np.interp(action[1], [-1,7,1.7], [-0.2,1.7])
        # action[2] = np.interp(action[2], [-2.5,2.5], [0,2.5])
        # action[3] = np.interp(action[3], [-1.5,1.5], [-1.5,0])

        # add actions as increments to current joints value
        new_joints = np.array(
            self.agent.get_joint_positions()) + np.array(action)

        # check limits
        if np.any(np.greater(new_joints, self.high_joints_limits)) or np.any(
                np.less(new_joints, self.low_joints_limits)):
            logging.debug("New joints value out of limits r=-10")
            return self._get_state(), -10.0, True, {}

        # move the robot and wait to stop
        self.agent.set_joint_target_positions(new_joints)
        reloj = time.time()
        while True:  # wait for arm to stop
            self.pr.step()  # Step the physics simulation
            a = self.agent.get_joint_velocities()
            if not np.any(np.where(np.fabs(a) < 0.1, False,
                                   True)) or (time.time() - reloj) > 3:
                break

        # compute relevant magnitudes
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_robot_tip_position = np.array(self.agent.get_tip().get_position())
        dist = np.linalg.norm(np_pollo_target - np_robot_tip_position)
        altura = np.clip((np_pollo_target[2] - self.initial_pollo_position[2]),
                         0, 0.5)

        for obj in self.scene_objects:  # colisión con la mesa
            if self.agent_hand.check_collision(obj):
                logging.debug("Collision with objects r=-10")
                return self._get_state(), -10.0, True, {}
        if altura > 0.3:  # pollo en mano
            logging.debug("Height reached !!! r=0")
            return self._get_state(), 30.0, True, {}
        elif dist > self.initial_distance:  # mano se aleja
            logging.debug("Hand moving away from chicken r=-10")
            return self._get_state(), -10.0, True, {}
        if time.time() - self.initial_epoch_time > 5:  # too much time
            logging.debug("Time out. End of epoch r=-10")
            return self._get_state(), -10.0, True, {}

        # Reward
        #pollo_height = np.exp(-altura*20)  # para 0.3m pollo_height = 0.002, para 0m pollo_height = 1
        reward = altura - dist
        logging.debug("New joints value out of limits r=-10")
        return self._get_state(), reward, False, {}

    def close(self):
        self.pr.stop()
        self.pr.shutdown()

    def render(self):
        print("RENDER")
        np_pollo_en_camara = self.getPolloEnCamara()

        # capture depth image
        depth = self.camera.capture_rgb()
        circ = plt.Circle(
            (int(np_pollo_en_camara[0]), int(np_pollo_en_camara[1])), 10)
        plt.clf()
        fig = plt.gcf()
        ax = fig.gca()
        ax.add_artist(circ)
        ax.imshow(depth, cmap='hot')
        plt.pause(0.000001)

    # Aux

    # transform env.pollo_target.get_position() to camera coordinates and project pollo_en_camera a image coordinates
    def getPolloEnCamara(self):
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_pollo_target_cam = self.np_camera_matrix_extrinsics.dot(
            np.append([np_pollo_target], 1.0))
        np_pollo_en_camara = self.np_camera_matrix_intrinsics.dot(
            np_pollo_target_cam)
        np_pollo_en_camara = np_pollo_en_camara / np_pollo_en_camara[2]
        np_pollo_en_camara = np.delete(np_pollo_en_camara, 2)
        return np_pollo_en_camara

    def getPolloEnCamaraEx(self):
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_pollo_en_camara = self.np_camera_matrix_extrinsics.dot(
            np.append([np_pollo_target], 1.0))
        return np_pollo_en_camara

    def _get_state(self):
        # Return state containing arm joint angles/velocities & target position
        # return (self.agent.get_joint_positions() +
        #         self.agent.get_joint_velocities() +
        #         self.pollo_target.get_position())
        p = self.getPolloEnCamaraEx()
        j = self.agent.get_joint_positions()
        #r = np.array([p[0],p[1],p[2],j[0],j[1],j[2],j[3],j[4],j[5]])
        r = np.array([p[0], p[1], p[2]])
        return r

    def vrepToNP(self, c):
        return np.array([[c[0], c[4], c[8], c[3]], [c[1], c[5], c[9], c[7]],
                         [c[2], c[6], c[10], c[11]], [0, 0, 0, 1]])
示例#2
0
class PioneerEnv(object):
    def __init__(self,
                 escene_name='proximity_sensor.ttt',
                 start=[100, 100],
                 goal=[180, 500],
                 rand_area=[100, 450],
                 path_resolution=5.0,
                 margin=0.,
                 margin_to_goal=0.5,
                 action_max=[.5, 1.],
                 action_min=[0., 0.],
                 _load_path=True,
                 path_name="PathNodes",
                 type_of_planning="PID",
                 type_replay_buffer="random",
                 max_laser_range=1.0,
                 headless=False):

        SCENE_FILE = join(dirname(abspath(__file__)), escene_name)

        self.pr = PyRep()
        self.pr.launch(SCENE_FILE, headless=headless)
        self.pr.start()
        self.agent = Pioneer("Pioneer_p3dx",
                             type_of_planning=type_of_planning,
                             type_replay_buffer=type_replay_buffer)
        self.agent.set_control_loop_enabled(False)
        self.initial_joint_positions = self.agent.get_joint_positions()
        self.initial_position = self.agent.get_position()
        print(f"Agent initial position: {self.initial_position}")

        self.vision_map = VisionSensor("VisionMap")
        self.vision_map.handle_explicitly()
        self.vision_map_handle = self.vision_map.get_handle()

        self.floor = Shape("Floor_respondable")

        self.perspective_angle = self.vision_map.get_perspective_angle  # degrees
        self.resolution = self.vision_map.get_resolution  # list [resx, resy]
        self.margin = margin
        self.margin_to_goal = margin_to_goal
        self.rand_area = rand_area
        self.start = start
        self.goal = goal
        self.type_of_planning = type_of_planning
        self.max_laser_range = max_laser_range
        self.max_angle_orientation = np.pi
        scene_image = self.vision_map.capture_rgb() * 255
        scene_image = np.flipud(scene_image)

        self.rew_weights = [1, 10000, 5000]

        self.start_position = Dummy("Start").get_position()
        self.goal_position = Dummy("Goal").get_position()  # [x, y, z]

        self.local_goal_aux = Dummy("Local_goal_aux")

        self.local_goal_aux_pos_list = [[-3.125, 2.175, 0], [2.9, 3.575, 0],
                                        self.goal_position, self.goal_position]
        self.ind_local_goal_aux = 0

        self.max_distance = get_distance([-7.5, -4.1, 0.], self.goal_position)
        self.distance_to_goal_m1 = get_distance(self.start_position,
                                                self.goal_position)

        self.c_lin_vel = self.c_ang_vel = self.b_lin_vel = self.b_ang_vel = 0.

        self.action_max = action_max
        self.action_min = action_min

        if type_of_planning == 'PID':
            self.planning_info_logger = get_logger("./loggers",
                                                   "Planning_Info.log")
            self.image_path = None
            if exists("./paths/" + path_name + ".npy") and _load_path:
                print("Load Path..")
                self.image_path = np.load("./paths/" + path_name + ".npy",
                                          allow_pickle=True)
            else:
                print("Planning...")
                self.image_path = self.Planning(
                    scene_image,
                    self.start,
                    self.goal,
                    self.rand_area,
                    path_resolution=path_resolution,
                    logger=self.planning_info_logger)

            assert self.image_path is not None, "path should not be a Nonetype"

            self.real_path = self.path_image2real(self.image_path,
                                                  self.start_position)
            # project in coppelia sim
            sim_drawing_points = 0
            point_size = 10  #[pix]
            duplicate_tolerance = 0
            parent_obj_handle = self.floor.get_handle()
            max_iter_count = 999999
            ambient_diffuse = (255, 0, 0)
            blue_ambient_diffuse = (0, 0, 255)
            point_container = sim.simAddDrawingObject(
                sim_drawing_points,
                point_size,
                duplicate_tolerance,
                parent_obj_handle,
                max_iter_count,
                ambient_diffuse=ambient_diffuse)

            local_point_container = sim.simAddDrawingObject(
                sim_drawing_points,
                point_size,
                duplicate_tolerance,
                parent_obj_handle,
                max_iter_count,
                ambient_diffuse=blue_ambient_diffuse)

            # debug
            for point in self.real_path:
                point_data = (point[0], point[1], 0)
                sim.simAddDrawingObjectItem(point_container, point_data)
            # You need to get the real coord in the real world

            assert local_point_container is not None, "point container shouldn't be empty"
            self.agent.load_point_container(local_point_container)
            self.agent.load_path(self.real_path)

    def reset(self):
        self.pr.stop()
        if self.type_of_planning == 'PID':
            self.agent.local_goal_reset()
        self.pr.start()
        self.pr.step()
        sensor_state, distance_to_goal, orientation_to_goal = self._get_state()
        self.distance_to_goal_m1 = distance_to_goal
        self.orientation_to_goal_m1 = orientation_to_goal
        sensor_state_n, distance_to_goal, orientation_to_goal = self.normalize_states(
            sensor_state, distance_to_goal, orientation_to_goal)
        # pos_x, pos_y = self.agent.get_position()[:-1]
        return np.hstack((sensor_state_n, distance_to_goal,
                          orientation_to_goal)), sensor_state

    def step(self, action, pf_f=0):
        self.agent.set_joint_target_velocities(action)
        self.pr.step()  # Step the physics simulation
        scene_image = self.vision_map.capture_rgb() * 255  # numpy -> [w, h, 3]
        sensor_state, distance_to_goal, orientation_to_goal = self._get_state()
        self.b_lin_vel, self.b_ang_vel = self.c_lin_vel, self.c_ang_vel
        self.c_lin_vel, self.c_ang_vel = self.agent.get_velocity(
        )  # 3d coordinates
        self.c_lin_vel = np.linalg.norm(self.c_lin_vel)
        self.c_ang_vel = self.c_ang_vel[-1]  # w/r z
        reward, done = self._get_reward(sensor_state, distance_to_goal,
                                        orientation_to_goal, pf_f)
        sensor_state_n, distance_to_goal, orientation_to_goal = self.normalize_states(
            sensor_state, distance_to_goal, orientation_to_goal)
        # pos_x, pos_y = self.agent.get_position()[:-1]
        state = np.hstack(
            (sensor_state_n, distance_to_goal, orientation_to_goal))
        return state, reward, scene_image, done, sensor_state

    def _get_state(self):
        sensor_state = np.array([
            proxy_sensor.read()
            for proxy_sensor in self.agent.proximity_sensors
        ])  # list of distances. -1 if not detect anything
        goal_transformed = world_to_robot_frame(
            self.agent.get_position(), self.goal_position,
            self.agent.get_orientation()[-1])
        # distance_to_goal = get_distance(goal_transformed[:-1], np.array([0,0])) # robot frame
        distance_to_goal = get_distance(self.agent.get_position()[:-1],
                                        self.goal_position[:-1])
        orientation_to_goal = np.arctan2(goal_transformed[1],
                                         goal_transformed[0])
        return sensor_state, distance_to_goal, orientation_to_goal

    def _get_reward(self,
                    sensor_state,
                    distance_to_goal,
                    orientation_to_goal,
                    pf_f=0):
        done = False

        r_local_goal = self.update_local_goal_aux()

        r_target = -(distance_to_goal - self.distance_to_goal_m1)
        r_vel = self.c_lin_vel / self.action_max[0]
        # r_orientation = - (orientation_to_goal - self.orientation_to_goal_m1)
        reward = self.rew_weights[0] * (r_local_goal + r_vel)  # - pf_f/20)
        # distance_to_goal = get_distance(agent_position[:-1], goal[:-1])
        self.distance_to_goal_m1 = distance_to_goal
        self.orientation_to_goal_m1 = orientation_to_goal
        # collision check
        if self.collision_check(sensor_state, self.margin):
            reward = -1 * self.rew_weights[1]
            done = True
        # goal achievement
        if distance_to_goal < self.margin_to_goal:
            reward = self.rew_weights[2]
            done = True
        return reward, done

    def shutdown(self):
        self.pr.stop()
        self.pr.shutdown()

    def model_update(self, method="DDPG", pretraining_loop=False):

        if method == "DDPG": self.agent.trainer.update()
        elif method == "IL":
            loss = self.agent.trainer.IL_update()
            return loss
        elif method == "CoL":
            loss = self.agent.trainer.CoL_update(pretraining_loop)
        else:
            raise NotImplementedError()

    def normalize_states(self, sensor_state, distance_to_goal,
                         orientation_to_goal):
        sensor_state = self.normalize_laser(sensor_state, self.norm_func)
        distance_to_goal = self.norm_func(distance_to_goal, self.max_distance)
        orientation_to_goal = self.norm_func(orientation_to_goal,
                                             self.max_angle_orientation)
        return sensor_state, distance_to_goal, orientation_to_goal

    def normalize_laser(self, obs, norm_func):
        return np.array([
            norm_func(o, self.max_laser_range) if o >= 0 else -1 for o in obs
        ])

    def norm_func(self, x, max_value):
        return 2 * (1 - min(x, max_value) / max_value) - 1

    def set_margin(self, margin):
        self.margin = margin

    # def set_start_goal_position(self, start_pos, goal_pos):

    def update_local_goal_aux(self):
        pos = self.agent.get_position()[:-1]
        distance = get_distance(pos, self.local_goal_aux.get_position()[:-1])
        if distance < 0.5:
            self.local_goal_aux.set_position(
                self.local_goal_aux_pos_list[self.ind_local_goal_aux])
            self.ind_local_goal_aux += 1
        return -1 * distance**2

    @staticmethod
    def collision_check(observations, margin):
        if observations.sum() == -1 * observations.shape[0]:
            return False
        elif observations[observations >= 0].min() < margin:
            return True
        else:
            return False

    @staticmethod
    def Planning(Map,
                 start,
                 goal,
                 rand_area,
                 path_resolution=5.0,
                 logger=None):
        """
        :parameter Map(ndarray): Image that planning over with
        """
        Map = Image.fromarray(Map.astype(np.uint8)).convert('L')
        path, n_paths = main(Map,
                             start,
                             goal,
                             rand_area,
                             path_resolution=path_resolution,
                             logger=logger,
                             show_animation=False)

        if path is not None:
            np.save("./paths/PathNodes_" + str(n_paths) + ".npy", path)
            return path
        else:
            logger.info("Not found Path")
            return None

    @staticmethod
    def path_image2real(image_path, start):
        """
        image_path: np.array[pix] points of the image path
        start_array: np.array
        """
        scale = 13.0 / 512  # [m/pix]

        x_init = start[0]
        y_init = start[1]
        deltas = [(image_path[i + 1][0] - image_path[i][0],
                   image_path[i + 1][1] - image_path[i][1])
                  for i in range(image_path.shape[0] - 1)]

        path = np.zeros((image_path.shape[0], 3))
        path[0, :] = np.array([x_init, y_init, 0])
        for i in range(1, image_path.shape[0]):
            path[i, :] = np.array([
                path[i - 1, 0] + deltas[i - 1][0] * scale,
                path[i - 1, 1] - deltas[i - 1][1] * scale, 0
            ])

        rot_mat = np.diagflat([-1, -1, 1])
        tras_mat = np.zeros_like(path)
        tras_mat[:, 0] = np.ones_like(path.shape[0]) * 0.3
        tras_mat[:, 1] = np.ones_like(path.shape[0]) * 4.65
        path = path @ rot_mat + tras_mat
        return np.flip(path, axis=0)
示例#3
0
class EnvPollos(Env):
    def __init__(self, ep_length=100):
        """
        Pollos environment for testing purposes
        :param dim: (int) the size of the dimensions you want to learn
        :param ep_length: (int) the length of each episodes in timesteps
        """
        logging.basicConfig(level=logging.DEBUG)

        #
        self.pr = PyRep()
        self.pr.launch(SCENE_FILE, headless=False)
        self.pr.start()
        self.agent = UR10()
        self.agent.max_velocity = 0.8
        self.agent.set_control_loop_enabled(True)
        #self.agent.set_motor_locked_at_zero_velocity(True)

        self.MAX_INC = 0.2
        self.joints = [
            Joint('UR10_joint1'),
            Joint('UR10_joint2'),
            Joint('UR10_joint3'),
            Joint('UR10_joint4'),
            Joint('UR10_joint5'),
            Joint('UR10_joint6')
        ]
        #self.joints_limits = [[j.get_joint_interval()[1][0],j.get_joint_interval()[1][0]+j.get_joint_interval()[1][1]]
        #                      for j in self.joints]
        self.high_joints_limits = [0.1, 1.7, 2.7, 0.0, 0.02, 0.3]
        self.low_joints_limits = [-0.1, -0.2, 0.0, -1.5, -0.02, -0.5]
        self.initial_joint_positions = self.agent.get_joint_positions()

        self.agent_hand = Shape('hand')
        self.initial_agent_tip_position = self.agent.get_tip().get_position()
        self.initial_agent_tip_quaternion = self.agent.get_tip(
        ).get_quaternion()

        self.target = Dummy('UR10_target')

        self.pollo_target = Dummy('pollo_target')
        self.pollo = Shape('pollo')
        self.initial_pollo_position = self.pollo.get_position()
        self.initial_pollo_orientation = self.pollo.get_quaternion()

        self.table_target = Dummy('table_target')
        self.table_target = Dummy('table_target')

        # objects to check collisions
        self.scene_objects = [
            Shape('table0'),
            Shape('Plane'),
            Shape('Plane0'),
            Shape('ConcretBlock')
        ]

        self.initial_distance = np.linalg.norm(
            np.array(self.initial_pollo_position) -
            np.array(self.initial_agent_tip_position))

        # camera
        self.camera = VisionSensor('kinect_depth')
        self.camera_matrix_extrinsics = vrep.simGetObjectMatrix(
            self.camera.get_handle(), -1)
        self.np_camera_matrix_extrinsics = np.delete(
            np.linalg.inv(self.vrepToNP(self.camera_matrix_extrinsics)), 3, 0)
        width = 640.0
        height = 480.0
        angle = math.radians(57.0)
        focalx_px = (width / 2.0) / math.tan(angle / 2.0)
        focaly_px = (height / 2.0) / math.tan(angle / 2.0)
        self.np_camera_matrix_intrinsics = np.array([[-focalx_px, 0.0, 320.0],
                                                     [0.0, -focalx_px, 240.0],
                                                     [0.0, 0.0, 1.0]])

        self.reset()

    def reset(self):
        pos = list(
            np.random.uniform([-0.1, -0.1, 0.0], [0.1, 0.1, 0.1]) +
            self.initial_pollo_position)
        self.pollo.set_position(pos)
        self.pollo.set_quaternion(self.initial_pollo_orientation)
        self.agent.set_joint_positions(self.initial_joint_positions)
        self.initial_epoch_time = time.time()
        while True:  # wait for arm to stop
            self.pr.step()  # Step the physics simulation
            a = self.agent.get_joint_velocities()
            if not np.any(np.where(np.fabs(a) < 0.1, False, True)):
                break

        return self._get_state()

    def step(self, action):
        if action is None:
            print(self.total_reward)
            return self._get_state(), -10.0, True, {}

        # check for nan
        if np.any(np.isnan(action)):
            print("NAN values ", action)
            self.NANS_COMING = True
            return self._get_state(), -10.0, True, {}

        # check for strange values
        # if np.any(np.greater(action, self.MAX_INC)) or np.any(np.less(action, -self.MAX_INC)):
        #     print("Strange values ", action)
        #     self.NANS_COMING = True
        #     return self._get_state(), -10.0, True, {}

        # check for NAN in VREP get_position()
        if np.any(np.isnan(self.agent.get_tip().get_position())):
            print("NAN values in get_position()", action,
                  self.agent.get_tip().get_position())
            return self._get_state(), -10.0, True, {}

        self.pr.step()
        return self._get_state(), 0, True, {}

    def close(self):
        self.pr.stop()
        self.pr.shutdown()

    def render(self):
        print("RENDER")
        np_pollo_en_camara = self.getPolloEnCamara()

        # capture depth image
        depth = self.camera.capture_rgb()
        circ = plt.Circle(
            (int(np_pollo_en_camara[0]), int(np_pollo_en_camara[1])), 10)
        plt.clf()
        fig = plt.gcf()
        ax = fig.gca()
        ax.add_artist(circ)
        ax.imshow(depth, cmap='hot')
        plt.pause(0.000001)

    # Aux

    # transform env.pollo_target.get_position() to camera coordinates and project pollo_en_camera a image coordinates
    def getPolloEnCamara(self):
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_pollo_target_cam = self.np_camera_matrix_extrinsics.dot(
            np.append([np_pollo_target], 1.0))
        np_pollo_en_camara = self.np_camera_matrix_intrinsics.dot(
            np_pollo_target_cam)
        np_pollo_en_camara = np_pollo_en_camara / np_pollo_en_camara[2]
        np_pollo_en_camara = np.delete(np_pollo_en_camara, 2)
        return np_pollo_en_camara

    def getPolloEnCamaraEx(self):
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_pollo_en_camara = self.np_camera_matrix_extrinsics.dot(
            np.append([np_pollo_target], 1.0))
        return np_pollo_en_camara

    def _get_state(self):
        # Return state containing arm joint angles/velocities & target position
        # return (self.agent.get_joint_positions() +
        #         self.agent.get_joint_velocities() +
        #         self.pollo_target.get_position())
        p = self.getPolloEnCamaraEx()
        j = self.agent.get_joint_positions()
        #r = np.array([p[0],p[1],p[2],j[0],j[1],j[2],j[3],j[4],j[5]])
        r = np.array([p[0], p[1], p[2]])
        return r

    def vrepToNP(self, c):
        return np.array([[c[0], c[4], c[8], c[3]], [c[1], c[5], c[9], c[7]],
                         [c[2], c[6], c[10], c[11]], [0, 0, 0, 1]])
示例#4
0
class EnvPollos(Env):
    def __init__(self, ep_length=100):
        """
        Pollos environment for testing purposes
        :param dim: (int) the size of the dimensions you want to learn
        :param ep_length: (int) the length of each episodes in timesteps
        """
        # 5 points in 3D space forming the trajectory
        self.action_space = Box(low=-0.3, high=0.3, shape=(5,3), dtype=np.float32)
        self.observation_space = Box(low=np.array([-0.1, -0.2, 0, -1.5, 0.0, 0.0, -0.5, -0.2, -0.2]), 
                                     high=np.array([0.1, 1.7, 2.5, 0, 0.0, 0.0, 0.5, 0.2, 1.0]))
        
        #
        self.pr = PyRep()
        self.pr.launch(SCENE_FILE, headless=False)
        self.pr.start()
        self.agent = UR10()
        self.agent.max_velocity = 1.2
        self.joints = [Joint('UR10_joint1'), Joint('UR10_joint2'), Joint('UR10_joint3'), Joint('UR10_joint4'), Joint('UR10_joint5'), Joint('UR10_joint6')]
        self.joints_limits = [[j.get_joint_interval()[1][0],j.get_joint_interval()[1][0]+j.get_joint_interval()[1][1]] 
                              for j in self.joints]
        print(self.joints_limits)
        self.agent_hand = Shape('hand')
        self.agent.set_control_loop_enabled(True)
        self.agent.set_motor_locked_at_zero_velocity(True)
        self.initial_agent_tip_position = self.agent.get_tip().get_position()
        self.initial_agent_tip_quaternion = self.agent.get_tip().get_quaternion()
        self.target = Dummy('UR10_target')
        self.initial_joint_positions = self.agent.get_joint_positions()
        self.pollo_target = Dummy('pollo_target')
        self.pollo = Shape('pollo')
        self.table_target = Dummy('table_target')
        self.initial_pollo_position = self.pollo.get_position()
        self.initial_pollo_orientation = self.pollo.get_quaternion()
        self.table_target = Dummy('table_target')
        # objects
        self.scene_objects = [Shape('table0'), Shape('Plane'), Shape('Plane0'), Shape('ConcretBlock')]
        #self.agent_tip = self.agent.get_tip()
        self.initial_distance = np.linalg.norm(np.array(self.initial_pollo_position)-np.array(self.initial_agent_tip_position))
        
        # camera 
        self.camera = VisionSensor('kinect_depth')
        self.camera_matrix_extrinsics = vrep.simGetObjectMatrix(self.camera.get_handle(),-1)
        self.np_camera_matrix_extrinsics = np.delete(np.linalg.inv(self.vrepToNP(self.camera_matrix_extrinsics)), 3, 0)
        width = 640.0
        height = 480.0
        angle = math.radians(57.0)
        focalx_px = (width/2.0) / math.tan(angle/2.0)
        focaly_px = (height/2.0) / math.tan(angle/2.0)
        self.np_camera_matrix_intrinsics = np.array([[-focalx_px, 0.0, 320.0],
                                                     [0.0, -focalx_px, 240.0],
                                                     [0.0, 0.0, 1.0]])
                                        
        self.reset()

    def reset(self):
        pos = list(np.random.uniform( [-0.1, -0.1, 0.0],  [0.1, 0.1, 0.1]) + self.initial_pollo_position)
        self.pollo.set_position(pos)
        self.pollo.set_quaternion(self.initial_pollo_orientation)
        self.agent.set_joint_positions(self.initial_joint_positions)
        self.initial_epoch_time = time.time()
        while True:         # wait for arm to stop
            self.pr.step()  # Step the physics simulation
            a = self.agent.get_joint_velocities()
            if not np.any(np.where( np.fabs(a) < 0.1, False, True )):
                break
        return self._get_state()

    def step(self, action): 
        print(action.shape, action)
        if action is None:
            self.pr.step()
            return self._get_state(), 0.0, False, {}
        
        if np.any(np.isnan(action)):
            print(action)
            return self._get_state(), -1.0, False, {}

        # create a path with tip and pollo at its endpoints and 5 adjustable points in the middle
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_robot_tip_position = np.array(self.agent.get_tip().get_position())
        np_robot_tip_orientation = np.array(self.agent.get_tip().get_orientation())
        c_path = CartesianPath.create()     
        c_path.insert_control_points(action[4])         # point after pollo to secure the grasp
        c_path.insert_control_points([list(np_pollo_target) + list(np_robot_tip_orientation)])       # pollo
        c_path.insert_control_points(action[0:3])   
        c_path.insert_control_points([list(np_robot_tip_position) + list(np_robot_tip_orientation)]) # robot hand
        try:
            self.path = self.agent.get_path_from_cartesian_path(c_path)
        except IKError as e:
            print('Agent::grasp    Could not find joint values')  
            return self._get_state(), -1, True, {}   
    
        # go through path
        reloj = time.time()
        while self.path.step():
            self.pr.step()                                   # Step the physics simulation
            if (time.time()-reloj) > 4:              
                return self._get_state(), -10.0, True, {}    # Too much time
            if any((self.agent_hand.check_collision(obj) for obj in self.scene_objects)):   # colisión con la mesa
                return self._get_state(), -10.0, True, {}   
        
        # path ok, compute reward
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_robot_tip_position = np.array(self.agent.get_tip().get_position())
        dist = np.linalg.norm(np_pollo_target-np_robot_tip_position)
        altura = np.clip((np_pollo_target[2] - self.initial_pollo_position[2]), 0, 0.5)
        
        if altura > 0.3:                                                     # pollo en mano
            return self._get_state(), altura, True, {}
        elif dist > self.initial_distance:                                   # mano se aleja
            return self._get_state(), -10.0, True, {}
        if time.time() - self.initial_epoch_time > 5:                        # too much time
            return self._get_state(), -10.0, True, {}
        
        # Reward 
        pollo_height = np.exp(-altura*10)  # para 1m pollo_height = 0.001, para 0m pollo_height = 1
        reward = - pollo_height - dist
        return self._get_state(), reward, False, {}

    def close(self):
        self.pr.stop()
        self.pr.shutdown()
    
    def render(self):
        print("RENDER")
        np_pollo_en_camara = self.getPolloEnCamara()
         
        # capture depth image
        depth = self.camera.capture_rgb()
        circ = plt.Circle((int(np_pollo_en_camara[0]), int(np_pollo_en_camara[1])),10)
        plt.clf()
        fig = plt.gcf()
        ax = fig.gca()
        ax.add_artist(circ)
        ax.imshow(depth, cmap = 'hot')
        plt.pause(0.000001)

    # Aux

    # transform env.pollo_target.get_position() to camera coordinates and project pollo_en_camera a image coordinates        
    def getPolloEnCamara(self):
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_pollo_target_cam = self.np_camera_matrix_extrinsics.dot(np.append([np_pollo_target],1.0))
        np_pollo_en_camara = self.np_camera_matrix_intrinsics.dot(np_pollo_target_cam)
        np_pollo_en_camara = np_pollo_en_camara / np_pollo_en_camara[2]
        np_pollo_en_camara = np.delete(np_pollo_en_camara,2)
        return np_pollo_en_camara

    def getPolloEnCamaraEx(self):
        np_pollo_target = np.array(self.pollo_target.get_position())
        np_pollo_en_camara = self.np_camera_matrix_extrinsics.dot(np.append([np_pollo_target],1.0))
        return np_pollo_en_camara


    def _get_state(self):
        # Return state containing arm joint angles/velocities & target position
        # return (self.agent.get_joint_positions() + 
        #         self.agent.get_joint_velocities() +
        #         self.pollo_target.get_position())
        p = self.getPolloEnCamaraEx()
        j = self.agent.get_joint_positions()
        r = np.array([j[0],j[1],j[2],j[3],j[4],j[5],p[0],p[1],p[2]])
        return r
    
    def vrepToNP(self, c):
        return np.array([[c[0],c[4],c[8],c[3]],
                         [c[1],c[5],c[9],c[7]],
                         [c[2],c[6],c[10],c[11]],
                         [0,   0,   0,    1]])