示例#1
0
class PutPlateInColoredDishRack(Task):
    def init_task(self) -> None:
        plate = Shape('plate')
        self.dish_rack = Shape('dish_rack')
        self.plate_stand = Shape('plate_stand')
        self.success_sensor = ProximitySensor('success')
        self.success_poses = [Dummy('success_pos%d' % i) for i in range(3)]
        self.pillars = [Shape('dish_rack_pillar%d' % i) for i in range(6)]

        self.boundary = SpawnBoundary([Shape('boundary')])
        self.register_graspable_objects([plate])
        cond_set = ConditionSet([
            DetectedCondition(plate, self.success_sensor),
            NothingGrasped(self.robot.gripper)
        ],
                                order_matters=True)
        self.register_success_conditions([cond_set])

    def init_episode(self, index: int) -> List[str]:
        color_name, _ = colors[index]

        shuffled_pillar_indexes = list(range(3))
        np.random.shuffle(shuffled_pillar_indexes)

        # Color the other spokes
        color_choices = [index] + list(
            np.random.choice(
                list(range(index)) + list(range(index + 1, len(colors))),
                size=2,
                replace=False))
        for pillar_i, color_i in zip(shuffled_pillar_indexes, color_choices):
            _, rgb = colors[color_i]
            self.pillars[pillar_i * 2].set_color(rgb)
            self.pillars[1 + pillar_i * 2].set_color(rgb)

        # Move the success detector to the correct spot
        success_pos = self.success_poses[shuffled_pillar_indexes[0]]
        self.success_sensor.set_position(success_pos.get_position())
        self.success_sensor.set_orientation(success_pos.get_orientation())

        self.boundary.clear()
        self.boundary.sample(self.plate_stand,
                             min_rotation=(0, 0, 0.5),
                             max_rotation=(0, 0, 0.5))
        self.boundary.sample(self.dish_rack, min_distance=0.2)

        return [
            'put the plate between the %s pillars of the dish rack'  #
            % color_name,
            'place the plate in the the %s section of the drying rack' %
            color_name,
            'pick up the plate and leave it between the %s spokes on the '
            'dish rack' % color_name
        ]

    def variation_count(self) -> int:
        return len(colors)

    def base_rotation_bounds(self) -> Tuple[List[float], List[float]]:
        return [0, 0, 0], [0, 0, 0]
示例#2
0
 def init_episode(self, index: int) -> List[str]:
     b = SpawnBoundary([self.boundary])
     for obj in self.jars:
         b.sample(obj, min_distance=0.01)
     success = ProximitySensor('success')
     success.set_position([0.0, 0.0, 0.05],
                          relative_to=self.jars[index % 2],
                          reset_dynamics=False)
     w3 = Dummy('waypoint3')
     w3.set_orientation([-np.pi, 0, -np.pi], reset_dynamics=False)
     w3.set_position([0.0, 0.0, 0.125],
                     relative_to=self.jars[index % 2],
                     reset_dynamics=False)
     target_color_name, target_color_rgb = colors[index]
     color_choice = np.random.choice(list(range(index)) +
                                     list(range(index + 1, len(colors))),
                                     size=1,
                                     replace=False)[0]
     _, distractor_color_rgb = colors[color_choice]
     self.jars[index % 2].set_color(target_color_rgb)
     other_index = {0: 1, 1: 0}
     self.jars[other_index[index % 2]].set_color(distractor_color_rgb)
     self.conditions += [DetectedCondition(self.lid, success)]
     self.register_success_conditions(self.conditions)
     return [
         'close the %s jar' % target_color_name,
         'screw on the %s jar lid' % target_color_name,
         'grasping the lid, lift it from the table and use it to seal '
         'the %s jar' % target_color_name,
         'pick up the lid from the table and put it on the %s jar' %
         target_color_name
     ]
示例#3
0
class HannoiSquare(Task):

    def init_task(self) -> None:
        self.square_ring = Shape('hannoi_square_ring')
        self.success_detector = ProximitySensor(
            'hannoi_square_success_detector')
        self.register_graspable_objects([self.square_ring])
        success_condition1 = DetectedCondition(self.square_ring,
                                               self.success_detector)
        self.register_success_conditions([success_condition1])

    def init_episode(self, index: int) -> List[str]:
        pillar0 = Shape('hannoi_square_pillar0')
        pillar1 = Shape('hannoi_square_pillar1')
        pillar2 = Shape('hannoi_square_pillar2')
        spokes = [pillar0, pillar1, pillar2]

        color_name, color_rgb = colors[index]
        chosen_pillar = np.random.choice(spokes)
        chosen_pillar.set_color(color_rgb)
        _, _, z = self.success_detector.get_position()
        x, y, _ = chosen_pillar.get_position()
        self.success_detector.set_position([x, y, z])

        color_choices = np.random.choice(
            list(range(index)) + list(range(index + 1, len(colors))),
            size=2, replace=False)
        spokes.remove(chosen_pillar)
        for spoke, i in zip(spokes, color_choices):
            name, rgb = colors[i]
            spoke.set_color(rgb)

        boundary1 = Shape('hannoi_square_boundary0')
        square_ring = Shape('hannoi_square_ring')
        b = SpawnBoundary([boundary1])
        b.sample(square_ring)

        return ['put the ring on the %s spoke' % color_name,
                'slide the ring onto the %s colored spoke' % color_name,
                'place the ring onto the %s spoke' %color_name]

    def variation_count(self) -> int:
        return len(colors)
示例#4
0
class ReachTarget(Task):
    def __init__(self, pyrep: PyRep, robot: Robot):
        super().__init__(pyrep, robot)
        self.target = None
        self.success_sensor = None
        self._epi_len = 1500
        self._tar_pos = None
        self._last_rob_pos_queue = None
        self.init_tar_rob_dis = None
        self._max_dis = 2
        self.dis_del_reward_max = None
        self.dis_del_reward_min = None
        self.angle_reward_min = None
        self.angle_reward_max = None

        # goal information supplement
        # planar coordinate (x,y) of the target
        self.endgoal_dim = 2
        self.subgoal_dim = 2
        # robot's coordinate (x,y) and positions of all joints
        # self.subgoal_dim = self.robot.robot_body.get_joint_count() + 2
        # self.subgoal_bounds = np.concatenate((np.array([-10, 10]), np.array([-10, 10]),
        #                                       [-np.pi/2, np.pi/2] * self.robot.robot_body.get_joint_count()))
        self.subgoal_bounds = np.concatenate(
            (np.array([0, 3]), np.array([-2, 2])))
        self.subgoal_bounds = np.reshape(self.subgoal_bounds, (-1, 2))
        self.subgoal_bounds_symmetric = [
            (self.subgoal_bounds[i][1] - self.subgoal_bounds[i][0]) / 2
            for i in range(self.subgoal_bounds.shape[0])
        ]
        self.subgoal_offset = [
            self.subgoal_bounds[i][1] - self.subgoal_bounds_symmetric[i]
            for i in range(self.subgoal_bounds.shape[0])
        ]
        self.endgoal_thresholds = np.array([0.1, 0.1])
        # self.subgoal_thresholds = np.concatenate((np.array([0.3, 0.3]),
        #                                           np.array([np.deg2rad(10)]*self.robot.robot_body.get_joint_count())))
        self.subgoal_thresholds = np.array([0.1, 0.1])

    def init_task(self) -> None:
        self.target = Shape('target')
        self.success_sensor = ProximitySensor('success')
        self.register_success_conditions([
            DetectedCondition(self.robot.robot_body.get_snake_head(),
                              self.success_sensor)
        ])
        self.register_fail_conditions([
            OutOfBoundCondition(self.robot.robot_body.get_snake_head(),
                                self.target, self._max_dis)
        ])

    def init_episode(self, index: int) -> List[str]:
        # set color of the target
        color_name, color_rgb = colors[index]
        self.target.set_color(color_rgb)

        # set random position of the target
        # rand_x = np.random.rand()
        # rand_y = np.random.rand()
        ltime = time.localtime()
        np.random.seed(ltime.tm_hour * 60 + ltime.tm_min * 60 +
                       int(ltime.tm_sec / 10))
        rand_x = np.random.uniform(0.0, 1.0)
        arc_angle = 70
        radius = 1.6
        radius_cos = radius * np.cos(np.deg2rad(arc_angle / 2))
        if rand_x < radius - radius_cos:
            ymax = np.sqrt(radius**2 - (rand_x - radius)**2)
        else:
            ymax = (radius - rand_x) * np.tan(np.deg2rad(arc_angle / 2))
        rand_y = np.random.uniform(-ymax, ymax)

        self.target.set_position([rand_x, rand_y, 0.04])
        self.success_sensor.set_position([rand_x, rand_y, 0.04])
        # self.target.set_position([0.4, -1.0, 0.04])
        # self.success_sensor.set_position([0.4, -1.0, 0.04])
        # print("(x,y) = ", (rand_x, rand_y))
        self._tar_pos = np.array(self.target.get_position())
        self._last_rob_pos_queue = deque(maxlen=50)
        self._last_rob_pos_queue.append(
            np.array(self.robot.robot_body.get_snake_head_pos()))
        cur_head_pos = np.array(self.robot.robot_body.get_snake_head_pos())
        self.init_tar_rob_dis = np.sqrt(
            np.sum((self._tar_pos[:2] - cur_head_pos[:2])**2))

        self.robot.robot_body.init_state()

        return 'reach the %s sphere target' % color_name

    def variation_count(self) -> int:
        return len(colors)

    def get_reward(self) -> int:
        pass

    def get_goal(self) -> list:
        return self.get_target_pos()[:2]

    def get_short_term_reward(self) -> int:
        cur_head_pos = np.array(self.robot.robot_body.get_snake_head_pos())
        cur_tail_pos = np.array(self.robot.robot_body.get_snake_tail_pos())
        cur_dis = np.sqrt(np.sum((self._tar_pos[:2] - cur_head_pos[:2])**2))
        body_len = np.sqrt(np.sum((cur_tail_pos[:2] - cur_head_pos[:2])**2))
        dis_reward = -cur_dis / self.init_tar_rob_dis
        dis_reward = np.clip(dis_reward, -1, 0)

        if len(self._last_rob_pos_queue) == 50:
            last_rob_pos = self._last_rob_pos_queue[0]
            last_dis = np.sqrt(
                np.sum((self._tar_pos[:2] - last_rob_pos[:2])**2))
            dis_del_reward = last_dis - cur_dis
        else:
            dis_del_reward = 0
        self._last_rob_pos_queue.append(cur_head_pos)
        if self.dis_del_reward_min is None:
            self.dis_del_reward_min = dis_del_reward
            self.dis_del_reward_max = dis_del_reward
        elif self.dis_del_reward_min > dis_del_reward:
            self.dis_del_reward_min = dis_del_reward
        elif self.dis_del_reward_max < dis_del_reward:
            self.dis_del_reward_max = dis_del_reward
        if self.dis_del_reward_min != self.dis_del_reward_max:
            dis_del_reward = (dis_del_reward - self.dis_del_reward_min) / (
                self.dis_del_reward_max - self.dis_del_reward_min) - 1

        k1 = (cur_head_pos[1] - self._tar_pos[1]) / (cur_head_pos[0] -
                                                     self._tar_pos[0] + 1e-9)
        angle_reward = -np.clip(np.abs(np.arctan(k1)), 0, 1)

        k2 = (cur_tail_pos[1] - cur_head_pos[1]) / (cur_tail_pos[0] -
                                                    cur_head_pos[0] + 1e-9)
        posture_reward = -np.clip(np.abs(np.arctan(k2)), 0, 1)

        cos_angle = [(self._tar_pos[0] - cur_head_pos[0]) *
                     (cur_tail_pos[0] - cur_head_pos[0]) +
                     (self._tar_pos[1] - cur_head_pos[1]) *
                     (cur_tail_pos[1] - cur_head_pos[1])
                     ] / (cur_dis * body_len)
        angle_reward2 = np.abs(np.arccos(cos_angle)) - 1
        # if self.angle_reward_min is None:
        #     self.angle_reward_min = angle_reward
        #     self.angle_reward_max = angle_reward
        # elif self.angle_reward_min > angle_reward:
        #     self.angle_reward_min = angle_reward
        # elif self.angle_reward_max < angle_reward:
        #     self.angle_reward_max = angle_reward
        # if self.angle_reward_min != self.angle_reward_max:
        #     angle_reward = (angle_reward - self.angle_reward_min) / (self.angle_reward_max - self.angle_reward_min)
        # print(dis_del_reward, dis_reward, angle_reward)
        w1 = 1 - dis_reward / (dis_reward + angle_reward + posture_reward)
        w2 = 1 - angle_reward / (dis_reward + angle_reward + posture_reward)
        w3 = 1 - posture_reward / (dis_reward + angle_reward + posture_reward)
        # return dis_reward * w1 + angle_reward * w2 + posture_reward * w3
        return dis_reward + 0.1 * angle_reward + 0.1 * posture_reward
        # return dis_reward + 0.1 * angle_reward2

    def get_long_term_reward(self, timeout) -> int:
        success, _ = self.success()
        fail, _ = self.failure()
        if success:
            return 200
        elif fail:
            return -2
        elif timeout:
            cur_head_pos = np.array(self.robot.robot_body.get_snake_head_pos())
            cur_dis = np.sqrt(np.sum(
                (self._tar_pos[:2] - cur_head_pos[:2])**2))
            timeout_reward = 2 * (1 - cur_dis / self.init_tar_rob_dis) - 1
            timeout_reward = np.clip(timeout_reward, -1, 1)
            return timeout_reward * 100
        else:
            return 0

    def project_state_to_subgoal(self):
        robot_position = np.array(
            self.robot.robot_body.get_snake_head_pos())[:2]
        # joint_position = np.array(self.robot.robot_body.get_joint_positions())
        # subgoal = np.concatenate((robot_position, joint_position))
        subgoal = robot_position
        return subgoal

    def project_state_to_endgoal(self):
        robot_position = np.array(
            self.robot.robot_body.get_snake_head_pos())[:2]
        endgoal = robot_position
        return endgoal

    def get_target_pos(self) -> List[float]:
        return self.target.get_position()

    def get_target_angle(self) -> List[float]:
        robot_pos = np.array(self.robot.robot_body.get_snake_head_pos())[:2]
        target_pos = self._tar_pos[:2]
        k = (robot_pos[1] - target_pos[1]) / (robot_pos[0] - target_pos[0] +
                                              1e-8)
        angle = np.arctan(k)
        # print("angle = ", np.rad2deg(angle))
        return [angle]

    def get_robot_angle(self) -> List[float]:
        robot_angle = self.robot.robot_body.get_snake_angle()
        return [robot_angle]

    def compute_reward(self, achieved_goal, desired_goal) -> np.ndarray:
        goal_dis = np.sqrt(
            np.sum((achieved_goal[:, :2] - desired_goal[:, :2])**2, axis=1))
        reward = -goal_dis / self.init_tar_rob_dis
        reward = np.clip(reward, -1, 0)
        return reward

    @property
    def episode_len(self) -> int:
        return self._epi_len

    @staticmethod
    def decorate_observation(observation: Observation) -> Observation:
        """Can be used for tasks that want to modify the observations.

        Usually not used. Perhpas can be used to model

        :param observation: The Observation for this time step.
        :return: The modified Observation.
        """

        return observation