示例#1
0
    def __init__(self, multi=False, multi_no=3, tracking=False):
        self.multigoal = multi
        self.n_goals = multi_no
        self.tracking = tracking
        self.rand = Randomizer()

        self.goal = None

        # self.step_in_episode = 0

        self.metadata = {
            'render.modes': ['human', 'rgb_array']
        }

        self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437,
                                            radius=0.2022, height=0.2610,
                                            min_dist=0.1, halfsphere=True)

        # observation = 4 joints + 4 velocities + 2 coordinates for target
        joints_no = 4
        if self.tracking:
            joints_no = 3

        self.observation_space = spaces.Box(low=-1, high=1, shape=(joints_no + joints_no + 2,), dtype=np.float32)

        # action = 4 joint angles
        self.action_space = spaces.Box(low=-1, high=1, shape=(joints_no,), dtype=np.float32)

        self.diffs = [JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6)]

        self.cam = Camera(color=True)
        self.tracker = Tracker(TRACKING_GREEN["maxime_lower"], TRACKING_GREEN["maxime_upper"])
        # self.tracker = Tracker(TRACKING_GREEN["duct_lower"], TRACKING_GREEN["duct_upper"])
        if self.tracking:
            self.tracker_goal = Tracker(TRACKING_YELLOW["duckie_lower"], TRACKING_YELLOW["duckie_upper"])

        self.calibration = np.load(os.path.join(config_dir(), "calib-ergoreacher-adr.npz"))["calibration"].astype(
            np.int16)

        self.last_step_time = time.time()
        self.pts_tip = deque(maxlen=32)
        self.pts_goal = deque(maxlen=32)

        self.last_frame = np.ones((480, 640, 3), dtype=np.uint8)

        self.pause_counter = 0

        self.last_speed = 100

        self.goals_done = 0

        # self.goal_states=[]

        # for _ in range(10000):
        #     goal = self.rhis.sampleSimplePoint()
        #     self.goal_states.append(self._goal2pixel(goal))

        self._init_robot()

        super().__init__()
  def __init__(self,
               headless=False,
               simple=False,
               max_force=1000,
               max_vel=100,
               goal_halfsphere=False,
               backlash=.1,
               double_goal=False):
    self.simple = simple
    self.max_force = max_force
    self.max_vel = max_vel
    self.double_goal = double_goal

    self.robot = SingleRobot(
        debug=not headless, heavy=True, new_backlash=backlash, silent=True)
    self.ball = Ball(1)
    self.rhis = RandomPointInHalfSphere(
        0.0,
        3.69,
        4.37,
        radius=RADIUS,
        height=26.10,
        min_dist=10.,
        halfsphere=goal_halfsphere)
    self.goal = None
    self.goals_done = 0
    self.goal_dirty = False
    self.dist = DistanceBetweenObjects(
        bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1)
    self.episodes = 0  # used for resetting the sim every so often
    self.restart_every_n_episodes = 1000

    self.force_urdf_reload = False

    self.metadata = {'render.modes': ['human']}

    if not simple:
      # observation = 6 joints + 6 velocities + 3 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32)  #
      # action = 6 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(6,), dtype=np.float32)  #

    else:
      # observation = 4 joints + 4 velocities + 2 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32)  #
      # action = 4 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(4,), dtype=np.float32)  #

    super().__init__()
示例#3
0
class ErgoReacherLiveEnv(gym.Env):
    def __init__(self, multi=False, multi_no=3, tracking=False):
        self.multigoal = multi
        self.n_goals = multi_no
        self.tracking = tracking
        self.rand = Randomizer()

        self.goal = None

        # self.step_in_episode = 0

        self.metadata = {
            'render.modes': ['human', 'rgb_array']
        }

        self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437,
                                            radius=0.2022, height=0.2610,
                                            min_dist=0.1, halfsphere=True)

        # observation = 4 joints + 4 velocities + 2 coordinates for target
        joints_no = 4
        if self.tracking:
            joints_no = 3

        self.observation_space = spaces.Box(low=-1, high=1, shape=(joints_no + joints_no + 2,), dtype=np.float32)

        # action = 4 joint angles
        self.action_space = spaces.Box(low=-1, high=1, shape=(joints_no,), dtype=np.float32)

        self.diffs = [JOINT_LIMITS[i][1] - JOINT_LIMITS[i][0] for i in range(6)]

        self.cam = Camera(color=True)
        self.tracker = Tracker(TRACKING_GREEN["maxime_lower"], TRACKING_GREEN["maxime_upper"])
        # self.tracker = Tracker(TRACKING_GREEN["duct_lower"], TRACKING_GREEN["duct_upper"])
        if self.tracking:
            self.tracker_goal = Tracker(TRACKING_YELLOW["duckie_lower"], TRACKING_YELLOW["duckie_upper"])

        self.calibration = np.load(os.path.join(config_dir(), "calib-ergoreacher-adr.npz"))["calibration"].astype(
            np.int16)

        self.last_step_time = time.time()
        self.pts_tip = deque(maxlen=32)
        self.pts_goal = deque(maxlen=32)

        self.last_frame = np.ones((480, 640, 3), dtype=np.uint8)

        self.pause_counter = 0

        self.last_speed = 100

        self.goals_done = 0

        # self.goal_states=[]

        # for _ in range(10000):
        #     goal = self.rhis.sampleSimplePoint()
        #     self.goal_states.append(self._goal2pixel(goal))

        self._init_robot()

        super().__init__()

    def _init_robot(self):
        self.controller = ZMQController(host="flogo3.local")
        self._setSpeedCompliance()

    def _setSpeedCompliance(self):
        self.controller.compliant(False)
        self.controller.set_max_speed(500)  # default: 100

    def setSpeed(self, speed):
        assert speed > 0 and speed < 1000
        self.controller.set_max_speed(speed)
        self.last_speed = speed

    def seed(self, seed=None):
        np.random.seed(seed)

    def reset(self):
        self.setSpeed(100)  # do resetting at a normal speed

        if self.multigoal:
            # sample N goals, calculate total reward as distance between them. Add distances to list. Subtract list elements on rew calculation
            self.goal_distances = []
            self.goal_positions = []
            for goal_idx in range(self.n_goals):
                point = self.rhis.sampleSimplePoint()
                self.goal_positions.append(point)
            for goal_idx in range(self.n_goals - 1):
                dist = np.linalg.norm(self.goal_positions[goal_idx] -
                                      self.goal_positions[goal_idx + 1])
                self.goal_distances.append(dist)

        self.gripper_closed = False
        self.gripper_closed_frames = 0
        self.closest_distance = np.inf
        self.ready_to_close = False
        self.tracking_frames = 0

        if not self.tracking:
            self.goal = self.rhis.sampleSimplePoint()

        # goals = []

        # for _ in range(100000):
        #     goals.append(self.rhis.sampleSimplePoint())
        #
        # goals = np.array(goals)
        # print ("x min/max",goals[:,1].min(),goals[:,1].max())
        # print ("y min/max",goals[:,2].min(),goals[:,2].max())

        qpos = np.random.uniform(low=-0.2, high=0.2, size=6)
        qpos[[0, 3]] = 0

        self.pts_tip.clear()

        add_text(self.last_frame, "=== RESETTING ===")

        if (self.pause_counter > ITERATIONS_MAX):
            self.pause_counter = 0
            self.controller.compliant(True)
            input("\n\n=== MAINTENANCE: PLEASE CHECK THE ROBOT AND THEN PRESS ENTER TO CONTINUE ===")
            self.controller.compliant(False)
            time.sleep(.5)  # wait briefly to make sure all joints are non-compliant / powered

        # this counts as rest position
        self.controller.goto_normalized(qpos)

        cv2.imshow("Frame", self.last_frame)
        cv2.waitKey(1000)

        if self.multigoal:

            while True:
                frame = Camera.to_numpy(self.cam.get_color())[:, :, ::-1]  # RGB to BGR for cv2
                hsv = self.tracker.blur_img(frame)
                mask_tip = self.tracker.prep_image(hsv)

                # center of mass, radius of enclosing circle, x/y of enclosing circle
                center_tip, radius_tip, x_tip, y_tip = self.tracker.track(mask_tip)

                # grab more frames until the green blob is big enough / visible
                if center_tip is not None and radius_tip > DETECTION_RADIUS:
                    break

            pos_tip = self._pixel2goal(center_tip)

            reward = np.linalg.norm(np.array(self.goal[1:]) - np.array(pos_tip))
            distance = reward.copy()

            self.goal_distances.append(distance)

        self.setSpeed(self.last_speed)

        self.last_step_time = time.time()

        return self._get_obs()

    def _get_obs(self):
        if self.goal is None:
            return np.zeros(8)

        pv = self.controller.get_posvel()
        if not self.tracking:
            self.observation = self._normalize(pv)[[1, 2, 4, 5, 7, 8, 10, 11]]  # leave out joint0/3
        else:
            self.observation = self._normalize(pv)[[1, 2, 4, 7, 8, 10]]  # leave out joint0/3/5
        self.observation = np.hstack((self.observation, self.rhis.normalize(self.goal)[1:]))
        return self.observation

    def _normalize(self, pos):
        pos = np.array(pos).astype('float32')
        pos[:6] = ((pos[:6] + 90) / 180) * 2 - 1  # positions
        pos[6:] = ((pos[6:] + 300) / 600) * 2 - 1  # velocities
        return pos

    def _pixel2goal(self, camcenter):
        pos = np.array(camcenter).astype(np.int16)
        x_n = (pos[0] - self.calibration[0, 0]) / (self.calibration[1, 0] - self.calibration[0, 0])
        x_d = .224 - (x_n * (.224 + .148))
        y_n = (pos[1] - self.calibration[2, 1]) / (self.calibration[1, 1] - self.calibration[2, 1])
        y_d = .25 - (y_n * (.23 - .046))  # .056 has been modified from the .016 below
        return np.array([x_d, y_d])

    def _goal2pixel(self, goal):
        # print (self.goal[1:], self.calibration)

        # x_n = (float(goal[1]) + .0979) / (.2390 + .0979)
        # x_d = x_n * (self.calibration[0, 0] - self.calibration[1, 0]) + self.calibration[1, 0]
        # y_n = (float(goal[2]) - .0437) / (.2458 - .0437)
        # y_d = self.calibration[0, 1] - (y_n * (self.calibration[0, 1] - self.calibration[2, 1]))

        x_n = (float(goal[1]) + .148) / (.224 + .148)
        x_d = x_n * (self.calibration[0, 0] - self.calibration[1, 0]) + self.calibration[1, 0]
        y_n = (float(goal[2]) - .016) / (.25 - .016)
        y_d = self.calibration[0, 1] - (y_n * (self.calibration[0, 1] - self.calibration[2, 1]))
        # print ([x_d, y_d])
        return np.array([int(round(x_d)), int(round(y_d))])

    def _render_img(self, frame, center, radius, x, y, pts, color_a=(0, 255, 255), color_b=(0, 0, 255)):
        g = self._goal2pixel(self.goal)
        cv2.circle(frame, (g[0], g[1]), int(5), (255, 0, 255), 3)

        # circle center
        cv2.circle(frame, (int(x), int(y)), int(radius), color_a, 2)

        # center of mass
        cv2.circle(frame, center, 5, color_b, -1)

        # update the points queue
        pts.appendleft(center)

        # loop over the set of tracked points
        for i in range(1, len(pts)):
            # if either of the tracked points are None, ignore
            # them
            if pts[i - 1] is None or pts[i] is None:
                continue

            # otherwise, compute the thickness of the line and
            # draw the connecting lines
            thickness = int(np.sqrt(32 / float(i + 1)) * 2.5)
            cv2.line(frame, pts[i - 1], pts[i], color_b, thickness)

        self.last_frame = frame.copy()

        return frame

    def _get_reward(self):
        done = False
        center_goal = None

        while True:
            frame = Camera.to_numpy(self.cam.get_color())[:, :, ::-1]  # RGB to BGR for cv2
            hsv = self.tracker.blur_img(frame)
            mask_tip = self.tracker.prep_image(hsv)

            # center of mass, radius of enclosing circle, x/y of enclosing circle
            center_tip, radius_tip, x_tip, y_tip = self.tracker.track(mask_tip)

            if self.tracking:
                mask_goal = self.tracker_goal.prep_image(hsv)
                center_goal, radius_goal, x_goal, y_goal = self.tracker_goal.track(mask_goal)

            # grab more frames until the green blob is big enough / visible
            if center_tip is not None and radius_tip > DETECTION_RADIUS:
                break

        frame2 = np.ascontiguousarray(frame, dtype=np.uint8)

        if self.tracking and center_goal is not None:
            self.goal = np.zeros(3, dtype=np.float32)
            self.goal[1:] = self._pixel2goal(center_goal)

            frame2 = self._render_img(frame2, center_goal, radius_goal, x_goal, y_goal, pts=self.pts_goal)

        if self.goal is not None:
            frame2 = self._render_img(frame2, center_tip, radius_tip, x_tip, y_tip, pts=self.pts_tip,
                                      color_a=(255, 255, 0), color_b=(255, 0, 0))

        if self.tracking and center_goal is None:
            self.goal = None
            return 0, False, np.inf, frame2.copy()

        pos_tip = self._pixel2goal(center_tip)

        reward = np.linalg.norm(np.array(self.goal[1:]) - np.array(pos_tip))
        distance = reward.copy()
        reward *= -1  # the reward is the inverse distance

        if not self.multigoal:
            if reward > MIN_DIST:  # this is a bit arbitrary, but works well
                done = True
                reward = 1

        else:
            reward *= -1
            # self.goal = self.rhis.sampleSimplePoint()

            dirty = False  # in case we _just_ hit the goal
            if -reward > MIN_DIST:
                self.goals_done += 1
                if self.goals_done == self.n_goals:
                    done = True
                else:
                    # TODO: MOVE BALL
                    dirty = True

            if done or self.goals_done == self.n_goals:
                reward = 1
            else:
                # reward is distance to current target + sum of all other distances divided by total distance
                if dirty:
                    # take it off before the reward calc
                    self.goals_done -= 1

                reward = 1 + (-(reward + sum(
                    self.goal_distances[:-(self.goals_done + 1)])) /
                              sum(self.goal_distances))
                if dirty:
                    # add it back after the reward cald
                    self.goals_done += 1

        return reward, done, distance, frame2.copy()

    def step(self, action):
        action_ = np.zeros(6, np.float32)
        if not self.tracking:
            action_[[1, 2, 4, 5]] = action
        else:
            action_[[1, 2, 4]] = action
            action_[5] = 50 / 90

            if self.gripper_closed:
                self.gripper_closed_frames += 1
                action_[5] = -10 / 90

                if self.gripper_closed_frames >= GRIPPER_CLOSED_MAX_FRAMES:
                    self.gripper_closed = False
                    self.gripper_closed_frames = 0

        action = np.clip(action_, -1, 1)

        if self.goal is not None:
            self.controller.goto_normalized(action)

        reward, done, distance, frame = self._get_reward()
        cv2.imshow("Frame", frame)
        cv2.waitKey(1)

        if self.tracking:
            done = False

        if not self.gripper_closed:
            if distance < self.closest_distance:
                self.closest_distance = distance
                self.ready_to_close += 1
            else:
                if self.ready_to_close > CLOSING_FRAMES:
                    self.ready_to_close = 0
                    self.closest_distance = np.inf
                    self.gripper_closed = True  # only takes effect on next step
                else:
                    self.tracking_frames += 1
                    if self.tracking_frames > 50:
                        self.closest_distance = np.inf

        dt = (time.time() - self.last_step_time) * 1000
        if dt < MAX_REFRESHRATE:
            time.sleep((MAX_REFRESHRATE - dt) / 1000)

        self.last_step_time = time.time()

        self.pause_counter += 1

        return self._get_obs(), reward, done, {"distance": distance, "img": frame}

    def render(self, mode='human', close=False):
        pass
    def __init__(self,
                 headless=False,
                 simple=False,
                 backlash=False,
                 max_force=1,
                 max_vel=18,
                 goal_halfsphere=False,
                 multi_goal=False,
                 goals=3,
                 gripper=False):
        self.simple = simple
        self.backlash = backlash
        self.max_force = max_force
        self.max_vel = max_vel
        self.multigoal = multi_goal
        self.n_goals = goals
        self.gripper = gripper

        self.goals_done = 0
        self.is_initialized = False
        self.robot = SingleRobot(debug=not headless, backlash=backlash)
        self.ball = Ball()
        self.rhis = RandomPointInHalfSphere(0.0,
                                            0.0369,
                                            0.0437,
                                            radius=RADIUS,
                                            height=0.2610,
                                            min_dist=0.1,
                                            halfsphere=goal_halfsphere)
        self.goal = None
        self.dist = DistanceBetweenObjects(bodyA=self.robot.id,
                                           bodyB=self.ball.id,
                                           linkA=13,
                                           linkB=1)
        self.episodes = 0  # used for resetting the sim every so often
        self.restart_every_n_episodes = 1000

        self.metadata = {'render.modes': ['human']}

        if not simple and not gripper:  # default
            # observation = 6 joints + 6 velocities + 3 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(6 + 6 + 3, ),
                                                dtype=np.float32)  #
            # action = 6 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(6, ),
                                           dtype=np.float32)  #

        elif not gripper:  # simple
            # observation = 4 joints + 4 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(4 + 4 + 2, ),
                                                dtype=np.float32)  #
            # action = 4 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(4, ),
                                           dtype=np.float32)  #
        else:  # gripper
            # observation = 3 joints + 3 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(3 + 3 + 2, ),
                                                dtype=np.float32)  #
            # action = 3 joint angles, [-,1,2,-,4,-]
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(3, ),
                                           dtype=np.float32)  #

        super().__init__()
class ErgoReacherEnv(gym.Env):
    def __init__(self,
                 headless=False,
                 simple=False,
                 backlash=False,
                 max_force=1,
                 max_vel=18,
                 goal_halfsphere=False,
                 multi_goal=False,
                 goals=3,
                 gripper=False):
        self.simple = simple
        self.backlash = backlash
        self.max_force = max_force
        self.max_vel = max_vel
        self.multigoal = multi_goal
        self.n_goals = goals
        self.gripper = gripper

        self.goals_done = 0
        self.is_initialized = False
        self.robot = SingleRobot(debug=not headless, backlash=backlash)
        self.ball = Ball()
        self.rhis = RandomPointInHalfSphere(0.0,
                                            0.0369,
                                            0.0437,
                                            radius=RADIUS,
                                            height=0.2610,
                                            min_dist=0.1,
                                            halfsphere=goal_halfsphere)
        self.goal = None
        self.dist = DistanceBetweenObjects(bodyA=self.robot.id,
                                           bodyB=self.ball.id,
                                           linkA=13,
                                           linkB=1)
        self.episodes = 0  # used for resetting the sim every so often
        self.restart_every_n_episodes = 1000

        self.metadata = {'render.modes': ['human']}

        if not simple and not gripper:  # default
            # observation = 6 joints + 6 velocities + 3 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(6 + 6 + 3, ),
                                                dtype=np.float32)  #
            # action = 6 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(6, ),
                                           dtype=np.float32)  #

        elif not gripper:  # simple
            # observation = 4 joints + 4 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(4 + 4 + 2, ),
                                                dtype=np.float32)  #
            # action = 4 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(4, ),
                                           dtype=np.float32)  #
        else:  # gripper
            # observation = 3 joints + 3 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(3 + 3 + 2, ),
                                                dtype=np.float32)  #
            # action = 3 joint angles, [-,1,2,-,4,-]
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(3, ),
                                           dtype=np.float32)  #

        super().__init__()

    def seed(self, seed=None):
        return [np.random.seed(seed)]

    def step(self, action):
        if self.simple or self.gripper:
            action_ = np.zeros(6, np.float32)
            if self.simple:
                action_[[1, 2, 4, 5]] = action
            if self.gripper:
                action_[[1, 2, 4]] = action
            action = action_

        self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel)
        self.robot.step()

        reward, done, dist = self._getReward()

        obs = self._get_obs()
        return obs, reward, done, {"distance": dist}

    def _getReward(self):
        done = False

        reward = self.dist.query()
        distance = reward.copy()
        if not self.multigoal:  # this is the normal mode
            reward *= -1  # the reward is the inverse distance
            if reward > GOAL_REACHED_DISTANCE:  # this is a bit arbitrary, but works well
                self.goals_done += 1
                done = True
                reward = 1
        else:
            if -reward > GOAL_REACHED_DISTANCE:
                self.goals_done += 1
                if self.goals_done == self.n_goals:
                    done = True
                else:
                    robot_state = self._get_obs()[:8]
                    self.move_ball()
                    self._set_state(
                        robot_state)  # move robot back after ball has movedÒ
                    self.robot.step()
                    reward = self.dist.query()

            reward = (self.goals_done * DIA +
                      (DIA - reward)) / (self.n_goals * DIA)

            if done:
                reward = 1

        if self.gripper:
            reward *= 10
            if self.goals_done == RESET_EVERY:
                self.goals_done = 0
                self.reset(True)
            done = False

            # normalize - [-1,1] range:
            # reward = reward * 2 - 1

        return reward, done, distance

    def _setDist(self):
        self.dist.bodyA = self.robot.id
        self.dist.bodyB = self.ball.id

    def move_ball(self):
        if self.simple or self.gripper:
            self.goal = self.rhis.sampleSimplePoint()
        else:
            self.goal = self.rhis.samplePoint()

        self.dist.goal = self.goal
        self.ball.changePos(self.goal, 4)

        for _ in range(20):
            self.robot.step()  # we need this to move the ball

    def reset(self, forced=False):
        self.goals_done = 0

        self.episodes += 1
        if self.episodes >= self.restart_every_n_episodes:
            self.robot.hard_reset()  # this always has to go first
            self.ball.hard_reset()
            self._setDist()
            self.episodes = 0

        if self.is_initialized:
            robot_state = self._get_state()

        self.move_ball()

        if self.gripper and self.is_initialized:
            self._set_state(
                robot_state[:6])  # move robot back after ball has movedÒ
            self.robot.step()

        if forced or not self.gripper:  # if it's the gripper
            qpos = np.random.uniform(low=-0.2, high=0.2, size=6)

            if self.simple:
                qpos[[0, 3]] = 0

            self.robot.reset()
            self.robot.set(np.hstack((qpos, [0] * 6)))
            self.robot.act2(np.hstack((qpos)))
            self.robot.step()

        self.is_initialized = True

        return self._get_obs()

    def _get_obs(self):
        obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)])
        if self.simple:
            obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]]
        if self.gripper:
            obs = obs[[1, 2, 4, 7, 8, 10, 13, 14]]
        return obs

    def render(self, mode='human', close=False):
        pass

    def close(self):
        self.robot.close()

    def _get_state(self):
        return self.robot.observe()

    def _set_state(self, posvel):
        if self.simple or self.gripper:
            new_state = np.zeros((12), dtype=np.float32)
            if self.simple:
                new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel
            if self.gripper:
                new_state[[1, 2, 4, 7, 8, 10]] = posvel
        else:
            new_state = np.array(posvel)
        self.robot.set(new_state)
示例#6
0
debugParams = []

for i in range(len(motors)):
    motor = p.addUserDebugParameter("motor{}".format(i + 1), -1, 1, 0)
    debugParams.append(motor)

read_pos = p.addUserDebugParameter("read pos - slide right".format(i + 1), 0,
                                   1, 0)
read_pos_once = True

start = time.time()

rhis = RandomPointInHalfSphere(0.0,
                               0.0369,
                               0.0437,
                               radius=0.2022,
                               height=0.2610,
                               min_dist=0.0477)

dist = DistanceBetweenObjects(bodyA=robot, bodyB=ball.id, linkA=13, linkB=-1)

for i in range(frequency * 30):
    motorPos = []
    for m in range(len(motors)):
        pos = (math.pi / 2) * p.readUserDebugParameter(debugParams[m])
        motorPos.append(pos)
        p.setJointMotorControl2(robot,
                                motors[m],
                                p.POSITION_CONTROL,
                                targetPosition=pos)
class ErgoReacherHeavyEnv(gym.Env):

  def __init__(self,
               headless=False,
               simple=False,
               max_force=1000,
               max_vel=100,
               goal_halfsphere=False,
               backlash=.1,
               double_goal=False):
    self.simple = simple
    self.max_force = max_force
    self.max_vel = max_vel
    self.double_goal = double_goal

    self.robot = SingleRobot(
        debug=not headless, heavy=True, new_backlash=backlash, silent=True)
    self.ball = Ball(1)
    self.rhis = RandomPointInHalfSphere(
        0.0,
        3.69,
        4.37,
        radius=RADIUS,
        height=26.10,
        min_dist=10.,
        halfsphere=goal_halfsphere)
    self.goal = None
    self.goals_done = 0
    self.goal_dirty = False
    self.dist = DistanceBetweenObjects(
        bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1)
    self.episodes = 0  # used for resetting the sim every so often
    self.restart_every_n_episodes = 1000

    self.force_urdf_reload = False

    self.metadata = {'render.modes': ['human']}

    if not simple:
      # observation = 6 joints + 6 velocities + 3 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32)  #
      # action = 6 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(6,), dtype=np.float32)  #

    else:
      # observation = 4 joints + 4 velocities + 2 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32)  #
      # action = 4 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(4,), dtype=np.float32)  #

    super().__init__()

  def seed(self, seed=None):
    return [np.random.seed(seed)]

  def step(self, action):
    if self.simple:
      action_ = np.zeros(6, np.float32)
      action_[[1, 2, 4, 5]] = action
      action = action_

    self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel)
    self.robot.step()
    self.robot.step()
    self.robot.step()

    reward, done = self._getReward()

    obs = self._get_obs()
    return obs, reward, done, {}

  def _getReward(self):
    done = False

    reward = self.dist.query()
    reward *= -1  # the reward is the inverse distance

    if not self.double_goal:  # this is the normal mode
      if reward > -1.6:  # this is a bit arbitrary, but works well
        done = True
        reward = 1
    else:
      if reward > -1.6:
        self.goals_done += 1
        if self.goals_done == MAX_GOALS:
          done = True
        else:
          self.move_ball()
          self.goal_dirty = True

      max_multiplier = (MAX_GOALS - self.goals_done - 1)
      if self.goal_dirty:
        max_multiplier += 1
        self.goal_dirty = False

      # unnormalized:
      reward = reward - (RADIUS * 2 * max_multiplier)

      # # normalize - [0,1] range:
      reward = (reward + (RADIUS * 2 * (MAX_GOALS))) / (
          RADIUS * 2 * (MAX_GOALS))
      if done:
        reward = 1

      # normalize - [-1,1] range:
      reward = reward * 2 - 1

    return reward, done

  def _setDist(self):
    self.dist.bodyA = self.robot.id
    self.dist.bodyB = self.ball.id

  def update_backlash(self, new_val):
    self.robot.new_backlash = new_val
    self.force_urdf_reload = True
    # and now on the next self.reset() the new modified URDF will be loaded

  def move_ball(self):
    if self.simple:
      self.goal = self.rhis.sampleSimplePoint()
    else:
      self.goal = self.rhis.samplePoint()

    self.dist.goal = self.goal
    self.ball.changePos(self.goal, 4)

    for _ in range(20):
      self.robot.step()  # we need this to move the ball

  def reset(self):
    self.goals_done = 0
    self.goal_dirty = False

    self.episodes += 1
    if self.force_urdf_reload or self.episodes >= self.restart_every_n_episodes:
      self.robot.hard_reset()  # this always has to go first
      self.ball.hard_reset()
      self._setDist()
      self.episodes = 0
      self.force_urdf_reload = False

    self.move_ball()

    qpos = np.random.uniform(low=-0.2, high=0.2, size=6)

    if self.simple:
      qpos[[0, 3]] = 0

    self.robot.reset()
    self.robot.set(np.hstack((qpos, [0] * 6)))
    self.robot.act2(np.hstack((qpos)))
    self.robot.step()

    return self._get_obs()

  def _get_obs(self):
    obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)])
    if self.simple:
      obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]]
    return obs

  def render(self, mode='human', close=False):
    pass

  def close(self):
    self.robot.close()

  def _get_state(self):
    return self.robot.observe()

  def _set_state(self, posvel):
    if self.simple:
      new_state = np.zeros((12), dtype=np.float32)
      new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel
    else:
      new_state = np.array(posvel)
    self.robot.set(new_state)