Exemplo n.º 1
0
    def reset(self):
        self._step_counter = 0
        self.terminated = False
        self.n_contacts = 0
        if not self._first_reset_flag:
            # print('first reset,loading urdf...')
            p.resetSimulation()
            self._first_reset_flag = True
            p.setPhysicsEngineParameter(numSolverIterations=150)
            p.loadURDF(os.path.join(pybullet_data.getDataPath(), "plane.urdf"),
                       [0, 0, 0])
            p.setGravity(0, 0, GRAVITY)

            self._inmoov = inmoov.Inmoov(urdf_path=self.urdf_path,
                                         positional_control=True)
            self._inmoov_id = self._inmoov.inmoov_id

            self._tomato_id = p.loadURDF(
                os.path.join(self.urdf_path, "tomato_plant.urdf"),
                [0.4, 0.4, 0.5])
        print('fast reset,resetting robot joints')
        self._inmoov.reset_joints()
        # p.resetSimulation()
        # p.setPhysicsEngineParameter(numSolverIterations=150)
        # p.setGravity(0., 0., GRAVITY)
        return self.get_observation()
    def reset(self):
        self.terminated = False
        self.n_contacts = 0
        self.n_steps_outside = 0
        p.resetSimulation()
        p.setPhysicsEngineParameter(numSolverIterations=150)
        p.setTimeStep(self._timestep)
        self._inmoov = inmoov.Inmoov(urdf_path=self._urdf_sjtu)
        p.loadURDF(os.path.join(self._urdf_root, "plane.urdf"))
        self.table_uid = p.loadURDF(os.path.join(self._urdf_root, "table/table.urdf"), 0.0000000, 1.00000, 0.50000,
                                    0.000000, 0.000000, 0.0, 1.0)
        # Initialize button position
        x_pos = 0
        y_pos = 1
        if self._random_target:
            x_pos += 0.15 * self.np_random.uniform(-1, 1)
            y_pos += 0.3 * self.np_random.uniform(-1, 1)

        self.button_uid = p.loadURDF("/urdf/simple_button.urdf", [x_pos, y_pos, Z_TABLE])
        self.button_pos = np.array([x_pos, y_pos, Z_TABLE])
        p.setGravity(0, 0, -10)

        self._env_step_counter = 0
        # Close the gripper and wait for the arm to be in rest position
        for _ in range(5):
            self._inmoov.apply_action_pos([0, 0, 0])
            p.stepSimulation()

        # Randomize init arm pos: take 5 random actions
        for _ in range(N_RANDOM_ACTIONS_AT_INIT):
            action = np.zeros(3)
            rand_direction = self.np_random.normal((3,))
            # L2 normalize, so that the random direction is not too high or too low
            rand_direction /= np.linalg.norm(rand_direction, 2)
            action[:3] += DELTA_V_CONTINUOUS * rand_direction
            self._inmoov.apply_action_pos(list(action))
            p.stepSimulation()

        self._observation = self.getExtendedObservation()
        # TODO: why
        self.button_pos = np.array(p.getLinkState(self.button_uid, BUTTON_LINK_IDX)[0])
        self.button_pos[2] += BUTTON_DISTANCE_HEIGHT  # Set the target position on the top of the button
        if self.saver is not None:
            self.saver.reset(self._observation, self.getTargetPos(), self.getGroundTruth())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation)

        return np.array(self._observation)
    def __init__(self,
                 urdf_root=pybullet_data.getDataPath(),
                 renders=False,
                 is_discrete=True,
                 multi_view=False,
                 name="inmoov_onearm_tomato_gym",
                 max_distance=0.8,
                 action_repeat=1,
                 shape_reward=False,
                 action_joints=False,
                 record_data=False,
                 random_target=False,
                 force_down=True,
                 state_dim=-1,
                 learn_states=False,
                 verbose=False,
                 save_path='srl_zoo/data/',
                 env_rank=0,
                 srl_pipe=None,
                 srl_model="raw_pixels",
                 **_):
        super(InmoovOneArmTomatoGymEnv,
              self).__init__(srl_model=srl_model,
                             relative_pos=RELATIVE_POS,
                             env_rank=env_rank,
                             srl_pipe=srl_pipe)
        self._timestep = 1. / 240.
        # URDF PATH
        self._urdf_root = urdf_root
        self._urdf_sjtu = "../../urdf_robot/"

        self._action_repeat = action_repeat
        self._observation = []
        self._env_step_counter = 0
        self._renders = renders
        self._width = RENDER_WIDTH
        self._height = RENDER_HEIGHT
        ### TODO: check
        self._cam_dist = 1.1
        self._cam_yaw = 145
        self._cam_pitch = -36
        self._cam_roll = 0
        self._max_distance = max_distance
        ### TODO: delete
        self._shape_reward = shape_reward
        self._random_target = random_target
        self._force_down = force_down
        ### TODO: check
        self.camera_target_pos = (0.0, 0.0, 1.0)

        self._is_discrete = is_discrete
        self.terminated = False
        self.renderer = p.ER_TINY_RENDERER
        self.debug = True
        self.n_contacts = 0
        self.state_dim = state_dim
        self.action_joints = action_joints
        self.relative_pos = RELATIVE_POS
        self.cuda = th.cuda.is_available()
        self.saver = None
        self.multi_view = multi_view
        self.verbose = verbose
        self.max_steps = MAX_STEPS
        self.n_steps_outside = 0

        self.tomato_uid = None
        self.tomato_pos = None
        self._inmoov = inmoov.Inmoov(urdf_path=self._urdf_sjtu)
        ### TODO:delete
        self.table_uid = None
        self.button_pos = None
        self.button_uid = None
        self._kuka = None

        self.action = None
        self.srl_model = srl_model

        if record_data:
            self.saver = EpisodeSaver(name,
                                      max_distance,
                                      state_dim,
                                      globals_=getGlobals(),
                                      relative_pos=RELATIVE_POS,
                                      learn_states=learn_states,
                                      path=save_path)

        if self._renders:
            client_id = p.connect(p.SHARED_MEMORY)
            if client_id < 0:
                p.connect(p.GUI)
            ### TODO:check
            p.resetDebugVisualizerCamera(1.3, 180, -41, [0.52, -0.2, -0.33])

            self.debug = True
            # Debug sliders for moving the camera
            self.x_slider = p.addUserDebugParameter("x_slider", -10, 10,
                                                    self.camera_target_pos[0])
            self.y_slider = p.addUserDebugParameter("y_slider", -10, 10,
                                                    self.camera_target_pos[1])
            self.z_slider = p.addUserDebugParameter("z_slider", -10, 10,
                                                    self.camera_target_pos[2])
            self.dist_slider = p.addUserDebugParameter("cam_dist", 0, 10,
                                                       self._cam_dist)
            self.yaw_slider = p.addUserDebugParameter("cam_yaw", -180, 180,
                                                      self._cam_yaw)
            self.pitch_slider = p.addUserDebugParameter(
                "cam_pitch", -180, 180, self._cam_pitch)

        else:
            p.connect(p.DIRECT)

        global CONNECTED_TO_SIMULATOR
        CONNECTED_TO_SIMULATOR = True

        if self._is_discrete:
            self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)

        else:
            if self.action_joints:
                # 7 angles for the arm rotation, from -1 to 1
                #TODO:action_dim should be changed
                action_dim = 7
                self._action_bound = 1
            else:
                # 3 direction for the arm movement, from -1 to 1
                action_dim = 3
                self._action_bound = 1
            action_high = np.array([self._action_bound] * action_dim)
            self.action_space = spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)

        if self.srl_model == "ground_truth":
            self.state_dim = self.getGroundTruthDim()
        elif self.srl_model == "joints":
            self.state_dim = self.getJointsDim()
        elif self.srl_model == "joints_position":
            self.state_dim = self.getGroundTruthDim() + self.getJointsDim()

        if self.srl_model == "raw_pixels":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self._height,
                                                       self._width, 3),
                                                dtype=np.uint8)
        else:
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(self.state_dim, ),
                                                dtype=np.float32)
    def reset(self):
        self.terminated = False
        self.n_contacts = 0
        self.n_steps_outside = 0
        p.resetSimulation()
        p.setPhysicsEngineParameter(numSolverIterations=150)
        p.setTimeStep(self._timestep)
        p.loadURDF(os.path.join(self._urdf_root, "plane.urdf"), [0, 0, -1])

        #self.table_uid = p.loadURDF(os.path.join(self._urdf_root, "table/table.urdf"), 0.5000000, 0.00000, -.820000,
        #                            0.000000, 0.000000, 0.0, 1.0)
        tomato2Id = p.loadURDF(os.path.join(self._urdf_sjtu,
                                            "tomato_plant.urdf"),
                               [0.4, 0.4, 0.5],
                               baseOrientation=[0, 0, 0, 1])

        # Initialize tomato position

        ### TODO:change
        x_pos = 0.5
        y_pos = 0
        if self._random_target:
            x_pos += 0.15 * self.np_random.uniform(-1, 1)
            y_pos += 0.3 * self.np_random.uniform(-1, 1)

        self.button_uid = p.loadURDF("/urdf/simple_button.urdf",
                                     [x_pos, y_pos, Z_TABLE])
        self.button_pos = np.array([x_pos, y_pos, Z_TABLE])

        p.setGravity(0, 0, -9.8)
        self._inmoov = inmoov.Inmoov(urdf_path=self._urdf_sjtu)
        self._env_step_counter = 0
        # Close the gripper and wait for the arm to be in rest position
        for _ in range(500):
            if self.action_joints:
                ### TODO:change
                self._inmoov.applyAction(
                    list(np.array(self._kuka.joint_positions)[:7]) + [0, 0])
            else:
                self._inmoov.applyAction([0, 0, 0, 0, 0])
            p.stepSimulation()

        # Randomize init arm pos: take 5 random actions
        for _ in range(N_RANDOM_ACTIONS_AT_INIT):
            if self._is_discrete:
                action = [0, 0, 0, 0, 0]
                sign = 1 if self.np_random.rand() > 0.5 else -1
                action_idx = self.np_random.randint(3)  # dx, dy or dz
                action[action_idx] += sign * DELTA_V
            else:
                if self.action_joints:
                    joints = np.array(self._kuka.joint_positions)[:7]
                    joints += DELTA_THETA * self.np_random.normal(joints.shape)
                    action = list(joints) + [0, 0]
                else:
                    action = np.zeros(5)
                    rand_direction = self.np_random.normal((3, ))
                    # L2 normalize, so that the random direction is not too high or too low
                    rand_direction /= np.linalg.norm(rand_direction, 2)
                    action[:3] += DELTA_V_CONTINUOUS * rand_direction

            self._kuka.applyAction(list(action))
            p.stepSimulation()

        self._observation = self.getExtendedObservation()

        self.button_pos = np.array(
            p.getLinkState(self.button_uid, BUTTON_LINK_IDX)[0])
        self.button_pos[
            2] += BUTTON_DISTANCE_HEIGHT  # Set the target position on the top of the button
        if self.saver is not None:
            self.saver.reset(self._observation, self.getTargetPos(),
                             self.getGroundTruth())

        if self.srl_model != "raw_pixels":
            return self.getSRLState(self._observation)

        return np.array(self._observation)