Пример #1
0
class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=['left_shoulder_rgb']):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(
            action_mode, obs_config=obs_config, headless=False)
        self.env.launch()
        try: 
            self.task = self.env.get_task(getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        if len(state_type_list)>0:
            self.observation_space=[]
            for state_type in state_type_list:
                state=getattr(obs, state_type)
                self.observation_space.append(spaces.Box(low=-np.inf, high=np.inf, shape=state.shape))
        else:
            raise ValueError('No State Type!')
        self.action_space =  spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size,), dtype=np.float32)
        
        self.state_type_list = state_type_list
        self.spec = Spec(task_name)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass 

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        if len(self.state_type_list)>0:
            state=[]
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image=np.moveaxis(image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        obs_, reward, terminate = self.task.step(action)  # reward in original rlbench is binary for success or not
        return self._get_state(obs_), reward, terminate, None


    def close(self):
        self.env.shutdown()
def collect_demos(env: Environment,
                  task: Task,
                  taskname,
                  n_iter_per_var=50,
                  n_demos_per_iter=1000):
    env.launch()
    task_env = env.get_task(task)

    for variation_index in range(1):  #task_env.variation_count()):
        # set variation
        task_env.set_variation(variation_index)
        description, _ = task_env.reset()

        # collect and save demos
        start = time.time()
        for i in range(n_iter_per_var):
            np.random.seed(6)
            demos = task_env.get_demos(n_demos_per_iter,
                                       live_demos=True,
                                       max_attempts=1)
            demos = np.array(demos).flatten()
            print(demos.shape)
            save_to_hdf5(demos, taskname, description, variation_index)
        print("TIME: ", time.time() - start)

    # env.shutdown()
    return demos
Пример #3
0
def sub_proc(child):
    action_mode = ActionMode()
    env = Environment(action_mode, headless=child)
    env.launch()
    task = env.get_task(CloseMicrowave)

    descriptions, obs = task.reset()

    for i in range(1000):
        obs, reward, terminate = task.step(np.random.normal(np.zeros(action_mode.action_size)))
Пример #4
0
def act(graph,model,sess):
    obs_config = ObservationConfig()
    # obs_config.set_all(True)

    # 这几个参数用来关闭摄像头
    obs_config.right_shoulder_camera.set_all(False)
    obs_config.wrist_camera.set_all(False)
    obs_config.left_shoulder_camera.set_all(False)

    action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
    env = Environment(
        action_mode, obs_config=obs_config, headless=False)
    env.launch()

    task = env.get_task(ReachTarget)

    training_steps = 150
    episode_length = 60
    obs = None
    gripper = [1.0] # 爪子保持打开

    for i in range(training_steps):
        # 初始化一次任务,包括初始Obs数据
        if i % episode_length == 0:
            print('Reset Episode')
            descriptions, obs = task.reset()
            print(descriptions)
        
        # 网络的输入
        vision = obs.front_rgb
        vision = vision[np.newaxis,:]
        state = obs.joint_positions
        state = state[np.newaxis,:]
        action = None
        feed_dict = {
            model.vision:vision,
            model.state:state,
            model.action:action
        }
        run_ops = [model.predict_out]

        with graph.as_default():
            res = sess.run(run_ops,feed_dict=feed_dict)

        # print(res[0][0].shape)
        # print(gripper.shape)
        # input('check shape')
        action = np.concatenate([res[0][0],gripper],axis=-1)
        obs, reward, terminate = task.step(action)
        if reward == 1:
            print("successful try!")

    print('Done')
    env.shutdown()
Пример #5
0
 def _update_info_dict(self):
     # update info dict
     self._info["action mode"] = self._action_mode
     self._info["observation mode"] = self._observation_mode
     # TODO: action dim should related to robot, not action mode, here we fixed it temporally
     self._info["action dim"] = (self._action_config.action_size, )
     self._info["action low"] = -np.ones(self._info["action dim"],
                                         dtype=np.float32)
     self._info["action high"] = np.ones(self._info["action dim"],
                                         dtype=np.float32)
     if self._observation_mode == "state" or self._observation_mode == "all":
         # TODO: observation should be determined without init the entire environment
         with suppress_stdout():
             env = RLEnvironment(action_mode=self._action_config,
                                 obs_config=self._observation_config,
                                 headless=True,
                                 robot_configuration=self._robot_name)
             env.launch()
             task = env.get_task(get_named_class(self._task_name, tasks))
             _, obs = task.reset()
             env.shutdown()
             del task
             del env
         self._info["time step"] = _DT
         self._info["state dim"] = tuple(obs.get_low_dim_data().shape)
         self._info["state low"] = np.ones(self._info["state dim"],
                                           dtype=np.float32) * -np.inf
         self._info["state high"] = np.ones(self._info["state dim"],
                                            dtype=np.float32) * np.inf
     if self._observation_mode == "vision" or self._observation_mode == "all":
         self._info["left shoulder rgb dim"] = tuple(
             self._observation_config.left_shoulder_camera.image_size) + (
                 3, )
         self._info["left shoulder rgb low"] = np.zeros(
             self._info["left shoulder rgb dim"], dtype=np.float32)
         self._info["left shoulder rgb high"] = np.ones(
             self._info["left shoulder rgb dim"], dtype=np.float32)
         self._info["right shoulder rgb  dim"] = tuple(
             self._observation_config.right_shoulder_camera.image_size) + (
                 3, )
         self._info["right shoulder rgb  low"] = np.zeros(
             self._info["right shoulder rgb  dim"], dtype=np.float32)
         self._info["right shoulder rgb  high"] = np.ones(
             self._info["right shoulder rgb  dim"], dtype=np.float32)
         self._info["wrist rgb dim"] = tuple(
             self._observation_config.wrist_camera.image_size) + (3, )
         self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"],
                                                dtype=np.float32)
         self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"],
                                                dtype=np.float32)
     self._info["reward low"] = -np.inf
     self._info["reward high"] = np.inf
Пример #6
0
class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type='left_shoulder_rgb'):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=False)
        self.env.launch()
        try:
            self.task = self.env.get_task(
                getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        state = getattr(obs, state_type)
        self.state_type = state_type
        self.spec = Spec(task_name)

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=state.shape)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def reset(self):
        descriptions, obs = self.task.reset()
        return getattr(obs, self.state_type)

    def step(self, action):
        obs_, reward, terminate = self.task.step(action)
        return getattr(obs_, self.state_type), reward, terminate, None

    def close(self):
        self.env.shutdown()
Пример #7
0
def job_worker_validation(worker_id, action_mode, obs_config, task_class,
                          command_q: mp.Queue, result_q: mp.Queue, obs_scaling,
                          redundancy_resolution_setup, seed):

    np.random.seed(worker_id + seed)
    # setup the environment
    env = Environment(action_mode=action_mode,
                      obs_config=obs_config,
                      headless=True)
    env.launch()
    task = env.get_task(task_class)
    task.reset()
    while True:
        command = command_q.get()
        command_type = command[0]
        command_args = command[1]
        if command_type == "reset":
            task.set_variation(task.sample_variation())
            descriptions, observation = task.reset()
            if obs_scaling is not None:
                observation = observation.get_low_dim_data() / obs_scaling
            else:
                observation = observation.get_low_dim_data()
            result_q.put((descriptions, observation))
        elif command_type == "step":
            actions = command_args[0]
            # check if we need to resolve redundancy
            if redundancy_resolution_setup["use_redundancy_resolution"]:
                actions[0:7], L = task.resolve_redundancy_joint_velocities(
                    actions=actions[0:7], setup=redundancy_resolution_setup)
            else:
                # here we only care about the loss (for logging)
                _, L = task.resolve_redundancy_joint_velocities(
                    actions=actions[0:7], setup=redundancy_resolution_setup)

            next_observation, reward, done = task.step(actions)
            if obs_scaling is not None:
                next_observation = next_observation.get_low_dim_data(
                ) / obs_scaling
            else:
                next_observation = next_observation.get_low_dim_data()
            result_q.put((next_observation, reward, done, L))
        elif command_type == "kill":
            print("Killing worker %d" % worker_id)
            env.shutdown()
            break
Пример #8
0
class SimulationEnvironment():
    """
    This can be a parent class from which we can have multiple child classes that 
    can diversify for different tasks and deeper functions within the tasks.
    """

    def __init__(self,
                 task_name,
                 state_type_list=['left_shoulder_rgb'],
                 dataset_root='',
                 observation_mode='state',
                 headless=True):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self._observation_mode = observation_mode
        self.env = Environment(
            action_mode, dataset_root, obs_config=obs_config, headless=headless)
        # Dont need to call launch as task.get_task can launch env.
        self.env.launch()
        self.task = self.env.get_task(task_name)
        _, obs = self.task.reset()
        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
#         self.logger = logger.create_logger(__class__.__name__)
#         self.logger.propagate = 0
        if len(state_type_list) > 0:
            self.observation_space = []
            # for state_type in state_type_list:
            #     state = getattr(obs, state_type)
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)      
        else:
            raise ValueError('No State Type!')

        self.state_type_list = state_type_list

    def _get_state(self, obs):
        if len(self.state_type_list) > 0:
            if self._observation_mode == 'state':
                return self.get_low_dim_data(obs)
            elif self._observation_mode == 'vision':
                return None

    def get_low_dim_data(self, obs) -> np.ndarray:
        """Gets a 1D array of all the low-dimensional obseervations.
        :return: 1D array of observations.
        """
        low_dim_data = [] if obs.gripper_open is None else [[obs.gripper_open]]
        for data in [obs.joint_velocities, obs.joint_positions,
                     obs.joint_forces,
                     obs.gripper_pose, obs.gripper_joint_positions,
                     obs.gripper_touch_forces, obs.task_low_dim_state]:
            if data is not None:
                low_dim_data.append(data)
        return np.concatenate(low_dim_data)


    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        # reward in original rlbench is binary for success or not
        obs_, reward, terminate = self.task.step(action)
        return self._get_state(obs_), reward, terminate, None

    def shutdown(self):
        #         self.logger.info("Environment Shutdown! Create New Instance If u want to start again")
        #         self.logger.handlers.pop()
        self.env.shutdown()
Пример #9
0
def run(i, lock, task_index, variation_count, results, file_lock, tasks):
    """Each thread will choose one task and variation, and then gather
    all the episodes_per_task for that variation."""

    # Initialise each thread with random seed
    np.random.seed(None)
    num_tasks = len(tasks)

    img_size = list(map(int, FLAGS.image_size))

    obs_config = ObservationConfig()
    obs_config.set_all(True)
    obs_config.right_shoulder_camera.image_size = img_size
    obs_config.left_shoulder_camera.image_size = img_size
    obs_config.wrist_camera.image_size = img_size
    obs_config.front_camera.image_size = img_size
    # We want to save the masks as rgb encodings.
    obs_config.left_shoulder_camera.masks_as_one_channel = False
    obs_config.right_shoulder_camera.masks_as_one_channel = False
    obs_config.wrist_camera.masks_as_one_channel = False
    obs_config.front_camera.masks_as_one_channel = False

    if FLAGS.renderer == 'opengl':
        obs_config.right_shoulder_camera.render_mode = RenderMode.OPENGL
        obs_config.left_shoulder_camera.render_mode = RenderMode.OPENGL
        obs_config.wrist_camera.render_mode = RenderMode.OPENGL
        obs_config.front_camera.render_mode = RenderMode.OPENGL

    rlbench_env = Environment(action_mode=ActionMode(),
                              obs_config=obs_config,
                              headless=True)
    rlbench_env.launch()

    task_env = None

    tasks_with_problems = results[i] = ''

    while True:
        # Figure out what task/variation this thread is going to do
        with lock:

            if task_index.value >= num_tasks:
                print('Process', i, 'finished')
                break

            my_variation_count = variation_count.value
            t = tasks[task_index.value]
            task_env = rlbench_env.get_task(t)
            var_target = task_env.variation_count()
            if FLAGS.variations >= 0:
                var_target = np.minimum(FLAGS.variations, var_target)
            if my_variation_count >= var_target:
                # If we have reached the required number of variations for this
                # task, then move on to the next task.
                variation_count.value = my_variation_count = 0
                task_index.value += 1

            variation_count.value += 1
            if task_index.value >= num_tasks:
                print('Process', i, 'finished')
                break
            t = tasks[task_index.value]

            print('Process', i, 'collecting task:', task_env.get_name(),
                  '// variation:', my_variation_count)

        task_env = rlbench_env.get_task(t)
        task_env.set_variation(my_variation_count)
        obs, descriptions = task_env.reset()

        variation_path = os.path.join(FLAGS.save_path, task_env.get_name(),
                                      VARIATIONS_FOLDER % my_variation_count)

        check_and_make(variation_path)

        with open(os.path.join(variation_path, VARIATION_DESCRIPTIONS),
                  'wb') as f:
            pickle.dump(descriptions, f)

        episodes_path = os.path.join(variation_path, EPISODES_FOLDER)
        check_and_make(episodes_path)

        abort_variation = False
        for ex_idx in range(FLAGS.episodes_per_task):
            attempts = 10
            while attempts > 0:
                try:
                    # TODO: for now we do the explicit looping.
                    demo, = task_env.get_demos(amount=1, live_demos=True)
                except Exception as e:
                    attempts -= 1
                    if attempts > 0:
                        continue
                    problem = (
                        'Process %d failed collecting task %s (variation: %d, '
                        'example: %d). Skipping this task/variation.\n%s\n' %
                        (i, task_env.get_name(), my_variation_count, ex_idx,
                         str(e)))
                    print(problem)
                    tasks_with_problems += problem
                    abort_variation = True
                    break
                episode_path = os.path.join(episodes_path,
                                            EPISODE_FOLDER % ex_idx)
                with file_lock:
                    save_demo(demo, episode_path)
                break
            if abort_variation:
                break

    results[i] = tasks_with_problems
    rlbench_env.shutdown()
Пример #10
0
class RLBenchDataEnvGroup(DataEnvGroup):
    ''' DataEnvGroup for RLBench environment. 
        
        + The observation space can be modified through `global_config.env_args`
        + Observation space:
            - 'state': proprioceptive feature : [37] + task_specific
                > robot joint - velocities, positions, forces
                > gripper - pose, joint-position, touch_forces
                > task_low_dim_state
            - RGB-D/RGB image : (128 x 128 by default)
                > 'left_shoulder_rgb', 'right_shoulder_rgb'
                > 'front_rgb', 'wrist_rgb'
            - NOTE : Better to use only one of the RGB obvs rather than all, saves a lot of time while env creation.

            - Refer: https://github.com/stepjam/RLBench/blob/20988254b773aae433146fff3624d8bcb9ed7330/rlbench/observation_config.py

        + The action spaces by default are joint velocities [7] and gripper actuations [1].
            - Dimension : [8]
    '''
    def __init__(self, get_episode_type=None):
        super(RLBenchDataEnvGroup, self).__init__(get_episode_type)
        assert self.env_name == 'RLBENCH'
        self.env_obj = None
        self.use_gym = self.config.env_args['use_gym']

        self.observation_mode = self.config.env_args['observation_mode'].lower(
        )
        self.left_obv_key = 'left_shoulder_rgb'
        self.right_obv_key = 'right_shoulder_rgb'
        self.wrist_obv_key = 'wrist_rgb'
        self.front_obv_key = 'front_rgb'

        if self.observation_mode not in ['vision', 'state'
                                         ] and not self.use_gym:
            self.config.env_args['vis_obv_key'] = self.observation_mode

        self.vis_obv_key = self.config.env_args['vis_obv_key']
        self.dof_obv_key = 'state'
        self.env_action_key = 'joint_velocities'
        self.env_gripper_key = 'gripper_open'

        self.obs_space = {
            self.dof_obv_key: (37),
            self.left_obv_key: (128, 128, 3),
            self.right_obv_key: (128, 128, 3),
            self.wrist_obv_key: (128, 128, 3),
            self.front_obv_key: (128, 128, 3)
        }

        self.action_space, self.gripper_space = (7), (1)
        if self.config.env_args['combine_action_space']:
            self.action_space += self.gripper_space

    def get_env(self, task=None):
        task = task if task else self.config.env_type
        if self.use_gym:
            assert type(
                task
            ) == str  # NOTE : When using gym, the task has to be represented as a sting.
            assert self.observation_mode in ['vision', 'state']

            env = gym.make(
                task,
                observation_mode=self.config.env_args['observation_mode'],
                render_mode=self.config.env_args['render_mode'])
            self.env_obj = env
        else:
            obs_config = ObservationConfig()
            if self.observation_mode == 'vision':
                obs_config.set_all(True)
            elif self.observation_mode == 'state':
                obs_config.set_all_high_dim(False)
                obs_config.set_all_low_dim(True)
            else:
                obs_config_dict = {
                    self.left_obv_key: obs_config.left_shoulder_camera,
                    self.right_obv_key: obs_config.right_shoulder_camera,
                    self.wrist_obv_key: obs_config.wrist_camera,
                    self.front_obv_key: obs_config.front_camera
                }

                assert self.observation_mode in obs_config_dict.keys()

                obs_config.set_all_high_dim(False)
                obs_config_dict[self.observation_mode].set_all(True)
                obs_config.set_all_low_dim(True)

            # TODO : Write code to change it from env_args
            action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
            self.env_obj = Environment(action_mode,
                                       obs_config=obs_config,
                                       headless=True)

            task = task if task else ReachTarget
            if type(task) == str:
                task = task.split('-')[0]
                task = self.env_obj._string_to_task(task)

            self.env_obj.launch()
            env = self.env_obj.get_task(
                task)  # NOTE : `env` refered as `task` in RLBench docs.
        return env

    def _get_obs(self, obs, key):
        assert obs is not None and key is not None
        if type(obs) == tuple:
            obs = obs[1]
        if type(obs) == dict:
            return obs[key]
        elif type(obs) == Observation:
            if key == 'state':
                return obs.get_low_dim_data()
            return getattr(obs, key)

    def shutdown_env(self):
        if self.env_obj is None:
            print("Environment not created, call `.get_env()`")
        elif self.use_gym:
            self.env_obj.close()
        else:
            self.env_obj.shutdown()
        self.env_obj = None

    def teleoperate(self, demons_config, task=None):
        if self.config.env_args['keyboard_teleop']:
            raise NotImplementedError
        else:
            if self.env_obj is None or task is None:
                env = self.get_env(task)
            else:
                if type(task) == str:
                    task = self.env_obj._string_to_task(task.split('-')[0])
                env = self.env_obj.get_task(task)

            if self.use_gym:
                demos = env.task.get_demos(demons_config.n_runs,
                                           live_demos=True)
            else:
                demos = env.get_demos(demons_config.n_runs, live_demos=True)
            demos = np.array(demos).flatten()

            for i in range(demons_config.n_runs):
                sample = demos[i]
                if self.observation_mode != 'state':
                    tr_vobvs = np.array([
                        self._get_obs(obs, self.vis_obv_key) for obs in sample
                    ])
                tr_dof = np.array([
                    self._get_obs(obs, self.dof_obv_key).flatten()
                    for obs in sample
                ])
                tr_actions = np.array([
                    self._get_obs(obs, self.env_action_key).flatten()
                    for obs in sample
                ])
                tr_gripper = np.array(
                    [[self._get_obs(obs, self.env_gripper_key)]
                     for obs in sample])

                if self.config.env_args['combine_action_space']:
                    tr_actions = np.concatenate((tr_actions, tr_gripper),
                                                axis=-1)

                print("Storing Trajectory")
                trajectory = {self.dof_obv_key: tr_dof, 'action': tr_actions}
                if self.observation_mode != 'state':
                    trajectory.update({self.vis_obv_key: tr_vobvs})
                store_trajectoy(trajectory, episode_type='teleop', task=task)

            self.shutdown_env()

    def random_trajectory(self, demons_config):
        env = self.get_env()
        obs = env.reset()

        tr_vobvs, tr_dof, tr_actions = [], [], []
        for step in range(demons_config.flush_freq):
            if self.observation_mode != 'state':
                tr_vobvs.append(np.array(self._get_obs(obs, self.vis_obv_key)))
            tr_dof.append(
                np.array(self._get_obs(obs, self.dof_obv_key).flatten()))

            action = np.random.normal(size=self.action_space[0])
            obs, reward, done, info = env.step(action)

            tr_actions.append(action)

        print("Storing Trajectory")
        trajectory = {
            self.dof_obv_key: np.array(tr_dof),
            'action': np.array(tr_actions)
        }
        if self.observation_mode != 'state':
            trajectory.update({self.vis_obv_key: np.array(tr_vobvs)})
        store_trajectoy(trajectory, episode_type='random')
        self.shutdown_env()
Пример #11
0
class PPCAImitation:
    def __init__(self,
                 task_class,
                 state_dim,
                 n_features,
                 n_cluster,
                 n_latent,
                 parameters=None,
                 headless=False,
                 cov_reg=1E-8,
                 n_samples=50,
                 dense_reward=False,
                 imitation_noise=0.03):

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._obs_config = obs_config

        self._state_dim = state_dim
        self._headless = headless

        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)
        self._task_class = task_class
        self._action_mode = action_mode
        self.env = Environment(action_mode, "", obs_config, headless=headless)
        self.env.launch()

        self.task = self.env.get_task(task_class)
        if parameters is None:
            self.parameters = np.load(
                "parameters/%s_%d.npy" %
                (self.task.get_name(), n_features))[:n_samples]
        # parameters = np.concatenate([parameters for _ in range(20)])
        self.imitation_noise = imitation_noise
        self.parameters[:, :3] += imitation_noise * np.random.normal(
            size=self.parameters[:, :3].shape)
        self.mppca = MPPCA(n_cluster,
                           n_latent,
                           n_iterations=30,
                           cov_reg=cov_reg,
                           n_init=500)
        self.mppca.fit(self.parameters)

        self.rlmppca = None
        self.dense_reward = dense_reward

        print(np.exp(self.mppca.log_pi))

        group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])
        self.space = ClassicSpace(group, n_features=n_features)

        print("mpcca learned")

    def stop(self, joint_gripper, previous_reward):
        if previous_reward == 0.:
            success = 0.
            for _ in range(50):
                obs, reward, terminate = self.task.step(joint_gripper)
                if reward == 1.0:
                    success = 1.
                    break
            return self.task._task.get_dense_reward(), success
        return self.task._task.get_dense_reward(), 1.

    def collect_samples(self,
                        n_episodes=50,
                        noise=True,
                        isomorphic_noise=False):

        success_list = []
        reward_list = []
        latent = []
        cluster = []
        observations = []
        parameters = []
        ob = self.task.reset()
        for i in range(n_episodes):
            context = ob[1].task_low_dim_state
            observations.append(context)
            w, z, k = self.rlmppca.generate_full(
                np.expand_dims(context, 0),
                noise=noise,
                isomorphic_noise=isomorphic_noise)
            # w = self.parameters[i, 3:]
            parameters.append(w)
            latent.append(z)
            cluster.append(k)
            mp = MovementPrimitive(
                self.space,
                MovementPrimitive.get_params_from_block(self.space, w[1:]))
            duration = 1 if w[0] < 0 else w[0]
            print(duration)
            if self._headless:
                trajectory = mp.get_full_trajectory(duration=duration,
                                                    frequency=200)
            else:
                trajectory = mp.get_full_trajectory(duration=5 * duration,
                                                    frequency=200)
            tot_reward = 0.
            success = 0
            for a1 in trajectory.values:  # , a2 in zip(trajectory.values[:-1, :], trajectory.values[1:, :]):
                joint = a1  # (a2-a1)*20.
                joint_gripper = joint
                obs, reward, terminate = self.task.step(joint_gripper)
                if reward == 1. or terminate == 1.:
                    if reward == 1.:
                        success = 1.
                    break
            tot_reward, success = self.stop(joint_gripper, success)
            # tot_reward =  -np.mean(np.abs(context - ob[1].gripper_pose[:3]))
            # tot_reward = -(w[0]-0.2)**2
            print(tot_reward)
            # print("my reward", -np.mean(np.abs(context - ob[1].gripper_pose[:3])))
            # print("parameters", parameters[-1])
            success_list.append(success)

            if self.dense_reward:
                reward_list.append(tot_reward)
            else:
                reward_list.append(success)

            ob = self.task.reset()

        print("-" * 50)
        print("Total reward", np.mean(reward_list))
        print("-" * 50)
        return np.array(success_list), np.array(reward_list), np.array(
            parameters), np.array(latent), np.array(cluster), np.array(
                observations)

    def run_episode(self):
        self.env = Environment(self._action_mode,
                               "",
                               self._obs_config,
                               headless=self._headless)
        self.env.launch()

        self.task = self.env.get_task(self._task_class)

    def shutdown(self):
        self.env.shutdown()
Пример #12
0
class FakeRLBenchEnv(Environment):

    ROBOT_NAME = SUPPORTED_ROBOTS.keys()
    OBSERVATION_MODE = ("state", "vision", "all")
    ACTION_MODE = {
        "joint velocity": ArmActionMode.ABS_JOINT_VELOCITY,
        "delta joint velocity": ArmActionMode.DELTA_JOINT_VELOCITY,
        "joint position": ArmActionMode.ABS_JOINT_POSITION,
        "delta joint position": ArmActionMode.DELTA_JOINT_POSITION,
        "joint torque": ArmActionMode.ABS_JOINT_TORQUE,
        "delta joint torque": ArmActionMode.DELTA_JOINT_TORQUE,
        "effector velocity": ArmActionMode.ABS_EE_VELOCITY,
        "delta effector velocity": ArmActionMode.DELTA_EE_VELOCITY,
        "effector position": ArmActionMode.ABS_EE_POSE,
        "delta effector position": ArmActionMode.DELTA_EE_POSE
    }

    def __init__(self,
                 task_name: str,
                 observation_mode: str = "state",
                 action_mode: str = "delta joint position",
                 robot_name: str = "panda"):
        super(FakeRLBenchEnv, self).__init__(task_name)
        if task_name not in all_class_names(tasks):
            raise KeyError(f"Error: unknown task name {task_name}")
        if observation_mode not in FakeRLBenchEnv.OBSERVATION_MODE:
            raise KeyError(
                f"Error: unknown observation mode {observation_mode}, available: {FakeRLBenchEnv.OBSERVATION_MODE}"
            )
        if action_mode not in FakeRLBenchEnv.ACTION_MODE:
            raise KeyError(
                f"Error: unknown action mode {action_mode}, available: {FakeRLBenchEnv.ACTION_MODE.keys()}"
            )
        if robot_name not in FakeRLBenchEnv.ROBOT_NAME:
            raise KeyError(
                f"Error: unknown robot name {robot_name}, available: {FakeRLBenchEnv.ROBOT_NAME}"
            )

        # TODO: modify the task/robot/arm/gripper to support early instantiation before v-rep launched
        self._observation_mode = observation_mode
        self._action_mode = action_mode
        self._task_name = task_name
        self._robot_name = robot_name

        self._observation_config = ObservationConfig()
        if self._observation_mode == "state":
            self._observation_config.set_all_low_dim(True)
            self._observation_config.set_all_high_dim(False)
        elif self._observation_mode == "vision":
            self._observation_config.set_all_low_dim(False)
            self._observation_config.set_all_high_dim(True)
        elif self._observation_mode == "all":
            self._observation_config.set_all(True)

        self._action_config = ActionMode(
            FakeRLBenchEnv.ACTION_MODE[self._action_mode])

        self.env = None
        self.task = None

        self._update_info_dict()

    def init(self, display=False):
        with suppress_stdout():
            self.env = RLEnvironment(action_mode=self._action_config,
                                     obs_config=self._observation_config,
                                     headless=not display,
                                     robot_configuration=self._robot_name)
            self.env.launch()
            self.task = self.env.get_task(
                get_named_class(self._task_name, tasks))

    def reset(self, random: bool = True) -> StepDict:
        if not random:
            np.random.seed(0)
        self.task._static_positions = not random
        descriptions, obs = self.task.reset()
        # Returns a list of descriptions and the first observation
        next_step = {"opt": descriptions}

        if self._observation_mode == "state" or self._observation_mode == "all":
            next_step['s'] = obs.get_low_dim_data()
        if self._observation_mode == "vision" or self._observation_mode == "all":
            next_step["left shoulder rgb"] = obs.left_shoulder_rgb
            next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb
            next_step["wrist_rgb"] = obs.wrist_rgb
        return next_step

    def step(self, last_step: StepDict) -> (StepDict, bool):
        assert 'a' in last_step, "Key 'a' for action not in last_step, maybe you passed a wrong dict ?"

        obs, reward, terminate = self.task.step(last_step['a'])
        last_step['r'] = reward
        last_step["info"] = {}
        next_step = {"opt": None}

        if self._observation_mode == "state" or self._observation_mode == "all":
            next_step['s'] = obs.get_low_dim_data()
        if self._observation_mode == "vision" or self._observation_mode == "all":
            next_step["left shoulder rgb"] = obs.left_shoulder_rgb
            next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb
            next_step["wrist_rgb"] = obs.wrist_rgb
        return last_step, next_step, terminate

    def finalize(self) -> bool:
        with suppress_stdout():
            self.env.shutdown()
        self.task = None
        self.env = None
        return True

    def name(self) -> str:
        return self._task_name

    # ------------- private methods ------------- #

    def _update_info_dict(self):
        # update info dict
        self._info["action mode"] = self._action_mode
        self._info["observation mode"] = self._observation_mode
        # TODO: action dim should related to robot, not action mode, here we fixed it temporally
        self._info["action dim"] = (self._action_config.action_size, )
        self._info["action low"] = -np.ones(self._info["action dim"],
                                            dtype=np.float32)
        self._info["action high"] = np.ones(self._info["action dim"],
                                            dtype=np.float32)
        if self._observation_mode == "state" or self._observation_mode == "all":
            # TODO: observation should be determined without init the entire environment
            with suppress_stdout():
                env = RLEnvironment(action_mode=self._action_config,
                                    obs_config=self._observation_config,
                                    headless=True,
                                    robot_configuration=self._robot_name)
                env.launch()
                task = env.get_task(get_named_class(self._task_name, tasks))
                _, obs = task.reset()
                env.shutdown()
                del task
                del env
            self._info["time step"] = _DT
            self._info["state dim"] = tuple(obs.get_low_dim_data().shape)
            self._info["state low"] = np.ones(self._info["state dim"],
                                              dtype=np.float32) * -np.inf
            self._info["state high"] = np.ones(self._info["state dim"],
                                               dtype=np.float32) * np.inf
        if self._observation_mode == "vision" or self._observation_mode == "all":
            self._info["left shoulder rgb dim"] = tuple(
                self._observation_config.left_shoulder_camera.image_size) + (
                    3, )
            self._info["left shoulder rgb low"] = np.zeros(
                self._info["left shoulder rgb dim"], dtype=np.float32)
            self._info["left shoulder rgb high"] = np.ones(
                self._info["left shoulder rgb dim"], dtype=np.float32)
            self._info["right shoulder rgb  dim"] = tuple(
                self._observation_config.right_shoulder_camera.image_size) + (
                    3, )
            self._info["right shoulder rgb  low"] = np.zeros(
                self._info["right shoulder rgb  dim"], dtype=np.float32)
            self._info["right shoulder rgb  high"] = np.ones(
                self._info["right shoulder rgb  dim"], dtype=np.float32)
            self._info["wrist rgb dim"] = tuple(
                self._observation_config.wrist_camera.image_size) + (3, )
            self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"],
                                                   dtype=np.float32)
            self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"],
                                                   dtype=np.float32)
        self._info["reward low"] = -np.inf
        self._info["reward high"] = np.inf

    def live_demo(self, amount: int, random: bool = True) -> SampleBatch:
        """
        :param amount: number of demonstration trajectories to be generated
        :param random: if the starting position is random
        :return: observation list : [amount x [(steps-1) x [s, a] + [s_term, None]]],
                 WARNING: that the action here is calculated from observation, when executing, they may cause some inaccuracy
        """
        seeds = [rnd.randint(0, 4096) if random else 0 for _ in range(amount)]
        self.task._static_positions = not random

        demo_pack = []
        for seed in seeds:
            np.random.seed(seed)
            pack = self.task.get_demos(1, True)[0]

            demo_traj = []
            np.random.seed(seed)
            desc, obs = self.task.reset()
            v_tar = 0.
            for o_tar in pack[1:]:
                action = []
                if self._action_config.arm == ArmActionMode.ABS_JOINT_VELOCITY:
                    action.extend(
                        (o_tar.joint_positions - obs.joint_positions) / _DT)
                elif self._action_config.arm == ArmActionMode.ABS_JOINT_POSITION:
                    action.extend(o_tar.joint_positions)
                elif self._action_config.arm == ArmActionMode.ABS_JOINT_TORQUE:
                    action.extend(o_tar.joint_forces)
                    raise TypeError(
                        "Warning, abs_joint_torque is not currently supported")
                elif self._action_config.arm == ArmActionMode.ABS_EE_POSE:
                    action.extend(o_tar.gripper_pose)
                elif self._action_config.arm == ArmActionMode.ABS_EE_VELOCITY:
                    # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together
                    #          since the original RLBench decides to do so, we should follow it
                    action.extend(
                        (o_tar.gripper_pose - obs.gripper_pose) / _DT)
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_VELOCITY:
                    v_tar = (o_tar.joint_positions - obs.joint_positions) / _DT
                    action.extend(v_tar - obs.joint_velocities)
                    raise TypeError(
                        "Warning, delta_joint_velocity is not currently supported"
                    )
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_POSITION:
                    action.extend(o_tar.joint_positions - obs.joint_positions)
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_TORQUE:
                    action.extend(o_tar.joint_forces - obs.joint_forces)
                    raise TypeError(
                        "Warning, delta_joint_torque is not currently supported"
                    )
                elif self._action_config.arm == ArmActionMode.DELTA_EE_POSE:
                    action.extend(o_tar.gripper_pose[:3] -
                                  obs.gripper_pose[:3])
                    q = Quaternion(o_tar.gripper_pose[3:7]) * Quaternion(
                        obs.gripper_pose[3:7]).conjugate
                    action.extend(list(q))
                elif self._action_config.arm == ArmActionMode.DELTA_EE_VELOCITY:
                    # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together
                    #          since the original RLBench decides to do so, we should follow it
                    v_tar_new = (o_tar.gripper_pose - obs.gripper_pose) / _DT
                    action.extend(v_tar_new - v_tar)
                    v_tar = v_tar_new
                    raise TypeError(
                        "Warning, delta_ee_velocity is not currently supported"
                    )

                action.append(1.0 if o_tar.gripper_open > 0.9 else 0.0)
                action = np.asarray(action, dtype=np.float32)
                demo_traj.append({
                    'observation': obs,
                    'a': action,
                    's': obs.get_low_dim_data()
                })
                obs, reward, done = self.task.step(action)
                demo_traj[-1]['r'] = reward

            demo_pack.append(demo_traj)
        return {
            "trajectory": demo_pack,
            "config": "default",
            "policy": "hand-coding",
            "env class": self.__class__.__name__,
            "env name": self._task_name,
            "env config": "default",
            "observation config": self._observation_mode,
            "robot config": self._robot_name,
            "action config": self._action_mode
        }
Пример #13
0
class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}

    def __init__(self, task_class, observation_mode='state'):
        self._observation_mode = observation_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError(
                'Unrecognised observation_mode: %s.' % observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(
            action_mode, obs_config=obs_config, headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(action_mode.action_size,),
            dtype=np.float32)

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb": spaces.Box(
                    low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb": spaces.Box(
                    low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb": spaces.Box(
                    low=0, high=1, shape=obs.wrist_rgb.shape),
                })

        self._gym_cam = None

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'vision':
            return {
                "state": obs.get_low_dim_data(),
                "left_shoulder_rgb": obs.left_shoulder_rgb,
                "right_shoulder_rgb": obs.right_shoulder_rgb,
                "wrist_rgb": obs.wrist_rgb,
            }

    def render(self, mode='human'):
        if self._gym_cam is None:
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)

    def reset(self):
        descriptions, obs = self.task.reset()
        del descriptions  # Not used.
        return self._extract_obs(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(action)
        return self._extract_obs(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()
Пример #14
0
class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 action_mode='joint',
                 multi_action_space=False,
                 discrete_action_space=False):
        self.num_skip_control = 20
        self.epi_obs = []
        self.obs_record = True
        self.obs_record_id = None

        self._observation_mode = observation_mode
        self.obs_config = ObservationConfig()
        if observation_mode == 'state':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
        elif observation_mode == 'state-goal':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
            self.obs_config.set_goal_info(True)
        elif observation_mode == 'vision':
            self.obs_config.set_all(False)
            self.obs_config.set_camera_rgb(True)
        elif observation_mode == 'both':
            self.obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)

        self._action_mode = action_mode
        self.ac_config = None
        if action_mode == 'joint':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.ABS_JOINT_POSITION)
        elif action_mode == 'trigon':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.TRIGON_MODEL_PARAM,
                is_discrete=discrete_action_space)
        else:
            raise ValueError('Unrecognised action_mode: %s.' % action_mode)

        self.env = Environment(action_config=self.ac_config,
                               obs_config=self.obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)
        self.max_episode_steps = self.task.episode_len
        _, obs = self.task.reset()

        if action_mode == 'joint':
            self.action_space = spaces.Box(
                low=-1.7,
                high=1.7,
                shape=(self.ac_config.action_size, ),
                dtype=np.float32)
        elif action_mode == 'trigon':
            if multi_action_space:
                # action_space1 = spaces.MultiBinary(n=1)
                low1 = np.array([-0.8, -0.8])
                high1 = np.array([0.8, 0.8])
                action_space1 = spaces.Box(low=low1,
                                           high=high1,
                                           dtype=np.float32)
                # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low2 = np.array([1.0])
                high2 = np.array([3.0])
                action_space2 = spaces.Box(low=low2,
                                           high=high2,
                                           dtype=np.float32)
                self.action_space = spaces.Tuple(
                    (action_space1, action_space2))
            elif discrete_action_space:
                self.action_space = spaces.MultiDiscrete([4, 4, 4])
            else:
                # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low = np.array([-1, -1, -1])
                high = np.array([1, 1, 1])
                self.action_space = spaces.Box(low=low,
                                               high=high,
                                               dtype=np.float32)

        if observation_mode == 'state' or observation_mode == 'state-goal':
            self.observation_space = spaces.Box(
                low=-np.inf,
                high=np.inf,
                shape=(obs.get_low_dim_data().shape[0] *
                       self.num_skip_control, ))
            if observation_mode == 'state-goal':
                self.goal_dim = obs.get_goal_dim()
        elif observation_mode == 'vision':
            self.observation_space = spaces.Box(
                low=0, high=1, shape=obs.head_camera_rgb.shape)
        elif observation_mode == 'both':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "rattler_eye_rgb":
                spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape),
                "rattler_eye_depth":
                spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape),
            })

        self._gym_cam = None

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'state-goal':
            obs_goal = {'observation': obs.get_low_dim_data()}
            obs_goal.update(obs.get_goal_data())
            return obs_goal
        elif self._observation_mode == 'vision':
            return obs.head_camera_rgb
        elif self._observation_mode == 'both':
            return {
                "state": obs.get_low_dim_data(),
                "rattler_eye_rgb": obs.head_camera_rgb,
                "rattle_eye_depth": obs.head_camera_depth,
            }

    def render(self, mode='human'):
        if self._gym_cam is None:
            # # Add the camera to the scene
            self._gym_cam = VisionSensor('monitor')
            self._gym_cam.set_resolution([640, 640])
            self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED)

    def reset(self):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        descriptions, obs = self.task.reset()
        self.epi_obs.append(obs)
        obs_data = self._extract_obs(obs)
        for _ in range(self.num_skip_control):
            # obs_data_group.extend(obs_data)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(obs_data, dict):
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        del descriptions  # Not used
        return ret_obs

    def step(self, action):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        reward_group = []
        terminate = False
        for _ in range(self.num_skip_control):
            obs, reward, step_terminate, success = self.task.step(action)
            self.epi_obs.append(obs)
            obs_data = self._extract_obs(obs)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(
                    obs_data,
                    dict):  # used for hierarchical reinforcement algorithm
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
            reward_group.append(reward)
            terminate |= step_terminate
            if terminate:
                if self.obs_record and success:  # record a successful experience
                    self.record_obs("RobotPos")
                self.epi_obs = []
                break
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        return ret_obs, np.mean(reward_group), terminate, {}

    def close(self):
        self.env.shutdown()

    # def load_env_param(self):
    #     self.env.load_env_param()

    def compute_reward(self, achieved_goal=None, desired_goal=None, info=None):
        assert achieved_goal is not None
        assert desired_goal is not None
        return self.task.compute_reward(achieved_goal, desired_goal)

    def record_obs(self, obs_part):
        if self.obs_record_id is None:
            record_filenames = glob.glob("./obs_record/obs_record_*.txt")
            record_filenames.sort(key=lambda filename: int(
                filename.split('_')[-1].split('.')[0]))
            if len(record_filenames) == 0:
                self.obs_record_id = 1
            else:
                last_id = int(
                    record_filenames[-1].split('_')[-1].split('.')[0])
                self.obs_record_id = last_id + 1
        else:
            self.obs_record_id += 1
        filename = './obs_record/obs_record_' + str(
            self.obs_record_id) + '.txt'
        obs_record_file = open(filename, 'w')

        if obs_part == 'All':
            pass
        if obs_part == 'RobotPos':
            robot_pos_arr = []
            for obs in self.epi_obs:
                robot_pos = obs.get_2d_robot_pos()
                robot_pos_arr.append(robot_pos)
            target_pos = self.task.get_goal()
            robot_pos_arr.append(
                target_pos)  # The last line records the target position
            robot_pos_arr = np.array(robot_pos_arr)
            np.savetxt(obs_record_file, robot_pos_arr, fmt="%f")
        obs_record_file.close()
Пример #15
0
    def act(self, obs):
        return (np.random.normal(0.0, 0.1, size=(self.action_size, ))).tolist()


obs_config = ObservationConfig()
obs_config.set_all(False)
obs_config.left_shoulder_camera.rgb = True
obs_config.right_shoulder_camera.rgb = True
obs_config.wrist_camera.rgb = True

action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
env = Environment(action_mode,
                  obs_config=obs_config,
                  headless=False,
                  robot_configuration='jaco')
env.launch()

task = env.get_task(ReachTarget)

agent = Agent(action_size=7)  # Jaco is 6DoF + 1 for gripper action

training_steps = 120
episode_length = 40
obs = None
for i in range(training_steps):
    if i % episode_length == 0:
        print('Reset Episode')
        descriptions, obs = task.reset()
        print(descriptions)
    action = agent.act(obs)
    print(action)
Пример #16
0
class RLBench(BaseSimulator):
    def __init__(self, h):

        #64x64 camera outputs
        cam = CameraConfig(image_size=(64, 64))
        obs_config = ObservationConfig(left_shoulder_camera=cam,
                                       right_shoulder_camera=cam,
                                       wrist_camera=cam,
                                       front_camera=cam)
        obs_config.set_all(True)

        # delta EE control with motion planning
        action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME)

        #Inits
        self.env = Environment(action_mode, obs_config=obs_config, headless=h)
        self.env.launch()
        self.task = self.env.get_task(ReachTarget)

    def reset(self):

        d, o = self.task.reset()

        return o

    def step(self, a, prev_state):

        # delta orientation
        d_quat = np.array([0, 0, 0, 1])

        # gripper state
        gripper_open = 1.0

        # delta position
        d_pos = np.zeros(3)

        # For positive magnitude
        if (a % 2 == 0):
            a = int(a / 2)
            d_pos[a] = 0.03

        # For negative magnitude
        else:
            a = int((a - 1) / 2)
            d_pos[a] = -0.03

        # Forming action as expected by the environment
        action = np.concatenate([d_pos, d_quat, [gripper_open]])

        try:
            s, r, t = self.task.step(action)
            r *= 1000

        # Handling failure in planning
        except ConfigurationPathError:
            s = prev_state
            r = -0.1
            t = False

        # Handling wrong action for inverse Jacobian
        except InvalidActionError:
            s = prev_state
            r = -0.01
            t = False

        # Get bounding box centroids
        x, y, z = self.task._scene._workspace.get_position()

        # Set bounding box limits
        minx = x - 0.25
        maxx = x + 0.25
        miny = y - 0.35
        maxy = y + 0.35
        minz = z
        maxz = z + 0.5

        bounding_box = [minx, maxx, miny, maxy, minz, maxz]

        # Get gripper position
        gripper_pose = s.gripper_pose

        # Reward for being in the bounding box
        if (self.bb_check(bounding_box, gripper_pose)):
            r += 0.1

        return s, r, t

    # Check if gripper in the bounding box
    def bb_check(self, bounding_box, gripper_pose):

        out = True

        if (gripper_pose[0] < bounding_box[0]
                or gripper_pose[0] > bounding_box[1]):
            out = False

        if (gripper_pose[1] < bounding_box[2]
                or gripper_pose[1] > bounding_box[3]):
            out = False

        if (gripper_pose[2] < bounding_box[4]
                or gripper_pose[2] > bounding_box[5]):
            out = False

        return out

    @staticmethod
    def n_actions():
        return 6

    def shutdown(self):
        print("Shutdown")
        self.env.shutdown()

    def __del__(self):
        print("Shutdown")
        self.env.shutdown()
Пример #17
0
class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 render_mode: Union[None, str] = None):
        self._observation_mode = observation_mode
        self._render_mode = render_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(self.env.action_size, ))

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb":
                spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape),
                "front_rgb":
                spaces.Box(low=0, high=1, shape=obs.front_rgb.shape),
            })

        if render_mode is not None:
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            if render_mode == 'human':
                self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
            else:
                self._gym_cam.set_render_mode(RenderMode.OPENGL3)

    def _extract_obs(self, obs) -> Dict[str, np.ndarray]:
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'vision':
            return {
                "state": obs.get_low_dim_data(),
                "left_shoulder_rgb": obs.left_shoulder_rgb,
                "right_shoulder_rgb": obs.right_shoulder_rgb,
                "wrist_rgb": obs.wrist_rgb,
                "front_rgb": obs.front_rgb,
            }

    def render(self, mode='human') -> Union[None, np.ndarray]:
        if mode != self._render_mode:
            raise ValueError(
                'The render mode must match the render mode selected in the '
                'constructor. \nI.e. if you want "human" render mode, then '
                'create the env by calling: '
                'gym.make("reach_target-state-v0", render_mode="human").\n'
                'You passed in mode %s, but expected %s.' %
                (mode, self._render_mode))
        if mode == 'rgb_array':
            return self._gym_cam.capture_rgb()

    def reset(self) -> Dict[str, np.ndarray]:
        descriptions, obs = self.task.reset()
        del descriptions  # Not used.
        return self._extract_obs(obs)

    def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, dict]:
        obs, reward, terminate = self.task.step(action)
        return self._extract_obs(obs), reward, terminate, {}

    def close(self) -> None:
        self.env.shutdown()
Пример #18
0
class GraspController:
    def __init__(self, action_mode, static_positions=True):
        # Initialize environment with Action mode and observations
        # Resize the write camera to fit the GQCNN
        wrist_camera = CameraConfig(image_size=(1032, 772))
        self.env = Environment(action_mode, '', ObservationConfig(wrist_camera=wrist_camera), False, static_positions=static_positions)
        self.env.launch()
        # Load specified task into the environment
        self.task = self.env.get_task(EmptyContainer)

    def reset(self):
        descriptions, obs = self.task.reset()
        return descriptions, obs

    def get_objects(self, add_noise=False):
        objs = self.env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False)
        objs_dict = {}

        for obj in objs:
            name = obj.get_name()
            pose = obj.get_pose()
            if add_noise:
                pose = noisy_object(pose)
            objs_dict[name] = [obj, pose]

        return objs_dict

    def get_path(self, pose, set_orientation=False):
        # TODO deal with situations when path not found
        if set_orientation:
            path = self.env._robot.arm.get_path(pose[:3], quaternion=pose[3:],
                                                ignore_collisions=True, algorithm=Algos.RRTConnect, trials=1000)
        else:
            path = self.env._robot.arm.get_path(pose[:3], quaternion=np.array([0, 1, 0, 0]),
                                                ignore_collisions=True, algorithm=Algos.RRTConnect, trials=1000)
        return path

    def grasp(self):
        # TODO get feedback to check if grasp is successfull
        done_grab_action = False
        # Repeat unitil successfully grab the object
        while not done_grab_action:
            # gradually close the gripper
            done_grab_action = self.env._robot.gripper.actuate(0, velocity=0.2)  # 0 is close
            self.env._pyrep.step()
            # self.task._task.step()
            # self.env._scene.step()

        grasped_objects = {}
        obj_list = ['Shape', 'Shape1', 'Shape3']
        objs = self.env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False)
        for obj in objs:
            if obj.get_name() in obj_list:
                grasped_objects[obj.get_name()] = self.env._robot.gripper.grasp(obj)
        return grasped_objects
        # return self.env._robot.gripper.get_grasped_objects()

    def release(self):
        done = False
        while not done:
            done = self.env._robot.gripper.actuate(1, velocity=0.2)  # 1 is release
            self.env._pyrep.step()
            # self.task._task.step()
            # self.env._scene.step()
        self.env._robot.gripper.release()

    def execute_path(self, path, open_gripper=True):
        path = path._path_points.reshape(-1, path._num_joints)
        for i in range(len(path)):
            action = list(path[i]) + [int(open_gripper)]
            obs, reward, terminate = self.task.step(action)
        return obs, reward, terminate
Пример #19
0
class TestEnvironment(unittest.TestCase):

    def tearDown(self):
        self.env.shutdown()

    def get_task(self, task_class, arm_action_mode):
        obs_config = ObservationConfig()
        obs_config.set_all(False)
        obs_config.set_all_low_dim(True)
        obs_config.right_shoulder_camera.rgb = True
        action_mode = ActionMode(arm_action_mode)
        self.env = Environment(
            action_mode, ASSET_DIR, obs_config, headless=True)
        self.env.launch()
        return self.env.get_task(task_class)

    def test_get_task(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        self.assertIsInstance(task, TaskEnvironment)
        self.assertEqual(task.get_name(), 'reach_target')

    def test_reset(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        desc, obs = task.reset()
        self.assertIsNotNone(obs.right_shoulder_rgb)
        self.assertIsNone(obs.left_shoulder_rgb)
        self.assertIsInstance(desc, list)

    def test_step(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        task.reset()
        obs, reward, term = task.step(np.random.uniform(size=8))
        self.assertIsNotNone(obs.right_shoulder_rgb)
        self.assertIsNone(obs.left_shoulder_rgb)
        self.assertEqual(reward, 0)
        self.assertFalse(term)

    def test_get_invalid_number_of_demos(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        with self.assertRaises(RuntimeError):
            task.get_demos(10, live_demos=False, image_paths=True)

    def test_get_stored_demos_paths(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        demos = task.get_demos(2, live_demos=False, image_paths=True)
        self.assertEqual(len(demos), 2)
        self.assertGreater(len(demos[0]), 0)
        self.assertIsInstance(demos[0][0].right_shoulder_rgb, str)
        self.assertIsNone(demos[0][0].left_shoulder_rgb)

    def test_get_stored_demos_images(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        demos = task.get_demos(2, live_demos=False, image_paths=False)
        self.assertEqual(len(demos), 2)
        self.assertGreater(len(demos[0]), 0)
        self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray)
        self.assertIsNone(demos[0][0].left_shoulder_rgb)

    def test_get_live_demos(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        demos = task.get_demos(2, live_demos=True)
        self.assertEqual(len(demos), 2)
        self.assertGreater(len(demos[0]), 0)
        self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray)

    def test_action_mode_abs_joint_velocity(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        task.reset()
        action = [0.1] * 8
        obs, reward, term = task.step(action)
        [self.assertAlmostEqual(0.1, a, delta=0.05)
         for a in obs.joint_velocities]

    def test_action_mode_delta_joint_velocity(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_JOINT_VELOCITY)
        task.reset()
        action = [-0.1] * 8
        [task.step(action) for _ in range(2)]
        obs, reward, term = task.step(action)
        [self.assertAlmostEqual(-0.3, a, delta=0.1)
         for a in obs.joint_velocities]

    def test_action_mode_abs_joint_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_POSITION)
        _, obs = task.reset()
        init_angles = np.append(obs.joint_positions, 0.)  # for gripper
        target_angles = np.array(init_angles) + 0.05
        [task.step(target_angles) for _ in range(5)]
        obs, reward, term = task.step(target_angles)
        [self.assertAlmostEqual(a, actual, delta=0.05)
         for a, actual in zip(target_angles, obs.joint_positions)]

    def test_action_mode_delta_joint_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_JOINT_POSITION)
        _, obs = task.reset()
        init_angles = obs.joint_positions
        target_angles = np.array(init_angles) + 0.06
        [task.step([0.01] * 8) for _ in range(5)]
        obs, reward, term = task.step([0.01] * 8)
        [self.assertAlmostEqual(a, actual, delta=0.05)
         for a, actual in zip(target_angles, obs.joint_positions)]

    def test_action_mode_abs_ee_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_EE_POSE)
        _, obs = task.reset()
        init_pose = obs.gripper_pose
        new_pose = np.append(init_pose, 0.0)  # for gripper
        new_pose[2] -= 0.1  # 10cm down
        obs, reward, term = task.step(new_pose)
        [self.assertAlmostEqual(a, p, delta=0.001)
         for a, p in zip(new_pose, obs.gripper_pose)]

    def test_action_mode_delta_ee_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_EE_POSE)
        _, obs = task.reset()
        init_pose = obs.gripper_pose
        new_pose = [0, 0, -0.1, 0, 0, 0, 1.0, 0.0]  # 10cm down
        expected_pose = list(init_pose)
        expected_pose[2] -= 0.1
        obs, reward, term = task.step(new_pose)
        [self.assertAlmostEqual(a, p, delta=0.001)
         for a, p in zip(expected_pose, obs.gripper_pose)]

    def test_action_mode_abs_ee_velocity(self):
        VEL = -0.2  # move down with velocity 0.1
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_EE_VELOCITY)
        _, obs = task.reset()
        expected_pose = obs.gripper_pose
        expected_pose[2] += (VEL * 0.05)
        vels = np.zeros((8,))
        vels[2] += VEL
        obs, reward, term = task.step(vels)
        [self.assertAlmostEqual(a, p, delta=0.001)
         for a, p in zip(expected_pose, obs.gripper_pose)]

    def test_action_mode_delta_velocity(self):
        VEL = -0.2  # move down with velocity 0.2
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_EE_VELOCITY)
        _, obs = task.reset()
        expected_pose = obs.gripper_pose
        expected_pose[2] += (VEL * 0.05)
        expected_pose[2] += ((VEL * 2) * 0.05)
        expected_pose[2] += ((VEL * 3) * 0.05)
        vels = np.zeros((8,))
        vels[2] += VEL
        [task.step(vels) for _ in range(2)]
        obs, reward, term = task.step(vels)
        [self.assertAlmostEqual(a, p, delta=0.01)
         for a, p in zip(expected_pose, obs.gripper_pose)]

    def test_action_mode_abs_joint_torque(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_TORQUE)
        task.reset()
        action = [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, 0.0]
        obs, reward, term = task.step(action)
        # Difficult to test given gravity, so just check for exceptions.

    def test_action_mode_delta_joint_torque(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_JOINT_TORQUE)
        _, obs = task.reset()
        init_forces = np.array(obs.joint_forces)
        action = [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, 0]
        expected = np.array([0.3, -0.3, 0.3, -0.3, 0.3, -0.3, 0.3])
        expected += init_forces
        [task.step(action) for _ in range(2)]
        obs, reward, term = task.step(action)
Пример #20
0
class RLBenchBox(TaskInterface):

    def __init__(self, task_class, state_dim, n_features, headless=True):

        super().__init__(n_features)
        self._group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])
        self._space = ClassicSpace(self._group, n_features)

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._obs_config = obs_config

        self._state_dim = state_dim
        self._headless = headless

        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)
        self._task_class = task_class
        self._action_mode = action_mode
        self.env = Environment(
            action_mode, "", obs_config, headless=headless)
        self.env.launch()

        self.task = self.env.get_task(task_class)
        self._obs = None

    def get_context_dim(self):
        return self._state_dim

    def read_context(self):
        return self._obs[1].task_low_dim_state

    def get_demonstrations(self):
        file = "parameters/%s_%d.npy" % (self.task.get_name(), self._space.n_features)
        try:
            return np.load(file)
        except:
            raise Exception("File %s not found. Please consider running 'dataset_generator.py'" % file)

    def _stop(self, joint_gripper, previous_reward):
        if previous_reward == 0.:
            success = 0.
            for _ in range(50):
                obs, reward, terminate = self.task.step(joint_gripper)
                if reward == 1.0:
                    success = 1.
                    break
            return self.task._task.get_dense_reward(), success
        return self.task._task.get_dense_reward(), 1.

    def send_movement(self, weights, duration):
        mp = MovementPrimitive(self._space, MovementPrimitive.get_params_from_block(self._space, weights))
        duration = 1 if duration < 0 else duration
        if self._headless:
            trajectory = mp.get_full_trajectory(duration=min(duration, 1), frequency=200)
        else:
            trajectory = mp.get_full_trajectory(duration=5 * duration, frequency=200)
        tot_reward = 0.
        success = 0
        for a1 in trajectory.values:  # , a2 in zip(trajectory.values[:-1, :], trajectory.values[1:, :]):
            joint = a1  # (a2-a1)*20.
            joint_gripper = joint
            obs, reward, terminate = self.task.step(joint_gripper)
            if reward == 1. or terminate == 1.:
                if reward == 1.:
                    success = 1.
                break
        tot_reward, success = self._stop(joint_gripper, success)
        return success, tot_reward

    def reset(self):
        self._obs = self.task.reset()
Пример #21
0
class StateMachine(object):
    def __init__(self):
        self.env = None
        self.home = None
        self.task = None
        self.sensor = None
        self.objs_dict = None

    def initialize(self, headless=False):
        DATASET = ''
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        obs_config.left_shoulder_camera.rgb = True
        obs_config.right_shoulder_camera.rgb = True
        action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN)
        self.env = Environment(action_mode,
                               DATASET,
                               obs_config,
                               headless=headless)
        self.sensor = NoisyObjectPoseSensor(self.env)
        self.env.launch()
        self.task = self.env.get_task(EmptyContainer)
        self.task.reset()
        self.home = self.get_objects(False)['large_container'].get_pose()
        self.home[2] += 0.3
        # demos = task.get_demos(3, live_demos=live_demos)

        self.objs = self.get_objects()
        self.movable_objects = ['Shape', 'Shape1', 'Shape3']
        self.target_bins = ['small_container0']
        self.start_bins = ['large_container']
        self.max_retry = 5
        self.env._robot.gripper.set_joint_forces([50, 50])

    def get_objects(self, graspable=False):
        if graspable:
            objs = self.task._task.get_graspable_objects()
            objs_dict = {}
            for item in objs:
                name = item.get_name()
                objs_dict[name] = item
            return objs_dict
        else:
            objs = self.env._scene._active_task.get_base().\
                    get_objects_in_tree(exclude_base=True, first_generation_only=False)
            objs_dict = {}
            for obj in objs:
                name = obj.get_name()
                objs_dict[name] = obj
            return objs_dict

    # Move above object
    def move_to(self,
                pose,
                pad=0.05,
                ignore_collisions=True,
                quat=np.array([0, 1, 0, 0])):
        target_pose = np.copy(pose)
        obs = self.env._scene.get_observation()
        init_pose = obs.gripper_pose
        obs = self.env._scene.get_observation()
        init_pose = obs.gripper_pose
        target_pose[2] += pad
        path = self.env._robot.arm.get_path(np.array(target_pose[0:3]),
                                            quaternion=quat,
                                            trials=1000,
                                            ignore_collisions=True,
                                            algorithm=Algos.RRTConnect)
        # TODO catch errors and deal with situations when path not found
        return path

    def grasp(self, obj):
        # open the grippers
        cond = False
        while not cond:
            cond = self.env._robot.gripper.actuate(1, 0.1)
            self.env._scene.step()
        cond = False
        # now close
        while not cond:
            cond = self.env._robot.gripper.actuate(0, 0.1)
            self.env._scene.step()
        cond = self.env._robot.gripper.grasp(obj)
        return cond

    def release(self, obj):
        cond = False
        while not cond:
            cond = self.env._robot.gripper.actuate(1, 0.1)
            self.env._scene.step()
        self.env._robot.gripper.release()

    def execute(self, path):
        done = False
        path.set_to_start()
        while not done:
            done = path.step()
            # a = path.visualize()
            self.env._scene.step()
        return done

    def go_to(self,
              pose,
              pad=0.05,
              quat=np.array([0, 1, 0, 0]),
              gripper_close=False):
        pose_cp = copy.copy(pose)
        pose_cp[2] += pad
        pose_cp[3:] = quat
        wp = pose_cp.tolist() + [1]
        if gripper_close:
            wp = pose_cp.tolist() + [0]
        try:
            self.task.step(wp)
        except:
            print("Retrying with normal path planner")
            path = self.move_to(pose, pad, True, quat)
            self.execute(path)
        return

    def reset(self):
        self.task.reset()

    def is_within(self, obj1, obj2):
        #whether obj2 is within obj1
        obj1_bbox = obj1.get_bounding_box()
        obj1_mat = np.array(obj1.get_matrix()).reshape(3, 4)
        obj2_bbox = obj2.get_bounding_box()
        obj2_mat = np.array(obj2.get_matrix()).reshape(3, 4)
        obj1_rect = get_edge_points(obj1_bbox, obj1_mat)
        obj2_rect = get_edge_points(obj2_bbox, obj2_mat)
        return contains(obj1_rect, obj2_rect)

    def picking_bin_empty(self):
        '''
         Returns whether the picking bin is empty
        '''
        self.objs_dict = machine.get_objects()
        bin_obj = self.objs_dict['large_container']  #?Which one
        for obj_name in self.objs_dict.keys():
            if (not ('container' in obj_name)):
                if self.is_within(bin_obj, self.objs_dict[obj_name]):
                    return False
        return True

    def placing_bin_full(self):
        '''
         Returns whether the placing bin is full
        '''
        self.objs_dict = machine.get_objects()
        bin_obj = self.objs_dict['small_container1']  #?Which one
        for obj_name in self.objs_dict.keys():
            if (not ('container' in obj_name)):
                if not (self.is_within(bin_obj, self.objs_dict[obj_name])):
                    return False
        return True

    def get_all_shapes(self):
        objects = []

        for name in self.objs:
            if "Shape" in name:
                objects.append(name)
        print(objects)
        return objects

    def get_grasp_pose(self, theta):
        q = quaternion(0, 1, 0, 0)
        cos = np.cos(theta / 2)
        sin = np.sin(theta / 2)
        p = quaternion(cos, 0, 0, sin)
        rot_qt = p * q
        print("Theta", rot_qt, cos, sin, theta / 2)
        rot_arr = qn.as_float_array(rot_qt)
        rot_qt = [rot_arr[1], rot_arr[2], rot_arr[3], rot_arr[0]]
        return rot_qt

    def get_full_grasp_pose(self, thetas):
        thetax = thetas[0]
        cos = np.cos(thetax / 2)
        sin = np.sin(thetax / 2)
        qx = quaternion(cos, sin, 0, 0)
        thetay = thetas[1]
        cos = np.cos(thetay / 2)
        sin = np.sin(thetay / 2)
        qy = quaternion(cos, 0, sin, 0)
        thetaz = thetas[2]
        cos = np.cos(thetaz / 2)
        sin = np.sin(thetaz / 2)
        qz = quaternion(cos, 0, 0, sin)
        q_init = quaternion(0, 1, 0, 0)
        r = q_init * qz * qx * qy
        r_arr = qn.as_float_array(r)
        quat_vec = np.array([r_arr[1], r_arr[2], r_arr[3], r_arr[0]])
        return quat_vec

    def move_objects_to_target(self, target_bins, start_bins):

        target_bin = target_bins[0]
        start_bin = start_bins[0]

        start_bin_pose = self.objs[start_bin].get_pose()
        start_bin_pose[2] += 0.3

        target_bin_pose = self.objs[target_bin].get_pose()
        target_bin_pose[2] += 0.3
        '''
        move_objs = []
        for obj in machine.movable_objects:
            if self.is_within(target_bin, obj):
                move_objs.append(obj)
        print(move_objs)
        '''
        move_objs = machine.movable_objects

        for i, shape in enumerate(move_objs):

            cond = False
            retry_count = 0
            while not cond and retry_count < self.max_retry:
                theta = 2 * np.pi * retry_count / self.max_retry
                quat = self.get_grasp_pose(theta)
                # go back to home position
                machine.go_to(start_bin_pose, 0, gripper_close=False)
                # move above object
                objs_poses = machine.sensor.get_poses()
                # pose=objs[shape].get_pose()
                pose = objs_poses[shape]
                machine.go_to(pose, 0, quat=quat, gripper_close=False)
                # grasp the object
                cond = machine.grasp(self.objs[shape])
                if not cond:
                    retry_count += 1
                    print("retry count: ", retry_count)
            # move to home position
            machine.go_to(start_bin_pose, 0, gripper_close=True)
            print("Gripper joint forces",
                  self.env._robot.gripper.get_joint_forces())
            # move above small container
            objs_poses = machine.sensor.get_poses()
            pose = objs_poses[target_bin]
            pose[0] += (i * 0.04 - 0.04)
            machine.go_to(pose, 0.05, gripper_close=True)
            # release the object
            machine.release(self.objs[shape])
        machine.go_to(machine.home, 0, gripper_close=False)
Пример #22
0
    def __init__(self, task_class, n_features, load, n_movements):
        """
        Learn the Movement from demonstration.

        :param task_class: Task that we aim to learn
        :param n_features: Number of RBF in n_features
        :param load: Load from data
        :param n_movements: how many movements do we want to learn
        """

        frequency = 200

        # To use 'saved' demos, set the path below, and set live_demos=False
        live_demos = not load
        DATASET = '' if live_demos else 'datasets'

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._task_class = task_class
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)

        group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])

        env = Environment(action_mode, DATASET, obs_config, headless=True)
        env.launch()

        task = env.get_task(task_class)

        trajectories = []
        states = []

        lengths = []

        print("Start Demo")
        demos = task.get_demos(n_movements, live_demos=live_demos)
        print("End Demo")

        init = True
        for demo in demos:
            trajectory = NamedTrajectory(*group.refs)
            t = 0
            for ob in demo:
                if t == 0:
                    if init:
                        print("State dim: %d" % ob.task_low_dim_state.shape[0])
                        init = False
                    states.append(ob.task_low_dim_state)
                kwargs = {
                    "d%d" % i: ob.joint_positions[i]
                    for i in range(ob.joint_positions.shape[0])
                }
                kwargs["gripper"] = ob.gripper_open
                trajectory.notify(duration=1 / frequency, **kwargs)
                t += 1
            lengths.append(t / 200.)
            trajectories.append(trajectory)

        space = ClassicSpace(group,
                             n_features=n_features,
                             regularization=1E-15)
        z = np.linspace(-0.2, 1.2, 1000)
        Phi = space.get_phi(z)
        for i in range(n_features):
            plt.plot(z, Phi[:, i])
        plt.show()
        parameters = np.array([
            np.concatenate([
                s,
                np.array([l]),
                LearnTrajectory(space, traj).get_block_params()
            ]) for s, l, traj in zip(states, lengths, trajectories)
        ])
        np.save("parameters/%s_%d.npy" % (task.get_name(), n_features),
                parameters)
        env.shutdown()
Пример #23
0
class RLBenchEnv(gym.GoalEnv):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}
    def __init__(self, task_class, observation_mode='state', randomization_mode="none", 
        rand_config=None, img_size=256, special_start=[], fixed_grip=-1, force_randomly_place=False, force_change_position=False, sparse=False, not_special_p = 0, ground_p = 0, special_is_grip=False, altview=False, procedural_ind=0, procedural_mode='same', procedural_set = [], is_mesh_obs=False, blank=False, COLOR_TUPLE=[1,0,0]):
        # blank is 0s agent. 
        self.blank = blank
        #altview is whether to have second camera angle or not. True/False, "both" to concatentae the observations. 
        self.altview=altview
        self.img_size=img_size
        self.sparse = sparse
        self.task_class = task_class
        self._observation_mode = observation_mode
        self._randomization_mode = randomization_mode
        #special start is a list of actions to take at the beginning.
        self.special_start = special_start
        #fixed_grip temp hack for keeping the gripper a certain way. Change later. 0 for fixed closed, 0.1 for fixed open, -1 for not fixed
        self.fixed_grip = fixed_grip
        #to force the task to be randomly placed
        self.force_randomly_place = force_randomly_place
        #force the task to change position in addition to rotation
        self.force_change_position = force_change_position



        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision' or observation_mode=="visiondepth" or observation_mode=="visiondepthmask":
            # obs_config.set_all(True)
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        else:
            raise ValueError(
                'Unrecognised observation_mode: %s.' % observation_mode)

        action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_NOQ)
        print("using delta pose pan")

        if randomization_mode == "random":
            objs = ['target', 'boundary', 'Floor', 'Roof', 'Wall1', 'Wall2', 'Wall3', 'Wall4', 'diningTable_visible']
            if rand_config is None:
                assert False
            self.env = DomainRandomizationEnvironment(
                action_mode, obs_config=obs_config, headless=True,
                randomize_every=RandomizeEvery.EPISODE, frequency=1,
                visual_randomization_config=rand_config
            )
        else:
            self.env = Environment(
                action_mode, obs_config=obs_config, headless=True
            )
        self.env.launch()
        self.task = self.env.get_task(task_class)

        # Probability. Currently used for probability that pick and lift task will start off gripper at a certain location (should probs be called non_special p)
        self.task._task.not_special_p = not_special_p
        # Probability that ground goal.
        self.task._task.ground_p = ground_p 
        # For the "special" case, whether to grip the object or just hover above it. 
        self.task._task.special_is_grip = special_is_grip
        # for procedural env
        self.task._task.procedural_ind = procedural_ind
        # procedural mode: same, increase, or random. 
        self.task._task.procedural_mode = procedural_mode
        # ideally a list-like object, dictates the indices to sample from each episode. 
        self.task._task.procedural_set = procedural_set
        # if state obs is mesh obs
        self.task._task.is_mesh_obs = is_mesh_obs

        self.task._task.sparse = sparse
        self.task._task.COLOR_TUPLE = COLOR_TUPLE
        
        _, obs = self.task.reset()

        cam_placeholder = Dummy('cam_cinematic_placeholder')
        cam_pose = cam_placeholder.get_pose().copy()
        #custom pose
        cam_pose = [ 1.59999931,  0. ,         2.27999949 , 0.65328157, -0.65328145, -0.27059814, 0.27059814]
        cam_pose[0] = 1
        cam_pose[2] = 1.5
        self.frontcam = VisionSensor.create([img_size, img_size])
        self.frontcam.set_pose(cam_pose)

        self.frontcam.set_render_mode(RenderMode.OPENGL)
        self.frontcam.set_perspective_angle(60)
        self.frontcam.set_explicit_handling(1)

        self.frontcam_mask = VisionSensor.create([img_size, img_size])
        self.frontcam_mask.set_pose(cam_pose)
        self.frontcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED)
        self.frontcam_mask.set_perspective_angle(60)
        self.frontcam_mask.set_explicit_handling(1)

        if altview:
            alt_pose = [0.25    , -0.65    ,  1.5,   0, 0.93879825 ,0.34169483 , 0]
            self.altcam = VisionSensor.create([img_size, img_size])
            self.altcam.set_pose(alt_pose)
            self.altcam.set_render_mode(RenderMode.OPENGL)
            self.altcam.set_perspective_angle(60)
            self.altcam.set_explicit_handling(1)
            
            self.altcam_mask = VisionSensor.create([img_size, img_size])
            self.altcam_mask.set_pose(alt_pose)
            self.altcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED)
            self.altcam_mask.set_perspective_angle(60)
            self.altcam_mask.set_explicit_handling(1)


        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(action_mode.action_size,))

        if observation_mode == 'state':
            self.observation_space = spaces.Dict({
                "observation": spaces.Box(
                low=-np.inf, high=np.inf, shape=self.task._task.get_state_obs().shape),
                "achieved_goal": spaces.Box(
                low=-np.inf, high=np.inf, shape=self.task._task.get_achieved_goal().shape),
                'desired_goal': spaces.Box(
                low=-np.inf, high=np.inf, shape=self.task._task.get_desired_goal().shape)
            })
        # Use the frontvision cam
        elif observation_mode == 'vision':
            self.frontcam.handle_explicitly()
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "observation": spaces.Box(
                    low=0, high=1, shape=self.frontcam.capture_rgb().transpose(2,0,1).flatten().shape,
                    )
                })
            if altview == "both":
                example = self.frontcam.capture_rgb().transpose(2,0,1).flatten()
                self.observation_space = spaces.Dict({
                    "state": spaces.Box(
                        low=-np.inf, high=np.inf,
                        shape=obs.get_low_dim_data().shape),
                    "observation": spaces.Box(
                        low=0, high=1, shape=np.array([example,example]).shape,
                        )
                    })
        elif observation_mode == 'visiondepth':

            self.frontcam.handle_explicitly()
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "observation": spaces.Box(
                    #thinking about not flattening the shape, because primarily used for dataset and not for training
                    low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]]).shape,
                    )
                })
        elif observation_mode == 'visiondepthmask':

            self.frontcam.handle_explicitly()
            self.frontcam_mask.handle_explicitly()
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "observation": spaces.Box(
                    #thinking about not flattening the shape, because primarily used for dataset and not for training
                    low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape,
                    )
                })
            if altview == "both":
                self.observation_space = spaces.Dict({
                    "state": spaces.Box(
                        low=-np.inf, high=np.inf,
                        shape=obs.get_low_dim_data().shape),
                    "observation": spaces.Box(
                        #thinking about not flattening the shape, because primarily used for dataset and not for training
                        low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape,
                        )
                    })

        self._gym_cam = None


    # GoalEnv 
    def compute_reward(self, achieved_goal, desired_goal, info):
        reward = self.task._task.compute_reward(achieved_goal, desired_goal, info)
        return reward

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return {
                'observation': self.task._task.get_state_obs(),
                'achieved_goal': self.task._task.get_achieved_goal(),
                'desired_goal':self.task._task.get_desired_goal(),
            }
        elif self._observation_mode == 'vision':
            self.frontcam.handle_explicitly()
            if self.altview:
                self.altcam.handle_explicitly()
                second_view = self.altcam.capture_rgb().transpose(2,0,1).flatten()
                if self.altview == "both":
                    return {
                        'achieved_goal': self.task._task.get_achieved_goal(),
                        'desired_goal':self.task._task.get_desired_goal(),
                        'save_state': self.task._task.get_save_state(),
                        "observation": np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), second_view])
                    } 
                return {
                    'achieved_goal': self.task._task.get_achieved_goal(),
                    'desired_goal':self.task._task.get_desired_goal(),
                    'save_state': self.task._task.get_save_state(),
                    "observation": self.altcam.capture_rgb().transpose(2,0,1).flatten()
                }
            if self.blank:
                return {
                    'achieved_goal': self.task._task.get_achieved_goal(),
                    'desired_goal':self.task._task.get_desired_goal(),
                    'save_state': self.task._task.get_save_state(),
                    "observation": np.zeros(self.observation_space['observation'].shape)
                }                
            return {
                'achieved_goal': self.task._task.get_achieved_goal(),
                'desired_goal':self.task._task.get_desired_goal(),
                'save_state': self.task._task.get_save_state(),
                "observation": self.frontcam.capture_rgb().transpose(2,0,1).flatten(),
            }
        elif self._observation_mode == 'visiondepth':
            self.frontcam.handle_explicitly()
            return {
                "state": obs.get_low_dim_data(),
                "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]]
            }
        elif self._observation_mode == 'visiondepthmask':
            self.frontcam.handle_explicitly()
            self.frontcam_mask.handle_explicitly()

            mask = np.array(self.frontcam_mask.capture_rgb())

            if self.altview:
                self.altcam.handle_explicitly()
                self.altcam_mask.handle_explicitly()
                altmask = np.array(self.altcam_mask.capture_rgb())
                second_view = self.altcam.capture_rgb().transpose(2,0,1)

                if self.altview == "both":
                    assert False
                    # no use case for this
                    return {
                        "state": obs.get_low_dim_data(),
                        "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.altcam.capture_rgb().transpose(2,0,1).flatten()]
                    } 
                return {
                    "state": obs.get_low_dim_data(),
                    "observation": [self.altcam.capture_rgb().transpose(2,0,1), self.altcam.capture_depth()[None,...], altmask]
                }


            return {
                "state": obs.get_low_dim_data(),
                "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], mask]
            }

    def render(self, mode='human'):
        self.frontcam.handle_explicitly()
        return self.frontcam.capture_rgb()

    def reset(self):
        _, obs = self.task.reset(force_randomly_place=self.force_randomly_place, force_change_position=self.force_change_position)
        import time
        if len(self.special_start) != 0:
            for a in self.special_start:
                self.step(a)
        # Doesn't actually use obs. 
        return self._extract_obs(obs)

    def step(self, action):
        if self.fixed_grip >= 0:
            action[3] = self.fixed_grip
        obs, reward, terminate = self.task.step(action)
        reward, done = self.task._task.get_reward_and_done(sparse=self.sparse)
        return self._extract_obs(obs), reward, done, {"is_success": int(done)}

    def close(self):
        self.env.shutdown()

    def rerender(self, state):
        self.task._task.restore_full_state(state)
        return self.render().transpose(2,0,1).flatten()


    def resample_step(self, state, action, sampled_goal):
        self.task._task.restore_save_state(state)
        Shape('target').set_position(sampled_goal)

        if self.altview == "both":
            self.frontcam.handle_explicitly()
            self.altcam.handle_explicitly()
            o_before = np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), self.altcam.capture_rgb().transpose(2,0,1).flatten()])
        elif self.altview:
            self.altcam.handle_explicitly()
            o_before =  self.altcam.capture_rgb().transpose(2,0,1).flatten()
        else:
            if self.blank:
                o_before = np.zeros(self.observation_space['observation'].shape)
            else:
                o_before = self.render().transpose(2,0,1).flatten()
        o, r, d, _ = self.step(action)
        o_after = o['observation']
        return o_before, r, d, o_after
Пример #24
0
class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(
        self,
        task_name: str,
        state_type: list = 'state',
    ):
        # render_mode=None):
        """
        create RL Bench environment
        :param task_name: task names can be found in rlbench.tasks
        :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb']
        """
        if state_type == 'state' or state_type == 'vision' or isinstance(
                state_type, list):
            self._state_type = state_type
        else:
            raise ValueError(
                'State type value error, your value is {}'.format(state_type))
        # self._render_mode = render_mode
        self._render_mode = None
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        try:
            self.task = self.env.get_task(
                getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        self.spec = Spec(task_name)

        if self._state_type == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif self._state_type == 'vision':
            space_dict = OrderedDict()
            space_dict["state"] = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
            for i in [
                    "left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb",
                    "front_rgb"
            ]:
                space_dict[i] = spaces.Box(low=0,
                                           high=1,
                                           shape=getattr(obs, i).shape)
            self.observation_space = spaces.Dict(space_dict)
        else:
            space_dict = OrderedDict()
            for name in self._state_type:
                if name.split('_')[-1] in ('rgb'):
                    space_dict[name] = spaces.Box(low=0,
                                                  high=1,
                                                  shape=getattr(obs,
                                                                name).shape)
                elif name.split('_')[-1] in ('depth', 'mask'):
                    space_dict[name] = spaces.Box(low=0,
                                                  high=1,
                                                  shape=(128, 128, 3))
                    print(space_dict[name].shape)
                else:
                    space_dict[name] = spaces.Box(low=-np.inf,
                                                  high=np.inf,
                                                  shape=getattr(obs,
                                                                name).shape)
                self.observation_space = spaces.Dict(space_dict)
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(self.env.action_size, ),
                                       dtype=np.float32)

        # if render_mode is not None:
        #     # Add the camera to the scene
        #     cam_placeholder = Dummy('cam_cinematic_placeholder')
        #     self._gym_cam = VisionSensor.create([640, 360])
        #     self._gym_cam.set_pose(cam_placeholder.get_pose())
        #     if render_mode == 'human':
        #         self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
        #     else:
        #         self._gym_cam.set_render_mode(RenderMode.OPENGL3)

    def _extract_obs(self, obs):
        if self._state_type == 'state':
            return np.array(obs.get_low_dim_data(), np.float32)
        elif self._state_type == 'vision':
            return np.array([
                np.array(obs.get_low_dim_data(), np.float32),
                np.array(obs.left_shoulder_rgb, np.float32),
                np.array(obs.right_shoulder_rgb, np.float32),
                np.array(obs.wrist_rgb, np.float32),
                np.array(obs.front_rgb, np.float32),
            ])
        else:
            result = []
            for name in self._state_type:
                if name.split('_')[-1] in ('mask', 'depth'):
                    a = np.expand_dims(np.array(getattr(obs, name),
                                                np.float32),
                                       axis=-1)
                    a = np.concatenate(
                        (a, np.zeros((128, 128, 2), dtype=a.dtype)), axis=2)
                    result.append(a)
                else:
                    result.append(np.array(getattr(obs, name), np.float32))
            return np.array(result)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self, mode='human'):
        # todo render available at any time
        if self._render_mode is None:
            self._render_mode = mode
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            if mode == 'human':
                self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
            else:
                self._gym_cam.set_render_mode(RenderMode.OPENGL3)

        if mode != self._render_mode:
            raise ValueError(
                'The render mode must match the render mode selected in the '
                'constructor. \nI.e. if you want "human" render mode, then '
                'create the env by calling: '
                'gym.make("reach_target-state-v0", render_mode="human").\n'
                'You passed in mode %s, but expected %s.' %
                (mode, self._render_mode))
        if mode == 'rgb_array':
            return self._gym_cam.capture_rgb()

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._extract_obs(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(action)
        """
        img = Image.fromarray(np.uint8(np.array(obs.left_shoulder_rgb, np.float32)*255), 'RGB')
        img.show()
        a = np.array(obs.left_shoulder_mask, np.float32)
        img = Image.fromarray(np.uint8(a*255), 'L')
        img.show()
        a = np.array(obs.left_shoulder_depth, np.float32)
        img = Image.fromarray(np.uint8(a*255), 'L')
        img.show()
        input('> ')
        """
        return self._extract_obs(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()
Пример #25
0
class RLBenchEnv(object):
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=None, headless=False):
        if state_type_list is None:
            state_type_list = ['left_shoulder_rgb']
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=headless)
        self.env.launch()
        try:
            self.task = self.env.get_task(ReachTarget)
        except:
            raise NotImplementedError

        _, obs = self.task.reset()

        if len(state_type_list) > 0:
            self.observation_space = []
            for state_type in state_type_list:
                state = getattr(obs, state_type)
                if isinstance(state, (int, float)):
                    shape = (1, )
                else:
                    shape = state.shape
                self.observation_space.append(
                    spaces.Box(low=-np.inf, high=np.inf, shape=shape))
        else:
            raise ValueError('No State Type!')
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)

        self.state_type_list = state_type_list

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        if len(self.state_type_list) > 0:
            state = []
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image = np.moveaxis(
                        image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                elif state_type in primitive_types:
                    state.append(np.array([getattr(obs, state_type)]))
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(
            action)  # reward in original rlbench is binary for success or not

        # distance between current pos (tip) and target pos
        # x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose()
        x, y, z, qx, qy, qz, qw = self.task._robot.gripper.get_pose()
        tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose()
        distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z))
        reward -= 1. * distance

        # speed regulation
        joint_velocities = self.task._robot.arm.get_joint_velocities()
        velocities = np.inner(joint_velocities, joint_velocities)
        reward -= 0.05 / len(joint_velocities) * velocities

        # orientation
        gripper_x, gripper_y, gripper_z, gripper_qx, gripper_qy, gripper_qz, gripper_qw = self.task._robot.gripper.get_pose(
        )
        v1 = np.array(
            (tar_x - gripper_x, tar_y - gripper_y, tar_z - gripper_z))
        temp = np.sin(np.arccos(gripper_qw))
        v2 = np.array(
            (gripper_qx / temp, gripper_qy / temp, gripper_qz / temp))
        orientation_angle = np.abs(
            np.arccos(v1.dot(v2) / np.sqrt(v1.dot(v1)) /
                      np.sqrt(v2.dot(v2))))  # [0, pi]
        orientation_angle = orientation_angle if orientation_angle < np.pi / 2. else (
            np.pi - orientation_angle)
        reward -= 0.5 / np.pi * orientation_angle  # if orientation_angle > np.pi / 8. else 0.
        return self._get_state(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()
Пример #26
0
class RLBenchEnv(object):
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=None, headless=False):
        if state_type_list is None:
            state_type_list = ['left_shoulder_rgb']
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=headless)
        self.env.launch()
        try:
            self.task = self.env.get_task(ReachTarget)
        except:
            raise NotImplementedError

        _, obs = self.task.reset()

        if len(state_type_list) > 0:
            self.observation_space = []
            for state_type in state_type_list:
                state = getattr(obs, state_type)
                self.observation_space.append(
                    spaces.Box(low=-np.inf, high=np.inf, shape=state.shape))
        else:
            raise ValueError('No State Type!')
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)

        self.state_type_list = state_type_list

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        # state = np.hstack((obs.joint_positions, obs.joint_velocities, obs.joint_forces))
        if len(self.state_type_list) > 0:
            state = []
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image = np.moveaxis(
                        image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        # self.task._task.target.set_pose(np.array([0.354685515165329, -0.025212552398443222, 1.0284103155136108, 0.0, 0.0, 0.0, 1.0]))
        return self._get_state(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(
            action)  # reward in original rlbench is binary for success or not

        # distance between current pos (tip) and target pos
        x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose()
        tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose()
        distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z))
        reward -= 0.5 * distance

        # speed regulation
        joint_velocities = self.task._robot.arm.get_joint_velocities()
        velocities = np.inner(joint_velocities, joint_velocities)
        reward -= 0.1 / len(joint_velocities) * velocities
        return self._get_state(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()