Python Environment.launch примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlbench.environment

Класс/Тип: Environment

Метод/Функция: launch

Примеров на hotexamples.com: 26

Python Environment.launch - 26 примеров найдено. Это лучшие примеры Python кода для rlbench.environment.Environment.launch, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Environment(30)

get_task(30)

launch(26)

shutdown(23)

_action_mode(1)

_set_arm_control_action(1)

_string_to_task(1)

close(1)

Пример #1

Показать файл

Файл: build_rlbench_env.py Проект: brightzi/RL_RLBench

class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=['left_shoulder_rgb']):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(
            action_mode, obs_config=obs_config, headless=False)
        self.env.launch()
        try: 
            self.task = self.env.get_task(getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        if len(state_type_list)>0:
            self.observation_space=[]
            for state_type in state_type_list:
                state=getattr(obs, state_type)
                self.observation_space.append(spaces.Box(low=-np.inf, high=np.inf, shape=state.shape))
        else:
            raise ValueError('No State Type!')
        self.action_space =  spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size,), dtype=np.float32)
        
        self.state_type_list = state_type_list
        self.spec = Spec(task_name)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass 

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        if len(self.state_type_list)>0:
            state=[]
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image=np.moveaxis(image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        obs_, reward, terminate = self.task.step(action)  # reward in original rlbench is binary for success or not
        return self._get_state(obs_), reward, terminate, None


    def close(self):
        self.env.shutdown()

Пример #2

Показать файл

Файл: rlbench_data_collection.py Проект: telejesus2/tecnets-pytorch

def collect_demos(env: Environment,
                  task: Task,
                  taskname,
                  n_iter_per_var=50,
                  n_demos_per_iter=1000):
    env.launch()
    task_env = env.get_task(task)

    for variation_index in range(1):  #task_env.variation_count()):
        # set variation
        task_env.set_variation(variation_index)
        description, _ = task_env.reset()

        # collect and save demos
        start = time.time()
        for i in range(n_iter_per_var):
            np.random.seed(6)
            demos = task_env.get_demos(n_demos_per_iter,
                                       live_demos=True,
                                       max_attempts=1)
            demos = np.array(demos).flatten()
            print(demos.shape)
            save_to_hdf5(demos, taskname, description, variation_index)
        print("TIME: ", time.time() - start)

    # env.shutdown()
    return demos

Пример #3

Показать файл

def sub_proc(child):
    action_mode = ActionMode()
    env = Environment(action_mode, headless=child)
    env.launch()
    task = env.get_task(CloseMicrowave)

    descriptions, obs = task.reset()

    for i in range(1000):
        obs, reward, terminate = task.step(np.random.normal(np.zeros(action_mode.action_size)))

Пример #4

Показать файл

Файл: rlbench_test.py Проект: superggb/ggb_control

def act(graph,model,sess):
    obs_config = ObservationConfig()
    # obs_config.set_all(True)

    # 这几个参数用来关闭摄像头
    obs_config.right_shoulder_camera.set_all(False)
    obs_config.wrist_camera.set_all(False)
    obs_config.left_shoulder_camera.set_all(False)

    action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
    env = Environment(
        action_mode, obs_config=obs_config, headless=False)
    env.launch()

    task = env.get_task(ReachTarget)

    training_steps = 150
    episode_length = 60
    obs = None
    gripper = [1.0] # 爪子保持打开

    for i in range(training_steps):
        # 初始化一次任务，包括初始Obs数据
        if i % episode_length == 0:
            print('Reset Episode')
            descriptions, obs = task.reset()
            print(descriptions)
        
        # 网络的输入
        vision = obs.front_rgb
        vision = vision[np.newaxis,:]
        state = obs.joint_positions
        state = state[np.newaxis,:]
        action = None
        feed_dict = {
            model.vision:vision,
            model.state:state,
            model.action:action
        }
        run_ops = [model.predict_out]

        with graph.as_default():
            res = sess.run(run_ops,feed_dict=feed_dict)

        # print(res[0][0].shape)
        # print(gripper.shape)
        # input('check shape')
        action = np.concatenate([res[0][0],gripper],axis=-1)
        obs, reward, terminate = task.step(action)
        if reward == 1:
            print("successful try!")

    print('Done')
    env.shutdown()

Пример #5

Показать файл

Файл: fake_rlbench.py Проект: id9502/RLFrame

 def _update_info_dict(self):
     # update info dict
     self._info["action mode"] = self._action_mode
     self._info["observation mode"] = self._observation_mode
     # TODO: action dim should related to robot, not action mode, here we fixed it temporally
     self._info["action dim"] = (self._action_config.action_size, )
     self._info["action low"] = -np.ones(self._info["action dim"],
                                         dtype=np.float32)
     self._info["action high"] = np.ones(self._info["action dim"],
                                         dtype=np.float32)
     if self._observation_mode == "state" or self._observation_mode == "all":
         # TODO: observation should be determined without init the entire environment
         with suppress_stdout():
             env = RLEnvironment(action_mode=self._action_config,
                                 obs_config=self._observation_config,
                                 headless=True,
                                 robot_configuration=self._robot_name)
             env.launch()
             task = env.get_task(get_named_class(self._task_name, tasks))
             _, obs = task.reset()
             env.shutdown()
             del task
             del env
         self._info["time step"] = _DT
         self._info["state dim"] = tuple(obs.get_low_dim_data().shape)
         self._info["state low"] = np.ones(self._info["state dim"],
                                           dtype=np.float32) * -np.inf
         self._info["state high"] = np.ones(self._info["state dim"],
                                            dtype=np.float32) * np.inf
     if self._observation_mode == "vision" or self._observation_mode == "all":
         self._info["left shoulder rgb dim"] = tuple(
             self._observation_config.left_shoulder_camera.image_size) + (
                 3, )
         self._info["left shoulder rgb low"] = np.zeros(
             self._info["left shoulder rgb dim"], dtype=np.float32)
         self._info["left shoulder rgb high"] = np.ones(
             self._info["left shoulder rgb dim"], dtype=np.float32)
         self._info["right shoulder rgb  dim"] = tuple(
             self._observation_config.right_shoulder_camera.image_size) + (
                 3, )
         self._info["right shoulder rgb  low"] = np.zeros(
             self._info["right shoulder rgb  dim"], dtype=np.float32)
         self._info["right shoulder rgb  high"] = np.ones(
             self._info["right shoulder rgb  dim"], dtype=np.float32)
         self._info["wrist rgb dim"] = tuple(
             self._observation_config.wrist_camera.image_size) + (3, )
         self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"],
                                                dtype=np.float32)
         self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"],
                                                dtype=np.float32)
     self._info["reward low"] = -np.inf
     self._info["reward high"] = np.inf

Пример #6

Показать файл

class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type='left_shoulder_rgb'):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=False)
        self.env.launch()
        try:
            self.task = self.env.get_task(
                getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        state = getattr(obs, state_type)
        self.state_type = state_type
        self.spec = Spec(task_name)

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=state.shape)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def reset(self):
        descriptions, obs = self.task.reset()
        return getattr(obs, self.state_type)

    def step(self, action):
        obs_, reward, terminate = self.task.step(action)
        return getattr(obs_, self.state_type), reward, terminate, None

    def close(self):
        self.env.shutdown()

Пример #7

Показать файл

Файл: base_rl.py Проект: robfiras/PandaAgents_RLBench

def job_worker_validation(worker_id, action_mode, obs_config, task_class,
                          command_q: mp.Queue, result_q: mp.Queue, obs_scaling,
                          redundancy_resolution_setup, seed):

    np.random.seed(worker_id + seed)
    # setup the environment
    env = Environment(action_mode=action_mode,
                      obs_config=obs_config,
                      headless=True)
    env.launch()
    task = env.get_task(task_class)
    task.reset()
    while True:
        command = command_q.get()
        command_type = command[0]
        command_args = command[1]
        if command_type == "reset":
            task.set_variation(task.sample_variation())
            descriptions, observation = task.reset()
            if obs_scaling is not None:
                observation = observation.get_low_dim_data() / obs_scaling
            else:
                observation = observation.get_low_dim_data()
            result_q.put((descriptions, observation))
        elif command_type == "step":
            actions = command_args[0]
            # check if we need to resolve redundancy
            if redundancy_resolution_setup["use_redundancy_resolution"]:
                actions[0:7], L = task.resolve_redundancy_joint_velocities(
                    actions=actions[0:7], setup=redundancy_resolution_setup)
            else:
                # here we only care about the loss (for logging)
                _, L = task.resolve_redundancy_joint_velocities(
                    actions=actions[0:7], setup=redundancy_resolution_setup)

            next_observation, reward, done = task.step(actions)
            if obs_scaling is not None:
                next_observation = next_observation.get_low_dim_data(
                ) / obs_scaling
            else:
                next_observation = next_observation.get_low_dim_data()
            result_q.put((next_observation, reward, done, L))
        elif command_type == "kill":
            print("Killing worker %d" % worker_id)
            env.shutdown()
            break

Пример #8

Показать файл

class SimulationEnvironment():
    """
    This can be a parent class from which we can have multiple child classes that 
    can diversify for different tasks and deeper functions within the tasks.
    """

    def __init__(self,
                 task_name,
                 state_type_list=['left_shoulder_rgb'],
                 dataset_root='',
                 observation_mode='state',
                 headless=True):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self._observation_mode = observation_mode
        self.env = Environment(
            action_mode, dataset_root, obs_config=obs_config, headless=headless)
        # Dont need to call launch as task.get_task can launch env.
        self.env.launch()
        self.task = self.env.get_task(task_name)
        _, obs = self.task.reset()
        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
#         self.logger = logger.create_logger(__class__.__name__)
#         self.logger.propagate = 0
        if len(state_type_list) > 0:
            self.observation_space = []
            # for state_type in state_type_list:
            #     state = getattr(obs, state_type)
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)      
        else:
            raise ValueError('No State Type!')

        self.state_type_list = state_type_list

    def _get_state(self, obs):
        if len(self.state_type_list) > 0:
            if self._observation_mode == 'state':
                return self.get_low_dim_data(obs)
            elif self._observation_mode == 'vision':
                return None

    def get_low_dim_data(self, obs) -> np.ndarray:
        """Gets a 1D array of all the low-dimensional obseervations.
        :return: 1D array of observations.
        """
        low_dim_data = [] if obs.gripper_open is None else [[obs.gripper_open]]
        for data in [obs.joint_velocities, obs.joint_positions,
                     obs.joint_forces,
                     obs.gripper_pose, obs.gripper_joint_positions,
                     obs.gripper_touch_forces, obs.task_low_dim_state]:
            if data is not None:
                low_dim_data.append(data)
        return np.concatenate(low_dim_data)


    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        # reward in original rlbench is binary for success or not
        obs_, reward, terminate = self.task.step(action)
        return self._get_state(obs_), reward, terminate, None

    def shutdown(self):
        #         self.logger.info("Environment Shutdown! Create New Instance If u want to start again")
        #         self.logger.handlers.pop()
        self.env.shutdown()

Пример #9

Показать файл

Файл: dataset_generator.py Проект: superjeary/RLBench

def run(i, lock, task_index, variation_count, results, file_lock, tasks):
    """Each thread will choose one task and variation, and then gather
    all the episodes_per_task for that variation."""

    # Initialise each thread with random seed
    np.random.seed(None)
    num_tasks = len(tasks)

    img_size = list(map(int, FLAGS.image_size))

    obs_config = ObservationConfig()
    obs_config.set_all(True)
    obs_config.right_shoulder_camera.image_size = img_size
    obs_config.left_shoulder_camera.image_size = img_size
    obs_config.wrist_camera.image_size = img_size
    obs_config.front_camera.image_size = img_size
    # We want to save the masks as rgb encodings.
    obs_config.left_shoulder_camera.masks_as_one_channel = False
    obs_config.right_shoulder_camera.masks_as_one_channel = False
    obs_config.wrist_camera.masks_as_one_channel = False
    obs_config.front_camera.masks_as_one_channel = False

    if FLAGS.renderer == 'opengl':
        obs_config.right_shoulder_camera.render_mode = RenderMode.OPENGL
        obs_config.left_shoulder_camera.render_mode = RenderMode.OPENGL
        obs_config.wrist_camera.render_mode = RenderMode.OPENGL
        obs_config.front_camera.render_mode = RenderMode.OPENGL

    rlbench_env = Environment(action_mode=ActionMode(),
                              obs_config=obs_config,
                              headless=True)
    rlbench_env.launch()

    task_env = None

    tasks_with_problems = results[i] = ''

    while True:
        # Figure out what task/variation this thread is going to do
        with lock:

            if task_index.value >= num_tasks:
                print('Process', i, 'finished')
                break

            my_variation_count = variation_count.value
            t = tasks[task_index.value]
            task_env = rlbench_env.get_task(t)
            var_target = task_env.variation_count()
            if FLAGS.variations >= 0:
                var_target = np.minimum(FLAGS.variations, var_target)
            if my_variation_count >= var_target:
                # If we have reached the required number of variations for this
                # task, then move on to the next task.
                variation_count.value = my_variation_count = 0
                task_index.value += 1

            variation_count.value += 1
            if task_index.value >= num_tasks:
                print('Process', i, 'finished')
                break
            t = tasks[task_index.value]

            print('Process', i, 'collecting task:', task_env.get_name(),
                  '// variation:', my_variation_count)

        task_env = rlbench_env.get_task(t)
        task_env.set_variation(my_variation_count)
        obs, descriptions = task_env.reset()

        variation_path = os.path.join(FLAGS.save_path, task_env.get_name(),
                                      VARIATIONS_FOLDER % my_variation_count)

        check_and_make(variation_path)

        with open(os.path.join(variation_path, VARIATION_DESCRIPTIONS),
                  'wb') as f:
            pickle.dump(descriptions, f)

        episodes_path = os.path.join(variation_path, EPISODES_FOLDER)
        check_and_make(episodes_path)

        abort_variation = False
        for ex_idx in range(FLAGS.episodes_per_task):
            attempts = 10
            while attempts > 0:
                try:
                    # TODO: for now we do the explicit looping.
                    demo, = task_env.get_demos(amount=1, live_demos=True)
                except Exception as e:
                    attempts -= 1
                    if attempts > 0:
                        continue
                    problem = (
                        'Process %d failed collecting task %s (variation: %d, '
                        'example: %d). Skipping this task/variation.\n%s\n' %
                        (i, task_env.get_name(), my_variation_count, ex_idx,
                         str(e)))
                    print(problem)
                    tasks_with_problems += problem
                    abort_variation = True
                    break
                episode_path = os.path.join(episodes_path,
                                            EPISODE_FOLDER % ex_idx)
                with file_lock:
                    save_demo(demo, episode_path)
                break
            if abort_variation:
                break

    results[i] = tasks_with_problems
    rlbench_env.shutdown()

Пример #10

Показать файл

class RLBenchDataEnvGroup(DataEnvGroup):
    ''' DataEnvGroup for RLBench environment. 
        
        + The observation space can be modified through `global_config.env_args`
        + Observation space:
            - 'state': proprioceptive feature : [37] + task_specific
                > robot joint - velocities, positions, forces
                > gripper - pose, joint-position, touch_forces
                > task_low_dim_state
            - RGB-D/RGB image : (128 x 128 by default)
                > 'left_shoulder_rgb', 'right_shoulder_rgb'
                > 'front_rgb', 'wrist_rgb'
            - NOTE : Better to use only one of the RGB obvs rather than all, saves a lot of time while env creation.

            - Refer: https://github.com/stepjam/RLBench/blob/20988254b773aae433146fff3624d8bcb9ed7330/rlbench/observation_config.py

        + The action spaces by default are joint velocities [7] and gripper actuations [1].
            - Dimension : [8]
    '''
    def __init__(self, get_episode_type=None):
        super(RLBenchDataEnvGroup, self).__init__(get_episode_type)
        assert self.env_name == 'RLBENCH'
        self.env_obj = None
        self.use_gym = self.config.env_args['use_gym']

        self.observation_mode = self.config.env_args['observation_mode'].lower(
        )
        self.left_obv_key = 'left_shoulder_rgb'
        self.right_obv_key = 'right_shoulder_rgb'
        self.wrist_obv_key = 'wrist_rgb'
        self.front_obv_key = 'front_rgb'

        if self.observation_mode not in ['vision', 'state'
                                         ] and not self.use_gym:
            self.config.env_args['vis_obv_key'] = self.observation_mode

        self.vis_obv_key = self.config.env_args['vis_obv_key']
        self.dof_obv_key = 'state'
        self.env_action_key = 'joint_velocities'
        self.env_gripper_key = 'gripper_open'

        self.obs_space = {
            self.dof_obv_key: (37),
            self.left_obv_key: (128, 128, 3),
            self.right_obv_key: (128, 128, 3),
            self.wrist_obv_key: (128, 128, 3),
            self.front_obv_key: (128, 128, 3)
        }

        self.action_space, self.gripper_space = (7), (1)
        if self.config.env_args['combine_action_space']:
            self.action_space += self.gripper_space

    def get_env(self, task=None):
        task = task if task else self.config.env_type
        if self.use_gym:
            assert type(
                task
            ) == str  # NOTE : When using gym, the task has to be represented as a sting.
            assert self.observation_mode in ['vision', 'state']

            env = gym.make(
                task,
                observation_mode=self.config.env_args['observation_mode'],
                render_mode=self.config.env_args['render_mode'])
            self.env_obj = env
        else:
            obs_config = ObservationConfig()
            if self.observation_mode == 'vision':
                obs_config.set_all(True)
            elif self.observation_mode == 'state':
                obs_config.set_all_high_dim(False)
                obs_config.set_all_low_dim(True)
            else:
                obs_config_dict = {
                    self.left_obv_key: obs_config.left_shoulder_camera,
                    self.right_obv_key: obs_config.right_shoulder_camera,
                    self.wrist_obv_key: obs_config.wrist_camera,
                    self.front_obv_key: obs_config.front_camera
                }

                assert self.observation_mode in obs_config_dict.keys()

                obs_config.set_all_high_dim(False)
                obs_config_dict[self.observation_mode].set_all(True)
                obs_config.set_all_low_dim(True)

            # TODO : Write code to change it from env_args
            action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
            self.env_obj = Environment(action_mode,
                                       obs_config=obs_config,
                                       headless=True)

            task = task if task else ReachTarget
            if type(task) == str:
                task = task.split('-')[0]
                task = self.env_obj._string_to_task(task)

            self.env_obj.launch()
            env = self.env_obj.get_task(
                task)  # NOTE : `env` refered as `task` in RLBench docs.
        return env

    def _get_obs(self, obs, key):
        assert obs is not None and key is not None
        if type(obs) == tuple:
            obs = obs[1]
        if type(obs) == dict:
            return obs[key]
        elif type(obs) == Observation:
            if key == 'state':
                return obs.get_low_dim_data()
            return getattr(obs, key)

    def shutdown_env(self):
        if self.env_obj is None:
            print("Environment not created, call `.get_env()`")
        elif self.use_gym:
            self.env_obj.close()
        else:
            self.env_obj.shutdown()
        self.env_obj = None

    def teleoperate(self, demons_config, task=None):
        if self.config.env_args['keyboard_teleop']:
            raise NotImplementedError
        else:
            if self.env_obj is None or task is None:
                env = self.get_env(task)
            else:
                if type(task) == str:
                    task = self.env_obj._string_to_task(task.split('-')[0])
                env = self.env_obj.get_task(task)

            if self.use_gym:
                demos = env.task.get_demos(demons_config.n_runs,
                                           live_demos=True)
            else:
                demos = env.get_demos(demons_config.n_runs, live_demos=True)
            demos = np.array(demos).flatten()

            for i in range(demons_config.n_runs):
                sample = demos[i]
                if self.observation_mode != 'state':
                    tr_vobvs = np.array([
                        self._get_obs(obs, self.vis_obv_key) for obs in sample
                    ])
                tr_dof = np.array([
                    self._get_obs(obs, self.dof_obv_key).flatten()
                    for obs in sample
                ])
                tr_actions = np.array([
                    self._get_obs(obs, self.env_action_key).flatten()
                    for obs in sample
                ])
                tr_gripper = np.array(
                    [[self._get_obs(obs, self.env_gripper_key)]
                     for obs in sample])

                if self.config.env_args['combine_action_space']:
                    tr_actions = np.concatenate((tr_actions, tr_gripper),
                                                axis=-1)

                print("Storing Trajectory")
                trajectory = {self.dof_obv_key: tr_dof, 'action': tr_actions}
                if self.observation_mode != 'state':
                    trajectory.update({self.vis_obv_key: tr_vobvs})
                store_trajectoy(trajectory, episode_type='teleop', task=task)

            self.shutdown_env()

    def random_trajectory(self, demons_config):
        env = self.get_env()
        obs = env.reset()

        tr_vobvs, tr_dof, tr_actions = [], [], []
        for step in range(demons_config.flush_freq):
            if self.observation_mode != 'state':
                tr_vobvs.append(np.array(self._get_obs(obs, self.vis_obv_key)))
            tr_dof.append(
                np.array(self._get_obs(obs, self.dof_obv_key).flatten()))

            action = np.random.normal(size=self.action_space[0])
            obs, reward, done, info = env.step(action)

            tr_actions.append(action)

        print("Storing Trajectory")
        trajectory = {
            self.dof_obv_key: np.array(tr_dof),
            'action': np.array(tr_actions)
        }
        if self.observation_mode != 'state':
            trajectory.update({self.vis_obv_key: np.array(tr_vobvs)})
        store_trajectoy(trajectory, episode_type='random')
        self.shutdown_env()

Пример #11

Показать файл

Файл: imitation_learning.py Проект: SamuelePolimi/LAMPO

class PPCAImitation:
    def __init__(self,
                 task_class,
                 state_dim,
                 n_features,
                 n_cluster,
                 n_latent,
                 parameters=None,
                 headless=False,
                 cov_reg=1E-8,
                 n_samples=50,
                 dense_reward=False,
                 imitation_noise=0.03):

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._obs_config = obs_config

        self._state_dim = state_dim
        self._headless = headless

        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)
        self._task_class = task_class
        self._action_mode = action_mode
        self.env = Environment(action_mode, "", obs_config, headless=headless)
        self.env.launch()

        self.task = self.env.get_task(task_class)
        if parameters is None:
            self.parameters = np.load(
                "parameters/%s_%d.npy" %
                (self.task.get_name(), n_features))[:n_samples]
        # parameters = np.concatenate([parameters for _ in range(20)])
        self.imitation_noise = imitation_noise
        self.parameters[:, :3] += imitation_noise * np.random.normal(
            size=self.parameters[:, :3].shape)
        self.mppca = MPPCA(n_cluster,
                           n_latent,
                           n_iterations=30,
                           cov_reg=cov_reg,
                           n_init=500)
        self.mppca.fit(self.parameters)

        self.rlmppca = None
        self.dense_reward = dense_reward

        print(np.exp(self.mppca.log_pi))

        group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])
        self.space = ClassicSpace(group, n_features=n_features)

        print("mpcca learned")

    def stop(self, joint_gripper, previous_reward):
        if previous_reward == 0.:
            success = 0.
            for _ in range(50):
                obs, reward, terminate = self.task.step(joint_gripper)
                if reward == 1.0:
                    success = 1.
                    break
            return self.task._task.get_dense_reward(), success
        return self.task._task.get_dense_reward(), 1.

    def collect_samples(self,
                        n_episodes=50,
                        noise=True,
                        isomorphic_noise=False):

        success_list = []
        reward_list = []
        latent = []
        cluster = []
        observations = []
        parameters = []
        ob = self.task.reset()
        for i in range(n_episodes):
            context = ob[1].task_low_dim_state
            observations.append(context)
            w, z, k = self.rlmppca.generate_full(
                np.expand_dims(context, 0),
                noise=noise,
                isomorphic_noise=isomorphic_noise)
            # w = self.parameters[i, 3:]
            parameters.append(w)
            latent.append(z)
            cluster.append(k)
            mp = MovementPrimitive(
                self.space,
                MovementPrimitive.get_params_from_block(self.space, w[1:]))
            duration = 1 if w[0] < 0 else w[0]
            print(duration)
            if self._headless:
                trajectory = mp.get_full_trajectory(duration=duration,
                                                    frequency=200)
            else:
                trajectory = mp.get_full_trajectory(duration=5 * duration,
                                                    frequency=200)
            tot_reward = 0.
            success = 0
            for a1 in trajectory.values:  # , a2 in zip(trajectory.values[:-1, :], trajectory.values[1:, :]):
                joint = a1  # (a2-a1)*20.
                joint_gripper = joint
                obs, reward, terminate = self.task.step(joint_gripper)
                if reward == 1. or terminate == 1.:
                    if reward == 1.:
                        success = 1.
                    break
            tot_reward, success = self.stop(joint_gripper, success)
            # tot_reward =  -np.mean(np.abs(context - ob[1].gripper_pose[:3]))
            # tot_reward = -(w[0]-0.2)**2
            print(tot_reward)
            # print("my reward", -np.mean(np.abs(context - ob[1].gripper_pose[:3])))
            # print("parameters", parameters[-1])
            success_list.append(success)

            if self.dense_reward:
                reward_list.append(tot_reward)
            else:
                reward_list.append(success)

            ob = self.task.reset()

        print("-" * 50)
        print("Total reward", np.mean(reward_list))
        print("-" * 50)
        return np.array(success_list), np.array(reward_list), np.array(
            parameters), np.array(latent), np.array(cluster), np.array(
                observations)

    def run_episode(self):
        self.env = Environment(self._action_mode,
                               "",
                               self._obs_config,
                               headless=self._headless)
        self.env.launch()

        self.task = self.env.get_task(self._task_class)

    def shutdown(self):
        self.env.shutdown()

Пример #12

Показать файл

Файл: fake_rlbench.py Проект: id9502/RLFrame

class FakeRLBenchEnv(Environment):

    ROBOT_NAME = SUPPORTED_ROBOTS.keys()
    OBSERVATION_MODE = ("state", "vision", "all")
    ACTION_MODE = {
        "joint velocity": ArmActionMode.ABS_JOINT_VELOCITY,
        "delta joint velocity": ArmActionMode.DELTA_JOINT_VELOCITY,
        "joint position": ArmActionMode.ABS_JOINT_POSITION,
        "delta joint position": ArmActionMode.DELTA_JOINT_POSITION,
        "joint torque": ArmActionMode.ABS_JOINT_TORQUE,
        "delta joint torque": ArmActionMode.DELTA_JOINT_TORQUE,
        "effector velocity": ArmActionMode.ABS_EE_VELOCITY,
        "delta effector velocity": ArmActionMode.DELTA_EE_VELOCITY,
        "effector position": ArmActionMode.ABS_EE_POSE,
        "delta effector position": ArmActionMode.DELTA_EE_POSE
    }

    def __init__(self,
                 task_name: str,
                 observation_mode: str = "state",
                 action_mode: str = "delta joint position",
                 robot_name: str = "panda"):
        super(FakeRLBenchEnv, self).__init__(task_name)
        if task_name not in all_class_names(tasks):
            raise KeyError(f"Error: unknown task name {task_name}")
        if observation_mode not in FakeRLBenchEnv.OBSERVATION_MODE:
            raise KeyError(
                f"Error: unknown observation mode {observation_mode}, available: {FakeRLBenchEnv.OBSERVATION_MODE}"
            )
        if action_mode not in FakeRLBenchEnv.ACTION_MODE:
            raise KeyError(
                f"Error: unknown action mode {action_mode}, available: {FakeRLBenchEnv.ACTION_MODE.keys()}"
            )
        if robot_name not in FakeRLBenchEnv.ROBOT_NAME:
            raise KeyError(
                f"Error: unknown robot name {robot_name}, available: {FakeRLBenchEnv.ROBOT_NAME}"
            )

        # TODO: modify the task/robot/arm/gripper to support early instantiation before v-rep launched
        self._observation_mode = observation_mode
        self._action_mode = action_mode
        self._task_name = task_name
        self._robot_name = robot_name

        self._observation_config = ObservationConfig()
        if self._observation_mode == "state":
            self._observation_config.set_all_low_dim(True)
            self._observation_config.set_all_high_dim(False)
        elif self._observation_mode == "vision":
            self._observation_config.set_all_low_dim(False)
            self._observation_config.set_all_high_dim(True)
        elif self._observation_mode == "all":
            self._observation_config.set_all(True)

        self._action_config = ActionMode(
            FakeRLBenchEnv.ACTION_MODE[self._action_mode])

        self.env = None
        self.task = None

        self._update_info_dict()

    def init(self, display=False):
        with suppress_stdout():
            self.env = RLEnvironment(action_mode=self._action_config,
                                     obs_config=self._observation_config,
                                     headless=not display,
                                     robot_configuration=self._robot_name)
            self.env.launch()
            self.task = self.env.get_task(
                get_named_class(self._task_name, tasks))

    def reset(self, random: bool = True) -> StepDict:
        if not random:
            np.random.seed(0)
        self.task._static_positions = not random
        descriptions, obs = self.task.reset()
        # Returns a list of descriptions and the first observation
        next_step = {"opt": descriptions}

        if self._observation_mode == "state" or self._observation_mode == "all":
            next_step['s'] = obs.get_low_dim_data()
        if self._observation_mode == "vision" or self._observation_mode == "all":
            next_step["left shoulder rgb"] = obs.left_shoulder_rgb
            next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb
            next_step["wrist_rgb"] = obs.wrist_rgb
        return next_step

    def step(self, last_step: StepDict) -> (StepDict, bool):
        assert 'a' in last_step, "Key 'a' for action not in last_step, maybe you passed a wrong dict ?"

        obs, reward, terminate = self.task.step(last_step['a'])
        last_step['r'] = reward
        last_step["info"] = {}
        next_step = {"opt": None}

        if self._observation_mode == "state" or self._observation_mode == "all":
            next_step['s'] = obs.get_low_dim_data()
        if self._observation_mode == "vision" or self._observation_mode == "all":
            next_step["left shoulder rgb"] = obs.left_shoulder_rgb
            next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb
            next_step["wrist_rgb"] = obs.wrist_rgb
        return last_step, next_step, terminate

    def finalize(self) -> bool:
        with suppress_stdout():
            self.env.shutdown()
        self.task = None
        self.env = None
        return True

    def name(self) -> str:
        return self._task_name

    # ------------- private methods ------------- #

    def _update_info_dict(self):
        # update info dict
        self._info["action mode"] = self._action_mode
        self._info["observation mode"] = self._observation_mode
        # TODO: action dim should related to robot, not action mode, here we fixed it temporally
        self._info["action dim"] = (self._action_config.action_size, )
        self._info["action low"] = -np.ones(self._info["action dim"],
                                            dtype=np.float32)
        self._info["action high"] = np.ones(self._info["action dim"],
                                            dtype=np.float32)
        if self._observation_mode == "state" or self._observation_mode == "all":
            # TODO: observation should be determined without init the entire environment
            with suppress_stdout():
                env = RLEnvironment(action_mode=self._action_config,
                                    obs_config=self._observation_config,
                                    headless=True,
                                    robot_configuration=self._robot_name)
                env.launch()
                task = env.get_task(get_named_class(self._task_name, tasks))
                _, obs = task.reset()
                env.shutdown()
                del task
                del env
            self._info["time step"] = _DT
            self._info["state dim"] = tuple(obs.get_low_dim_data().shape)
            self._info["state low"] = np.ones(self._info["state dim"],
                                              dtype=np.float32) * -np.inf
            self._info["state high"] = np.ones(self._info["state dim"],
                                               dtype=np.float32) * np.inf
        if self._observation_mode == "vision" or self._observation_mode == "all":
            self._info["left shoulder rgb dim"] = tuple(
                self._observation_config.left_shoulder_camera.image_size) + (
                    3, )
            self._info["left shoulder rgb low"] = np.zeros(
                self._info["left shoulder rgb dim"], dtype=np.float32)
            self._info["left shoulder rgb high"] = np.ones(
                self._info["left shoulder rgb dim"], dtype=np.float32)
            self._info["right shoulder rgb  dim"] = tuple(
                self._observation_config.right_shoulder_camera.image_size) + (
                    3, )
            self._info["right shoulder rgb  low"] = np.zeros(
                self._info["right shoulder rgb  dim"], dtype=np.float32)
            self._info["right shoulder rgb  high"] = np.ones(
                self._info["right shoulder rgb  dim"], dtype=np.float32)
            self._info["wrist rgb dim"] = tuple(
                self._observation_config.wrist_camera.image_size) + (3, )
            self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"],
                                                   dtype=np.float32)
            self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"],
                                                   dtype=np.float32)
        self._info["reward low"] = -np.inf
        self._info["reward high"] = np.inf

    def live_demo(self, amount: int, random: bool = True) -> SampleBatch:
        """
        :param amount: number of demonstration trajectories to be generated
        :param random: if the starting position is random
        :return: observation list : [amount x [(steps-1) x [s, a] + [s_term, None]]],
                 WARNING: that the action here is calculated from observation, when executing, they may cause some inaccuracy
        """
        seeds = [rnd.randint(0, 4096) if random else 0 for _ in range(amount)]
        self.task._static_positions = not random

        demo_pack = []
        for seed in seeds:
            np.random.seed(seed)
            pack = self.task.get_demos(1, True)[0]

            demo_traj = []
            np.random.seed(seed)
            desc, obs = self.task.reset()
            v_tar = 0.
            for o_tar in pack[1:]:
                action = []
                if self._action_config.arm == ArmActionMode.ABS_JOINT_VELOCITY:
                    action.extend(
                        (o_tar.joint_positions - obs.joint_positions) / _DT)
                elif self._action_config.arm == ArmActionMode.ABS_JOINT_POSITION:
                    action.extend(o_tar.joint_positions)
                elif self._action_config.arm == ArmActionMode.ABS_JOINT_TORQUE:
                    action.extend(o_tar.joint_forces)
                    raise TypeError(
                        "Warning, abs_joint_torque is not currently supported")
                elif self._action_config.arm == ArmActionMode.ABS_EE_POSE:
                    action.extend(o_tar.gripper_pose)
                elif self._action_config.arm == ArmActionMode.ABS_EE_VELOCITY:
                    # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together
                    #          since the original RLBench decides to do so, we should follow it
                    action.extend(
                        (o_tar.gripper_pose - obs.gripper_pose) / _DT)
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_VELOCITY:
                    v_tar = (o_tar.joint_positions - obs.joint_positions) / _DT
                    action.extend(v_tar - obs.joint_velocities)
                    raise TypeError(
                        "Warning, delta_joint_velocity is not currently supported"
                    )
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_POSITION:
                    action.extend(o_tar.joint_positions - obs.joint_positions)
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_TORQUE:
                    action.extend(o_tar.joint_forces - obs.joint_forces)
                    raise TypeError(
                        "Warning, delta_joint_torque is not currently supported"
                    )
                elif self._action_config.arm == ArmActionMode.DELTA_EE_POSE:
                    action.extend(o_tar.gripper_pose[:3] -
                                  obs.gripper_pose[:3])
                    q = Quaternion(o_tar.gripper_pose[3:7]) * Quaternion(
                        obs.gripper_pose[3:7]).conjugate
                    action.extend(list(q))
                elif self._action_config.arm == ArmActionMode.DELTA_EE_VELOCITY:
                    # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together
                    #          since the original RLBench decides to do so, we should follow it
                    v_tar_new = (o_tar.gripper_pose - obs.gripper_pose) / _DT
                    action.extend(v_tar_new - v_tar)
                    v_tar = v_tar_new
                    raise TypeError(
                        "Warning, delta_ee_velocity is not currently supported"
                    )

                action.append(1.0 if o_tar.gripper_open > 0.9 else 0.0)
                action = np.asarray(action, dtype=np.float32)
                demo_traj.append({
                    'observation': obs,
                    'a': action,
                    's': obs.get_low_dim_data()
                })
                obs, reward, done = self.task.step(action)
                demo_traj[-1]['r'] = reward

            demo_pack.append(demo_traj)
        return {
            "trajectory": demo_pack,
            "config": "default",
            "policy": "hand-coding",
            "env class": self.__class__.__name__,
            "env name": self._task_name,
            "env config": "default",
            "observation config": self._observation_mode,
            "robot config": self._robot_name,
            "action config": self._action_mode
        }

Пример #13

Показать файл

class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}

    def __init__(self, task_class, observation_mode='state'):
        self._observation_mode = observation_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError(
                'Unrecognised observation_mode: %s.' % observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(
            action_mode, obs_config=obs_config, headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(action_mode.action_size,),
            dtype=np.float32)

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb": spaces.Box(
                    low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb": spaces.Box(
                    low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb": spaces.Box(
                    low=0, high=1, shape=obs.wrist_rgb.shape),
                })

        self._gym_cam = None

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'vision':
            return {
                "state": obs.get_low_dim_data(),
                "left_shoulder_rgb": obs.left_shoulder_rgb,
                "right_shoulder_rgb": obs.right_shoulder_rgb,
                "wrist_rgb": obs.wrist_rgb,
            }

    def render(self, mode='human'):
        if self._gym_cam is None:
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)

    def reset(self):
        descriptions, obs = self.task.reset()
        del descriptions  # Not used.
        return self._extract_obs(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(action)
        return self._extract_obs(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()

Пример #14

Показать файл

Файл: gym_env.py Проект: hjp-muser/RL_Snake_Robot

class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 action_mode='joint',
                 multi_action_space=False,
                 discrete_action_space=False):
        self.num_skip_control = 20
        self.epi_obs = []
        self.obs_record = True
        self.obs_record_id = None

        self._observation_mode = observation_mode
        self.obs_config = ObservationConfig()
        if observation_mode == 'state':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
        elif observation_mode == 'state-goal':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
            self.obs_config.set_goal_info(True)
        elif observation_mode == 'vision':
            self.obs_config.set_all(False)
            self.obs_config.set_camera_rgb(True)
        elif observation_mode == 'both':
            self.obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)

        self._action_mode = action_mode
        self.ac_config = None
        if action_mode == 'joint':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.ABS_JOINT_POSITION)
        elif action_mode == 'trigon':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.TRIGON_MODEL_PARAM,
                is_discrete=discrete_action_space)
        else:
            raise ValueError('Unrecognised action_mode: %s.' % action_mode)

        self.env = Environment(action_config=self.ac_config,
                               obs_config=self.obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)
        self.max_episode_steps = self.task.episode_len
        _, obs = self.task.reset()

        if action_mode == 'joint':
            self.action_space = spaces.Box(
                low=-1.7,
                high=1.7,
                shape=(self.ac_config.action_size, ),
                dtype=np.float32)
        elif action_mode == 'trigon':
            if multi_action_space:
                # action_space1 = spaces.MultiBinary(n=1)
                low1 = np.array([-0.8, -0.8])
                high1 = np.array([0.8, 0.8])
                action_space1 = spaces.Box(low=low1,
                                           high=high1,
                                           dtype=np.float32)
                # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low2 = np.array([1.0])
                high2 = np.array([3.0])
                action_space2 = spaces.Box(low=low2,
                                           high=high2,
                                           dtype=np.float32)
                self.action_space = spaces.Tuple(
                    (action_space1, action_space2))
            elif discrete_action_space:
                self.action_space = spaces.MultiDiscrete([4, 4, 4])
            else:
                # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low = np.array([-1, -1, -1])
                high = np.array([1, 1, 1])
                self.action_space = spaces.Box(low=low,
                                               high=high,
                                               dtype=np.float32)

        if observation_mode == 'state' or observation_mode == 'state-goal':
            self.observation_space = spaces.Box(
                low=-np.inf,
                high=np.inf,
                shape=(obs.get_low_dim_data().shape[0] *
                       self.num_skip_control, ))
            if observation_mode == 'state-goal':
                self.goal_dim = obs.get_goal_dim()
        elif observation_mode == 'vision':
            self.observation_space = spaces.Box(
                low=0, high=1, shape=obs.head_camera_rgb.shape)
        elif observation_mode == 'both':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "rattler_eye_rgb":
                spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape),
                "rattler_eye_depth":
                spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape),
            })

        self._gym_cam = None

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'state-goal':
            obs_goal = {'observation': obs.get_low_dim_data()}
            obs_goal.update(obs.get_goal_data())
            return obs_goal
        elif self._observation_mode == 'vision':
            return obs.head_camera_rgb
        elif self._observation_mode == 'both':
            return {
                "state": obs.get_low_dim_data(),
                "rattler_eye_rgb": obs.head_camera_rgb,
                "rattle_eye_depth": obs.head_camera_depth,
            }

    def render(self, mode='human'):
        if self._gym_cam is None:
            # # Add the camera to the scene
            self._gym_cam = VisionSensor('monitor')
            self._gym_cam.set_resolution([640, 640])
            self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED)

    def reset(self):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        descriptions, obs = self.task.reset()
        self.epi_obs.append(obs)
        obs_data = self._extract_obs(obs)
        for _ in range(self.num_skip_control):
            # obs_data_group.extend(obs_data)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(obs_data, dict):
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        del descriptions  # Not used
        return ret_obs

    def step(self, action):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        reward_group = []
        terminate = False
        for _ in range(self.num_skip_control):
            obs, reward, step_terminate, success = self.task.step(action)
            self.epi_obs.append(obs)
            obs_data = self._extract_obs(obs)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(
                    obs_data,
                    dict):  # used for hierarchical reinforcement algorithm
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
            reward_group.append(reward)
            terminate |= step_terminate
            if terminate:
                if self.obs_record and success:  # record a successful experience
                    self.record_obs("RobotPos")
                self.epi_obs = []
                break
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        return ret_obs, np.mean(reward_group), terminate, {}

    def close(self):
        self.env.shutdown()

    # def load_env_param(self):
    #     self.env.load_env_param()

    def compute_reward(self, achieved_goal=None, desired_goal=None, info=None):
        assert achieved_goal is not None
        assert desired_goal is not None
        return self.task.compute_reward(achieved_goal, desired_goal)

    def record_obs(self, obs_part):
        if self.obs_record_id is None:
            record_filenames = glob.glob("./obs_record/obs_record_*.txt")
            record_filenames.sort(key=lambda filename: int(
                filename.split('_')[-1].split('.')[0]))
            if len(record_filenames) == 0:
                self.obs_record_id = 1
            else:
                last_id = int(
                    record_filenames[-1].split('_')[-1].split('.')[0])
                self.obs_record_id = last_id + 1
        else:
            self.obs_record_id += 1
        filename = './obs_record/obs_record_' + str(
            self.obs_record_id) + '.txt'
        obs_record_file = open(filename, 'w')

        if obs_part == 'All':
            pass
        if obs_part == 'RobotPos':
            robot_pos_arr = []
            for obs in self.epi_obs:
                robot_pos = obs.get_2d_robot_pos()
                robot_pos_arr.append(robot_pos)
            target_pos = self.task.get_goal()
            robot_pos_arr.append(
                target_pos)  # The last line records the target position
            robot_pos_arr = np.array(robot_pos_arr)
            np.savetxt(obs_record_file, robot_pos_arr, fmt="%f")
        obs_record_file.close()

Пример #15

Показать файл

    def act(self, obs):
        return (np.random.normal(0.0, 0.1, size=(self.action_size, ))).tolist()


obs_config = ObservationConfig()
obs_config.set_all(False)
obs_config.left_shoulder_camera.rgb = True
obs_config.right_shoulder_camera.rgb = True
obs_config.wrist_camera.rgb = True

action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
env = Environment(action_mode,
                  obs_config=obs_config,
                  headless=False,
                  robot_configuration='jaco')
env.launch()

task = env.get_task(ReachTarget)

agent = Agent(action_size=7)  # Jaco is 6DoF + 1 for gripper action

training_steps = 120
episode_length = 40
obs = None
for i in range(training_steps):
    if i % episode_length == 0:
        print('Reset Episode')
        descriptions, obs = task.reset()
        print(descriptions)
    action = agent.act(obs)
    print(action)

Пример #16

Показать файл

class RLBench(BaseSimulator):
    def __init__(self, h):

        #64x64 camera outputs
        cam = CameraConfig(image_size=(64, 64))
        obs_config = ObservationConfig(left_shoulder_camera=cam,
                                       right_shoulder_camera=cam,
                                       wrist_camera=cam,
                                       front_camera=cam)
        obs_config.set_all(True)

        # delta EE control with motion planning
        action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME)

        #Inits
        self.env = Environment(action_mode, obs_config=obs_config, headless=h)
        self.env.launch()
        self.task = self.env.get_task(ReachTarget)

    def reset(self):

        d, o = self.task.reset()

        return o

    def step(self, a, prev_state):

        # delta orientation
        d_quat = np.array([0, 0, 0, 1])

        # gripper state
        gripper_open = 1.0

        # delta position
        d_pos = np.zeros(3)

        # For positive magnitude
        if (a % 2 == 0):
            a = int(a / 2)
            d_pos[a] = 0.03

        # For negative magnitude
        else:
            a = int((a - 1) / 2)
            d_pos[a] = -0.03

        # Forming action as expected by the environment
        action = np.concatenate([d_pos, d_quat, [gripper_open]])

        try:
            s, r, t = self.task.step(action)
            r *= 1000

        # Handling failure in planning
        except ConfigurationPathError:
            s = prev_state
            r = -0.1
            t = False

        # Handling wrong action for inverse Jacobian
        except InvalidActionError:
            s = prev_state
            r = -0.01
            t = False

        # Get bounding box centroids
        x, y, z = self.task._scene._workspace.get_position()

        # Set bounding box limits
        minx = x - 0.25
        maxx = x + 0.25
        miny = y - 0.35
        maxy = y + 0.35
        minz = z
        maxz = z + 0.5

        bounding_box = [minx, maxx, miny, maxy, minz, maxz]

        # Get gripper position
        gripper_pose = s.gripper_pose

        # Reward for being in the bounding box
        if (self.bb_check(bounding_box, gripper_pose)):
            r += 0.1

        return s, r, t

    # Check if gripper in the bounding box
    def bb_check(self, bounding_box, gripper_pose):

        out = True

        if (gripper_pose[0] < bounding_box[0]
                or gripper_pose[0] > bounding_box[1]):
            out = False

        if (gripper_pose[1] < bounding_box[2]
                or gripper_pose[1] > bounding_box[3]):
            out = False

        if (gripper_pose[2] < bounding_box[4]
                or gripper_pose[2] > bounding_box[5]):
            out = False

        return out

    @staticmethod
    def n_actions():
        return 6

    def shutdown(self):
        print("Shutdown")
        self.env.shutdown()

    def __del__(self):
        print("Shutdown")
        self.env.shutdown()

Пример #17

Показать файл

class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 render_mode: Union[None, str] = None):
        self._observation_mode = observation_mode
        self._render_mode = render_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(self.env.action_size, ))

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb":
                spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape),
                "front_rgb":
                spaces.Box(low=0, high=1, shape=obs.front_rgb.shape),
            })

        if render_mode is not None:
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            if render_mode == 'human':
                self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
            else:
                self._gym_cam.set_render_mode(RenderMode.OPENGL3)

    def _extract_obs(self, obs) -> Dict[str, np.ndarray]:
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'vision':
            return {
                "state": obs.get_low_dim_data(),
                "left_shoulder_rgb": obs.left_shoulder_rgb,
                "right_shoulder_rgb": obs.right_shoulder_rgb,
                "wrist_rgb": obs.wrist_rgb,
                "front_rgb": obs.front_rgb,
            }

    def render(self, mode='human') -> Union[None, np.ndarray]:
        if mode != self._render_mode:
            raise ValueError(
                'The render mode must match the render mode selected in the '
                'constructor. \nI.e. if you want "human" render mode, then '
                'create the env by calling: '
                'gym.make("reach_target-state-v0", render_mode="human").\n'
                'You passed in mode %s, but expected %s.' %
                (mode, self._render_mode))
        if mode == 'rgb_array':
            return self._gym_cam.capture_rgb()

    def reset(self) -> Dict[str, np.ndarray]:
        descriptions, obs = self.task.reset()
        del descriptions  # Not used.
        return self._extract_obs(obs)

    def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, dict]:
        obs, reward, terminate = self.task.step(action)
        return self._extract_obs(obs), reward, terminate, {}

    def close(self) -> None:
        self.env.shutdown()

Пример #18

Показать файл

Файл: main.py Проект: brightzi/Autonomous-Bin-Picking

class GraspController:
    def __init__(self, action_mode, static_positions=True):
        # Initialize environment with Action mode and observations
        # Resize the write camera to fit the GQCNN
        wrist_camera = CameraConfig(image_size=(1032, 772))
        self.env = Environment(action_mode, '', ObservationConfig(wrist_camera=wrist_camera), False, static_positions=static_positions)
        self.env.launch()
        # Load specified task into the environment
        self.task = self.env.get_task(EmptyContainer)

    def reset(self):
        descriptions, obs = self.task.reset()
        return descriptions, obs

    def get_objects(self, add_noise=False):
        objs = self.env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False)
        objs_dict = {}

        for obj in objs:
            name = obj.get_name()
            pose = obj.get_pose()
            if add_noise:
                pose = noisy_object(pose)
            objs_dict[name] = [obj, pose]

        return objs_dict

    def get_path(self, pose, set_orientation=False):
        # TODO deal with situations when path not found
        if set_orientation:
            path = self.env._robot.arm.get_path(pose[:3], quaternion=pose[3:],
                                                ignore_collisions=True, algorithm=Algos.RRTConnect, trials=1000)
        else:
            path = self.env._robot.arm.get_path(pose[:3], quaternion=np.array([0, 1, 0, 0]),
                                                ignore_collisions=True, algorithm=Algos.RRTConnect, trials=1000)
        return path

    def grasp(self):
        # TODO get feedback to check if grasp is successfull
        done_grab_action = False
        # Repeat unitil successfully grab the object
        while not done_grab_action:
            # gradually close the gripper
            done_grab_action = self.env._robot.gripper.actuate(0, velocity=0.2)  # 0 is close
            self.env._pyrep.step()
            # self.task._task.step()
            # self.env._scene.step()

        grasped_objects = {}
        obj_list = ['Shape', 'Shape1', 'Shape3']
        objs = self.env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False)
        for obj in objs:
            if obj.get_name() in obj_list:
                grasped_objects[obj.get_name()] = self.env._robot.gripper.grasp(obj)
        return grasped_objects
        # return self.env._robot.gripper.get_grasped_objects()

    def release(self):
        done = False
        while not done:
            done = self.env._robot.gripper.actuate(1, velocity=0.2)  # 1 is release
            self.env._pyrep.step()
            # self.task._task.step()
            # self.env._scene.step()
        self.env._robot.gripper.release()

    def execute_path(self, path, open_gripper=True):
        path = path._path_points.reshape(-1, path._num_joints)
        for i in range(len(path)):
            action = list(path[i]) + [int(open_gripper)]
            obs, reward, terminate = self.task.step(action)
        return obs, reward, terminate

Пример #19

Показать файл

Файл: test_environment.py Проект: yanweifu/RLBench

class TestEnvironment(unittest.TestCase):

    def tearDown(self):
        self.env.shutdown()

    def get_task(self, task_class, arm_action_mode):
        obs_config = ObservationConfig()
        obs_config.set_all(False)
        obs_config.set_all_low_dim(True)
        obs_config.right_shoulder_camera.rgb = True
        action_mode = ActionMode(arm_action_mode)
        self.env = Environment(
            action_mode, ASSET_DIR, obs_config, headless=True)
        self.env.launch()
        return self.env.get_task(task_class)

    def test_get_task(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        self.assertIsInstance(task, TaskEnvironment)
        self.assertEqual(task.get_name(), 'reach_target')

    def test_reset(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        desc, obs = task.reset()
        self.assertIsNotNone(obs.right_shoulder_rgb)
        self.assertIsNone(obs.left_shoulder_rgb)
        self.assertIsInstance(desc, list)

    def test_step(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        task.reset()
        obs, reward, term = task.step(np.random.uniform(size=8))
        self.assertIsNotNone(obs.right_shoulder_rgb)
        self.assertIsNone(obs.left_shoulder_rgb)
        self.assertEqual(reward, 0)
        self.assertFalse(term)

    def test_get_invalid_number_of_demos(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        with self.assertRaises(RuntimeError):
            task.get_demos(10, live_demos=False, image_paths=True)

    def test_get_stored_demos_paths(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        demos = task.get_demos(2, live_demos=False, image_paths=True)
        self.assertEqual(len(demos), 2)
        self.assertGreater(len(demos[0]), 0)
        self.assertIsInstance(demos[0][0].right_shoulder_rgb, str)
        self.assertIsNone(demos[0][0].left_shoulder_rgb)

    def test_get_stored_demos_images(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        demos = task.get_demos(2, live_demos=False, image_paths=False)
        self.assertEqual(len(demos), 2)
        self.assertGreater(len(demos[0]), 0)
        self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray)
        self.assertIsNone(demos[0][0].left_shoulder_rgb)

    def test_get_live_demos(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        demos = task.get_demos(2, live_demos=True)
        self.assertEqual(len(demos), 2)
        self.assertGreater(len(demos[0]), 0)
        self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray)

    def test_action_mode_abs_joint_velocity(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY)
        task.reset()
        action = [0.1] * 8
        obs, reward, term = task.step(action)
        [self.assertAlmostEqual(0.1, a, delta=0.05)
         for a in obs.joint_velocities]

    def test_action_mode_delta_joint_velocity(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_JOINT_VELOCITY)
        task.reset()
        action = [-0.1] * 8
        [task.step(action) for _ in range(2)]
        obs, reward, term = task.step(action)
        [self.assertAlmostEqual(-0.3, a, delta=0.1)
         for a in obs.joint_velocities]

    def test_action_mode_abs_joint_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_POSITION)
        _, obs = task.reset()
        init_angles = np.append(obs.joint_positions, 0.)  # for gripper
        target_angles = np.array(init_angles) + 0.05
        [task.step(target_angles) for _ in range(5)]
        obs, reward, term = task.step(target_angles)
        [self.assertAlmostEqual(a, actual, delta=0.05)
         for a, actual in zip(target_angles, obs.joint_positions)]

    def test_action_mode_delta_joint_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_JOINT_POSITION)
        _, obs = task.reset()
        init_angles = obs.joint_positions
        target_angles = np.array(init_angles) + 0.06
        [task.step([0.01] * 8) for _ in range(5)]
        obs, reward, term = task.step([0.01] * 8)
        [self.assertAlmostEqual(a, actual, delta=0.05)
         for a, actual in zip(target_angles, obs.joint_positions)]

    def test_action_mode_abs_ee_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_EE_POSE)
        _, obs = task.reset()
        init_pose = obs.gripper_pose
        new_pose = np.append(init_pose, 0.0)  # for gripper
        new_pose[2] -= 0.1  # 10cm down
        obs, reward, term = task.step(new_pose)
        [self.assertAlmostEqual(a, p, delta=0.001)
         for a, p in zip(new_pose, obs.gripper_pose)]

    def test_action_mode_delta_ee_position(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_EE_POSE)
        _, obs = task.reset()
        init_pose = obs.gripper_pose
        new_pose = [0, 0, -0.1, 0, 0, 0, 1.0, 0.0]  # 10cm down
        expected_pose = list(init_pose)
        expected_pose[2] -= 0.1
        obs, reward, term = task.step(new_pose)
        [self.assertAlmostEqual(a, p, delta=0.001)
         for a, p in zip(expected_pose, obs.gripper_pose)]

    def test_action_mode_abs_ee_velocity(self):
        VEL = -0.2  # move down with velocity 0.1
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_EE_VELOCITY)
        _, obs = task.reset()
        expected_pose = obs.gripper_pose
        expected_pose[2] += (VEL * 0.05)
        vels = np.zeros((8,))
        vels[2] += VEL
        obs, reward, term = task.step(vels)
        [self.assertAlmostEqual(a, p, delta=0.001)
         for a, p in zip(expected_pose, obs.gripper_pose)]

    def test_action_mode_delta_velocity(self):
        VEL = -0.2  # move down with velocity 0.2
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_EE_VELOCITY)
        _, obs = task.reset()
        expected_pose = obs.gripper_pose
        expected_pose[2] += (VEL * 0.05)
        expected_pose[2] += ((VEL * 2) * 0.05)
        expected_pose[2] += ((VEL * 3) * 0.05)
        vels = np.zeros((8,))
        vels[2] += VEL
        [task.step(vels) for _ in range(2)]
        obs, reward, term = task.step(vels)
        [self.assertAlmostEqual(a, p, delta=0.01)
         for a, p in zip(expected_pose, obs.gripper_pose)]

    def test_action_mode_abs_joint_torque(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.ABS_JOINT_TORQUE)
        task.reset()
        action = [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, 0.0]
        obs, reward, term = task.step(action)
        # Difficult to test given gravity, so just check for exceptions.

    def test_action_mode_delta_joint_torque(self):
        task = self.get_task(
            ReachTarget, ArmActionMode.DELTA_JOINT_TORQUE)
        _, obs = task.reset()
        init_forces = np.array(obs.joint_forces)
        action = [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, 0]
        expected = np.array([0.3, -0.3, 0.3, -0.3, 0.3, -0.3, 0.3])
        expected += init_forces
        [task.step(action) for _ in range(2)]
        obs, reward, term = task.step(action)

Пример #20

Показать файл

class RLBenchBox(TaskInterface):

    def __init__(self, task_class, state_dim, n_features, headless=True):

        super().__init__(n_features)
        self._group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])
        self._space = ClassicSpace(self._group, n_features)

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._obs_config = obs_config

        self._state_dim = state_dim
        self._headless = headless

        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)
        self._task_class = task_class
        self._action_mode = action_mode
        self.env = Environment(
            action_mode, "", obs_config, headless=headless)
        self.env.launch()

        self.task = self.env.get_task(task_class)
        self._obs = None

    def get_context_dim(self):
        return self._state_dim

    def read_context(self):
        return self._obs[1].task_low_dim_state

    def get_demonstrations(self):
        file = "parameters/%s_%d.npy" % (self.task.get_name(), self._space.n_features)
        try:
            return np.load(file)
        except:
            raise Exception("File %s not found. Please consider running 'dataset_generator.py'" % file)

    def _stop(self, joint_gripper, previous_reward):
        if previous_reward == 0.:
            success = 0.
            for _ in range(50):
                obs, reward, terminate = self.task.step(joint_gripper)
                if reward == 1.0:
                    success = 1.
                    break
            return self.task._task.get_dense_reward(), success
        return self.task._task.get_dense_reward(), 1.

    def send_movement(self, weights, duration):
        mp = MovementPrimitive(self._space, MovementPrimitive.get_params_from_block(self._space, weights))
        duration = 1 if duration < 0 else duration
        if self._headless:
            trajectory = mp.get_full_trajectory(duration=min(duration, 1), frequency=200)
        else:
            trajectory = mp.get_full_trajectory(duration=5 * duration, frequency=200)
        tot_reward = 0.
        success = 0
        for a1 in trajectory.values:  # , a2 in zip(trajectory.values[:-1, :], trajectory.values[1:, :]):
            joint = a1  # (a2-a1)*20.
            joint_gripper = joint
            obs, reward, terminate = self.task.step(joint_gripper)
            if reward == 1. or terminate == 1.:
                if reward == 1.:
                    success = 1.
                break
        tot_reward, success = self._stop(joint_gripper, success)
        return success, tot_reward

    def reset(self):
        self._obs = self.task.reset()

Пример #21

Показать файл

class StateMachine(object):
    def __init__(self):
        self.env = None
        self.home = None
        self.task = None
        self.sensor = None
        self.objs_dict = None

    def initialize(self, headless=False):
        DATASET = ''
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        obs_config.left_shoulder_camera.rgb = True
        obs_config.right_shoulder_camera.rgb = True
        action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN)
        self.env = Environment(action_mode,
                               DATASET,
                               obs_config,
                               headless=headless)
        self.sensor = NoisyObjectPoseSensor(self.env)
        self.env.launch()
        self.task = self.env.get_task(EmptyContainer)
        self.task.reset()
        self.home = self.get_objects(False)['large_container'].get_pose()
        self.home[2] += 0.3
        # demos = task.get_demos(3, live_demos=live_demos)

        self.objs = self.get_objects()
        self.movable_objects = ['Shape', 'Shape1', 'Shape3']
        self.target_bins = ['small_container0']
        self.start_bins = ['large_container']
        self.max_retry = 5
        self.env._robot.gripper.set_joint_forces([50, 50])

    def get_objects(self, graspable=False):
        if graspable:
            objs = self.task._task.get_graspable_objects()
            objs_dict = {}
            for item in objs:
                name = item.get_name()
                objs_dict[name] = item
            return objs_dict
        else:
            objs = self.env._scene._active_task.get_base().\
                    get_objects_in_tree(exclude_base=True, first_generation_only=False)
            objs_dict = {}
            for obj in objs:
                name = obj.get_name()
                objs_dict[name] = obj
            return objs_dict

    # Move above object
    def move_to(self,
                pose,
                pad=0.05,
                ignore_collisions=True,
                quat=np.array([0, 1, 0, 0])):
        target_pose = np.copy(pose)
        obs = self.env._scene.get_observation()
        init_pose = obs.gripper_pose
        obs = self.env._scene.get_observation()
        init_pose = obs.gripper_pose
        target_pose[2] += pad
        path = self.env._robot.arm.get_path(np.array(target_pose[0:3]),
                                            quaternion=quat,
                                            trials=1000,
                                            ignore_collisions=True,
                                            algorithm=Algos.RRTConnect)
        # TODO catch errors and deal with situations when path not found
        return path

    def grasp(self, obj):
        # open the grippers
        cond = False
        while not cond:
            cond = self.env._robot.gripper.actuate(1, 0.1)
            self.env._scene.step()
        cond = False
        # now close
        while not cond:
            cond = self.env._robot.gripper.actuate(0, 0.1)
            self.env._scene.step()
        cond = self.env._robot.gripper.grasp(obj)
        return cond

    def release(self, obj):
        cond = False
        while not cond:
            cond = self.env._robot.gripper.actuate(1, 0.1)
            self.env._scene.step()
        self.env._robot.gripper.release()

    def execute(self, path):
        done = False
        path.set_to_start()
        while not done:
            done = path.step()
            # a = path.visualize()
            self.env._scene.step()
        return done

    def go_to(self,
              pose,
              pad=0.05,
              quat=np.array([0, 1, 0, 0]),
              gripper_close=False):
        pose_cp = copy.copy(pose)
        pose_cp[2] += pad
        pose_cp[3:] = quat
        wp = pose_cp.tolist() + [1]
        if gripper_close:
            wp = pose_cp.tolist() + [0]
        try:
            self.task.step(wp)
        except:
            print("Retrying with normal path planner")
            path = self.move_to(pose, pad, True, quat)
            self.execute(path)
        return

    def reset(self):
        self.task.reset()

    def is_within(self, obj1, obj2):
        #whether obj2 is within obj1
        obj1_bbox = obj1.get_bounding_box()
        obj1_mat = np.array(obj1.get_matrix()).reshape(3, 4)
        obj2_bbox = obj2.get_bounding_box()
        obj2_mat = np.array(obj2.get_matrix()).reshape(3, 4)
        obj1_rect = get_edge_points(obj1_bbox, obj1_mat)
        obj2_rect = get_edge_points(obj2_bbox, obj2_mat)
        return contains(obj1_rect, obj2_rect)

    def picking_bin_empty(self):
        '''
         Returns whether the picking bin is empty
        '''
        self.objs_dict = machine.get_objects()
        bin_obj = self.objs_dict['large_container']  #?Which one
        for obj_name in self.objs_dict.keys():
            if (not ('container' in obj_name)):
                if self.is_within(bin_obj, self.objs_dict[obj_name]):
                    return False
        return True

    def placing_bin_full(self):
        '''
         Returns whether the placing bin is full
        '''
        self.objs_dict = machine.get_objects()
        bin_obj = self.objs_dict['small_container1']  #?Which one
        for obj_name in self.objs_dict.keys():
            if (not ('container' in obj_name)):
                if not (self.is_within(bin_obj, self.objs_dict[obj_name])):
                    return False
        return True

    def get_all_shapes(self):
        objects = []

        for name in self.objs:
            if "Shape" in name:
                objects.append(name)
        print(objects)
        return objects

    def get_grasp_pose(self, theta):
        q = quaternion(0, 1, 0, 0)
        cos = np.cos(theta / 2)
        sin = np.sin(theta / 2)
        p = quaternion(cos, 0, 0, sin)
        rot_qt = p * q
        print("Theta", rot_qt, cos, sin, theta / 2)
        rot_arr = qn.as_float_array(rot_qt)
        rot_qt = [rot_arr[1], rot_arr[2], rot_arr[3], rot_arr[0]]
        return rot_qt

    def get_full_grasp_pose(self, thetas):
        thetax = thetas[0]
        cos = np.cos(thetax / 2)
        sin = np.sin(thetax / 2)
        qx = quaternion(cos, sin, 0, 0)
        thetay = thetas[1]
        cos = np.cos(thetay / 2)
        sin = np.sin(thetay / 2)
        qy = quaternion(cos, 0, sin, 0)
        thetaz = thetas[2]
        cos = np.cos(thetaz / 2)
        sin = np.sin(thetaz / 2)
        qz = quaternion(cos, 0, 0, sin)
        q_init = quaternion(0, 1, 0, 0)
        r = q_init * qz * qx * qy
        r_arr = qn.as_float_array(r)
        quat_vec = np.array([r_arr[1], r_arr[2], r_arr[3], r_arr[0]])
        return quat_vec

    def move_objects_to_target(self, target_bins, start_bins):

        target_bin = target_bins[0]
        start_bin = start_bins[0]

        start_bin_pose = self.objs[start_bin].get_pose()
        start_bin_pose[2] += 0.3

        target_bin_pose = self.objs[target_bin].get_pose()
        target_bin_pose[2] += 0.3
        '''
        move_objs = []
        for obj in machine.movable_objects:
            if self.is_within(target_bin, obj):
                move_objs.append(obj)
        print(move_objs)
        '''
        move_objs = machine.movable_objects

        for i, shape in enumerate(move_objs):

            cond = False
            retry_count = 0
            while not cond and retry_count < self.max_retry:
                theta = 2 * np.pi * retry_count / self.max_retry
                quat = self.get_grasp_pose(theta)
                # go back to home position
                machine.go_to(start_bin_pose, 0, gripper_close=False)
                # move above object
                objs_poses = machine.sensor.get_poses()
                # pose=objs[shape].get_pose()
                pose = objs_poses[shape]
                machine.go_to(pose, 0, quat=quat, gripper_close=False)
                # grasp the object
                cond = machine.grasp(self.objs[shape])
                if not cond:
                    retry_count += 1
                    print("retry count: ", retry_count)
            # move to home position
            machine.go_to(start_bin_pose, 0, gripper_close=True)
            print("Gripper joint forces",
                  self.env._robot.gripper.get_joint_forces())
            # move above small container
            objs_poses = machine.sensor.get_poses()
            pose = objs_poses[target_bin]
            pose[0] += (i * 0.04 - 0.04)
            machine.go_to(pose, 0.05, gripper_close=True)
            # release the object
            machine.release(self.objs[shape])
        machine.go_to(machine.home, 0, gripper_close=False)

Пример #22

Показать файл

Файл: imitation_learning.py Проект: SamuelePolimi/LAMPO

    def __init__(self, task_class, n_features, load, n_movements):
        """
        Learn the Movement from demonstration.

        :param task_class: Task that we aim to learn
        :param n_features: Number of RBF in n_features
        :param load: Load from data
        :param n_movements: how many movements do we want to learn
        """

        frequency = 200

        # To use 'saved' demos, set the path below, and set live_demos=False
        live_demos = not load
        DATASET = '' if live_demos else 'datasets'

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._task_class = task_class
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)

        group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])

        env = Environment(action_mode, DATASET, obs_config, headless=True)
        env.launch()

        task = env.get_task(task_class)

        trajectories = []
        states = []

        lengths = []

        print("Start Demo")
        demos = task.get_demos(n_movements, live_demos=live_demos)
        print("End Demo")

        init = True
        for demo in demos:
            trajectory = NamedTrajectory(*group.refs)
            t = 0
            for ob in demo:
                if t == 0:
                    if init:
                        print("State dim: %d" % ob.task_low_dim_state.shape[0])
                        init = False
                    states.append(ob.task_low_dim_state)
                kwargs = {
                    "d%d" % i: ob.joint_positions[i]
                    for i in range(ob.joint_positions.shape[0])
                }
                kwargs["gripper"] = ob.gripper_open
                trajectory.notify(duration=1 / frequency, **kwargs)
                t += 1
            lengths.append(t / 200.)
            trajectories.append(trajectory)

        space = ClassicSpace(group,
                             n_features=n_features,
                             regularization=1E-15)
        z = np.linspace(-0.2, 1.2, 1000)
        Phi = space.get_phi(z)
        for i in range(n_features):
            plt.plot(z, Phi[:, i])
        plt.show()
        parameters = np.array([
            np.concatenate([
                s,
                np.array([l]),
                LearnTrajectory(space, traj).get_block_params()
            ]) for s, l, traj in zip(states, lengths, trajectories)
        ])
        np.save("parameters/%s_%d.npy" % (task.get_name(), n_features),
                parameters)
        env.shutdown()

Пример #23

Показать файл

Файл: rlbench_env.py Проект: corl20paper526/midlevel

class RLBenchEnv(gym.GoalEnv):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}
    def __init__(self, task_class, observation_mode='state', randomization_mode="none", 
        rand_config=None, img_size=256, special_start=[], fixed_grip=-1, force_randomly_place=False, force_change_position=False, sparse=False, not_special_p = 0, ground_p = 0, special_is_grip=False, altview=False, procedural_ind=0, procedural_mode='same', procedural_set = [], is_mesh_obs=False, blank=False, COLOR_TUPLE=[1,0,0]):
        # blank is 0s agent. 
        self.blank = blank
        #altview is whether to have second camera angle or not. True/False, "both" to concatentae the observations. 
        self.altview=altview
        self.img_size=img_size
        self.sparse = sparse
        self.task_class = task_class
        self._observation_mode = observation_mode
        self._randomization_mode = randomization_mode
        #special start is a list of actions to take at the beginning.
        self.special_start = special_start
        #fixed_grip temp hack for keeping the gripper a certain way. Change later. 0 for fixed closed, 0.1 for fixed open, -1 for not fixed
        self.fixed_grip = fixed_grip
        #to force the task to be randomly placed
        self.force_randomly_place = force_randomly_place
        #force the task to change position in addition to rotation
        self.force_change_position = force_change_position



        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision' or observation_mode=="visiondepth" or observation_mode=="visiondepthmask":
            # obs_config.set_all(True)
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        else:
            raise ValueError(
                'Unrecognised observation_mode: %s.' % observation_mode)

        action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_NOQ)
        print("using delta pose pan")

        if randomization_mode == "random":
            objs = ['target', 'boundary', 'Floor', 'Roof', 'Wall1', 'Wall2', 'Wall3', 'Wall4', 'diningTable_visible']
            if rand_config is None:
                assert False
            self.env = DomainRandomizationEnvironment(
                action_mode, obs_config=obs_config, headless=True,
                randomize_every=RandomizeEvery.EPISODE, frequency=1,
                visual_randomization_config=rand_config
            )
        else:
            self.env = Environment(
                action_mode, obs_config=obs_config, headless=True
            )
        self.env.launch()
        self.task = self.env.get_task(task_class)

        # Probability. Currently used for probability that pick and lift task will start off gripper at a certain location (should probs be called non_special p)
        self.task._task.not_special_p = not_special_p
        # Probability that ground goal.
        self.task._task.ground_p = ground_p 
        # For the "special" case, whether to grip the object or just hover above it. 
        self.task._task.special_is_grip = special_is_grip
        # for procedural env
        self.task._task.procedural_ind = procedural_ind
        # procedural mode: same, increase, or random. 
        self.task._task.procedural_mode = procedural_mode
        # ideally a list-like object, dictates the indices to sample from each episode. 
        self.task._task.procedural_set = procedural_set
        # if state obs is mesh obs
        self.task._task.is_mesh_obs = is_mesh_obs

        self.task._task.sparse = sparse
        self.task._task.COLOR_TUPLE = COLOR_TUPLE
        
        _, obs = self.task.reset()

        cam_placeholder = Dummy('cam_cinematic_placeholder')
        cam_pose = cam_placeholder.get_pose().copy()
        #custom pose
        cam_pose = [ 1.59999931,  0. ,         2.27999949 , 0.65328157, -0.65328145, -0.27059814, 0.27059814]
        cam_pose[0] = 1
        cam_pose[2] = 1.5
        self.frontcam = VisionSensor.create([img_size, img_size])
        self.frontcam.set_pose(cam_pose)

        self.frontcam.set_render_mode(RenderMode.OPENGL)
        self.frontcam.set_perspective_angle(60)
        self.frontcam.set_explicit_handling(1)

        self.frontcam_mask = VisionSensor.create([img_size, img_size])
        self.frontcam_mask.set_pose(cam_pose)
        self.frontcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED)
        self.frontcam_mask.set_perspective_angle(60)
        self.frontcam_mask.set_explicit_handling(1)

        if altview:
            alt_pose = [0.25    , -0.65    ,  1.5,   0, 0.93879825 ,0.34169483 , 0]
            self.altcam = VisionSensor.create([img_size, img_size])
            self.altcam.set_pose(alt_pose)
            self.altcam.set_render_mode(RenderMode.OPENGL)
            self.altcam.set_perspective_angle(60)
            self.altcam.set_explicit_handling(1)
            
            self.altcam_mask = VisionSensor.create([img_size, img_size])
            self.altcam_mask.set_pose(alt_pose)
            self.altcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED)
            self.altcam_mask.set_perspective_angle(60)
            self.altcam_mask.set_explicit_handling(1)


        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(action_mode.action_size,))

        if observation_mode == 'state':
            self.observation_space = spaces.Dict({
                "observation": spaces.Box(
                low=-np.inf, high=np.inf, shape=self.task._task.get_state_obs().shape),
                "achieved_goal": spaces.Box(
                low=-np.inf, high=np.inf, shape=self.task._task.get_achieved_goal().shape),
                'desired_goal': spaces.Box(
                low=-np.inf, high=np.inf, shape=self.task._task.get_desired_goal().shape)
            })
        # Use the frontvision cam
        elif observation_mode == 'vision':
            self.frontcam.handle_explicitly()
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "observation": spaces.Box(
                    low=0, high=1, shape=self.frontcam.capture_rgb().transpose(2,0,1).flatten().shape,
                    )
                })
            if altview == "both":
                example = self.frontcam.capture_rgb().transpose(2,0,1).flatten()
                self.observation_space = spaces.Dict({
                    "state": spaces.Box(
                        low=-np.inf, high=np.inf,
                        shape=obs.get_low_dim_data().shape),
                    "observation": spaces.Box(
                        low=0, high=1, shape=np.array([example,example]).shape,
                        )
                    })
        elif observation_mode == 'visiondepth':

            self.frontcam.handle_explicitly()
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "observation": spaces.Box(
                    #thinking about not flattening the shape, because primarily used for dataset and not for training
                    low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]]).shape,
                    )
                })
        elif observation_mode == 'visiondepthmask':

            self.frontcam.handle_explicitly()
            self.frontcam_mask.handle_explicitly()
            self.observation_space = spaces.Dict({
                "state": spaces.Box(
                    low=-np.inf, high=np.inf,
                    shape=obs.get_low_dim_data().shape),
                "observation": spaces.Box(
                    #thinking about not flattening the shape, because primarily used for dataset and not for training
                    low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape,
                    )
                })
            if altview == "both":
                self.observation_space = spaces.Dict({
                    "state": spaces.Box(
                        low=-np.inf, high=np.inf,
                        shape=obs.get_low_dim_data().shape),
                    "observation": spaces.Box(
                        #thinking about not flattening the shape, because primarily used for dataset and not for training
                        low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape,
                        )
                    })

        self._gym_cam = None


    # GoalEnv 
    def compute_reward(self, achieved_goal, desired_goal, info):
        reward = self.task._task.compute_reward(achieved_goal, desired_goal, info)
        return reward

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return {
                'observation': self.task._task.get_state_obs(),
                'achieved_goal': self.task._task.get_achieved_goal(),
                'desired_goal':self.task._task.get_desired_goal(),
            }
        elif self._observation_mode == 'vision':
            self.frontcam.handle_explicitly()
            if self.altview:
                self.altcam.handle_explicitly()
                second_view = self.altcam.capture_rgb().transpose(2,0,1).flatten()
                if self.altview == "both":
                    return {
                        'achieved_goal': self.task._task.get_achieved_goal(),
                        'desired_goal':self.task._task.get_desired_goal(),
                        'save_state': self.task._task.get_save_state(),
                        "observation": np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), second_view])
                    } 
                return {
                    'achieved_goal': self.task._task.get_achieved_goal(),
                    'desired_goal':self.task._task.get_desired_goal(),
                    'save_state': self.task._task.get_save_state(),
                    "observation": self.altcam.capture_rgb().transpose(2,0,1).flatten()
                }
            if self.blank:
                return {
                    'achieved_goal': self.task._task.get_achieved_goal(),
                    'desired_goal':self.task._task.get_desired_goal(),
                    'save_state': self.task._task.get_save_state(),
                    "observation": np.zeros(self.observation_space['observation'].shape)
                }                
            return {
                'achieved_goal': self.task._task.get_achieved_goal(),
                'desired_goal':self.task._task.get_desired_goal(),
                'save_state': self.task._task.get_save_state(),
                "observation": self.frontcam.capture_rgb().transpose(2,0,1).flatten(),
            }
        elif self._observation_mode == 'visiondepth':
            self.frontcam.handle_explicitly()
            return {
                "state": obs.get_low_dim_data(),
                "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]]
            }
        elif self._observation_mode == 'visiondepthmask':
            self.frontcam.handle_explicitly()
            self.frontcam_mask.handle_explicitly()

            mask = np.array(self.frontcam_mask.capture_rgb())

            if self.altview:
                self.altcam.handle_explicitly()
                self.altcam_mask.handle_explicitly()
                altmask = np.array(self.altcam_mask.capture_rgb())
                second_view = self.altcam.capture_rgb().transpose(2,0,1)

                if self.altview == "both":
                    assert False
                    # no use case for this
                    return {
                        "state": obs.get_low_dim_data(),
                        "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.altcam.capture_rgb().transpose(2,0,1).flatten()]
                    } 
                return {
                    "state": obs.get_low_dim_data(),
                    "observation": [self.altcam.capture_rgb().transpose(2,0,1), self.altcam.capture_depth()[None,...], altmask]
                }


            return {
                "state": obs.get_low_dim_data(),
                "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], mask]
            }

    def render(self, mode='human'):
        self.frontcam.handle_explicitly()
        return self.frontcam.capture_rgb()

    def reset(self):
        _, obs = self.task.reset(force_randomly_place=self.force_randomly_place, force_change_position=self.force_change_position)
        import time
        if len(self.special_start) != 0:
            for a in self.special_start:
                self.step(a)
        # Doesn't actually use obs. 
        return self._extract_obs(obs)

    def step(self, action):
        if self.fixed_grip >= 0:
            action[3] = self.fixed_grip
        obs, reward, terminate = self.task.step(action)
        reward, done = self.task._task.get_reward_and_done(sparse=self.sparse)
        return self._extract_obs(obs), reward, done, {"is_success": int(done)}

    def close(self):
        self.env.shutdown()

    def rerender(self, state):
        self.task._task.restore_full_state(state)
        return self.render().transpose(2,0,1).flatten()


    def resample_step(self, state, action, sampled_goal):
        self.task._task.restore_save_state(state)
        Shape('target').set_position(sampled_goal)

        if self.altview == "both":
            self.frontcam.handle_explicitly()
            self.altcam.handle_explicitly()
            o_before = np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), self.altcam.capture_rgb().transpose(2,0,1).flatten()])
        elif self.altview:
            self.altcam.handle_explicitly()
            o_before =  self.altcam.capture_rgb().transpose(2,0,1).flatten()
        else:
            if self.blank:
                o_before = np.zeros(self.observation_space['observation'].shape)
            else:
                o_before = self.render().transpose(2,0,1).flatten()
        o, r, d, _ = self.step(action)
        o_after = o['observation']
        return o_before, r, d, o_after

Пример #24

Показать файл

class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(
        self,
        task_name: str,
        state_type: list = 'state',
    ):
        # render_mode=None):
        """
        create RL Bench environment
        :param task_name: task names can be found in rlbench.tasks
        :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb']
        """
        if state_type == 'state' or state_type == 'vision' or isinstance(
                state_type, list):
            self._state_type = state_type
        else:
            raise ValueError(
                'State type value error, your value is {}'.format(state_type))
        # self._render_mode = render_mode
        self._render_mode = None
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        try:
            self.task = self.env.get_task(
                getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        self.spec = Spec(task_name)

        if self._state_type == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif self._state_type == 'vision':
            space_dict = OrderedDict()
            space_dict["state"] = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
            for i in [
                    "left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb",
                    "front_rgb"
            ]:
                space_dict[i] = spaces.Box(low=0,
                                           high=1,
                                           shape=getattr(obs, i).shape)
            self.observation_space = spaces.Dict(space_dict)
        else:
            space_dict = OrderedDict()
            for name in self._state_type:
                if name.split('_')[-1] in ('rgb'):
                    space_dict[name] = spaces.Box(low=0,
                                                  high=1,
                                                  shape=getattr(obs,
                                                                name).shape)
                elif name.split('_')[-1] in ('depth', 'mask'):
                    space_dict[name] = spaces.Box(low=0,
                                                  high=1,
                                                  shape=(128, 128, 3))
                    print(space_dict[name].shape)
                else:
                    space_dict[name] = spaces.Box(low=-np.inf,
                                                  high=np.inf,
                                                  shape=getattr(obs,
                                                                name).shape)
                self.observation_space = spaces.Dict(space_dict)
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(self.env.action_size, ),
                                       dtype=np.float32)

        # if render_mode is not None:
        #     # Add the camera to the scene
        #     cam_placeholder = Dummy('cam_cinematic_placeholder')
        #     self._gym_cam = VisionSensor.create([640, 360])
        #     self._gym_cam.set_pose(cam_placeholder.get_pose())
        #     if render_mode == 'human':
        #         self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
        #     else:
        #         self._gym_cam.set_render_mode(RenderMode.OPENGL3)

    def _extract_obs(self, obs):
        if self._state_type == 'state':
            return np.array(obs.get_low_dim_data(), np.float32)
        elif self._state_type == 'vision':
            return np.array([
                np.array(obs.get_low_dim_data(), np.float32),
                np.array(obs.left_shoulder_rgb, np.float32),
                np.array(obs.right_shoulder_rgb, np.float32),
                np.array(obs.wrist_rgb, np.float32),
                np.array(obs.front_rgb, np.float32),
            ])
        else:
            result = []
            for name in self._state_type:
                if name.split('_')[-1] in ('mask', 'depth'):
                    a = np.expand_dims(np.array(getattr(obs, name),
                                                np.float32),
                                       axis=-1)
                    a = np.concatenate(
                        (a, np.zeros((128, 128, 2), dtype=a.dtype)), axis=2)
                    result.append(a)
                else:
                    result.append(np.array(getattr(obs, name), np.float32))
            return np.array(result)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self, mode='human'):
        # todo render available at any time
        if self._render_mode is None:
            self._render_mode = mode
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            if mode == 'human':
                self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
            else:
                self._gym_cam.set_render_mode(RenderMode.OPENGL3)

        if mode != self._render_mode:
            raise ValueError(
                'The render mode must match the render mode selected in the '
                'constructor. \nI.e. if you want "human" render mode, then '
                'create the env by calling: '
                'gym.make("reach_target-state-v0", render_mode="human").\n'
                'You passed in mode %s, but expected %s.' %
                (mode, self._render_mode))
        if mode == 'rgb_array':
            return self._gym_cam.capture_rgb()

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._extract_obs(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(action)
        """
        img = Image.fromarray(np.uint8(np.array(obs.left_shoulder_rgb, np.float32)*255), 'RGB')
        img.show()
        a = np.array(obs.left_shoulder_mask, np.float32)
        img = Image.fromarray(np.uint8(a*255), 'L')
        img.show()
        a = np.array(obs.left_shoulder_depth, np.float32)
        img = Image.fromarray(np.uint8(a*255), 'L')
        img.show()
        input('> ')
        """
        return self._extract_obs(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()

Пример #25

Показать файл

class RLBenchEnv(object):
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=None, headless=False):
        if state_type_list is None:
            state_type_list = ['left_shoulder_rgb']
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=headless)
        self.env.launch()
        try:
            self.task = self.env.get_task(ReachTarget)
        except:
            raise NotImplementedError

        _, obs = self.task.reset()

        if len(state_type_list) > 0:
            self.observation_space = []
            for state_type in state_type_list:
                state = getattr(obs, state_type)
                if isinstance(state, (int, float)):
                    shape = (1, )
                else:
                    shape = state.shape
                self.observation_space.append(
                    spaces.Box(low=-np.inf, high=np.inf, shape=shape))
        else:
            raise ValueError('No State Type!')
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)

        self.state_type_list = state_type_list

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        if len(self.state_type_list) > 0:
            state = []
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image = np.moveaxis(
                        image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                elif state_type in primitive_types:
                    state.append(np.array([getattr(obs, state_type)]))
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(
            action)  # reward in original rlbench is binary for success or not

        # distance between current pos (tip) and target pos
        # x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose()
        x, y, z, qx, qy, qz, qw = self.task._robot.gripper.get_pose()
        tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose()
        distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z))
        reward -= 1. * distance

        # speed regulation
        joint_velocities = self.task._robot.arm.get_joint_velocities()
        velocities = np.inner(joint_velocities, joint_velocities)
        reward -= 0.05 / len(joint_velocities) * velocities

        # orientation
        gripper_x, gripper_y, gripper_z, gripper_qx, gripper_qy, gripper_qz, gripper_qw = self.task._robot.gripper.get_pose(
        )
        v1 = np.array(
            (tar_x - gripper_x, tar_y - gripper_y, tar_z - gripper_z))
        temp = np.sin(np.arccos(gripper_qw))
        v2 = np.array(
            (gripper_qx / temp, gripper_qy / temp, gripper_qz / temp))
        orientation_angle = np.abs(
            np.arccos(v1.dot(v2) / np.sqrt(v1.dot(v1)) /
                      np.sqrt(v2.dot(v2))))  # [0, pi]
        orientation_angle = orientation_angle if orientation_angle < np.pi / 2. else (
            np.pi - orientation_angle)
        reward -= 0.5 / np.pi * orientation_angle  # if orientation_angle > np.pi / 8. else 0.
        return self._get_state(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()

Пример #26

Показать файл

Файл: rlbench_env.py Проект: EdenGabriel/coppeliasim_deeprl

class RLBenchEnv(object):
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=None, headless=False):
        if state_type_list is None:
            state_type_list = ['left_shoulder_rgb']
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=headless)
        self.env.launch()
        try:
            self.task = self.env.get_task(ReachTarget)
        except:
            raise NotImplementedError

        _, obs = self.task.reset()

        if len(state_type_list) > 0:
            self.observation_space = []
            for state_type in state_type_list:
                state = getattr(obs, state_type)
                self.observation_space.append(
                    spaces.Box(low=-np.inf, high=np.inf, shape=state.shape))
        else:
            raise ValueError('No State Type!')
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)

        self.state_type_list = state_type_list

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        # state = np.hstack((obs.joint_positions, obs.joint_velocities, obs.joint_forces))
        if len(self.state_type_list) > 0:
            state = []
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image = np.moveaxis(
                        image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        # self.task._task.target.set_pose(np.array([0.354685515165329, -0.025212552398443222, 1.0284103155136108, 0.0, 0.0, 0.0, 1.0]))
        return self._get_state(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(
            action)  # reward in original rlbench is binary for success or not

        # distance between current pos (tip) and target pos
        x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose()
        tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose()
        distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z))
        reward -= 0.5 * distance

        # speed regulation
        joint_velocities = self.task._robot.arm.get_joint_velocities()
        velocities = np.inner(joint_velocities, joint_velocities)
        reward -= 0.1 / len(joint_velocities) * velocities
        return self._get_state(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()