Python Environment примеры, rlbench.environment.Environment Python примеры использования

Пример #1

0

Показать файл

    def __init__(self,
                 task_name,
                 state_type_list=['left_shoulder_rgb'],
                 dataset_root='',
                 observation_mode='state',
                 headless=True):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self._observation_mode = observation_mode
        self.env = Environment(
            action_mode, dataset_root, obs_config=obs_config, headless=headless)
        # Dont need to call launch as task.get_task can launch env.
        self.env.launch()
        self.task = self.env.get_task(task_name)
        _, obs = self.task.reset()
        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
#         self.logger = logger.create_logger(__class__.__name__)
#         self.logger.propagate = 0
        if len(state_type_list) > 0:
            self.observation_space = []
            # for state_type in state_type_list:
            #     state = getattr(obs, state_type)
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)      
        else:
            raise ValueError('No State Type!')

        self.state_type_list = state_type_list

Пример #2

0

Показать файл

Файл: build_rlbench_env.py Проект: brightzi/RL_RLBench

class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=['left_shoulder_rgb']):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(
            action_mode, obs_config=obs_config, headless=False)
        self.env.launch()
        try: 
            self.task = self.env.get_task(getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        if len(state_type_list)>0:
            self.observation_space=[]
            for state_type in state_type_list:
                state=getattr(obs, state_type)
                self.observation_space.append(spaces.Box(low=-np.inf, high=np.inf, shape=state.shape))
        else:
            raise ValueError('No State Type!')
        self.action_space =  spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size,), dtype=np.float32)
        
        self.state_type_list = state_type_list
        self.spec = Spec(task_name)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass 

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        if len(self.state_type_list)>0:
            state=[]
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image=np.moveaxis(image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        obs_, reward, terminate = self.task.step(action)  # reward in original rlbench is binary for success or not
        return self._get_state(obs_), reward, terminate, None


    def close(self):
        self.env.shutdown()

Пример #3

0

Показать файл

    def initialize(self, headless=False):
        DATASET = ''
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        obs_config.left_shoulder_camera.rgb = True
        obs_config.right_shoulder_camera.rgb = True
        action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN)
        self.env = Environment(action_mode,
                               DATASET,
                               obs_config,
                               headless=headless)
        self.sensor = NoisyObjectPoseSensor(self.env)
        self.env.launch()
        self.task = self.env.get_task(EmptyContainer)
        self.task.reset()
        self.home = self.get_objects(False)['large_container'].get_pose()
        self.home[2] += 0.3
        # demos = task.get_demos(3, live_demos=live_demos)

        self.objs = self.get_objects()
        self.movable_objects = ['Shape', 'Shape1', 'Shape3']
        self.target_bins = ['small_container0']
        self.start_bins = ['large_container']
        self.max_retry = 5
        self.env._robot.gripper.set_joint_forces([50, 50])

Пример #4

0

Показать файл

    def __init__(self, task_name, state_type='left_shoulder_rgb'):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=False)
        self.env.launch()
        try:
            self.task = self.env.get_task(
                getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        state = getattr(obs, state_type)
        self.state_type = state_type
        self.spec = Spec(task_name)

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=state.shape)

Пример #5

0

Показать файл

Файл: rlbench_data_collection.py Проект: telejesus2/tecnets-pytorch

def collect_demos(env: Environment,
                  task: Task,
                  taskname,
                  n_iter_per_var=50,
                  n_demos_per_iter=1000):
    env.launch()
    task_env = env.get_task(task)

    for variation_index in range(1):  #task_env.variation_count()):
        # set variation
        task_env.set_variation(variation_index)
        description, _ = task_env.reset()

        # collect and save demos
        start = time.time()
        for i in range(n_iter_per_var):
            np.random.seed(6)
            demos = task_env.get_demos(n_demos_per_iter,
                                       live_demos=True,
                                       max_attempts=1)
            demos = np.array(demos).flatten()
            print(demos.shape)
            save_to_hdf5(demos, taskname, description, variation_index)
        print("TIME: ", time.time() - start)

    # env.shutdown()
    return demos

Пример #6

0

Показать файл

    def __init__(self, task_name, state_type_list=None, headless=False):
        if state_type_list is None:
            state_type_list = ['left_shoulder_rgb']
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=headless)
        self.env.launch()
        try:
            self.task = self.env.get_task(ReachTarget)
        except:
            raise NotImplementedError

        _, obs = self.task.reset()

        if len(state_type_list) > 0:
            self.observation_space = []
            for state_type in state_type_list:
                state = getattr(obs, state_type)
                if isinstance(state, (int, float)):
                    shape = (1, )
                else:
                    shape = state.shape
                self.observation_space.append(
                    spaces.Box(low=-np.inf, high=np.inf, shape=shape))
        else:
            raise ValueError('No State Type!')
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)

        self.state_type_list = state_type_list

Пример #7

0

Показать файл

Файл: main.py Проект: brightzi/Autonomous-Bin-Picking

 def __init__(self, action_mode, static_positions=True):
     # Initialize environment with Action mode and observations
     # Resize the write camera to fit the GQCNN
     wrist_camera = CameraConfig(image_size=(1032, 772))
     self.env = Environment(action_mode, '', ObservationConfig(wrist_camera=wrist_camera), False, static_positions=static_positions)
     self.env.launch()
     # Load specified task into the environment
     self.task = self.env.get_task(EmptyContainer)

Пример #8

0

Показать файл

Файл: imitation_learning.py Проект: SamuelePolimi/LAMPO

    def run_episode(self):
        self.env = Environment(self._action_mode,
                               "",
                               self._obs_config,
                               headless=self._headless)
        self.env.launch()

        self.task = self.env.get_task(self._task_class)

Пример #9

0

Показать файл

class SimulationEnvionment():
    """
    This can be a parent class from which we can have multiple child classes that 
    can diversify for different tasks and deeper functions within the tasks.
    """
    def __init__(self,\
                action_mode=DEFAULT_ACTION_MODE,\
                task=DEFAULT_TASK,\
                dataset_root='',\
                headless=True):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = action_mode
        self.env = Environment(action_mode,
                               dataset_root,
                               obs_config=obs_config,
                               headless=headless)
        # Dont need to call launch as task.get_task can launch env.
        self.task = self.env.get_task(task)
        _, obs = self.task.reset()
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(self.env.action_size, ),
                                       dtype=np.float32)
        self.logger = logger.create_logger(__class__.__name__)
        self.logger.propagate = 0

    def _get_state(self, obs: Observation):
        """
        This will be a hook over the environment to get the state. 
        This will ensure that if changes need to be made to attributes of `Observation` then they can be made
        in a modular way. 
        """
        raise NotImplementedError()

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        obs_, reward, terminate = self.task.step(
            action)  # reward in original rlbench is binary for success or not
        return self._get_state(obs_), reward, terminate

    def shutdown(self):
        self.logger.info(
            "Environment Shutdown! Create New Instance If u want to start again"
        )
        self.logger.handlers.pop()
        self.env.shutdown()

    def get_demos(self, num_demos):
        """
       Should be implemented by child classes because other extra properties needed
       before training can be accessed via that. 
       """
        raise NotImplementedError()

Пример #10

0

Показать файл

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 render_mode: Union[None, str] = None):
        self._observation_mode = observation_mode
        self._render_mode = render_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(self.env.action_size, ))

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb":
                spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape),
                "front_rgb":
                spaces.Box(low=0, high=1, shape=obs.front_rgb.shape),
            })

        if render_mode is not None:
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            if render_mode == 'human':
                self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
            else:
                self._gym_cam.set_render_mode(RenderMode.OPENGL3)

Пример #11

0

Показать файл

def sub_proc(child):
    action_mode = ActionMode()
    env = Environment(action_mode, headless=child)
    env.launch()
    task = env.get_task(CloseMicrowave)

    descriptions, obs = task.reset()

    for i in range(1000):
        obs, reward, terminate = task.step(np.random.normal(np.zeros(action_mode.action_size)))

Пример #12

0

Показать файл

Файл: test_environment.py Проект: yanweifu/RLBench

 def get_task(self, task_class, arm_action_mode):
     obs_config = ObservationConfig()
     obs_config.set_all(False)
     obs_config.set_all_low_dim(True)
     obs_config.right_shoulder_camera.rgb = True
     action_mode = ActionMode(arm_action_mode)
     self.env = Environment(
         action_mode, ASSET_DIR, obs_config, headless=True)
     self.env.launch()
     return self.env.get_task(task_class)

Пример #13

0

Показать файл

Файл: imitation_learning.py Проект: SamuelePolimi/LAMPO

    def __init__(self,
                 task_class,
                 state_dim,
                 n_features,
                 n_cluster,
                 n_latent,
                 parameters=None,
                 headless=False,
                 cov_reg=1E-8,
                 n_samples=50,
                 dense_reward=False,
                 imitation_noise=0.03):

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._obs_config = obs_config

        self._state_dim = state_dim
        self._headless = headless

        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)
        self._task_class = task_class
        self._action_mode = action_mode
        self.env = Environment(action_mode, "", obs_config, headless=headless)
        self.env.launch()

        self.task = self.env.get_task(task_class)
        if parameters is None:
            self.parameters = np.load(
                "parameters/%s_%d.npy" %
                (self.task.get_name(), n_features))[:n_samples]
        # parameters = np.concatenate([parameters for _ in range(20)])
        self.imitation_noise = imitation_noise
        self.parameters[:, :3] += imitation_noise * np.random.normal(
            size=self.parameters[:, :3].shape)
        self.mppca = MPPCA(n_cluster,
                           n_latent,
                           n_iterations=30,
                           cov_reg=cov_reg,
                           n_init=500)
        self.mppca.fit(self.parameters)

        self.rlmppca = None
        self.dense_reward = dense_reward

        print(np.exp(self.mppca.log_pi))

        group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])
        self.space = ClassicSpace(group, n_features=n_features)

        print("mpcca learned")

Пример #14

0

Показать файл

Файл: build_rlbench_env.py Проект: zivzone/RLzoo

    def __init__(self, task_name: str, state_type: list = 'state', ):
        # render_mode=None):
        """
        create RL Bench environment
        :param task_name: task names can be found in rlbench.tasks
        :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb']
        """
        if state_type == 'state' or state_type == 'vision' or isinstance(state_type, list):
            self._state_type = state_type
        else:
            raise ValueError('State type value error, your value is {}'.format(state_type))
        # self._render_mode = render_mode
        self._render_mode = None
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(
            action_mode, obs_config=obs_config, headless=True)
        self.env.launch()
        try:
            self.task = self.env.get_task(getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        self.spec = Spec(task_name)

        if self._state_type == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif self._state_type == 'vision':
            space_dict = OrderedDict()
            space_dict["state"] = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
            for i in ["left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb"]:
                space_dict[i] = spaces.Box(
                    low=0, high=1, shape=getattr(obs, i).shape)
            self.observation_space = spaces.Dict(space_dict)
        else:
            space_dict = OrderedDict()
            for name in self._state_type:
                if name.split('_')[-1] in ('rgb', 'depth', 'mask'):
                    space_dict[name] = spaces.Box(
                        low=0, high=1, shape=getattr(obs, name).shape)
                else:
                    space_dict[name] = spaces.Box(
                        low=-np.inf, high=np.inf,
                        shape=getattr(obs, name).shape)
                self.observation_space = spaces.Dict(space_dict)
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)

Пример #15

0

Показать файл

    def get_env(self, task=None):
        task = task if task else self.config.env_type
        if self.use_gym:
            assert type(
                task
            ) == str  # NOTE : When using gym, the task has to be represented as a sting.
            assert self.observation_mode in ['vision', 'state']

            env = gym.make(
                task,
                observation_mode=self.config.env_args['observation_mode'],
                render_mode=self.config.env_args['render_mode'])
            self.env_obj = env
        else:
            obs_config = ObservationConfig()
            if self.observation_mode == 'vision':
                obs_config.set_all(True)
            elif self.observation_mode == 'state':
                obs_config.set_all_high_dim(False)
                obs_config.set_all_low_dim(True)
            else:
                obs_config_dict = {
                    self.left_obv_key: obs_config.left_shoulder_camera,
                    self.right_obv_key: obs_config.right_shoulder_camera,
                    self.wrist_obv_key: obs_config.wrist_camera,
                    self.front_obv_key: obs_config.front_camera
                }

                assert self.observation_mode in obs_config_dict.keys()

                obs_config.set_all_high_dim(False)
                obs_config_dict[self.observation_mode].set_all(True)
                obs_config.set_all_low_dim(True)

            # TODO : Write code to change it from env_args
            action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
            self.env_obj = Environment(action_mode,
                                       obs_config=obs_config,
                                       headless=True)

            task = task if task else ReachTarget
            if type(task) == str:
                task = task.split('-')[0]
                task = self.env_obj._string_to_task(task)

            self.env_obj.launch()
            env = self.env_obj.get_task(
                task)  # NOTE : `env` refered as `task` in RLBench docs.
        return env

Пример #16

0

Показать файл

Файл: Environment.py Проект: valayDave/rlbench-documentation

 def __init__(self,\
             action_mode=DEFAULT_ACTION_MODE,\
             task=DEFAULT_TASK,\
             dataset_root='',\
             headless=True):
     obs_config = ObservationConfig()
     obs_config.set_all(True)
     action_mode = action_mode
     self.env = Environment(
         action_mode,dataset_root,obs_config=obs_config, headless=headless)
     # Dont need to call launch as task.get_task can launch env. 
     self.task = self.env.get_task(task)
     _, obs = self.task.reset()
     self.action_space =  spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
     self.logger = logger.create_logger(__class__.__name__)
     self.logger.propagate = 0

Пример #17

0

Показать файл

class RLBenchEnv():
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type='left_shoulder_rgb'):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=False)
        self.env.launch()
        try:
            self.task = self.env.get_task(
                getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        state = getattr(obs, state_type)
        self.state_type = state_type
        self.spec = Spec(task_name)

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=state.shape)

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def reset(self):
        descriptions, obs = self.task.reset()
        return getattr(obs, self.state_type)

    def step(self, action):
        obs_, reward, terminate = self.task.step(action)
        return getattr(obs_, self.state_type), reward, terminate, None

    def close(self):
        self.env.shutdown()

Пример #18

0

Показать файл

    def __init__(self, h):

        #64x64 camera outputs
        cam = CameraConfig(image_size=(64, 64))
        obs_config = ObservationConfig(left_shoulder_camera=cam,
                                       right_shoulder_camera=cam,
                                       wrist_camera=cam,
                                       front_camera=cam)
        obs_config.set_all(True)

        # delta EE control with motion planning
        action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME)

        #Inits
        self.env = Environment(action_mode, obs_config=obs_config, headless=h)
        self.env.launch()
        self.task = self.env.get_task(ReachTarget)

Пример #19

0

Показать файл

Файл: fake_rlbench.py Проект: id9502/RLFrame

 def init(self, display=False):
     with suppress_stdout():
         self.env = RLEnvironment(action_mode=self._action_config,
                                  obs_config=self._observation_config,
                                  headless=not display,
                                  robot_configuration=self._robot_name)
         self.env.launch()
         self.task = self.env.get_task(
             get_named_class(self._task_name, tasks))

Пример #20

0

Показать файл

    def __init__(self, task_class, observation_mode='state'):
        self._observation_mode = observation_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ))

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb":
                spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape),
            })

        self._gym_cam = None

Пример #21

0

Показать файл

Файл: rlbench_test.py Проект: superggb/ggb_control

def act(graph,model,sess):
    obs_config = ObservationConfig()
    # obs_config.set_all(True)

    # 这几个参数用来关闭摄像头
    obs_config.right_shoulder_camera.set_all(False)
    obs_config.wrist_camera.set_all(False)
    obs_config.left_shoulder_camera.set_all(False)

    action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
    env = Environment(
        action_mode, obs_config=obs_config, headless=False)
    env.launch()

    task = env.get_task(ReachTarget)

    training_steps = 150
    episode_length = 60
    obs = None
    gripper = [1.0] # 爪子保持打开

    for i in range(training_steps):
        # 初始化一次任务，包括初始Obs数据
        if i % episode_length == 0:
            print('Reset Episode')
            descriptions, obs = task.reset()
            print(descriptions)
        
        # 网络的输入
        vision = obs.front_rgb
        vision = vision[np.newaxis,:]
        state = obs.joint_positions
        state = state[np.newaxis,:]
        action = None
        feed_dict = {
            model.vision:vision,
            model.state:state,
            model.action:action
        }
        run_ops = [model.predict_out]

        with graph.as_default():
            res = sess.run(run_ops,feed_dict=feed_dict)

        # print(res[0][0].shape)
        # print(gripper.shape)
        # input('check shape')
        action = np.concatenate([res[0][0],gripper],axis=-1)
        obs, reward, terminate = task.step(action)
        if reward == 1:
            print("successful try!")

    print('Done')
    env.shutdown()

Пример #22

0

Показать файл

    def __init__(self, task_class, state_dim, n_features, headless=True):

        super().__init__(n_features)
        self._group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"])
        self._space = ClassicSpace(self._group, n_features)

        obs_config = ObservationConfig()
        obs_config.set_all_low_dim(True)
        obs_config.set_all_high_dim(False)
        self._obs_config = obs_config

        self._state_dim = state_dim
        self._headless = headless

        action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION)
        self._task_class = task_class
        self._action_mode = action_mode
        self.env = Environment(
            action_mode, "", obs_config, headless=headless)
        self.env.launch()

        self.task = self.env.get_task(task_class)
        self._obs = None

Пример #23

0

Показать файл

Файл: reinforcement_grasp.py Проект: atharvsathe/robotGrasping

def simulation(q):
    # See rlbench/action_modes.py for other action modes
    withLearning = False
    action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN)
    action_mode_v = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
    if withLearning:
        env = Environment(action_mode_v, '', ObservationConfig(), False)
    else:
        env = Environment(action_mode, '', ObservationConfig(), False)
    task = env.get_task(EmptyContainer)
    obj_pose_sensor = NoisyObjectPoseSensor(env)
    descriptions, obs = task.reset()
    allPoses = obj_pose_sensor.get_poses()
    agent = RandomAgent(allPoses["large_container"],
                        allPoses["small_container0"], env)
    print(descriptions)
    obj_poses = obj_pose_sensor.get_poses()
    training_steps = 10000
    episode_length = 100
    i = 0
    rewardFunc = Reward(env, ['Shape0', 'Shape2', 'Shape4'],
                        "small_container0")
    while i < training_steps:
        # Getting noisy object poses
        obj_poses = obj_pose_sensor.get_poses()
        mask = env._scene._cam_wrist_mask.capture_rgb()
        data = (obs.wrist_depth, obs.wrist_rgb, mask)
        if withLearning:
            if i > 0 and i % episode_length == 0:
                print(i, 'Reset Episode')
                agent.updateV()
                i += 1
                continue
            oldState, action = agent.act(obs)
            obs, _, terminate = task.step(action)
            reward = rewardFunc.evaluate(obs)
            print(reward)
            nextState = np.zeros(8)
            nextState[:7] = obs.joint_positions
            nextState[7] = obs.gripper_open > 0.5
            agent.updateQ(oldState, action, nextState, reward)
            i += 1
        else:
            q.put(data)
            nextState = agent.resetAct(obs, obj_poses)
            obs, reward, terminate = task.step(nextState)
        # if terminate:
        #     break
    env.shutdown()

Пример #24

0

Показать файл

Файл: fake_rlbench.py Проект: id9502/RLFrame

 def _update_info_dict(self):
     # update info dict
     self._info["action mode"] = self._action_mode
     self._info["observation mode"] = self._observation_mode
     # TODO: action dim should related to robot, not action mode, here we fixed it temporally
     self._info["action dim"] = (self._action_config.action_size, )
     self._info["action low"] = -np.ones(self._info["action dim"],
                                         dtype=np.float32)
     self._info["action high"] = np.ones(self._info["action dim"],
                                         dtype=np.float32)
     if self._observation_mode == "state" or self._observation_mode == "all":
         # TODO: observation should be determined without init the entire environment
         with suppress_stdout():
             env = RLEnvironment(action_mode=self._action_config,
                                 obs_config=self._observation_config,
                                 headless=True,
                                 robot_configuration=self._robot_name)
             env.launch()
             task = env.get_task(get_named_class(self._task_name, tasks))
             _, obs = task.reset()
             env.shutdown()
             del task
             del env
         self._info["time step"] = _DT
         self._info["state dim"] = tuple(obs.get_low_dim_data().shape)
         self._info["state low"] = np.ones(self._info["state dim"],
                                           dtype=np.float32) * -np.inf
         self._info["state high"] = np.ones(self._info["state dim"],
                                            dtype=np.float32) * np.inf
     if self._observation_mode == "vision" or self._observation_mode == "all":
         self._info["left shoulder rgb dim"] = tuple(
             self._observation_config.left_shoulder_camera.image_size) + (
                 3, )
         self._info["left shoulder rgb low"] = np.zeros(
             self._info["left shoulder rgb dim"], dtype=np.float32)
         self._info["left shoulder rgb high"] = np.ones(
             self._info["left shoulder rgb dim"], dtype=np.float32)
         self._info["right shoulder rgb  dim"] = tuple(
             self._observation_config.right_shoulder_camera.image_size) + (
                 3, )
         self._info["right shoulder rgb  low"] = np.zeros(
             self._info["right shoulder rgb  dim"], dtype=np.float32)
         self._info["right shoulder rgb  high"] = np.ones(
             self._info["right shoulder rgb  dim"], dtype=np.float32)
         self._info["wrist rgb dim"] = tuple(
             self._observation_config.wrist_camera.image_size) + (3, )
         self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"],
                                                dtype=np.float32)
         self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"],
                                                dtype=np.float32)
     self._info["reward low"] = -np.inf
     self._info["reward high"] = np.inf

Пример #25

0

Показать файл

Файл: base_rl.py Проект: robfiras/PandaAgents_RLBench

def job_worker_validation(worker_id, action_mode, obs_config, task_class,
                          command_q: mp.Queue, result_q: mp.Queue, obs_scaling,
                          redundancy_resolution_setup, seed):

    np.random.seed(worker_id + seed)
    # setup the environment
    env = Environment(action_mode=action_mode,
                      obs_config=obs_config,
                      headless=True)
    env.launch()
    task = env.get_task(task_class)
    task.reset()
    while True:
        command = command_q.get()
        command_type = command[0]
        command_args = command[1]
        if command_type == "reset":
            task.set_variation(task.sample_variation())
            descriptions, observation = task.reset()
            if obs_scaling is not None:
                observation = observation.get_low_dim_data() / obs_scaling
            else:
                observation = observation.get_low_dim_data()
            result_q.put((descriptions, observation))
        elif command_type == "step":
            actions = command_args[0]
            # check if we need to resolve redundancy
            if redundancy_resolution_setup["use_redundancy_resolution"]:
                actions[0:7], L = task.resolve_redundancy_joint_velocities(
                    actions=actions[0:7], setup=redundancy_resolution_setup)
            else:
                # here we only care about the loss (for logging)
                _, L = task.resolve_redundancy_joint_velocities(
                    actions=actions[0:7], setup=redundancy_resolution_setup)

            next_observation, reward, done = task.step(actions)
            if obs_scaling is not None:
                next_observation = next_observation.get_low_dim_data(
                ) / obs_scaling
            else:
                next_observation = next_observation.get_low_dim_data()
            result_q.put((next_observation, reward, done, L))
        elif command_type == "kill":
            print("Killing worker %d" % worker_id)
            env.shutdown()
            break

Пример #26

0

Показать файл

class RLBenchEnv(object):
    """ make RLBench env to have same interfaces as openai.gym """
    def __init__(self, task_name, state_type_list=None, headless=False):
        if state_type_list is None:
            state_type_list = ['left_shoulder_rgb']
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=headless)
        self.env.launch()
        try:
            self.task = self.env.get_task(ReachTarget)
        except:
            raise NotImplementedError

        _, obs = self.task.reset()

        if len(state_type_list) > 0:
            self.observation_space = []
            for state_type in state_type_list:
                state = getattr(obs, state_type)
                if isinstance(state, (int, float)):
                    shape = (1, )
                else:
                    shape = state.shape
                self.observation_space.append(
                    spaces.Box(low=-np.inf, high=np.inf, shape=shape))
        else:
            raise ValueError('No State Type!')
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)

        self.state_type_list = state_type_list

    def seed(self, seed_value):
        # set seed as in openai.gym env
        pass

    def render(self):
        # render the scene
        pass

    def _get_state(self, obs):
        if len(self.state_type_list) > 0:
            state = []
            for state_type in self.state_type_list:
                if state_type in image_types:
                    image = getattr(obs, state_type)
                    image = np.moveaxis(
                        image, 2, 0)  # change (H, W, C) to (C, H, W) for torch
                    state.append(image)
                elif state_type in primitive_types:
                    state.append(np.array([getattr(obs, state_type)]))
                else:
                    state.append(getattr(obs, state_type))
        else:
            raise ValueError('State Type Not Exists!')
        return state

    def reset(self):
        descriptions, obs = self.task.reset()
        return self._get_state(obs)

    def step(self, action):
        obs, reward, terminate = self.task.step(
            action)  # reward in original rlbench is binary for success or not

        # distance between current pos (tip) and target pos
        # x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose()
        x, y, z, qx, qy, qz, qw = self.task._robot.gripper.get_pose()
        tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose()
        distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z))
        reward -= 1. * distance

        # speed regulation
        joint_velocities = self.task._robot.arm.get_joint_velocities()
        velocities = np.inner(joint_velocities, joint_velocities)
        reward -= 0.05 / len(joint_velocities) * velocities

        # orientation
        gripper_x, gripper_y, gripper_z, gripper_qx, gripper_qy, gripper_qz, gripper_qw = self.task._robot.gripper.get_pose(
        )
        v1 = np.array(
            (tar_x - gripper_x, tar_y - gripper_y, tar_z - gripper_z))
        temp = np.sin(np.arccos(gripper_qw))
        v2 = np.array(
            (gripper_qx / temp, gripper_qy / temp, gripper_qz / temp))
        orientation_angle = np.abs(
            np.arccos(v1.dot(v2) / np.sqrt(v1.dot(v1)) /
                      np.sqrt(v2.dot(v2))))  # [0, pi]
        orientation_angle = orientation_angle if orientation_angle < np.pi / 2. else (
            np.pi - orientation_angle)
        reward -= 0.5 / np.pi * orientation_angle  # if orientation_angle > np.pi / 8. else 0.
        return self._get_state(obs), reward, terminate, None

    def close(self):
        self.env.shutdown()

Пример #27

0

Показать файл

Файл: rlbench_team4.py Проект: evamo0508/robot-autonomy-play-jenga

            pose[:3] += pos
            pose[3:] = [
                perturbed_quat_wxyz.x, perturbed_quat_wxyz.y,
                perturbed_quat_wxyz.z, perturbed_quat_wxyz.w
            ]

            obj_poses[name] = pose

        return obj_poses


if __name__ == "__main__":
    # action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE) # See rlbench/action_modes.py for other action modes
    # action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN)
    action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN)
    env = Environment(action_mode, '', ObservationConfig(), False, frequency=5)
    # task = env.get_task(StackBlocks) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable
    task = env.get_task(
        PlayJenga
    )  # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable

    obj_pose_sensor = NoisyObjectPoseSensor(env)
    descriptions, obs = task.reset()
    agent = Agent(obs, obj_pose_sensor.get_poses())

    while True:
        # Getting noisy object poses
        obj_poses = obj_pose_sensor.get_poses()

        # Getting various fields from obs
        current_joints = obs.joint_positions

Пример #28

0

Показать файл

Файл: base.py Проект: robfiras/PandaAgents_RLBench

    def run_validation(self, model):
        """ Runs validation for presentation purposes
        :param model: Tensorflow model for action prediction
        """
        if self.rand_env:
            env = DomainRandomizationEnvironment(
                action_mode=self.action_mode,
                obs_config=self.obs_config,
                headless=self.headless,
                randomize_every=self.randomize_every,
                visual_randomization_config=self.visual_rand_config)
        else:
            env = Environment(action_mode=self.action_mode,
                              obs_config=self.obs_config,
                              headless=self.headless)
        env.launch()
        task = env.get_task(self.task_class)
        task.reset()
        print("Initialized Environment for validation")
        observation = None
        episode = 0
        step = 0
        done = False
        logger = CmdLineLogger(self.logging_interval, self.training_episodes,
                               1)
        evaluator = SuccessEvaluator()

        # containers for validation
        reward_per_episode = 0
        dones = 0
        rewards = 0
        episode_lengths = 0

        # we can save all enables camera inputs to root log dir if we want
        if self.save_camera_input:
            camcorder = Camcorder(self.root_log_dir, 0)
        while episode < self.training_episodes:
            # reset episode if maximal length is reached or all worker are done
            if step % self.episode_length == 0 or done:

                descriptions, observation = task.reset()

                if self.save_camera_input:
                    camcorder.save(observation, task.get_robot_visuals(),
                                   task.get_all_graspable_objects())
                if self.obs_scaling_vector is not None:
                    observation = observation.get_low_dim_data(
                    ) / self.obs_scaling_vector
                else:
                    observation = observation.get_low_dim_data()
                logger(episode, evaluator.successful_episodes,
                       evaluator.successful_episodes_perc)

                if step != 0:
                    rewards += reward_per_episode
                    episode_lengths += step

                step = 0
                reward_per_episode = 0
                episode += 1

            # predict action
            actions = np.squeeze(model.predict(tf.constant([observation])))
            actions[0:7] = actions[0:7]
            # check if we need to resolve redundancy
            if self.use_redundancy_resolution:
                actions[0:7], _ = task.resolve_redundancy_joint_velocities(
                    actions=actions[0:7],
                    setup=self.redundancy_resolution_setup)

            # check if we need to use a gripper actions
            if self.keep_gripper_open:
                actions[7] = 1

            # increment simulation
            next_observation, reward, done = task.step(
                actions, camcorder if self.save_camera_input else None)
            if self.obs_scaling_vector is not None:
                next_observation = next_observation.get_low_dim_data(
                ) / self.obs_scaling_vector
            else:
                next_observation = next_observation.get_low_dim_data()
            evaluator.add(episode, done)
            observation = next_observation
            step += 1
            reward_per_episode += reward
            dones += done

        env.shutdown()
        print("\nDone.\n")
        return rewards / self.episode_length, dones / self.training_episodes, episode_lengths / self.training_episodes

Пример #29

0

Показать файл

Файл: gym_env.py Проект: hjp-muser/RL_Snake_Robot

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 action_mode='joint',
                 multi_action_space=False,
                 discrete_action_space=False):
        self.num_skip_control = 20
        self.epi_obs = []
        self.obs_record = True
        self.obs_record_id = None

        self._observation_mode = observation_mode
        self.obs_config = ObservationConfig()
        if observation_mode == 'state':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
        elif observation_mode == 'state-goal':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
            self.obs_config.set_goal_info(True)
        elif observation_mode == 'vision':
            self.obs_config.set_all(False)
            self.obs_config.set_camera_rgb(True)
        elif observation_mode == 'both':
            self.obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)

        self._action_mode = action_mode
        self.ac_config = None
        if action_mode == 'joint':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.ABS_JOINT_POSITION)
        elif action_mode == 'trigon':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.TRIGON_MODEL_PARAM,
                is_discrete=discrete_action_space)
        else:
            raise ValueError('Unrecognised action_mode: %s.' % action_mode)

        self.env = Environment(action_config=self.ac_config,
                               obs_config=self.obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)
        self.max_episode_steps = self.task.episode_len
        _, obs = self.task.reset()

        if action_mode == 'joint':
            self.action_space = spaces.Box(
                low=-1.7,
                high=1.7,
                shape=(self.ac_config.action_size, ),
                dtype=np.float32)
        elif action_mode == 'trigon':
            if multi_action_space:
                # action_space1 = spaces.MultiBinary(n=1)
                low1 = np.array([-0.8, -0.8])
                high1 = np.array([0.8, 0.8])
                action_space1 = spaces.Box(low=low1,
                                           high=high1,
                                           dtype=np.float32)
                # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low2 = np.array([1.0])
                high2 = np.array([3.0])
                action_space2 = spaces.Box(low=low2,
                                           high=high2,
                                           dtype=np.float32)
                self.action_space = spaces.Tuple(
                    (action_space1, action_space2))
            elif discrete_action_space:
                self.action_space = spaces.MultiDiscrete([4, 4, 4])
            else:
                # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low = np.array([-1, -1, -1])
                high = np.array([1, 1, 1])
                self.action_space = spaces.Box(low=low,
                                               high=high,
                                               dtype=np.float32)

        if observation_mode == 'state' or observation_mode == 'state-goal':
            self.observation_space = spaces.Box(
                low=-np.inf,
                high=np.inf,
                shape=(obs.get_low_dim_data().shape[0] *
                       self.num_skip_control, ))
            if observation_mode == 'state-goal':
                self.goal_dim = obs.get_goal_dim()
        elif observation_mode == 'vision':
            self.observation_space = spaces.Box(
                low=0, high=1, shape=obs.head_camera_rgb.shape)
        elif observation_mode == 'both':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "rattler_eye_rgb":
                spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape),
                "rattler_eye_depth":
                spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape),
            })

        self._gym_cam = None

Пример #30

0

Показать файл

Файл: gym_env.py Проект: hjp-muser/RL_Snake_Robot

class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 action_mode='joint',
                 multi_action_space=False,
                 discrete_action_space=False):
        self.num_skip_control = 20
        self.epi_obs = []
        self.obs_record = True
        self.obs_record_id = None

        self._observation_mode = observation_mode
        self.obs_config = ObservationConfig()
        if observation_mode == 'state':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
        elif observation_mode == 'state-goal':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
            self.obs_config.set_goal_info(True)
        elif observation_mode == 'vision':
            self.obs_config.set_all(False)
            self.obs_config.set_camera_rgb(True)
        elif observation_mode == 'both':
            self.obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)

        self._action_mode = action_mode
        self.ac_config = None
        if action_mode == 'joint':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.ABS_JOINT_POSITION)
        elif action_mode == 'trigon':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.TRIGON_MODEL_PARAM,
                is_discrete=discrete_action_space)
        else:
            raise ValueError('Unrecognised action_mode: %s.' % action_mode)

        self.env = Environment(action_config=self.ac_config,
                               obs_config=self.obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)
        self.max_episode_steps = self.task.episode_len
        _, obs = self.task.reset()

        if action_mode == 'joint':
            self.action_space = spaces.Box(
                low=-1.7,
                high=1.7,
                shape=(self.ac_config.action_size, ),
                dtype=np.float32)
        elif action_mode == 'trigon':
            if multi_action_space:
                # action_space1 = spaces.MultiBinary(n=1)
                low1 = np.array([-0.8, -0.8])
                high1 = np.array([0.8, 0.8])
                action_space1 = spaces.Box(low=low1,
                                           high=high1,
                                           dtype=np.float32)
                # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low2 = np.array([1.0])
                high2 = np.array([3.0])
                action_space2 = spaces.Box(low=low2,
                                           high=high2,
                                           dtype=np.float32)
                self.action_space = spaces.Tuple(
                    (action_space1, action_space2))
            elif discrete_action_space:
                self.action_space = spaces.MultiDiscrete([4, 4, 4])
            else:
                # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low = np.array([-1, -1, -1])
                high = np.array([1, 1, 1])
                self.action_space = spaces.Box(low=low,
                                               high=high,
                                               dtype=np.float32)

        if observation_mode == 'state' or observation_mode == 'state-goal':
            self.observation_space = spaces.Box(
                low=-np.inf,
                high=np.inf,
                shape=(obs.get_low_dim_data().shape[0] *
                       self.num_skip_control, ))
            if observation_mode == 'state-goal':
                self.goal_dim = obs.get_goal_dim()
        elif observation_mode == 'vision':
            self.observation_space = spaces.Box(
                low=0, high=1, shape=obs.head_camera_rgb.shape)
        elif observation_mode == 'both':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "rattler_eye_rgb":
                spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape),
                "rattler_eye_depth":
                spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape),
            })

        self._gym_cam = None

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'state-goal':
            obs_goal = {'observation': obs.get_low_dim_data()}
            obs_goal.update(obs.get_goal_data())
            return obs_goal
        elif self._observation_mode == 'vision':
            return obs.head_camera_rgb
        elif self._observation_mode == 'both':
            return {
                "state": obs.get_low_dim_data(),
                "rattler_eye_rgb": obs.head_camera_rgb,
                "rattle_eye_depth": obs.head_camera_depth,
            }

    def render(self, mode='human'):
        if self._gym_cam is None:
            # # Add the camera to the scene
            self._gym_cam = VisionSensor('monitor')
            self._gym_cam.set_resolution([640, 640])
            self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED)

    def reset(self):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        descriptions, obs = self.task.reset()
        self.epi_obs.append(obs)
        obs_data = self._extract_obs(obs)
        for _ in range(self.num_skip_control):
            # obs_data_group.extend(obs_data)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(obs_data, dict):
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        del descriptions  # Not used
        return ret_obs

    def step(self, action):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        reward_group = []
        terminate = False
        for _ in range(self.num_skip_control):
            obs, reward, step_terminate, success = self.task.step(action)
            self.epi_obs.append(obs)
            obs_data = self._extract_obs(obs)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(
                    obs_data,
                    dict):  # used for hierarchical reinforcement algorithm
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
            reward_group.append(reward)
            terminate |= step_terminate
            if terminate:
                if self.obs_record and success:  # record a successful experience
                    self.record_obs("RobotPos")
                self.epi_obs = []
                break
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        return ret_obs, np.mean(reward_group), terminate, {}

    def close(self):
        self.env.shutdown()

    # def load_env_param(self):
    #     self.env.load_env_param()

    def compute_reward(self, achieved_goal=None, desired_goal=None, info=None):
        assert achieved_goal is not None
        assert desired_goal is not None
        return self.task.compute_reward(achieved_goal, desired_goal)

    def record_obs(self, obs_part):
        if self.obs_record_id is None:
            record_filenames = glob.glob("./obs_record/obs_record_*.txt")
            record_filenames.sort(key=lambda filename: int(
                filename.split('_')[-1].split('.')[0]))
            if len(record_filenames) == 0:
                self.obs_record_id = 1
            else:
                last_id = int(
                    record_filenames[-1].split('_')[-1].split('.')[0])
                self.obs_record_id = last_id + 1
        else:
            self.obs_record_id += 1
        filename = './obs_record/obs_record_' + str(
            self.obs_record_id) + '.txt'
        obs_record_file = open(filename, 'w')

        if obs_part == 'All':
            pass
        if obs_part == 'RobotPos':
            robot_pos_arr = []
            for obs in self.epi_obs:
                robot_pos = obs.get_2d_robot_pos()
                robot_pos_arr.append(robot_pos)
            target_pos = self.task.get_goal()
            robot_pos_arr.append(
                target_pos)  # The last line records the target position
            robot_pos_arr = np.array(robot_pos_arr)
            np.savetxt(obs_record_file, robot_pos_arr, fmt="%f")
        obs_record_file.close()

Python Environment примеры использования