Пример #1
0
    def __init__(self,
                 domain_name,
                 task_name,
                 horizon,
                 gamma,
                 task_kwargs=None,
                 dt=.01,
                 width_screen=480,
                 height_screen=480,
                 camera_id=0):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;

        """
        # MDP creation
        if task_kwargs is None:
            task_kwargs = dict()
        task_kwargs[
            'time_limit'] = np.inf  # Hack to ignore dm_control time limit.

        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(
            self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info)
Пример #2
0
    def __init__(self,
                 domain_name,
                 task_name,
                 horizon=None,
                 gamma=0.99,
                 task_kwargs=None,
                 dt=.01,
                 width_screen=480,
                 height_screen=480,
                 camera_id=0,
                 use_pixels=False,
                 pixels_width=64,
                 pixels_height=64):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;
             use_pixels (bool, False): if True, pixel observations are used
                rather than the state vector;
             pixels_width (int, 64): width of the pixel observation;
             pixels_height (int, 64): height of the pixel observation;

        """
        # MDP creation
        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)
        if use_pixels:
            self.env = pixels.Wrapper(self.env,
                                      render_kwargs={
                                          'width': pixels_width,
                                          'height': pixels_height
                                      })

        # get the default horizon
        if horizon is None:
            horizon = self.env._step_limit

        # Hack to ignore dm_control time limit.
        self.env._step_limit = np.inf

        if use_pixels:
            self._convert_observation_space = self._convert_observation_space_pixels
            self._convert_observation = self._convert_observation_pixels
        else:
            self._convert_observation_space = self._convert_observation_space_vector
            self._convert_observation = self._convert_observation_vector

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(
            self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info)

        self._state = None
Пример #3
0
class DMControl(Environment):
    """
    Interface for dm_control suite Mujoco environments. It makes it possible to
    use every dm_control suite Mujoco environment just providing the necessary
    information.

    """
    def __init__(self,
                 domain_name,
                 task_name,
                 horizon=None,
                 gamma=0.99,
                 task_kwargs=None,
                 dt=.01,
                 width_screen=480,
                 height_screen=480,
                 camera_id=0,
                 use_pixels=False,
                 pixels_width=64,
                 pixels_height=64):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;
             use_pixels (bool, False): if True, pixel observations are used
                rather than the state vector;
             pixels_width (int, 64): width of the pixel observation;
             pixels_height (int, 64): height of the pixel observation;

        """
        # MDP creation
        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)
        if use_pixels:
            self.env = pixels.Wrapper(self.env,
                                      render_kwargs={
                                          'width': pixels_width,
                                          'height': pixels_height
                                      })

        # get the default horizon
        if horizon is None:
            horizon = self.env._step_limit

        # Hack to ignore dm_control time limit.
        self.env._step_limit = np.inf

        if use_pixels:
            self._convert_observation_space = self._convert_observation_space_pixels
            self._convert_observation = self._convert_observation_pixels
        else:
            self._convert_observation_space = self._convert_observation_space_vector
            self._convert_observation = self._convert_observation_vector

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(
            self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info)

        self._state = None

    def reset(self, state=None):
        if state is None:
            self._state = self._convert_observation(
                self.env.reset().observation)
        else:
            raise NotImplementedError

        return self._state

    def step(self, action):
        step = self.env.step(action)

        reward = step.reward
        self._state = self._convert_observation(step.observation)
        absorbing = step.last()

        return self._state, reward, absorbing, {}

    def render(self):
        img = self.env.physics.render(self._viewer.size[1],
                                      self._viewer.size[0], self._camera_id)
        self._viewer.display(img)

    def stop(self):
        self._viewer.close()

    @staticmethod
    def _convert_observation_space_vector(observation_space):
        observation_shape = 0
        for i in observation_space:
            shape = observation_space[i].shape
            observation_var = 1
            for dim in shape:
                observation_var *= dim
            observation_shape += observation_var

        return Box(low=-np.inf, high=np.inf, shape=(observation_shape, ))

    @staticmethod
    def _convert_observation_space_pixels(observation_space):
        img_size = observation_space['pixels'].shape
        return Box(low=0., high=255., shape=(3, img_size[0], img_size[1]))

    @staticmethod
    def _convert_action_space(action_space):
        low = action_space.minimum
        high = action_space.maximum

        return Box(low=np.array(low), high=np.array(high))

    @staticmethod
    def _convert_observation_vector(observation):
        obs = list()
        for i in observation:
            obs.append(np.atleast_1d(observation[i]).flatten())

        return np.concatenate(obs)

    @staticmethod
    def _convert_observation_pixels(observation):
        return observation['pixels'].transpose((2, 0, 1))
Пример #4
0
    def __init__(self,
                 config_file,
                 horizon=None,
                 gamma=0.99,
                 is_discrete=False,
                 width=None,
                 height=None,
                 debug_gui=False,
                 verbose=False):
        """
        Constructor.

        Args:
             config_file (str): path to the YAML file specifying the task
                (see igibson/examples/configs/ and igibson/test/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             is_discrete (bool, False): if True, actions are automatically
                discretized by iGibson's `set_up_discrete_action_space`.
                Please note that not all robots support discrete actions.
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used;
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used;
             debug_gui (bool, False): if True, activate the iGibson in GUI mode,
                showing the pybullet rendering and the robot camera.
             verbose (bool, False): if False, it disable iGibson default messages.

        """

        if not verbose:
            logging.disable(logging.CRITICAL +
                            1)  # Disable iGibson log messages

        # MDP creation
        self._not_pybullet = False
        self._first = True

        config = parse_config(config_file)
        config['is_discrete'] = is_discrete

        if horizon is not None:
            config['max_step'] = horizon
        else:
            horizon = config['max_step']
            config['max_step'] = horizon + 1  # Hack to ignore gym time limit

        if width is not None:
            config['image_width'] = width
        if height is not None:
            config['image_height'] = height

        env = iGibsonEnv(config_file=config,
                         mode='gui' if debug_gui else 'headless')
        env = iGibsonWrapper(env)

        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 60)
        self._image = None

        Environment.__init__(self, mdp_info)
Пример #5
0
class iGibson(Gym):
    """
    Interface for iGibson https://github.com/StanfordVL/iGibson

    There are both navigation and interaction tasks.
    Observations are pixel images of what the agent sees in front of itself.
    Image resolution is specified in the config file.
    By default, actions are continuous, but can be discretized automatically
    using a flag. Note that not all robots support discrete actions.

    Scene and task details are defined in the YAML config file.

    """
    def __init__(self,
                 config_file,
                 horizon=None,
                 gamma=0.99,
                 is_discrete=False,
                 width=None,
                 height=None,
                 debug_gui=False,
                 verbose=False):
        """
        Constructor.

        Args:
             config_file (str): path to the YAML file specifying the task
                (see igibson/examples/configs/ and igibson/test/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             is_discrete (bool, False): if True, actions are automatically
                discretized by iGibson's `set_up_discrete_action_space`.
                Please note that not all robots support discrete actions.
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used;
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used;
             debug_gui (bool, False): if True, activate the iGibson in GUI mode,
                showing the pybullet rendering and the robot camera.
             verbose (bool, False): if False, it disable iGibson default messages.

        """

        if not verbose:
            logging.disable(logging.CRITICAL +
                            1)  # Disable iGibson log messages

        # MDP creation
        self._not_pybullet = False
        self._first = True

        config = parse_config(config_file)
        config['is_discrete'] = is_discrete

        if horizon is not None:
            config['max_step'] = horizon
        else:
            horizon = config['max_step']
            config['max_step'] = horizon + 1  # Hack to ignore gym time limit

        if width is not None:
            config['image_width'] = width
        if height is not None:
            config['image_height'] = height

        env = iGibsonEnv(config_file=config,
                         mode='gui' if debug_gui else 'headless')
        env = iGibsonWrapper(env)

        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 60)
        self._image = None

        Environment.__init__(self, mdp_info)

    def reset(self, state=None):
        assert state is None, 'Cannot set iGibson state'
        return self._convert_observation(np.atleast_1d(self.env.reset()))

    def step(self, action):
        action = self._convert_action(action)
        obs, reward, absorbing, info = self.env.step(action)
        self._image = obs.copy()
        return self._convert_observation(
            np.atleast_1d(obs)), reward, absorbing, info

    def close(self):
        self.env.close()

    def stop(self):
        self._viewer.close()

    def render(self, mode='human'):
        self._viewer.display(self._image)

    @staticmethod
    def _convert_observation(observation):
        return observation.transpose((2, 0, 1))

    @staticmethod
    def root_path():
        return igibson.root_path
Пример #6
0
    def __init__(self,
                 wrapper,
                 config_file,
                 base_config_file=None,
                 horizon=None,
                 gamma=0.99,
                 width=None,
                 height=None):
        """
        Constructor. For more details on how to pass YAML configuration files,
        please see <MUSHROOM_RL PATH>/examples/habitat/README.md

        Args:
             wrapper (str): wrapper for converting observations and actions
                (e.g., HabitatRearrangeWrapper);
             config_file (str): path to the YAML file specifying the RL task
                configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/);
             base_config_file (str, None): path to an optional YAML file, used
                as 'BASE_TASK_CONFIG_PATH' in the first YAML
                (see <HABITAT_LAB PATH>/configs/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used.
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used.

        """
        # MDP creation
        self._not_pybullet = False
        self._first = True

        if base_config_file is None:
            base_config_file = config_file

        config = get_config(config_paths=config_file,
                            opts=['BASE_TASK_CONFIG_PATH', base_config_file])

        config.defrost()

        if horizon is None:
            horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS  # Get the default horizon
        config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1  # Hack to ignore gym time limit

        # Overwrite all RGB width / height used for the TASK (not SIMULATOR)
        for k in config['TASK_CONFIG']['SIMULATOR']:
            if 'rgb' in k.lower():
                if height is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height
                if width is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width

        config.freeze()

        env_class = get_env_class(config.ENV_NAME)
        env = make_env_fn(env_class=env_class, config=config)
        env = globals()[wrapper](env)
        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 10)

        Environment.__init__(self, mdp_info)
Пример #7
0
class Habitat(Gym):
    """
    Interface for Habitat RL environments.
    This class is very generic and can be used for many Habitat task. Depending
    on the robot / task, you have to use different wrappers, since observation
    and action spaces may vary.

    See <MUSHROOM_RL PATH>/examples/habitat/ for more details.

    """
    def __init__(self,
                 wrapper,
                 config_file,
                 base_config_file=None,
                 horizon=None,
                 gamma=0.99,
                 width=None,
                 height=None):
        """
        Constructor. For more details on how to pass YAML configuration files,
        please see <MUSHROOM_RL PATH>/examples/habitat/README.md

        Args:
             wrapper (str): wrapper for converting observations and actions
                (e.g., HabitatRearrangeWrapper);
             config_file (str): path to the YAML file specifying the RL task
                configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/);
             base_config_file (str, None): path to an optional YAML file, used
                as 'BASE_TASK_CONFIG_PATH' in the first YAML
                (see <HABITAT_LAB PATH>/configs/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used.
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used.

        """
        # MDP creation
        self._not_pybullet = False
        self._first = True

        if base_config_file is None:
            base_config_file = config_file

        config = get_config(config_paths=config_file,
                            opts=['BASE_TASK_CONFIG_PATH', base_config_file])

        config.defrost()

        if horizon is None:
            horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS  # Get the default horizon
        config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1  # Hack to ignore gym time limit

        # Overwrite all RGB width / height used for the TASK (not SIMULATOR)
        for k in config['TASK_CONFIG']['SIMULATOR']:
            if 'rgb' in k.lower():
                if height is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height
                if width is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width

        config.freeze()

        env_class = get_env_class(config.ENV_NAME)
        env = make_env_fn(env_class=env_class, config=config)
        env = globals()[wrapper](env)
        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 10)

        Environment.__init__(self, mdp_info)

    def reset(self, state=None):
        assert state is None, 'Cannot set Habitat state'
        obs = self._convert_observation(np.atleast_1d(self.env.reset()))
        return obs

    def step(self, action):
        action = self._convert_action(action)
        obs, reward, absorbing, info = self.env.step(action)
        return self._convert_observation(
            np.atleast_1d(obs)), reward, absorbing, info

    def stop(self):
        self._viewer.close()

    def render(self, mode='rgb_array'):
        if mode == "rgb_array":
            frame = observations_to_image(
                self.env._last_full_obs, self.env.unwrapped._env.get_metrics())
        else:
            raise ValueError(f"Render mode {mode} not currently supported.")

        self._viewer.display(frame)

    @staticmethod
    def _convert_observation(observation):
        return observation.transpose((2, 0, 1))

    @staticmethod
    def root_path():
        return os.path.dirname(os.path.dirname(habitat.__file__))
Пример #8
0
class DMControl(Environment):
    """
    Interface for dm_control suite Mujoco environments. It makes it possible to
    use every dm_control suite Mujoco environment just providing the necessary
    information.

    """
    def __init__(self,
                 domain_name,
                 task_name,
                 horizon,
                 gamma,
                 task_kwargs=None,
                 dt=.01,
                 width_screen=480,
                 height_screen=480,
                 camera_id=0):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;

        """
        # MDP creation
        if task_kwargs is None:
            task_kwargs = dict()
        task_kwargs[
            'time_limit'] = np.inf  # Hack to ignore dm_control time limit.

        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(
            self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info)

    def reset(self, state=None):
        if state is None:
            self._state = self._convert_observation(
                self.env.reset().observation)
        else:
            raise NotImplementedError

        return self._state

    def step(self, action):
        step = self.env.step(action)

        reward = step.reward
        self._state = self._convert_observation(step.observation)
        absorbing = step.last()

        return self._state, reward, absorbing, {}

    def render(self):
        img = self.env.physics.render(self._viewer.size[1],
                                      self._viewer.size[0], self._camera_id)
        self._viewer.display(img)

    def stop(self):
        pass

    @staticmethod
    def _convert_observation_space(observation_space):
        observation_shape = 0
        for i in observation_space:
            shape = observation_space[i].shape
            if len(shape) > 0:
                observation_shape += shape[0]
            else:
                observation_shape += 1

        return Box(low=-np.inf, high=np.inf, shape=(observation_shape, ))

    @staticmethod
    def _convert_action_space(action_space):
        low = action_space.minimum
        high = action_space.maximum

        return Box(low=np.array(low), high=np.array(high))

    @staticmethod
    def _convert_observation(observation):
        obs = list()
        for i in observation:
            obs.append(np.atleast_1d(observation[i]))

        return np.concatenate(obs)