示例#1
0
def test_box():
    # simple action space
    action_space = BoxActionSpace(4, -5, 5, ["a", "b", "c", "d"])
    for i in range(100):
        sample = action_space.sample()
        assert np.all(-5 <= sample) and np.all(sample <= 5)
        assert sample.shape == (4, )
        assert sample.dtype == float

    # test clipping
    clipped_action = action_space.clip_action_to_space(
        np.array([-10, 10, 2, 5]))
    assert np.all(clipped_action == np.array([-5, 5, 2, 5]))

    # more complex high and low definition
    action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]),
                                  np.array([1, 2, 4, 5]), ["a", "b", "c", "d"])
    for i in range(100):
        sample = action_space.sample()
        assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(
            sample <= np.array([1, 2, 4, 5]))
        assert sample.shape == (4, )
        assert sample.dtype == float

    # test clipping
    clipped_action = action_space.clip_action_to_space(
        np.array([-10, 10, 2, 5]))
    assert np.all(clipped_action == np.array([-5, 2, 2, 5]))

    # mixed high and low definition
    action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), 5,
                                  ["a", "b", "c", "d"])
    for i in range(100):
        sample = action_space.sample()
        assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all(
            sample <= 5)
        assert sample.shape == (4, )
        assert sample.dtype == float

    # test clipping
    clipped_action = action_space.clip_action_to_space(
        np.array([-10, 10, 2, 5]))
    assert np.all(clipped_action == np.array([-5, 5, 2, 5]))

    # invalid bounds
    with pytest.raises(ValueError):
        action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), -1,
                                      ["a", "b", "c", "d"])
示例#2
0
class ControlSuiteEnvironment(Environment):
    def __init__(
            self,
            level: LevelSelection,
            frame_skip: int,
            visualization_parameters: VisualizationParameters,
            target_success_rate: float = 1.0,
            seed: Union[None, int] = None,
            human_control: bool = False,
            observation_type: ObservationType = ObservationType.Measurements,
            custom_reward_threshold: Union[int, float] = None,
            **kwargs):
        """
        :param level: (str)
            A string representing the control suite level to run. This can also be a LevelSelection object.
            For example, cartpole:swingup.

        :param frame_skip: (int)
            The number of frames to skip between any two actions given by the agent. The action will be repeated
            for all the skipped frames.

        :param visualization_parameters: (VisualizationParameters)
            The parameters used for visualizing the environment, such as the render flag, storing videos etc.

        :param target_success_rate: (float)
            Stop experiment if given target success rate was achieved.

        :param seed: (int)
            A seed to use for the random number generator when running the environment.

        :param human_control: (bool)
            A flag that allows controlling the environment using the keyboard keys.

        :param observation_type: (ObservationType)
            An enum which defines which observation to use. The current options are to use:
            * Measurements only - a vector of joint torques and similar measurements
            * Image only - an image of the environment as seen by a camera attached to the simulator
            * Measurements & Image - both type of observations will be returned in the state using the keys
            'measurements' and 'pixels' respectively.

        :param custom_reward_threshold: (float)
            Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment.

        """
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters,
                         target_success_rate)

        self.observation_type = observation_type

        # load and initialize environment
        domain_name, task_name = self.env_id.split(":")
        self.env = suite.load(domain_name=domain_name,
                              task_name=task_name,
                              task_kwargs={'random': seed})

        if observation_type != ObservationType.Measurements:
            self.env = pixels.Wrapper(
                self.env,
                pixels_only=observation_type == ObservationType.Image)

        # seed
        if self.seed is not None:
            np.random.seed(self.seed)
            random.seed(self.seed)

        self.state_space = StateSpace({})

        # image observations
        if observation_type != ObservationType.Measurements:
            self.state_space['pixels'] = ImageObservationSpace(
                shape=self.env.observation_spec()['pixels'].shape, high=255)

        # measurements observations
        if observation_type != ObservationType.Image:
            measurements_space_size = 0
            measurements_names = []
            for observation_space_name, observation_space in self.env.observation_spec(
            ).items():
                if len(observation_space.shape) == 0:
                    measurements_space_size += 1
                    measurements_names.append(observation_space_name)
                elif len(observation_space.shape) == 1:
                    measurements_space_size += observation_space.shape[0]
                    measurements_names.extend([
                        "{}_{}".format(observation_space_name, i)
                        for i in range(observation_space.shape[0])
                    ])
            self.state_space['measurements'] = VectorObservationSpace(
                shape=measurements_space_size,
                measurements_names=measurements_names)

        # actions
        self.action_space = BoxActionSpace(
            shape=self.env.action_spec().shape[0],
            low=self.env.action_spec().minimum,
            high=self.env.action_spec().maximum)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

        self.target_success_rate = target_success_rate

    def _update_state(self):
        self.state = {}

        if self.observation_type != ObservationType.Measurements:
            self.pixels = self.last_result.observation['pixels']
            self.state['pixels'] = self.pixels

        if self.observation_type != ObservationType.Image:
            self.measurements = np.array([])
            for sub_observation in self.last_result.observation.values():
                if isinstance(sub_observation, np.ndarray) and len(
                        sub_observation.shape) == 1:
                    self.measurements = np.concatenate(
                        (self.measurements, sub_observation))
                else:
                    self.measurements = np.concatenate(
                        (self.measurements, np.array([sub_observation])))
            self.state['measurements'] = self.measurements

        self.reward = self.last_result.reward if self.last_result.reward is not None else 0

        self.done = self.last_result.last()

    def _take_action(self, action):
        if type(self.action_space) == BoxActionSpace:
            action = self.action_space.clip_action_to_space(action)

        self.last_result = self.env.step(action)

    def _restart_environment_episode(self, force_environment_reset=False):
        self.last_result = self.env.reset()

    def _render(self):
        pass

    def get_rendered_image(self):
        return self.env.physics.render(camera_id=0)

    def get_target_success_rate(self) -> float:
        return self.target_success_rate
class ControlSuiteEnvironment(Environment):
    def __init__(
            self,
            level: LevelSelection,
            frame_skip: int,
            visualization_parameters: VisualizationParameters,
            seed: Union[None, int] = None,
            human_control: bool = False,
            observation_type: ObservationType = ObservationType.Measurements,
            custom_reward_threshold: Union[int, float] = None,
            **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        self.observation_type = observation_type

        # load and initialize environment
        domain_name, task_name = self.env_id.split(":")
        self.env = suite.load(domain_name=domain_name,
                              task_name=task_name,
                              task_kwargs={'random': seed})

        if observation_type != ObservationType.Measurements:
            self.env = pixels.Wrapper(
                self.env,
                pixels_only=observation_type == ObservationType.Image)

        # seed
        if self.seed is not None:
            np.random.seed(self.seed)
            random.seed(self.seed)

        self.state_space = StateSpace({})

        # image observations
        if observation_type != ObservationType.Measurements:
            self.state_space['pixels'] = ImageObservationSpace(
                shape=self.env.observation_spec()['pixels'].shape, high=255)

        # measurements observations
        if observation_type != ObservationType.Image:
            measurements_space_size = 0
            measurements_names = []
            for observation_space_name, observation_space in self.env.observation_spec(
            ).items():
                if len(observation_space.shape) == 0:
                    measurements_space_size += 1
                    measurements_names.append(observation_space_name)
                elif len(observation_space.shape) == 1:
                    measurements_space_size += observation_space.shape[0]
                    measurements_names.extend([
                        "{}_{}".format(observation_space_name, i)
                        for i in range(observation_space.shape[0])
                    ])
            self.state_space['measurements'] = VectorObservationSpace(
                shape=measurements_space_size,
                measurements_names=measurements_names)

        # actions
        self.action_space = BoxActionSpace(
            shape=self.env.action_spec().shape[0],
            low=self.env.action_spec().minimum,
            high=self.env.action_spec().maximum)

        # initialize the state by getting a new state from the environment
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            scale = 1
            if self.human_control:
                scale = 2
            if not self.native_rendering:
                self.renderer.create_screen(image.shape[1] * scale,
                                            image.shape[0] * scale)

    def _update_state(self):
        self.state = {}

        if self.observation_type != ObservationType.Measurements:
            self.pixels = self.last_result.observation['pixels']
            self.state['pixels'] = self.pixels

        if self.observation_type != ObservationType.Image:
            self.measurements = np.array([])
            for sub_observation in self.last_result.observation.values():
                if isinstance(sub_observation, np.ndarray) and len(
                        sub_observation.shape) == 1:
                    self.measurements = np.concatenate(
                        (self.measurements, sub_observation))
                else:
                    self.measurements = np.concatenate(
                        (self.measurements, np.array([sub_observation])))
            self.state['measurements'] = self.measurements

        self.reward = self.last_result.reward if self.last_result.reward is not None else 0

        self.done = self.last_result.last()

    def _take_action(self, action):
        if type(self.action_space) == BoxActionSpace:
            action = self.action_space.clip_action_to_space(action)

        self.last_result = self.env.step(action)

    def _restart_environment_episode(self, force_environment_reset=False):
        self.last_result = self.env.reset()

    def _render(self):
        pass

    def get_rendered_image(self):
        return self.env.physics.render(camera_id=0)
示例#4
0
class RobosuiteEnvironment(Environment):
    def __init__(self, level: LevelSelection,
                 seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float, None],
                 visualization_parameters: VisualizationParameters,
                 base_parameters: RobosuiteBaseParameters,
                 extra_parameters: Dict[str, Any],
                 robot: str, controller: str,
                 target_success_rate: float = 1.0, apply_dr: bool = False,
                 dr_every_n_steps_min: int = 10, dr_every_n_steps_max: int = 20, use_joint_vel_obs=False,
                 custom_controller_config_fpath=None, **kwargs):
        super(RobosuiteEnvironment, self).__init__(level, seed, frame_skip, human_control, custom_reward_threshold,
                                                   visualization_parameters, target_success_rate)

        # Validate arguments

        self.frame_skip = max(1, self.frame_skip)

        def validate_input(input, supported, name):
            if input not in supported:
                raise ValueError("Unknown Robosuite {0} passed: '{1}' ; Supported {0}s are: {2}".format(
                    name, input, ' | '.join(supported)
                ))

        validate_input(self.env_id, robosuite_environments, 'environment')
        validate_input(robot, robosuite_robots, 'robot')
        self.robot = robot
        if controller is not None:
            validate_input(controller, robosuite_controllers, 'controller')
        self.controller = controller

        self.base_parameters = base_parameters
        self.base_parameters.has_renderer = self.is_rendered and self.native_rendering
        self.base_parameters.has_offscreen_renderer = self.base_parameters.use_camera_obs or (self.is_rendered and not
                                                                                              self.native_rendering)

        # Seed
        if self.seed is not None:
            np.random.seed(self.seed)
            random.seed(self.seed)

        # Load and initialize environment
        env_args = self.base_parameters.env_kwargs_dict()
        env_args.update(extra_parameters)

        if 'reward_scale' not in env_args and self.env_id in DEFAULT_REWARD_SCALES:
            env_args['reward_scale'] = DEFAULT_REWARD_SCALES[self.env_id]

        env_args['robots'] = self.robot
        controller_cfg = None
        if self.controller is not None:
            controller_cfg = robosuite.controllers.load_controller_config(default_controller=self.controller)
        elif custom_controller_config_fpath is not None:
            controller_cfg = robosuite.controllers.load_controller_config(custom_fpath=custom_controller_config_fpath)

        env_args['controller_configs'] = controller_cfg

        self.env = robosuite.make(self.env_id, **env_args)

        # TODO: Generalize this to filter any observation by name
        if not use_joint_vel_obs:
            self.env.modify_observable('robot0_joint_vel', 'active', False)

        # Wrap with a dummy wrapper so we get a consistent API (there are subtle changes between
        # wrappers and actual environments in Robosuite, for example action_spec as property vs. function)
        self.env = Wrapper(self.env)
        if apply_dr:
            self.env = DomainRandomizationWrapper(self.env, seed=self.seed, randomize_every_n_steps_min=dr_every_n_steps_min,
                                                  randomize_every_n_steps_max=dr_every_n_steps_max)

        # State space
        self.state_space = self._setup_state_space()

        # Action space
        low, high = self.env.unwrapped.action_spec
        self.action_space = BoxActionSpace(low.shape, low=low, high=high)

        self.reset_internal_state()

        if self.is_rendered:
            image = self.get_rendered_image()
            self.renderer.create_screen(image.shape[1], image.shape[0])
        # TODO: Other environments call rendering here, why? reset_internal_state does it

    def _setup_state_space(self):
        state_space = StateSpace({})
        dummy_obs = self._process_observation(self.env.observation_spec())

        state_space['measurements'] = VectorObservationSpace(dummy_obs['measurements'].shape[0])

        if self.base_parameters.use_camera_obs:
            state_space['camera'] = PlanarMapsObservationSpace(dummy_obs['camera'].shape, 0, 255)

        return state_space

    def _process_observation(self, raw_obs):
        new_obs = {}

        # TODO: Support multiple cameras, this assumes a single camera
        camera_name = self.base_parameters.camera_names

        camera_obs = raw_obs.get(camera_name + '_image', None)
        if camera_obs is not None:
            depth_obs = raw_obs.get(camera_name + '_depth', None)
            if depth_obs is not None:
                depth_obs = np.expand_dims(depth_obs, axis=2)
                camera_obs = np.concatenate([camera_obs, depth_obs], axis=2)
            new_obs['camera'] = camera_obs

        measurements = raw_obs['robot0_proprio-state']
        object_obs = raw_obs.get('object-state', None)
        if object_obs is not None:
            measurements = np.concatenate([measurements, object_obs])
        new_obs['measurements'] = measurements

        return new_obs

    def _take_action(self, action):
        action = self.action_space.clip_action_to_space(action)

        # We mimic the "action_repeat" mechanism of RobosuiteWrapper in Surreal.
        # Same concept as frame_skip, only returning the average reward across repeated actions instead
        # of the total reward.
        rewards = []
        for _ in range(self.frame_skip):
            obs, reward, done, info = self.env.step(action)
            rewards.append(reward)
            if done:
                break
        reward = np.mean(rewards)
        self.last_result = RobosuiteStepResult(obs, reward, done, info)

    def _update_state(self):
        obs = self._process_observation(self.last_result.observation)
        self.state = {k: obs[k] for k in self.state_space.sub_spaces}
        self.reward = self.last_result.reward or 0
        self.done = self.last_result.done
        self.info = self.last_result.info

    def _restart_environment_episode(self, force_environment_reset=False):
        reset_obs = self.env.reset()
        self.last_result = RobosuiteStepResult(reset_obs, 0.0, False, {})

    def _render(self):
        self.env.render()

    def get_rendered_image(self):
        img: np.ndarray = self.env.sim.render(camera_name=self.base_parameters.render_camera,
                                              height=512, width=512, depth=False)
        return np.flip(img, 0)

    def close(self):
        self.env.close()