Пример #1
0
    def __init__(self, level: LevelSelection, seed: int, frame_skip: int,
                 human_control: bool, custom_reward_threshold: Union[int,
                                                                     float],
                 visualization_parameters: VisualizationParameters, **kwargs):
        """
        :param level: The environment level. Each environment can have multiple levels
        :param seed: a seed for the random number generator of the environment
        :param frame_skip: number of frames to skip (while repeating the same action) between each two agent directives
        :param human_control: human should control the environment
        :param visualization_parameters: a blob of parameters used for visualization of the environment
        :param **kwargs: as the class is instantiated by EnvironmentParameters, this is used to support having
                         additional arguments which will be ignored by this class, but might be used by others
        """
        super().__init__()

        # env initialization

        self.game = []

        self.state = {}
        self.observation = None
        self.goal = None
        self.reward = 0
        self.done = False
        self.info = {}
        self._last_env_response = None
        self.last_action = 0
        self.episode_idx = 0
        self.total_steps_counter = 0
        self.current_episode_steps_counter = 0
        self.last_episode_time = time.time()
        self.key_to_action = {}
        self.last_episode_images = []

        # rewards
        self.total_reward_in_current_episode = 0
        self.max_reward_achieved = -np.inf
        self.reward_success_threshold = custom_reward_threshold

        # spaces
        self.state_space = self._state_space = None
        self.goal_space = self._goal_space = None
        self.action_space = self._action_space = None
        self.reward_space = RewardSpace(
            1, reward_success_threshold=self.reward_success_threshold
        )  # TODO: add a getter and setter

        self.env_id = str(level)
        self.seed = seed
        self.frame_skip = frame_skip

        # human interaction and visualization
        self.human_control = human_control
        self.wait_for_explicit_human_action = False
        self.is_rendered = visualization_parameters.render or self.human_control
        self.native_rendering = visualization_parameters.native_rendering and not self.human_control
        self.visualization_parameters = visualization_parameters
        if not self.native_rendering:
            self.renderer = Renderer()
Пример #2
0
class Environment(EnvironmentInterface):
    def __init__(self,
                 level: LevelSelection,
                 seed: int,
                 frame_skip: int,
                 human_control: bool,
                 custom_reward_threshold: Union[int, float],
                 visualization_parameters: VisualizationParameters,
                 target_success_rate: float = 1.0,
                 **kwargs):
        """
        :param level: The environment level. Each environment can have multiple levels
        :param seed: a seed for the random number generator of the environment
        :param frame_skip: number of frames to skip (while repeating the same action) between each two agent directives
        :param human_control: human should control the environment
        :param visualization_parameters: a blob of parameters used for visualization of the environment
        :param **kwargs: as the class is instantiated by EnvironmentParameters, this is used to support having
                         additional arguments which will be ignored by this class, but might be used by others
        """
        super().__init__()

        # env initialization

        self.game = []

        self.state = {}
        self.observation = None
        self.goal = None
        self.reward = 0
        self.done = False
        self.info = {}
        self._last_env_response = None
        self.last_action = 0
        self.episode_idx = 0
        self.total_steps_counter = 0
        self.current_episode_steps_counter = 0
        self.last_episode_time = time.time()
        self.key_to_action = {}
        self.last_episode_images = []

        # rewards
        self.total_reward_in_current_episode = 0
        self.max_reward_achieved = -np.inf
        self.reward_success_threshold = custom_reward_threshold

        # spaces
        self.state_space = self._state_space = None
        self.goal_space = self._goal_space = None
        self.action_space = self._action_space = None
        self.reward_space = RewardSpace(
            1, reward_success_threshold=self.reward_success_threshold
        )  # TODO: add a getter and setter

        self.env_id = str(level)
        self.seed = seed
        self.frame_skip = frame_skip

        # human interaction and visualization
        self.human_control = human_control
        self.wait_for_explicit_human_action = False
        self.is_rendered = visualization_parameters.render or self.human_control
        self.native_rendering = visualization_parameters.native_rendering and not self.human_control
        self.visualization_parameters = visualization_parameters
        if not self.native_rendering:
            self.renderer = Renderer()

        # Set target reward and target_success if present
        self.target_success_rate = target_success_rate

    @property
    def action_space(self) -> Union[List[ActionSpace], ActionSpace]:
        """
        Get the action space of the environment

        :return: the action space
        """
        return self._action_space

    @action_space.setter
    def action_space(self, val: Union[List[ActionSpace], ActionSpace]):
        """
        Set the action space of the environment

        :return: None
        """
        self._action_space = val

    @property
    def state_space(self) -> Union[List[StateSpace], StateSpace]:
        """
        Get the state space of the environment

        :return: the observation space
        """
        return self._state_space

    @state_space.setter
    def state_space(self, val: Union[List[StateSpace], StateSpace]):
        """
        Set the state space of the environment

        :return: None
        """
        self._state_space = val

    @property
    def goal_space(self) -> Union[List[ObservationSpace], ObservationSpace]:
        """
        Get the state space of the environment

        :return: the observation space
        """
        return self._goal_space

    @goal_space.setter
    def goal_space(self, val: Union[List[ObservationSpace], ObservationSpace]):
        """
        Set the goal space of the environment

        :return: None
        """
        self._goal_space = val

    def get_action_from_user(self) -> ActionType:
        """
        Get an action from the user keyboard

        :return: action index
        """
        if self.wait_for_explicit_human_action:
            while len(self.renderer.pressed_keys) == 0:
                self.renderer.get_events()

        if self.key_to_action == {}:
            # the keys are the numbers on the keyboard corresponding to the action index
            if len(self.renderer.pressed_keys) > 0:
                action_idx = self.renderer.pressed_keys[0] - ord("1")
                if 0 <= action_idx < self.action_space.shape[0]:
                    return action_idx
        else:
            # the keys are mapped through the environment to more intuitive keyboard keys
            # key = tuple(self.renderer.pressed_keys)
            # for key in self.renderer.pressed_keys:
            for env_keys in self.key_to_action.keys():
                if set(env_keys) == set(self.renderer.pressed_keys):
                    return self.action_space.actions[
                        self.key_to_action[env_keys]]

        # return the default action 0 so that the environment will continue running
        return self.action_space.default_action

    @property
    def last_env_response(self) -> Union[List[EnvResponse], EnvResponse]:
        """
        Get the last environment response

        :return: a dictionary that contains the state, reward, etc.
        """
        return squeeze_list(self._last_env_response)

    @last_env_response.setter
    def last_env_response(self, val: Union[List[EnvResponse], EnvResponse]):
        """
        Set the last environment response

        :param val: the last environment response
        """
        self._last_env_response = force_list(val)

    def step(self, action: ActionType) -> EnvResponse:
        """
        Make a single step in the environment using the given action

        :param action: an action to use for stepping the environment. Should follow the definition of the action space.
        :return: the environment response as returned in get_last_env_response
        """
        action = self.action_space.clip_action_to_space(action)
        if self.action_space and not self.action_space.val_matches_space_definition(
                action):
            raise ValueError(
                "The given action does not match the action space definition. "
                "Action = {}, action space definition = {}".format(
                    action, self.action_space))

        # store the last agent action done and allow passing None actions to repeat the previously done action
        if action is None:
            action = self.last_action
        self.last_action = action
        if self.visualization_parameters.add_rendered_image_to_env_response:
            current_rendered_image = self.get_rendered_image()

        self.current_episode_steps_counter += 1
        if self.phase != RunPhase.UNDEFINED:
            self.total_steps_counter += 1

        # act
        self._take_action(action)

        # observe
        self._update_state()

        if self.is_rendered:
            self.render()

        self.total_reward_in_current_episode += self.reward

        if self.visualization_parameters.add_rendered_image_to_env_response:
            self.info['image'] = current_rendered_image

        self.last_env_response = \
            EnvResponse(
                reward=self.reward,
                next_state=self.state,
                goal=self.goal,
                game_over=self.done,
                info=self.info
            )

        # store observations for video / gif dumping
        if self.should_dump_video_of_the_current_episode(episode_terminated=False) and \
            (self.visualization_parameters.dump_mp4 or self.visualization_parameters.dump_gifs):
            self.last_episode_images.append(self.get_rendered_image())

        return self.last_env_response

    def render(self) -> None:
        """
        Call the environment function for rendering to the screen

        :return: None
        """
        if self.native_rendering:
            self._render()
        else:
            self.renderer.render_image(self.get_rendered_image())

    def handle_episode_ended(self) -> None:
        """
        End an episode

        :return: None
        """
        self.dump_video_of_last_episode_if_needed()

    def reset_internal_state(self,
                             force_environment_reset=False) -> EnvResponse:
        """
        Reset the environment and all the variable of the wrapper

        :param force_environment_reset: forces environment reset even when the game did not end
        :return: A dictionary containing the observation, reward, done flag, action and measurements
        """

        self._restart_environment_episode(force_environment_reset)
        self.last_episode_time = time.time()

        if self.current_episode_steps_counter > 0 and self.phase != RunPhase.UNDEFINED:
            self.episode_idx += 1

        self.done = False
        self.total_reward_in_current_episode = self.reward = 0.0
        self.last_action = 0
        self.current_episode_steps_counter = 0
        self.last_episode_images = []
        self._update_state()

        # render before the preprocessing of the observation, so that the image will be in its original quality
        if self.is_rendered:
            self.render()

        self.last_env_response = \
            EnvResponse(
                reward=self.reward,
                next_state=self.state,
                goal=self.goal,
                game_over=self.done,
                info=self.info
            )

        return self.last_env_response

    def get_random_action(self) -> ActionType:
        """
        Returns an action picked uniformly from the available actions

        :return: a numpy array with a random action
        """
        return self.action_space.sample()

    def get_available_keys(self) -> List[Tuple[str, ActionType]]:
        """
        Return a list of tuples mapping between action names and the keyboard key that triggers them

        :return: a list of tuples mapping between action names and the keyboard key that triggers them
        """
        available_keys = []
        if self.key_to_action != {}:
            for key, idx in sorted(self.key_to_action.items(),
                                   key=operator.itemgetter(1)):
                if key != ():
                    key_names = [
                        self.renderer.get_key_names([k])[0] for k in key
                    ]
                    available_keys.append((self.action_space.descriptions[idx],
                                           ' + '.join(key_names)))
        elif type(self.action_space) == DiscreteActionSpace:
            for action in range(self.action_space.shape):
                available_keys.append(
                    ("Action {}".format(action + 1), action + 1))
        return available_keys

    def get_goal(self) -> GoalType:
        """
        Get the current goal that the agents needs to achieve in the environment

        :return: The goal
        """
        return self.goal

    def set_goal(self, goal: GoalType) -> None:
        """
        Set the current goal that the agent needs to achieve in the environment

        :param goal: the goal that needs to be achieved
        :return: None
        """
        self.goal = goal

    def should_dump_video_of_the_current_episode(self,
                                                 episode_terminated=False):
        if self.visualization_parameters.video_dump_filters:
            for video_dump_filter in force_list(
                    self.visualization_parameters.video_dump_filters):
                if not video_dump_filter.should_dump(episode_terminated, **
                                                     self.__dict__):
                    return False
            return True
        return True

    def dump_video_of_last_episode_if_needed(self):
        if self.last_episode_images != [] and self.should_dump_video_of_the_current_episode(
                episode_terminated=True):
            self.dump_video_of_last_episode()

    def dump_video_of_last_episode(self):
        frame_skipping = max(1, int(5 / self.frame_skip))
        file_name = 'episode-{}_score-{}'.format(
            self.episode_idx, self.total_reward_in_current_episode)
        fps = 10
        if self.visualization_parameters.dump_gifs:
            logger.create_gif(self.last_episode_images[::frame_skipping],
                              name=file_name,
                              fps=fps)
        if self.visualization_parameters.dump_mp4:
            logger.create_mp4(self.last_episode_images[::frame_skipping],
                              name=file_name,
                              fps=fps)

    # The following functions define the interaction with the environment.
    # Any new environment that inherits the Environment class should use these signatures.
    # Some of these functions are optional - please read their description for more details.

    def _take_action(self, action_idx: ActionType) -> None:
        """
        An environment dependent function that sends an action to the simulator.

        :param action_idx: the action to perform on the environment
        :return: None
        """
        raise NotImplementedError("")

    def _update_state(self) -> None:
        """
        Updates the state from the environment.
        Should update self.observation, self.reward, self.done, self.measurements and self.info

        :return: None
        """
        raise NotImplementedError("")

    def _restart_environment_episode(self,
                                     force_environment_reset=False) -> None:
        """
        Restarts the simulator episode

        :param force_environment_reset: Force the environment to reset even if the episode is not done yet.
        :return: None
        """
        raise NotImplementedError("")

    def _render(self) -> None:
        """
        Renders the environment using the native simulator renderer

        :return: None
        """
        pass

    def get_rendered_image(self) -> np.ndarray:
        """
        Return a numpy array containing the image that will be rendered to the screen.
        This can be different from the observation. For example, mujoco's observation is a measurements vector.

        :return: numpy array containing the image that will be rendered to the screen
        """
        return np.transpose(self.state['observation'], [1, 2, 0])

    def get_target_success_rate(self) -> float:
        return self.target_success_rate

    def close(self) -> None:
        """
        Clean up steps.

        :return: None
        """
        pass