Python EnvResponse示例，rl_coach.core_types.EnvResponse Python示例

示例#1

0

显示文件

文件： test_reward_clipping_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_filter(clip_filter):
    transition = EnvResponse(next_state={'observation': np.zeros(10)},
                             reward=100,
                             game_over=False)
    result = clip_filter.filter(transition)[0]
    unfiltered_reward = transition.reward
    filtered_reward = result.reward

    # validate that the reward was clipped correctly
    assert filtered_reward == 10

    # make sure the original reward is unchanged
    assert unfiltered_reward == 100

    # reward in bounds
    transition = EnvResponse(next_state={'observation': np.zeros(10)},
                             reward=5,
                             game_over=False)
    result = clip_filter.filter(transition)[0]
    assert result.reward == 5

    # reward below bounds
    transition = EnvResponse(next_state={'observation': np.zeros(10)},
                             reward=-5,
                             game_over=False)
    result = clip_filter.filter(transition)[0]
    assert result.reward == 2

示例#2

0

显示文件

def test_filter():
    # make an RGB observation smaller
    squeeze_filter = InputFilter()
    squeeze_filter.add_observation_filter('observation', 'squeeze',
                                          ObservationSqueezeFilter())
    squeeze_filter_with_axis = InputFilter()
    squeeze_filter_with_axis.add_observation_filter(
        'observation', 'squeeze', ObservationSqueezeFilter(2))

    observation = np.random.rand(20, 30, 1, 3)
    env_response = EnvResponse(next_state={'observation': observation},
                               reward=0,
                               game_over=False)

    result = squeeze_filter.filter(env_response)[0]
    result_with_axis = squeeze_filter_with_axis.filter(env_response)[0]
    unfiltered_observation_shape = env_response.next_state['observation'].shape
    filtered_observation_shape = result.next_state['observation'].shape
    filtered_observation_with_axis_shape = result_with_axis.next_state[
        'observation'].shape

    # make sure the original observation is unchanged
    assert unfiltered_observation_shape == observation.shape

    # make sure the filtering is done correctly
    assert filtered_observation_shape == (20, 30, 3)
    assert filtered_observation_with_axis_shape == (20, 30, 3)

    observation = np.random.rand(1, 30, 1, 3)
    env_response = EnvResponse(next_state={'observation': observation},
                               reward=0,
                               game_over=False)

    result = squeeze_filter.filter(env_response)[0]
    assert result.next_state['observation'].shape == (30, 3)

示例#3

0

显示文件

文件： test_observation_rescale_size_by_factor_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_filter():
    # make an RGB observation smaller
    env_response = EnvResponse(
        next_state={'observation': np.ones([20, 30, 3])},
        reward=0,
        game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale', ObservationRescaleSizeByFactorFilter(0.5))

    result = rescale_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (20, 30, 3)

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (10, 15, 3)

    # make a grayscale observation bigger
    env_response = EnvResponse(next_state={'observation': np.ones([20, 30])},
                               reward=0,
                               game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale', ObservationRescaleSizeByFactorFilter(2))
    result = rescale_filter.filter(env_response)[0]
    filtered_observation = result.next_state['observation']

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (40, 60)
    assert np.all(filtered_observation == np.ones([40, 60]))

示例#4

0

显示文件

文件： test_observation_rescale_to_size_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_filter():
    # make an RGB observation smaller
    transition = EnvResponse(next_state={'observation': np.ones([20, 30, 3])},
                             reward=0,
                             game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale',
        ObservationRescaleToSizeFilter(
            ImageObservationSpace(np.array([10, 20, 3]), high=255)))

    result = rescale_filter.filter(transition)[0]
    unfiltered_observation = transition.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (20, 30, 3)

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (10, 20, 3)
    assert np.all(filtered_observation == np.ones([10, 20, 3]))

    # make a grayscale observation bigger
    transition = EnvResponse(next_state={'observation': np.ones([20, 30])},
                             reward=0,
                             game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale',
        ObservationRescaleToSizeFilter(
            ImageObservationSpace(np.array([40, 60]), high=255)))
    result = rescale_filter.filter(transition)[0]
    filtered_observation = result.next_state['observation']

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (40, 60)
    assert np.all(filtered_observation == np.ones([40, 60]))

    # rescale channels -> error
    # with pytest.raises(ValueError):
    #     InputFilter(
    #         observation_filters=OrderedDict([('rescale',
    #                                          ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 1]),
    #                                                                                               high=255)
    #                                                                        ))]))

    # TODO: validate input to filter
    # different number of axes -> error
    # env_response = EnvResponse(state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
    # rescale_filter = ObservationRescaleToSizeFilter(ObservationSpace(np.array([10, 20]))
    #                                                 )
    # with pytest.raises(ValueError):
    #     result = rescale_filter.filter(transition)

    # channels first -> error
    with pytest.raises(ValueError):
        ObservationRescaleToSizeFilter(
            ImageObservationSpace(np.array([3, 10, 20]), high=255))

示例#5

0

显示文件

文件： multi_agent_environment.py 项目： wfrei020/DeepRacer-Freire

    def reset_internal_state(self,
                             force_environment_reset=False) -> EnvResponse:
        """
        Reset the environment and all the variable of the wrapper

        :param force_environment_reset: forces environment reset even when the game did not end
        :return: A dictionary containing the observation, reward, done flag, action and measurements
        """

        self._restart_environment_episode(force_environment_reset)
        self.last_episode_time = time.time()

        if self.current_episode_steps_counter > 0 and self.phase != RunPhase.UNDEFINED:
            self.episode_idx += 1

        self.done = [False] * self.num_agents
        self.total_reward_in_current_episode = self.reward = [
            0.0
        ] * self.num_agents
        self.last_action = [0] * self.num_agents
        self.current_episode_steps_counter = 0

        self.last_env_response = \
            [EnvResponse(
                next_state=state,
                reward=reward,
                game_over=done,
                goal=self.goal,
                info=self.info
            ) for state, reward, done in zip(self.state, self.reward, self.done)]

        return self.last_env_response

示例#6

0

显示文件

 def run_pre_network_filter_for_inference(
         self, state: StateType, update_internal_state: bool = False):
     dummy_env_response = EnvResponse(next_state=state,
                                      reward=0,
                                      game_over=False)
     return self.pre_network_filter.filter(
         dummy_env_response, update_internal_state=False)[0].next_state

示例#7

0

显示文件

文件： environment.py 项目： Madalin-Florin/coach

    def reset_internal_state(self, force_environment_reset=False) -> EnvResponse:
        """
        Reset the environment and all the variable of the wrapper
        :param force_environment_reset: forces environment reset even when the game did not end
        :return: A dictionary containing the observation, reward, done flag, action and measurements
        """

        self.dump_video_of_last_episode_if_needed()
        self._restart_environment_episode(force_environment_reset)
        self.last_episode_time = time.time()

        if self.current_episode_steps_counter > 0 and self.phase != RunPhase.UNDEFINED:
            self.episode_idx += 1

        self.done = False
        self.total_reward_in_current_episode = self.reward = 0.0
        self.last_action = 0
        self.current_episode_steps_counter = 0
        self.last_episode_images = []
        self._update_state()

        # render before the preprocessing of the observation, so that the image will be in its original quality
        if self.is_rendered:
            self.render()

        self.last_env_response = \
            EnvResponse(
                reward=self.reward,
                next_state=self.state,
                goal=self.goal,
                game_over=self.done,
                info=self.info
            )

        return self.last_env_response

示例#8

0

显示文件

def test_filter():
    rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))]))
    env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False)
    print(rescale_filter.observation_filters)
    result = rescale_filter.filter(env_response)[0]
    unfiltered_reward = env_response.reward
    filtered_reward = result.reward

    # validate that the reward was clipped correctly
    assert filtered_reward == 10

    # make sure the original reward is unchanged
    assert unfiltered_reward == 100

    # negative reward
    env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-50, game_over=False)
    result = rescale_filter.filter(env_response)[0]
    assert result.reward == -5

示例#9

0

显示文件

    def run_pre_network_filter_for_inference(self, state: StateType) -> StateType:
        """
        Run filters which where defined for being applied right before using the state for inference.

        :param state: The state to run the filters on
        :return: The filtered state
        """
        dummy_env_response = EnvResponse(next_state=state, reward=0, game_over=False)
        return self.pre_network_filter.filter(dummy_env_response)[0].next_state

示例#10

0

显示文件

文件： environment.py 项目： yuhonghong66/coach

    def step(self, action: ActionType) -> EnvResponse:
        """
        Make a single step in the environment using the given action

        :param action: an action to use for stepping the environment. Should follow the definition of the action space.
        :return: the environment response as returned in get_last_env_response
        """
        action = self.action_space.clip_action_to_space(action)
        if self.action_space and not self.action_space.val_matches_space_definition(
                action):
            raise ValueError(
                "The given action does not match the action space definition. "
                "Action = {}, action space definition = {}".format(
                    action, self.action_space))

        # store the last agent action done and allow passing None actions to repeat the previously done action
        if action is None:
            action = self.last_action
        self.last_action = action
        if self.visualization_parameters.add_rendered_image_to_env_response:
            current_rendered_image = self.get_rendered_image()

        self.current_episode_steps_counter += 1
        if self.phase != RunPhase.UNDEFINED:
            self.total_steps_counter += 1

        # act
        self._take_action(action)

        # observe
        self._update_state()

        if self.is_rendered:
            self.render()

        self.total_reward_in_current_episode += self.reward

        if self.visualization_parameters.add_rendered_image_to_env_response:
            self.info['image'] = current_rendered_image

        self.last_env_response = \
            EnvResponse(
                reward=self.reward,
                next_state=self.state,
                goal=self.goal,
                game_over=self.done,
                info=self.info
            )

        # store observations for video / gif dumping
        if self.should_dump_video_of_the_current_episode(episode_terminated=False) and \
            (self.visualization_parameters.dump_mp4 or self.visualization_parameters.dump_gifs):
            self.last_episode_images.append(self.get_rendered_image())

        return self.last_env_response

示例#11

0

显示文件

文件： multi_agent_environment.py 项目： wfrei020/DeepRacer-Freire

    def step(self, action: Union[List[ActionType],
                                 ActionType]) -> List[EnvResponse]:
        """
        Make a single step in the environment using the given action

        :param action: an action to use for stepping the environment. Should follow the definition of the action space.
        :return: the environment response as returned in get_last_env_response
        """
        clipped_and_scaled_action = list()
        for agent_action, action_space in zip(force_list(action),
                                              force_list(self.action_space)):
            agent_action = action_space.clip_action_to_space(agent_action)
            if action_space and not action_space.contains(agent_action):
                raise ValueError(
                    "The given action does not match the action space definition. "
                    "Action = {}, action space definition = {}".format(
                        agent_action, action_space))
            if hasattr(
                    action_space,
                    'scale_action_space') and action_space.scale_action_space:
                agent_action = action_space.scale_action_values(agent_action)
            clipped_and_scaled_action.append(agent_action)
        action = clipped_and_scaled_action
        # store the last agent action done and allow passing None actions to repeat the previously done action
        if action is None:
            action = self.last_action
        self.last_action = action

        self.current_episode_steps_counter += 1
        if self.phase != RunPhase.UNDEFINED:
            self.total_steps_counter += 1

        # act
        self._take_action(action)

        # observe
        self._update_state()

        self.total_reward_in_current_episode = [
            total_reward_in_current_episode + reward
            for total_reward_in_current_episode, reward in zip(
                self.total_reward_in_current_episode, self.reward)
        ]

        self.last_env_response = \
            [EnvResponse(
                next_state=state,
                reward=reward,
                game_over=done,
                goal=self.goal,
                info=self.info
            ) for state, reward, done in zip(self.state, self.reward, self.done)]

        return self.last_env_response

示例#12

0

显示文件

文件： test_observation_reduction_by_sub_parts_name_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_get_filtered_observation_space():
    # Keep
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep))

    filtered_observation_space = reduction_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([1]))
    assert filtered_observation_space.measurements_names == ['a']

    # Discard
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard))

    filtered_observation_space = reduction_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([2]))
    assert filtered_observation_space.measurements_names == ['b', 'c']

示例#13

0

显示文件

def test_filter_stacking():
    # test that filter stacking works fine by taking as input a transition with:
    # - an observation of shape 210x160,
    # - a reward of 100
    # filtering it by:
    # - rescaling the observation to 110x84
    # - cropping the observation to 84x84
    # - clipping the reward to 1
    # - stacking 4 observations to get 84x84x4

    env_response = EnvResponse({'observation': np.ones([210, 160])}, reward=100, game_over=False)

    filter1 = ObservationRescaleToSizeFilter(
        output_observation_space=ImageObservationSpace(np.array([110, 84]), high=255),
    )

    filter2 = ObservationCropFilter(
        crop_low=np.array([16, 0]),
        crop_high=np.array([100, 84])
    )

    filter3 = RewardClippingFilter(
        clipping_low=-1,
        clipping_high=1
    )

    output_filter = ObservationStackingFilter(
        stack_size=4,
        stacking_axis=-1
    )

    input_filter = InputFilter(
        observation_filters={
            "observation": OrderedDict([
                ("filter1", filter1),
                ("filter2", filter2),
                ("output_filter", output_filter)
            ])},
        reward_filters=OrderedDict([
            ("filter3", filter3)
        ])
    )

    result = input_filter.filter(env_response)[0]
    observation = np.array(result.next_state['observation'])
    assert observation.shape == (84, 84, 4)
    assert np.all(observation == np.ones([84, 84, 4]))
    assert result.reward == 1

示例#14

0

显示文件

def test_filter(rgb_to_y_filter):
    # convert RGB observation to graysacle
    observation = np.random.rand(20, 30, 3) * 255.0
    transition = EnvResponse(next_state={'observation': observation},
                             reward=0,
                             game_over=False)

    result = rgb_to_y_filter.filter(transition)[0]
    unfiltered_observation = transition.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (20, 30, 3)

    # make sure the filtering is done correctly
    assert filtered_observation.shape == (20, 30)

示例#15

0

显示文件

文件： test_observation_reduction_by_sub_parts_name_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_filter():
    # Keep
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep))

    reduction_filter.get_filtered_observation_space('observation',
                                                    observation_space)
    result = reduction_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (3, )

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (1, )

    # Discard
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard))
    reduction_filter.get_filtered_observation_space('observation',
                                                    observation_space)
    result = reduction_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (3, )

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (2, )

示例#16

0

显示文件

文件： test_observation_to_uint8_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_filter():
    # make an RGB observation smaller
    uint8_filter = InputFilter()
    uint8_filter.add_observation_filter(
        'observation', 'to_uint8',
        ObservationToUInt8Filter(input_low=0, input_high=255))

    observation = np.random.rand(20, 30, 3) * 255.0
    env_response = EnvResponse(next_state={'observation': observation},
                               reward=0,
                               game_over=False)

    result = uint8_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.dtype == 'float64'

    # make sure the filtering is done correctly
    assert filtered_observation.dtype == 'uint8'
    assert np.all(filtered_observation == observation.astype('uint8'))

示例#17

0

显示文件

文件： test_observation_stacking_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def test_reset(stack_filter, env_response):
    # stack observation on empty stack
    result = stack_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    assert np.all(
        np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)

    # reset and make sure the outputs are correct
    stack_filter.reset()
    unfiltered_observation = np.random.rand(20, 30, 1)
    new_env_response = EnvResponse(
        next_state={'observation': unfiltered_observation},
        reward=0,
        game_over=False)
    result = stack_filter.filter(new_env_response)[0]
    filtered_observation = result.next_state['observation']
    assert np.all(
        np.array(filtered_observation)[:, :, :, 0] == unfiltered_observation)
    assert np.all(
        np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)

示例#18

0

显示文件

文件： test_observation_stacking_filter.py 项目： bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

def env_response():
    observation = np.random.rand(20, 30, 1)
    return EnvResponse(next_state={'observation': observation},
                       reward=0,
                       game_over=False)