def test_filter(clip_filter): transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False) result = clip_filter.filter(transition)[0] unfiltered_reward = transition.reward filtered_reward = result.reward # validate that the reward was clipped correctly assert filtered_reward == 10 # make sure the original reward is unchanged assert unfiltered_reward == 100 # reward in bounds transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=5, game_over=False) result = clip_filter.filter(transition)[0] assert result.reward == 5 # reward below bounds transition = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-5, game_over=False) result = clip_filter.filter(transition)[0] assert result.reward == 2
def test_filter(): # make an RGB observation smaller squeeze_filter = InputFilter() squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter()) squeeze_filter_with_axis = InputFilter() squeeze_filter_with_axis.add_observation_filter( 'observation', 'squeeze', ObservationSqueezeFilter(2)) observation = np.random.rand(20, 30, 1, 3) env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False) result = squeeze_filter.filter(env_response)[0] result_with_axis = squeeze_filter_with_axis.filter(env_response)[0] unfiltered_observation_shape = env_response.next_state['observation'].shape filtered_observation_shape = result.next_state['observation'].shape filtered_observation_with_axis_shape = result_with_axis.next_state[ 'observation'].shape # make sure the original observation is unchanged assert unfiltered_observation_shape == observation.shape # make sure the filtering is done correctly assert filtered_observation_shape == (20, 30, 3) assert filtered_observation_with_axis_shape == (20, 30, 3) observation = np.random.rand(1, 30, 1, 3) env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False) result = squeeze_filter.filter(env_response)[0] assert result.next_state['observation'].shape == (30, 3)
def test_filter(): # make an RGB observation smaller env_response = EnvResponse( next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleSizeByFactorFilter(0.5)) result = rescale_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (20, 30, 3) # validate the shape of the filtered observation assert filtered_observation.shape == (10, 15, 3) # make a grayscale observation bigger env_response = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleSizeByFactorFilter(2)) result = rescale_filter.filter(env_response)[0] filtered_observation = result.next_state['observation'] # validate the shape of the filtered observation assert filtered_observation.shape == (40, 60) assert np.all(filtered_observation == np.ones([40, 60]))
def test_filter(): # make an RGB observation smaller transition = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([10, 20, 3]), high=255))) result = rescale_filter.filter(transition)[0] unfiltered_observation = transition.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (20, 30, 3) # validate the shape of the filtered observation assert filtered_observation.shape == (10, 20, 3) assert np.all(filtered_observation == np.ones([10, 20, 3])) # make a grayscale observation bigger transition = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([40, 60]), high=255))) result = rescale_filter.filter(transition)[0] filtered_observation = result.next_state['observation'] # validate the shape of the filtered observation assert filtered_observation.shape == (40, 60) assert np.all(filtered_observation == np.ones([40, 60])) # rescale channels -> error # with pytest.raises(ValueError): # InputFilter( # observation_filters=OrderedDict([('rescale', # ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 1]), # high=255) # ))])) # TODO: validate input to filter # different number of axes -> error # env_response = EnvResponse(state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False) # rescale_filter = ObservationRescaleToSizeFilter(ObservationSpace(np.array([10, 20])) # ) # with pytest.raises(ValueError): # result = rescale_filter.filter(transition) # channels first -> error with pytest.raises(ValueError): ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([3, 10, 20]), high=255))
def reset_internal_state(self, force_environment_reset=False) -> EnvResponse: """ Reset the environment and all the variable of the wrapper :param force_environment_reset: forces environment reset even when the game did not end :return: A dictionary containing the observation, reward, done flag, action and measurements """ self._restart_environment_episode(force_environment_reset) self.last_episode_time = time.time() if self.current_episode_steps_counter > 0 and self.phase != RunPhase.UNDEFINED: self.episode_idx += 1 self.done = [False] * self.num_agents self.total_reward_in_current_episode = self.reward = [ 0.0 ] * self.num_agents self.last_action = [0] * self.num_agents self.current_episode_steps_counter = 0 self.last_env_response = \ [EnvResponse( next_state=state, reward=reward, game_over=done, goal=self.goal, info=self.info ) for state, reward, done in zip(self.state, self.reward, self.done)] return self.last_env_response
def run_pre_network_filter_for_inference( self, state: StateType, update_internal_state: bool = False): dummy_env_response = EnvResponse(next_state=state, reward=0, game_over=False) return self.pre_network_filter.filter( dummy_env_response, update_internal_state=False)[0].next_state
def reset_internal_state(self, force_environment_reset=False) -> EnvResponse: """ Reset the environment and all the variable of the wrapper :param force_environment_reset: forces environment reset even when the game did not end :return: A dictionary containing the observation, reward, done flag, action and measurements """ self.dump_video_of_last_episode_if_needed() self._restart_environment_episode(force_environment_reset) self.last_episode_time = time.time() if self.current_episode_steps_counter > 0 and self.phase != RunPhase.UNDEFINED: self.episode_idx += 1 self.done = False self.total_reward_in_current_episode = self.reward = 0.0 self.last_action = 0 self.current_episode_steps_counter = 0 self.last_episode_images = [] self._update_state() # render before the preprocessing of the observation, so that the image will be in its original quality if self.is_rendered: self.render() self.last_env_response = \ EnvResponse( reward=self.reward, next_state=self.state, goal=self.goal, game_over=self.done, info=self.info ) return self.last_env_response
def test_filter(): rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))])) env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False) print(rescale_filter.observation_filters) result = rescale_filter.filter(env_response)[0] unfiltered_reward = env_response.reward filtered_reward = result.reward # validate that the reward was clipped correctly assert filtered_reward == 10 # make sure the original reward is unchanged assert unfiltered_reward == 100 # negative reward env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-50, game_over=False) result = rescale_filter.filter(env_response)[0] assert result.reward == -5
def run_pre_network_filter_for_inference(self, state: StateType) -> StateType: """ Run filters which where defined for being applied right before using the state for inference. :param state: The state to run the filters on :return: The filtered state """ dummy_env_response = EnvResponse(next_state=state, reward=0, game_over=False) return self.pre_network_filter.filter(dummy_env_response)[0].next_state
def step(self, action: ActionType) -> EnvResponse: """ Make a single step in the environment using the given action :param action: an action to use for stepping the environment. Should follow the definition of the action space. :return: the environment response as returned in get_last_env_response """ action = self.action_space.clip_action_to_space(action) if self.action_space and not self.action_space.val_matches_space_definition( action): raise ValueError( "The given action does not match the action space definition. " "Action = {}, action space definition = {}".format( action, self.action_space)) # store the last agent action done and allow passing None actions to repeat the previously done action if action is None: action = self.last_action self.last_action = action if self.visualization_parameters.add_rendered_image_to_env_response: current_rendered_image = self.get_rendered_image() self.current_episode_steps_counter += 1 if self.phase != RunPhase.UNDEFINED: self.total_steps_counter += 1 # act self._take_action(action) # observe self._update_state() if self.is_rendered: self.render() self.total_reward_in_current_episode += self.reward if self.visualization_parameters.add_rendered_image_to_env_response: self.info['image'] = current_rendered_image self.last_env_response = \ EnvResponse( reward=self.reward, next_state=self.state, goal=self.goal, game_over=self.done, info=self.info ) # store observations for video / gif dumping if self.should_dump_video_of_the_current_episode(episode_terminated=False) and \ (self.visualization_parameters.dump_mp4 or self.visualization_parameters.dump_gifs): self.last_episode_images.append(self.get_rendered_image()) return self.last_env_response
def step(self, action: Union[List[ActionType], ActionType]) -> List[EnvResponse]: """ Make a single step in the environment using the given action :param action: an action to use for stepping the environment. Should follow the definition of the action space. :return: the environment response as returned in get_last_env_response """ clipped_and_scaled_action = list() for agent_action, action_space in zip(force_list(action), force_list(self.action_space)): agent_action = action_space.clip_action_to_space(agent_action) if action_space and not action_space.contains(agent_action): raise ValueError( "The given action does not match the action space definition. " "Action = {}, action space definition = {}".format( agent_action, action_space)) if hasattr( action_space, 'scale_action_space') and action_space.scale_action_space: agent_action = action_space.scale_action_values(agent_action) clipped_and_scaled_action.append(agent_action) action = clipped_and_scaled_action # store the last agent action done and allow passing None actions to repeat the previously done action if action is None: action = self.last_action self.last_action = action self.current_episode_steps_counter += 1 if self.phase != RunPhase.UNDEFINED: self.total_steps_counter += 1 # act self._take_action(action) # observe self._update_state() self.total_reward_in_current_episode = [ total_reward_in_current_episode + reward for total_reward_in_current_episode, reward in zip( self.total_reward_in_current_episode, self.reward) ] self.last_env_response = \ [EnvResponse( next_state=state, reward=reward, game_over=done, goal=self.goal, info=self.info ) for state, reward, done in zip(self.state, self.reward, self.done)] return self.last_env_response
def test_get_filtered_observation_space(): # Keep observation_space = VectorObservationSpace( 3, measurements_names=['a', 'b', 'c']) env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False) reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep)) filtered_observation_space = reduction_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(filtered_observation_space.shape == np.array([1])) assert filtered_observation_space.measurements_names == ['a'] # Discard observation_space = VectorObservationSpace( 3, measurements_names=['a', 'b', 'c']) env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False) reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard)) filtered_observation_space = reduction_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(filtered_observation_space.shape == np.array([2])) assert filtered_observation_space.measurements_names == ['b', 'c']
def test_filter_stacking(): # test that filter stacking works fine by taking as input a transition with: # - an observation of shape 210x160, # - a reward of 100 # filtering it by: # - rescaling the observation to 110x84 # - cropping the observation to 84x84 # - clipping the reward to 1 # - stacking 4 observations to get 84x84x4 env_response = EnvResponse({'observation': np.ones([210, 160])}, reward=100, game_over=False) filter1 = ObservationRescaleToSizeFilter( output_observation_space=ImageObservationSpace(np.array([110, 84]), high=255), ) filter2 = ObservationCropFilter( crop_low=np.array([16, 0]), crop_high=np.array([100, 84]) ) filter3 = RewardClippingFilter( clipping_low=-1, clipping_high=1 ) output_filter = ObservationStackingFilter( stack_size=4, stacking_axis=-1 ) input_filter = InputFilter( observation_filters={ "observation": OrderedDict([ ("filter1", filter1), ("filter2", filter2), ("output_filter", output_filter) ])}, reward_filters=OrderedDict([ ("filter3", filter3) ]) ) result = input_filter.filter(env_response)[0] observation = np.array(result.next_state['observation']) assert observation.shape == (84, 84, 4) assert np.all(observation == np.ones([84, 84, 4])) assert result.reward == 1
def test_filter(rgb_to_y_filter): # convert RGB observation to graysacle observation = np.random.rand(20, 30, 3) * 255.0 transition = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False) result = rgb_to_y_filter.filter(transition)[0] unfiltered_observation = transition.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (20, 30, 3) # make sure the filtering is done correctly assert filtered_observation.shape == (20, 30)
def test_filter(): # Keep observation_space = VectorObservationSpace( 3, measurements_names=['a', 'b', 'c']) env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False) reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep)) reduction_filter.get_filtered_observation_space('observation', observation_space) result = reduction_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (3, ) # validate the shape of the filtered observation assert filtered_observation.shape == (1, ) # Discard reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard)) reduction_filter.get_filtered_observation_space('observation', observation_space) result = reduction_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (3, ) # validate the shape of the filtered observation assert filtered_observation.shape == (2, )
def test_filter(): # make an RGB observation smaller uint8_filter = InputFilter() uint8_filter.add_observation_filter( 'observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=255)) observation = np.random.rand(20, 30, 3) * 255.0 env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False) result = uint8_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.dtype == 'float64' # make sure the filtering is done correctly assert filtered_observation.dtype == 'uint8' assert np.all(filtered_observation == observation.astype('uint8'))
def test_reset(stack_filter, env_response): # stack observation on empty stack result = stack_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] assert np.all( np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation) # reset and make sure the outputs are correct stack_filter.reset() unfiltered_observation = np.random.rand(20, 30, 1) new_env_response = EnvResponse( next_state={'observation': unfiltered_observation}, reward=0, game_over=False) result = stack_filter.filter(new_env_response)[0] filtered_observation = result.next_state['observation'] assert np.all( np.array(filtered_observation)[:, :, :, 0] == unfiltered_observation) assert np.all( np.array(filtered_observation)[:, :, :, -1] == unfiltered_observation)
def env_response(): observation = np.random.rand(20, 30, 1) return EnvResponse(next_state={'observation': observation}, reward=0, game_over=False)