def test_filter(env_response): crop_low = np.array([0, 5, 10]) crop_high = np.array([5, 10, 20]) crop_filter = InputFilter() crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high)) result = crop_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # validate the shape of the filtered observation assert filtered_observation.shape == (5, 5, 10) # validate the content of the filtered observation assert np.all(filtered_observation == unfiltered_observation[0:5, 5:10, 10:20]) # crop with -1 on some axes crop_low = np.array([0, 0, 0]) crop_high = np.array([5, -1, -1]) crop_filter = InputFilter() crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high)) result = crop_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # validate the shape of the filtered observation assert filtered_observation.shape == (5, 20, 30) # validate the content of the filtered observation assert np.all(filtered_observation == unfiltered_observation[0:5, :, :])
def test_filter(): # make an RGB observation smaller env_response = EnvResponse( next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleSizeByFactorFilter(0.5)) result = rescale_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (20, 30, 3) # validate the shape of the filtered observation assert filtered_observation.shape == (10, 15, 3) # make a grayscale observation bigger env_response = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleSizeByFactorFilter(2)) result = rescale_filter.filter(env_response)[0] filtered_observation = result.next_state['observation'] # validate the shape of the filtered observation assert filtered_observation.shape == (40, 60) assert np.all(filtered_observation == np.ones([40, 60]))
def test_filter(): # make an RGB observation smaller transition = EnvResponse(next_state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([10, 20, 3]), high=255))) result = rescale_filter.filter(transition)[0] unfiltered_observation = transition.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (20, 30, 3) # validate the shape of the filtered observation assert filtered_observation.shape == (10, 20, 3) assert np.all(filtered_observation == np.ones([10, 20, 3])) # make a grayscale observation bigger transition = EnvResponse(next_state={'observation': np.ones([20, 30])}, reward=0, game_over=False) rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([40, 60]), high=255))) result = rescale_filter.filter(transition)[0] filtered_observation = result.next_state['observation'] # validate the shape of the filtered observation assert filtered_observation.shape == (40, 60) assert np.all(filtered_observation == np.ones([40, 60])) # rescale channels -> error # with pytest.raises(ValueError): # InputFilter( # observation_filters=OrderedDict([('rescale', # ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 1]), # high=255) # ))])) # TODO: validate input to filter # different number of axes -> error # env_response = EnvResponse(state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False) # rescale_filter = ObservationRescaleToSizeFilter(ObservationSpace(np.array([10, 20])) # ) # with pytest.raises(ValueError): # result = rescale_filter.filter(transition) # channels first -> error with pytest.raises(ValueError): ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([3, 10, 20]), high=255))
def load_csv(self, csv_dataset: CsvDataset, input_filter: InputFilter) -> None: """ Restore the replay buffer contents from a csv file. The csv file is assumed to include a list of transitions. :param csv_dataset: A construct which holds the dataset parameters :param input_filter: A filter used to filter the CSV data before feeding it to the memory. """ self.assert_not_frozen() df = pd.read_csv(csv_dataset.filepath) if len(df) > self.max_size[1]: screen.warning("Warning! The number of transitions to load into the replay buffer ({}) is " "bigger than the max size of the replay buffer ({}). The excessive transitions will " "not be stored.".format(len(df), self.max_size[1])) episode_ids = df['episode_id'].unique() progress_bar = ProgressBar(len(episode_ids)) state_columns = [col for col in df.columns if col.startswith('state_feature')] for e_id in episode_ids: progress_bar.update(e_id) df_episode_transitions = df[df['episode_id'] == e_id] input_filter.reset() if len(df_episode_transitions) < 2: # we have to have at least 2 rows in each episode for creating a transition continue episode = Episode() transitions = [] for (_, current_transition), (_, next_transition) in zip(df_episode_transitions[:-1].iterrows(), df_episode_transitions[1:].iterrows()): state = np.array([current_transition[col] for col in state_columns]) next_state = np.array([next_transition[col] for col in state_columns]) transitions.append( Transition(state={'observation': state}, action=int(current_transition['action']), reward=current_transition['reward'], next_state={'observation': next_state}, game_over=False, info={'all_action_probabilities': ast.literal_eval(current_transition['all_action_probabilities'])}), ) transitions = input_filter.filter(transitions, deep_copy=False) for t in transitions: episode.insert(t) # Set the last transition to end the episode if csv_dataset.is_episodic: episode.get_last_transition().game_over = True self.store_episode(episode) # close the progress bar progress_bar.update(len(episode_ids)) progress_bar.close()
def test_filter_stacking(): # test that filter stacking works fine by taking as input a transition with: # - an observation of shape 210x160, # - a reward of 100 # filtering it by: # - rescaling the observation to 110x84 # - cropping the observation to 84x84 # - clipping the reward to 1 # - stacking 4 observations to get 84x84x4 env_response = EnvResponse({'observation': np.ones([210, 160])}, reward=100, game_over=False) filter1 = ObservationRescaleToSizeFilter( output_observation_space=ImageObservationSpace(np.array([110, 84]), high=255), ) filter2 = ObservationCropFilter( crop_low=np.array([16, 0]), crop_high=np.array([100, 84]) ) filter3 = RewardClippingFilter( clipping_low=-1, clipping_high=1 ) output_filter = ObservationStackingFilter( stack_size=4, stacking_axis=-1 ) input_filter = InputFilter( observation_filters={ "observation": OrderedDict([ ("filter1", filter1), ("filter2", filter2), ("output_filter", output_filter) ])}, reward_filters=OrderedDict([ ("filter3", filter3) ]) ) result = input_filter.filter(env_response)[0] observation = np.array(result.next_state['observation']) assert observation.shape == (84, 84, 4) assert np.all(observation == np.ones([84, 84, 4])) assert result.reward == 1
def test_get_filtered_observation_space(): # error on wrong number of channels rescale_filter = InputFilter() rescale_filter.add_observation_filter( 'observation', 'rescale', ObservationRescaleSizeByFactorFilter( 0.5, RescaleInterpolationType.BILINEAR)) observation_space = ObservationSpace(np.array([10, 20, 5])) with pytest.raises(ValueError): filtered_observation_space = rescale_filter.get_filtered_observation_space( 'observation', observation_space) # error on wrong number of dimensions observation_space = ObservationSpace(np.array([10, 20, 10, 3])) with pytest.raises(ValueError): filtered_observation_space = rescale_filter.get_filtered_observation_space( 'observation', observation_space) # make sure the new observation space shape is calculated correctly observation_space = ObservationSpace(np.array([10, 20, 3])) filtered_observation_space = rescale_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(filtered_observation_space.shape == np.array([5, 10, 3])) # make sure the original observation space is unchanged assert np.all(observation_space.shape == np.array([10, 20, 3]))
def test_filter(): rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))])) env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=100, game_over=False) print(rescale_filter.observation_filters) result = rescale_filter.filter(env_response)[0] unfiltered_reward = env_response.reward filtered_reward = result.reward # validate that the reward was clipped correctly assert filtered_reward == 10 # make sure the original reward is unchanged assert unfiltered_reward == 100 # negative reward env_response = EnvResponse(next_state={'observation': np.zeros(10)}, reward=-50, game_over=False) result = rescale_filter.filter(env_response)[0] assert result.reward == -5
def test_get_filtered_reward_space(): rescale_filter = InputFilter(reward_filters=OrderedDict([('rescale', RewardRescaleFilter(1/10.))])) # reward is clipped reward_space = RewardSpace(1, -100, 100) filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space) # make sure the new reward space shape is calculated correctly assert filtered_reward_space.shape == 1 assert filtered_reward_space.low == -10 assert filtered_reward_space.high == 10 # unbounded rewards reward_space = RewardSpace(1, -np.inf, np.inf) filtered_reward_space = rescale_filter.get_filtered_reward_space(reward_space) # make sure the new reward space shape is calculated correctly assert filtered_reward_space.shape == 1 assert filtered_reward_space.low == -np.inf assert filtered_reward_space.high == np.inf
def test_filter(): # make an RGB observation smaller uint8_filter = InputFilter() uint8_filter.add_observation_filter( 'observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=255)) observation = np.random.rand(20, 30, 3) * 255.0 env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False) result = uint8_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.dtype == 'float64' # make sure the filtering is done correctly assert filtered_observation.dtype == 'uint8' assert np.all(filtered_observation == observation.astype('uint8'))
def test_get_filtered_observation_space(): # Keep observation_space = VectorObservationSpace( 3, measurements_names=['a', 'b', 'c']) env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False) reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep)) filtered_observation_space = reduction_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(filtered_observation_space.shape == np.array([1])) assert filtered_observation_space.measurements_names == ['a'] # Discard observation_space = VectorObservationSpace( 3, measurements_names=['a', 'b', 'c']) env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False) reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard)) filtered_observation_space = reduction_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(filtered_observation_space.shape == np.array([2])) assert filtered_observation_space.measurements_names == ['b', 'c']
def test_get_filtered_observation_space(): # error on observation space with values not matching the filter configuration uint8_filter = InputFilter() uint8_filter.add_observation_filter( 'observation', 'to_uint8', ObservationToUInt8Filter(input_low=0, input_high=200)) observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100) with pytest.raises(ValueError): uint8_filter.get_filtered_observation_space('observation', observation_space) # verify output observation space is correct observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 200) result = uint8_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(result.high == 255) assert np.all(result.low == 0) assert np.all(result.shape == observation_space.shape)
e: "{}".format(lower_under_to_upper(e) + '-v2') for e in gym_mujoco_envs } mujoco_v2['walker2d'] = 'Walker2d-v2' # Fetch gym_fetch_envs = ['reach', 'slide', 'push', 'pick_and_place'] fetch_v1 = { e: "{}".format('Fetch' + lower_under_to_upper(e) + '-v1') for e in gym_fetch_envs } """ Atari Environment Components """ AtariInputFilter = InputFilter(is_a_reference_filter=True) AtariInputFilter.add_reward_filter('clipping', RewardClippingFilter(-1.0, 1.0)) AtariInputFilter.add_observation_filter( 'observation', 'rescaling', ObservationRescaleToSizeFilter( ImageObservationSpace( np.array([84, 84, 3]), #np.array([224, 224, 3]), high=255))) #AtariInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) #AtariInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) #AtariInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(4)) AtariOutputFilter = NoOutputFilter() class Atari(GymEnvironmentParameters):
def get_graph_manager(**hp_dict): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int( hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int( hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float( hp_dict.get("term_cond_avg_score", 100000)) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps( params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].activation_function = 'relu' agent_params.network_wrappers[ 'main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = params["batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params["loss_type"] == "huber": agent_params.network_wrappers[ 'main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule( 1.0, params["e_greedy_value"], params["epsilon_steps"]) ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) SilverstoneInputFilter.add_observation_filter( 'observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter( 'observation', 'stacking', ObservationStackingFilter(params["stack_size"])) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'SilverstoneRacetrack-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager( agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
'SELECT_WEAPON3': ord("3"), 'SELECT_WEAPON4': ord("4"), 'SELECT_WEAPON5': ord("5"), 'SELECT_WEAPON6': ord("6"), 'SELECT_WEAPON7': ord("7"), 'SELECT_WEAPON8': ord("8"), 'SELECT_WEAPON9': ord("9"), 'SPEED': 304, # shift 'STRAFE': 9, # tab 'TURN180': ord("u"), 'TURN_LEFT': ord("a"), # left arrow 'TURN_RIGHT': ord("d"), # right arrow 'USE': ord("f"), } DoomInputFilter = InputFilter(is_a_reference_filter=True) DoomInputFilter.add_observation_filter( 'observation', 'rescaling', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([60, 76, 3]), high=255))) DoomInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) DoomInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) DoomInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(3)) DoomOutputFilter = OutputFilter(is_a_reference_filter=True) DoomOutputFilter.add_action_filter('to_discrete', FullDiscreteActionSpaceMap())
# Starcraft Constants _NOOP = actions.FUNCTIONS.no_op.id _MOVE_SCREEN = actions.FUNCTIONS.Move_screen.id _SELECT_ARMY = actions.FUNCTIONS.select_army.id _PLAYER_RELATIVE = features.SCREEN_FEATURES.player_relative.index _NOT_QUEUED = [0] _SELECT_ALL = [0] class StarcraftObservationType(Enum): Features = 0 RGB = 1 StarcraftInputFilter = InputFilter(is_a_reference_filter=True) StarcraftInputFilter.add_observation_filter('screen', 'move_axis', ObservationMoveAxisFilter(0, -1)) StarcraftInputFilter.add_observation_filter( 'screen', 'rescaling', ObservationRescaleToSizeFilter( PlanarMapsObservationSpace(np.array([84, 84, 1]), low=0, high=255, channels_axis=-1))) StarcraftInputFilter.add_observation_filter('screen', 'to_uint8', ObservationToUInt8Filter(0, 255)) StarcraftInputFilter.add_observation_filter('minimap', 'move_axis', ObservationMoveAxisFilter(0, -1)) StarcraftInputFilter.add_observation_filter(
def get_graph_manager(**hp_dict): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000)) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu' agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = params["batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params["loss_type"] == "huber": agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, params["e_greedy_value"], params["epsilon_steps"]) ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(params["stack_size"])) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'SilverstoneRacetrack-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
def stack_filter(): stack_filter = InputFilter() stack_filter.add_observation_filter( 'observation', 'stack', ObservationStackingFilter(4, stacking_axis=-1)) return stack_filter
agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation') agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'] = \ agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation') agent_params.network_wrappers['actor'].input_embedders_parameters[ 'measurements'].scheme = [Dense(300)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [ Dense(200) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'measurements'].scheme = [Dense(400)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [ Dense(300) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'action'].scheme = EmbedderScheme.Empty agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter("rescale", RewardRescaleFilter(1 / 10.)) ############### # Environment # ############### env_params = ControlSuiteEnvironmentParameters( level=SingleLevelSelection(control_suite_envs)) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.trace_test_levels = ['cartpole:swingup', 'hopper:hop']
1.0, 0, 1000000) agent_params.algorithm.beta_entropy = 0.01 # also try 0.001 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.999 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( 20) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20) agent_params.exploration = CategoricalParameters() agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5) ############### # Environment # ############### MeiroRunnerFilter = InputFilter(is_a_reference_filter=True) env_params = GymVectorEnvironment() env_params.level = 'RoboMaker-MeiroRunner-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 1000
agent_params.algorithm.optimization_epochs = 5 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( 20) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20) #huber loss agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True agent_params.exploration = CategoricalParameters() agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) # SilverstoneInputFilter.add_observation_filter('left_camera', 'to_grayscale', ObservationRGBToYFilter()) # SilverstoneInputFilter.add_observation_filter('left_camera', 'to_uint8', ObservationToUInt8Filter(0, 255)) # SilverstoneInputFilter.add_observation_filter('left_camera', 'stacking', ObservationStackingFilter(1)) SilverstoneInputFilter.add_observation_filter('STEREO_CAMERAS', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter( 'LIDAR', 'clipping', ObservationClippingFilter(0.1, 0.5)) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False
def rgb_to_y_filter(): rgb_to_y_filter = InputFilter() rgb_to_y_filter.add_observation_filter('observation', 'rgb_to_y', ObservationRGBToYFilter()) return rgb_to_y_filter
agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000) agent_params.algorithm.beta_entropy = 0.01 # also try 0.001 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.999 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(20) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20) agent_params.exploration = CategoricalParameters() ############### # Environment # ############### turtlebot3_input_filter = InputFilter(is_a_reference_filter=True) turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) turtlebot3_input_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) turtlebot3_input_filter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1)) env_params = GymVectorEnvironment() env_params.default_input_filter = turtlebot3_input_filter env_params.level = 'SageMaker-TurtleBot3-Discrete-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters()
TOWN1 = {"map_name": "Town01", "map_path": "/Game/Maps/Town01"} TOWN2 = {"map_name": "Town02", "map_path": "/Game/Maps/Town02"} key_map = { 'BRAKE': (274, ), # down arrow 'GAS': (273, ), # up arrow 'TURN_LEFT': (276, ), # left arrow 'TURN_RIGHT': (275, ), # right arrow 'GAS_AND_TURN_LEFT': (273, 276), 'GAS_AND_TURN_RIGHT': (273, 275), 'BRAKE_AND_TURN_LEFT': (274, 276), 'BRAKE_AND_TURN_RIGHT': (274, 275), } CarlaInputFilter = InputFilter(is_a_reference_filter=True) CarlaInputFilter.add_observation_filter( 'forward_camera', 'rescaling', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([128, 180, 3]), high=255))) CarlaInputFilter.add_observation_filter('forward_camera', 'to_grayscale', ObservationRGBToYFilter()) CarlaInputFilter.add_observation_filter('forward_camera', 'to_uint8', ObservationToUInt8Filter(0, 255)) CarlaInputFilter.add_observation_filter('forward_camera', 'stacking', ObservationStackingFilter(4)) CarlaOutputFilter = NoOutputFilter() class CameraTypes(Enum):
def get_graph_manager(**hp_dict): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["lr_decay_rate"] = float(hp_dict.get("lr_decay_rate", 0)) params["lr_decay_steps"] = float(hp_dict.get("lr_decay_steps", 0)) params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000)) params["tensorboard"] = hp_dict.get("tensorboard", False) params["dump_mp4"] = hp_dict.get("dump_mp4", False) params["dump_gifs"] = hp_dict.get("dump_gifs", False) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].learning_rate_decay_rate = params["lr_decay_rate"] agent_params.network_wrappers['main'].learning_rate_decay_steps = params["lr_decay_steps"] agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu' # Replace the default CNN with single layer Conv2d(32, 3, 1) # agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = EmbedderScheme.Shallow # agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3 agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' # agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Shallow # agent_params.network_wrappers['main'].middleware_parameters.dropout_rate = 0.3 agent_params.network_wrappers['main'].batch_size = params["batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 # agent_params.network_wrappers['main'].l2_regularization = 2e-5 if params["loss_type"] == "huber": agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, params["e_greedy_value"], params["epsilon_steps"]) ############### # Environment # ############### DeepRacerInputFilter = InputFilter(is_a_reference_filter=True) # Add an observation image pertubation for many aspects # DeepRacerInputFilter.add_observation_filter('observation', 'perturb_color', ObservationColorPerturbation(0.2)) # Rescale to much smaller input when using shallow networks to avoid OOM # DeepRacerInputFilter.add_observation_filter('observation', 'rescaling', # ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([84, 84, 3]), # high=255))) DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) DeepRacerInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(params["stack_size"])) env_params = GymVectorEnvironment() env_params.default_input_filter = DeepRacerInputFilter env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0' vis_params = VisualizationParameters() vis_params.tensorboard = params["tensorboard"] vis_params.dump_mp4 = params["dump_mp4"] vis_params.dump_gifs = params["dump_gifs"] # AlwaysDumpFilter, MaxDumpFilter, EveryNEpisodesDumpFilter, SelectedPhaseOnlyDumpFilter vis_params.video_dump_filters = [AlwaysDumpFilter()] ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 10000 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
BulletOutputFilter = NoOutputFilter() class Bullet(GymEnvironmentParameters): def __init__(self): super().__init__() self.frame_skip = 1 self.default_input_filter = BulletInputFilter self.default_output_filter = BulletOutputFilter """ Atari Environment Components """ AtariInputFilter = InputFilter(is_a_reference_filter=True) AtariInputFilter.add_reward_filter('clipping', RewardClippingFilter(-1.0, 1.0)) AtariInputFilter.add_observation_filter( 'observation', 'rescaling', ObservationRescaleToSizeFilter( ImageObservationSpace(np.array([84, 84, 3]), high=255))) AtariInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) AtariInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) AtariInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(4)) AtariOutputFilter = NoOutputFilter() class Atari(GymEnvironmentParameters):
agent_params.network_wrappers['main'].clip_gradients = 1000 agent_params.network_wrappers[ 'main'].gradients_clipping_method = GradientClippingMethod.ClipByValue ############### # Environment # ############### import jsbsim import gym_jsbsim from rl_coach.filters.filter import NoInputFilter, NoOutputFilter from rl_coach.filters.filter import InputFilter from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter vis_params = VisualizationParameters(native_rendering=True) input = InputFilter(is_a_reference_filter=True) input.add_observation_filter('observation', 'stacking', ObservationStackingFilter(10)) class MyGymVectorEnvironment(GymVectorEnvironment): def __init__(self, level=None): super().__init__(level=level) self.frame_skip = 1 self.default_input_filter = NoInputFilter( ) # hrmm.. my custom input filter errored out self.default_output_filter = NoOutputFilter() env_params = MyGymVectorEnvironment( level='JSBSim-HeadingControlTask-Cessna172P-Shaping.EXTRA-FG-v0')
def get_graph_manager(hp_dict, agent_list, run_phase_subject): #################### # All Default Parameters # #################### params = {} params["batch_size"] = int(hp_dict.get("batch_size", 64)) params["num_epochs"] = int(hp_dict.get("num_epochs", 10)) params["stack_size"] = int(hp_dict.get("stack_size", 1)) params["lr"] = float(hp_dict.get("lr", 0.0003)) params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower() params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05)) params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000)) params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01)) params["discount_factor"] = float(hp_dict.get("discount_factor", .999)) params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower() params["num_episodes_between_training"] = int( hp_dict.get("num_episodes_between_training", 20)) params["term_cond_max_episodes"] = int( hp_dict.get("term_cond_max_episodes", 100000)) params["term_cond_avg_score"] = float( hp_dict.get("term_cond_avg_score", 100000)) params_json = json.dumps(params, indent=2, sort_keys=True) print("Using the following hyper-parameters", params_json, sep='\n') #################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps( params["term_cond_max_episodes"]) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### trainable_agents_list = list() non_trainable_agents_list = list() for agent in agent_list: agent_params = DeepRacerAgentParams() if agent.network_settings: agent_params.env_agent = agent agent_params.network_wrappers['main'].learning_rate = params["lr"] agent_params.network_wrappers['main'].input_embedders_parameters = \ create_input_embedder(agent.network_settings['input_embedders'], agent.network_settings['embedder_type'], agent.network_settings['activation_function']) agent_params.network_wrappers['main'].middleware_parameters = \ create_middle_embedder(agent.network_settings['middleware_embedders'], agent.network_settings['embedder_type'], agent.network_settings['activation_function']) input_filter = InputFilter(is_a_reference_filter=True) for observation in agent.network_settings['input_embedders'].keys( ): if observation == Input.LEFT_CAMERA.value or observation == Input.CAMERA.value or\ observation == Input.OBSERVATION.value: input_filter.add_observation_filter( observation, 'to_grayscale', ObservationRGBToYFilter()) input_filter.add_observation_filter( observation, 'to_uint8', ObservationToUInt8Filter(0, 255)) input_filter.add_observation_filter( observation, 'stacking', ObservationStackingFilter(1)) if observation == Input.STEREO.value: input_filter.add_observation_filter( observation, 'to_uint8', ObservationToUInt8Filter(0, 255)) if observation == Input.LIDAR.value: input_filter.add_observation_filter( observation, 'clipping', ObservationClippingFilter(0.15, 1.0)) if observation == Input.SECTOR_LIDAR.value: input_filter.add_observation_filter( observation, 'binary', ObservationBinarySectorFilter()) agent_params.input_filter = input_filter() agent_params.network_wrappers['main'].batch_size = params[ "batch_size"] agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 if params["loss_type"] == "huber": agent_params.network_wrappers[ 'main'].replace_mse_with_huber_loss = True agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 1.0, 0, 1000000) agent_params.algorithm.beta_entropy = params["beta_entropy"] agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = params["discount_factor"] agent_params.algorithm.optimization_epochs = params["num_epochs"] agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = \ EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.num_consecutive_playing_steps = \ EnvironmentEpisodes(params["num_episodes_between_training"]) agent_params.algorithm.distributed_coach_synchronization_type = \ DistributedCoachSynchronizationType.SYNC if params["exploration_type"] == "categorical": agent_params.exploration = CategoricalParameters() else: agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule( 1.0, params["e_greedy_value"], params["epsilon_steps"]) trainable_agents_list.append(agent_params) else: non_trainable_agents_list.append(agent) ############### # Environment # ############### env_params = DeepRacerRacetrackEnvParameters() env_params.agents_params = trainable_agents_list env_params.non_trainable_agents = non_trainable_agents_list env_params.level = 'DeepRacerRacetrackEnv-v0' env_params.run_phase_subject = run_phase_subject vis_params = VisualizationParameters() vis_params.dump_mp4 = False ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True preset_validation_params.min_reward_threshold = 400 preset_validation_params.max_episodes_to_achieve_reward = 10000 graph_manager = MultiAgentGraphManager( agents_params=trainable_agents_list, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, preset_validation_params=preset_validation_params) return graph_manager, params_json
def test_filter(): # make an RGB observation smaller squeeze_filter = InputFilter() squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter()) squeeze_filter_with_axis = InputFilter() squeeze_filter_with_axis.add_observation_filter( 'observation', 'squeeze', ObservationSqueezeFilter(2)) observation = np.random.rand(20, 30, 1, 3) env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False) result = squeeze_filter.filter(env_response)[0] result_with_axis = squeeze_filter_with_axis.filter(env_response)[0] unfiltered_observation_shape = env_response.next_state['observation'].shape filtered_observation_shape = result.next_state['observation'].shape filtered_observation_with_axis_shape = result_with_axis.next_state[ 'observation'].shape # make sure the original observation is unchanged assert unfiltered_observation_shape == observation.shape # make sure the filtering is done correctly assert filtered_observation_shape == (20, 30, 3) assert filtered_observation_with_axis_shape == (20, 30, 3) observation = np.random.rand(1, 30, 1, 3) env_response = EnvResponse(next_state={'observation': observation}, reward=0, game_over=False) result = squeeze_filter.filter(env_response)[0] assert result.next_state['observation'].shape == (30, 3)
agent_params.memory.goals_space = GoalsSpace(goal_name='achieved_goal', reward_type=ReachingGoal(distance_from_goal_threshold=0.05, goal_reaching_reward=0, default_reward=-1), distance_metric=GoalsSpace.DistanceMetric.Euclidean) agent_params.memory.shared_memory = True # exploration parameters agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3) agent_params.exploration.evaluation_epsilon = 0 # they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1) agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0 agent_params.input_filter = InputFilter() agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200)) agent_params.pre_network_filter = InputFilter() agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation', ObservationNormalizationFilter(name='normalize_observation')) agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal', ObservationNormalizationFilter(name='normalize_achieved_goal')) agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal', ObservationNormalizationFilter(name='normalize_desired_goal')) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(fetch_v1)) env_params.custom_reward_threshold = -49
def test_get_filtered_observation_space(): # error on observation space with shape not matching the filter squeeze axis configuration squeeze_filter = InputFilter() squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter(axis=3)) observation_space = ObservationSpace(np.array([20, 1, 30, 3]), 0, 100) small_observation_space = ObservationSpace(np.array([20, 1, 30]), 0, 100) with pytest.raises(ValueError): squeeze_filter.get_filtered_observation_space('observation', observation_space) squeeze_filter.get_filtered_observation_space('observation', small_observation_space) # verify output observation space is correct observation_space = ObservationSpace(np.array([1, 2, 3, 1]), 0, 200) result = squeeze_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(result.shape == np.array([1, 2, 3])) squeeze_filter = InputFilter() squeeze_filter.add_observation_filter('observation', 'squeeze', ObservationSqueezeFilter()) result = squeeze_filter.get_filtered_observation_space( 'observation', observation_space) assert np.all(result.shape == np.array([2, 3]))
agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.999 agent_params.algorithm.optimization_epochs = 10 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes( 20) agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20) agent_params.exploration = CategoricalParameters() agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC ############### # Environment # ############### SilverstoneInputFilter = InputFilter(is_a_reference_filter=True) SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter()) SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255)) SilverstoneInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1)) env_params = GymVectorEnvironment() env_params.default_input_filter = SilverstoneInputFilter env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0' vis_params = VisualizationParameters() vis_params.dump_mp4 = False
def test_filter(): # Keep observation_space = VectorObservationSpace( 3, measurements_names=['a', 'b', 'c']) env_response = EnvResponse(next_state={'observation': np.ones([3])}, reward=0, game_over=False) reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep)) reduction_filter.get_filtered_observation_space('observation', observation_space) result = reduction_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (3, ) # validate the shape of the filtered observation assert filtered_observation.shape == (1, ) # Discard reduction_filter = InputFilter() reduction_filter.add_observation_filter( 'observation', 'reduce', ObservationReductionBySubPartsNameFilter( ["a"], ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard)) reduction_filter.get_filtered_observation_space('observation', observation_space) result = reduction_filter.filter(env_response)[0] unfiltered_observation = env_response.next_state['observation'] filtered_observation = result.next_state['observation'] # make sure the original observation is unchanged assert unfiltered_observation.shape == (3, ) # validate the shape of the filtered observation assert filtered_observation.shape == (2, )