def create_scheme(info_dict): """Creates a custom scheme whose first layers are convolutional layers and last layers are dense layers. info_dict- dictionary containing the following entries: conv_info_list - List of list where the embedded list represent the num of filter, kernel size, and stride. Embedded list of size less than 3 will produce an exception. The size of the non-imbedded list is interpreted and the desired number convolutional layers. dense_layer__hidden_unit_list = List where the size represents the number of desired dense layers to be used after the convolution layer, the value of the list represents the number of hidden units """ try: scheme = list() # Add the convolutional layers first for conv_info in info_dict[SchemeInfo.CONV_INFO_LIST.value]: num_filters, kernel_size, strides = tuple(conv_info) scheme.append(Conv2d(num_filters, kernel_size, strides)) for hindden_units in info_dict[SchemeInfo.DENSE_LAYER_INFO_LIST.value]: scheme.append(Dense(hindden_units)) return scheme except KeyError as err: raise Exception("Info, key {} not found".format(err.args[0])) except ValueError as err: raise Exception("Error while unpacking info: {}".format(err)) except Exception as err: raise Exception("Error while creating scheme: {}".format(err))
agent_params.algorithm.use_non_zero_discount_for_terminal_states = True agent_params.exploration.noise_schedule = LinearSchedule(1.5, 0.5, 300000) agent_params.input_filter = NoInputFilter() agent_params.output_filter = NoOutputFilter() # Camera observation pre-processing network scheme camera_obs_scheme = [ Conv2d(32, 8, 4), BatchnormActivationDropout(activation_function='relu'), Conv2d(64, 4, 2), BatchnormActivationDropout(activation_function='relu'), Conv2d(64, 3, 1), BatchnormActivationDropout(activation_function='relu'), Flatten(), Dense(256), BatchnormActivationDropout(activation_function='relu') ] # Actor actor_network = agent_params.network_wrappers['actor'] actor_network.input_embedders_parameters = { 'measurements': InputEmbedderParameters(scheme=EmbedderScheme.Empty), agent_params.algorithm.agent_obs_key: InputEmbedderParameters(scheme=camera_obs_scheme, activation_function='none') } actor_network.middleware_parameters.scheme = [Dense(300), Dense(200)] actor_network.learning_rate = 1e-4
#################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(1000000) schedule_params.steps_between_evaluation_periods = EnvironmentSteps(5000) schedule_params.evaluation_steps = EnvironmentEpisodes(10) schedule_params.heatup_steps = EnvironmentSteps(10000) ######### # Agent # ######### agent_params = TD3AgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters[ 'observation'].scheme = [Dense(400)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [ Dense(300) ] agent_params.network_wrappers['critic'].input_embedders_parameters[ 'observation'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['critic'].input_embedders_parameters[ 'action'].scheme = EmbedderScheme.Empty agent_params.network_wrappers['critic'].middleware_parameters.scheme = [ Dense(400), Dense(300) ] ############### # Environment # ###############
agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].input_embedders_parameters = { # 'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3), 'STEREO_CAMERAS': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3), # 'stereo': InputEmbedderParameters( # scheme=[ # #Conv2d(32, 8, 4), # Conv2d(32, 4, 2), # Conv2d(64, 4, 2), # #Conv2d(64, 3, 1), # Conv2d(64, 2, 1) # ], # activation_function='relu', dropout_rate=0.3) 'LIDAR': InputEmbedderParameters( scheme=[ Dense(64) ], activation_function='relu', dropout_rate=0.3) } agent_params.network_wrappers['main'].middleware_parameters = \ FCMiddlewareParameters( scheme=[ Dense(512) ], activation_function='relu', dropout_rate=0.3 ) agent_params.network_wrappers['main'].learning_rate = 0.0003 #agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = 64
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.005 agent_params.network_wrappers['main'].learning_rate = 0.00002 agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \ InputEmbedderParameters(scheme=[Dense(200)]) agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128) agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.)) agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2)) ######## # Test # ########
agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.99 agent_params.algorithm.beta_entropy = 0.01 agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.05 agent_params.algorithm.estimate_state_value_using_gae = True agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(2048) agent_params.network_wrappers["main"].learning_rate = 0.0003 agent_params.network_wrappers["main"].input_embedders_parameters[ "observation" ].activation_function = "tanh" agent_params.network_wrappers["main"].input_embedders_parameters["observation"].scheme = [Dense(64)] agent_params.network_wrappers["main"].middleware_parameters.scheme = [Dense(64)] agent_params.network_wrappers["main"].middleware_parameters.activation_function = "tanh" agent_params.network_wrappers["main"].batch_size = 64 agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5 agent_params.network_wrappers["main"].clip_gradients = 40.0 agent_params.exploration = EGreedyParameters() agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000) ############### # Environment # ############### env_params = GymVectorEnvironment(level="autoscalesim:SimpleScalableWebserviceSim") ########
schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].input_embedders_parameters = { 'STEREO_CAMERAS': InputEmbedderParameters( scheme=[Conv2d(32, 8, 4), Conv2d(32, 4, 2), Conv2d(64, 4, 2)], activation_function='relu', dropout_rate=0.3), 'LIDAR': InputEmbedderParameters(scheme=[Dense(64), Dense(32)], activation_function='relu', dropout_rate=0.3) } agent_params.network_wrappers['main'].middleware_parameters = \ FCMiddlewareParameters( scheme=[ Dense(256) ], activation_function='relu', dropout_rate=0.3 ) agent_params.network_wrappers['main'].learning_rate = 0.0003 #agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = 64
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1000) ######### # Agent # ######### agent_params = DDPGAgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'] = \ agent_params.network_wrappers['actor'].input_embedders_parameters.pop('observation') agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'] = \ agent_params.network_wrappers['critic'].input_embedders_parameters.pop('observation') agent_params.network_wrappers['actor'].input_embedders_parameters['measurements'].scheme = [Dense(300)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(200)] agent_params.network_wrappers['critic'].input_embedders_parameters['measurements'].scheme = [Dense(400)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter("rescale", RewardRescaleFilter(1/10.)) ############### # Environment # ############### env_params = ControlSuiteEnvironmentParameters(level=SingleLevelSelection(control_suite_envs)) ######## # Test # ########
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(1000) ######### # Agent # ######### agent_params = SoftActorCriticAgentParameters() # override default parameters: # value (v) networks parameters agent_params.network_wrappers['v'].batch_size = 32 agent_params.network_wrappers['v'].learning_rate = 0.0003 agent_params.network_wrappers['v'].middleware_parameters.scheme = [Dense(32)] agent_params.network_wrappers['v'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['v'].adam_optimizer_beta2 = 0.999 agent_params.network_wrappers['v'].input_embedders_parameters['forward_camera'] = \ agent_params.network_wrappers['v'].input_embedders_parameters.pop('observation') # critic (q) network parameters agent_params.network_wrappers['q'].heads_parameters[0].network_layers_sizes = ( 32, 32) agent_params.network_wrappers['q'].batch_size = 32 agent_params.network_wrappers['q'].learning_rate = 0.0003 agent_params.network_wrappers['q'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['q'].adam_optimizer_beta2 = 0.999 agent_params.network_wrappers['q'].input_embedders_parameters['forward_camera'] = \ agent_params.network_wrappers['q'].input_embedders_parameters.pop('observation')
schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = DQNAgentParameters() # DQN params agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(100) agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(10) # was 1 # NN configuration agent_params.network_wrappers['main'].learning_rate = 0.001 # was 0.00025 agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Conv2d(32, 2, 1), Conv2d(32, 2, 2), Dense(64)] agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu' agent_params.network_wrappers['main'].input_embedders_parameters['observation'].input_rescaling = {'image': 1.0, 'vector': 1.0, 'tensor': 1.0} agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty # ER size agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000) # E-Greedy schedule agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, schedule_params.improve_steps.num_steps) # was 1.0, 0.01, 10000 graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params, schedule_params=schedule_params, vis_params=VisualizationParameters(), preset_validation_params=preset_validation_params)
schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100) schedule_params.evaluation_steps = EnvironmentEpisodes(3) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = NStepQAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = [Conv2d(16, 8, 4), Conv2d(32, 4, 2)] agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense(256) ] ############### # Environment # ############### env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4)) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.trace_test_levels = [ 'breakout', 'pong', 'space_invaders' ]
agent_params = ActorCriticAgentParameters() agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE agent_params.algorithm.discount = 0.99 agent_params.algorithm.apply_gradients_every_x_episodes = 10 agent_params.algorithm.num_steps_between_gradient_updates = 10 agent_params.algorithm.gae_lambda = 1 agent_params.algorithm.beta_entropy = 0.01 agent_params.network_wrappers['main'].optimizer_type = 'Adam' agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = [Conv2d(32, 2, 1), Conv2d(32, 2, 2), Dense(64)] agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].activation_function = 'relu' agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].input_rescaling = { 'image': 1.0, 'vector': 1.0, 'tensor': 1.0 } agent_params.network_wrappers[ 'main'].middleware_parameters.scheme = MiddlewareScheme.Empty ######## # Test # ######## preset_validation_params.num_workers = 8
# Conv2d(64, 3, 1), # BatchnormActivationDropout(batchnorm=True, activation_function='relu'), # Dense(512), # BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5), # Dense(512), # BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5) # ], # activation_function='none'), 'lidar': InputEmbedderParameters( scheme=[ # Dense(256), # BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5), # Dense(256), # BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5) Dense(32), BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5), Dense(32), BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5) ], activation_function='none' ) # we define the activation function for each layer explicitly } # agent_params.network_wrappers['main'].middleware_parameters = \ # FCMiddlewareParameters( # scheme=[ # Dense(256), # BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5), # Dense(128),
schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(100) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentEpisodes(10) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers["main"].learning_rate = 0.001 agent_params.network_wrappers["main"].input_embedders_parameters[ "observation"].activation_function = "tanh" agent_params.network_wrappers["main"].input_embedders_parameters[ "observation"].scheme = [Dense(32)] agent_params.network_wrappers["main"].middleware_parameters.scheme = [ Dense(32) ] agent_params.network_wrappers[ "main"].middleware_parameters.activation_function = "tanh" agent_params.network_wrappers["main"].batch_size = 256 agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5 agent_params.network_wrappers["main"].adam_optimizer_beta2 = 0.999 agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.3 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 0.5, 0.1, 10000 * 50) agent_params.algorithm.beta_entropy = 0 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.999
schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].input_embedders_parameters = { 'STEREO_CAMERAS': InputEmbedderParameters( scheme=[ Conv2d(32, 8, 4), Conv2d(32, 4, 2), Conv2d(64, 4, 2), Conv2d(64, 3, 1), Conv2d(64, 2, 1), Dense(256) ], activation_function='relu', dropout_rate=0.3), 'LIDAR': InputEmbedderParameters( scheme=[ Dense(64), Dense(32) ], activation_function='relu', dropout_rate=0.3) } agent_params.network_wrappers['main'].middleware_parameters = \ FCMiddlewareParameters( scheme=[ Dense(256) ],
#################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentSteps(2000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(10000) ######### # Agent # ######### agent_params = DDPGAgentParameters() agent_params.network_wrappers['actor'].input_embedders_parameters['observation'].scheme = [Dense(400)] agent_params.network_wrappers['actor'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['observation'].scheme = [Dense(400)] agent_params.network_wrappers['critic'].middleware_parameters.scheme = [Dense(300)] agent_params.network_wrappers['critic'].input_embedders_parameters['action'].scheme = EmbedderScheme.Empty ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2)) ######## # Test # ######## preset_validation_params = PresetValidationParameters() preset_validation_params.test = True
############ # DQN Agent ############ agent_params = DDQNAgentParameters() # DQN params agent_params.algorithm.discount = 0.99 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1) agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1000) # NN configuration agent_params.network_wrappers['main'].batch_size = 32 agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = [Dense(512)] agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()] agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(512)] # ER size agent_params.memory.max_size = (MemoryGranularity.Transitions, 10000) # E-Greedy schedule agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 40000) ############# # Environment ############# env_params = GymVectorEnvironment(level='trading_env:TradingEnv')
schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(100) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(10) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentEpisodes(10) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.001 agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].activation_function = 'tanh' agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = [Dense(32)] agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense(32) ] agent_params.network_wrappers[ 'main'].middleware_parameters.activation_function = 'tanh' agent_params.network_wrappers['main'].batch_size = 256 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.3 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 0.5, 0.1, 10000 * 50) agent_params.algorithm.beta_entropy = 0 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.999
schedule_params.improve_steps = TrainingSteps(10000) schedule_params.steps_between_evaluation_periods = EnvironmentSteps(204) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers["main"].learning_rate = 0.0003 agent_params.network_wrappers["main"].input_embedders_parameters[ "observation"].activation_function = "tanh" agent_params.network_wrappers["main"].input_embedders_parameters[ "observation"].scheme = [Dense(64)] agent_params.network_wrappers["main"].middleware_parameters.scheme = [ Dense(64) ] agent_params.network_wrappers[ "main"].middleware_parameters.activation_function = "tanh" agent_params.network_wrappers["main"].batch_size = 64 agent_params.network_wrappers["main"].optimizer_epsilon = 1e-5 agent_params.network_wrappers["main"].adam_optimizer_beta2 = 0.999 agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 1.0, 0, 1000000) agent_params.algorithm.beta_entropy = 0 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.99
Conv2d(64, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function='tanh'), Conv2d(128, 3, 2), BatchnormActivationDropout(batchnorm=True, activation_function='tanh'), Conv2d(128, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function='tanh'), Conv2d(256, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function='tanh'), Conv2d(256, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function='tanh'), Dense(512), BatchnormActivationDropout(activation_function='tanh', dropout_rate=0.3), Dense(512), BatchnormActivationDropout(activation_function='tanh', dropout_rate=0.3) ], activation_function= 'none' # we define the activation function for each layer explicitly ), 'measurements': InputEmbedderParameters( scheme=[ Dense(128), BatchnormActivationDropout(activation_function='tanh', dropout_rate=0.5),
# Agent # ######### agent_params = ClippedPPOAgentParameters() # added agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = [ Conv2d(32, 8, 4), BatchnormActivationDropout(batchnorm=True, activation_function='relu'), Conv2d(32, 4, 2), BatchnormActivationDropout(batchnorm=True, activation_function='relu'), Conv2d(64, 4, 2), BatchnormActivationDropout(batchnorm=True, activation_function='relu'), Conv2d(64, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function='relu'), Dense(512), BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5), Dense(512), BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5) ] # agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Deep agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense(128), BatchnormActivationDropout(activation_function='relu', dropout_rate=0.4), Dense(128), BatchnormActivationDropout(activation_function='relu', dropout_rate=0.4), Dense(128), BatchnormActivationDropout(activation_function='relu', dropout_rate=0.4), ]
#################### # Graph Scheduling # #################### schedule_params = ScheduleParameters() schedule_params.improve_steps = EnvironmentEpisodes(16 * 50 * 200) # 200 epochs schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(16 * 50) # 50 cycles schedule_params.evaluation_steps = EnvironmentEpisodes(10) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = DQNAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.001 agent_params.network_wrappers['main'].batch_size = 128 agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(256)] agent_params.network_wrappers['main'].input_embedders_parameters = { 'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)} agent_params.algorithm.discount = 0.98 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16) agent_params.algorithm.num_consecutive_training_steps = 40 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(40) agent_params.algorithm.rate_for_copying_weights_to_target = 0.05 agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2) agent_params.exploration.evaluation_epsilon = 0 agent_params.memory = EpisodicHindsightExperienceReplayParameters() agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Final agent_params.memory.hindsight_transitions_per_regular_transition = 1
schedule_params.improve_steps = TrainingSteps(100000) schedule_params.steps_between_evaluation_periods = EnvironmentSteps(2000) schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.0003 agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].activation_function = 'tanh' agent_params.network_wrappers['main'].input_embedders_parameters[ 'observation'].scheme = [Dense(64)] agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense(64) ] agent_params.network_wrappers[ 'main'].middleware_parameters.activation_function = 'tanh' agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2 agent_params.algorithm.clipping_decay_schedule = LinearSchedule( 1.0, 0, 1000000) agent_params.algorithm.beta_entropy = 0 agent_params.algorithm.gae_lambda = 0.95 agent_params.algorithm.discount = 0.99
def create_batchnorm_scheme(info_dict): """Creates a scheme where every other layer is a batchnorm layer, convolutional layers are first then dense layers. info_dict- dictionary containing the following entries: conv_info_list - List of list where the embedded list represent the num of filter, kernel size, and stride. Embedded list of size less than 3 will produce an exception. The size of the non-imbedded list is interpreted and the desired number convolutional layers. dense_layer__hidden_unit_list = List where the size represents the number of desired dense layers to be used after the convolution layer, the value of the list represents the number of hidden units bn_info_conv - List containing bool whether or not to use batchnorm for the convolutional part of the archetecture, string for desired activation function, and dropout rate, list with less than 3 items will cause an excpetion. bn_info_dense - List containing bool whether or not to use batchnorm for the dense part of the archetecture, string for desired activation function, and dropout rate, list with less than 3 items will cause an excpetion. is_first_layer_bn - True if the first layer of the scheme should be a batchnorm layer. """ try: batchnorm, activation_function, dropout_rate = tuple( info_dict[SchemeInfo.BN_INFO_CONV.value] ) if not ActivationFunctions.has_activation_function(activation_function): raise Exception("Invalid activation function for batchnorm scheme") scheme = list() if info_dict[SchemeInfo.IS_FIRST_LAYER_BN.value]: scheme.append( BatchnormActivationDropout( batchnorm=batchnorm, activation_function=activation_function, dropout_rate=dropout_rate, ) ) # Add the convolutional layers first for conv_info in info_dict[SchemeInfo.CONV_INFO_LIST.value]: # Add the convolutional filters followed by batchnorms num_filters, kernel_size, strides = tuple(conv_info) scheme.append(Conv2d(num_filters, kernel_size, strides)) scheme.append( BatchnormActivationDropout( batchnorm=batchnorm, activation_function=activation_function, dropout_rate=dropout_rate, ) ) batchnorm, activation_function, dropout_rate = tuple( info_dict[SchemeInfo.BN_INFO_DENSE.value] ) if not ActivationFunctions.has_activation_function(activation_function): raise Exception("Invalid activation function for batchnorm scheme") for hindden_units in info_dict[SchemeInfo.DENSE_LAYER_INFO_LIST.value]: scheme.append(Dense(hindden_units)) scheme.append( BatchnormActivationDropout( batchnorm=batchnorm, activation_function=activation_function, dropout_rate=dropout_rate, ) ) return scheme except KeyError as err: raise Exception("Info, key {} not found".format(err.args[0])) except ValueError as err: raise Exception("Error while unpacking info: {}".format(err)) except Exception as err: raise Exception("Error while creating scheme: {}".format(err))
# Agent Params # ################ agent_params = DDPGAgentParameters() # actor actor_network = agent_params.network_wrappers['actor'] actor_network.learning_rate = 0.001 actor_network.batch_size = 256 actor_network.optimizer_epsilon = 1e-08 actor_network.adam_optimizer_beta1 = 0.9 actor_network.adam_optimizer_beta2 = 0.999 actor_network.input_embedders_parameters = { 'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)]) actor_network.heads_parameters[0].batchnorm = False # critic critic_network = agent_params.network_wrappers['critic'] critic_network.learning_rate = 0.001 critic_network.batch_size = 256 critic_network.optimizer_epsilon = 1e-08 critic_network.adam_optimizer_beta1 = 0.9 critic_network.adam_optimizer_beta2 = 0.999 critic_network.input_embedders_parameters = { 'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } critic_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)])
######### agent_params = ClippedPPOAgentParameters() agent_params.network_wrappers['main'].input_embedders_parameters = { # 'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3), 'STEREO_CAMERAS': InputEmbedderParameters(scheme=[ Conv2d(32, 8, 4), Conv2d(32, 4, 2), Conv2d(64, 4, 2), Conv2d(64, 3, 1), Conv2d(64, 2, 1) ], activation_function='relu', dropout_rate=0.3), 'LIDAR': InputEmbedderParameters(scheme=[Dense(64)], activation_function='relu', dropout_rate=0.3) } agent_params.network_wrappers['main'].middleware_parameters = \ FCMiddlewareParameters( scheme=[ Dense(512) ], activation_function='relu', dropout_rate=0.3 ) agent_params.network_wrappers['main'].learning_rate = 0.0003 #agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu' agent_params.network_wrappers['main'].batch_size = 64