def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(batchnorm=True), 'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow)} self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [VHeadParameters()] self.optimizer_type = 'Adam' self.batch_size = 64 self.async_training = False self.learning_rate = 0.001 self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = False
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [DNDQHeadParameters()] self.optimizer_type = 'Adam'
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [PolicyHeadParameters()] self.async_training = True
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [DNDQHeadParameters()] self.loss_weights = [1.0] self.rescale_gradient_from_head_by_factor = [1] self.optimizer_type = 'Adam'
def __init__(self, scheme=MiddlewareScheme.Medium, dense_layer=Dense): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters( scheme=scheme, dense_layer=dense_layer) self.heads_parameters = [DNDQHeadParameters()] self.optimizer_type = 'Adam'
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.input_embedders_parameters['observation'].scheme = EmbedderScheme.Medium self.middleware_parameters = FCMiddlewareParameters() self.optimizer_type = 'Adam' self.batch_size = 32 self.replace_mse_with_huber_loss = False self.create_target_network = False
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters()} self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium) self.heads_parameters = [QHeadParameters()] self.loss_weights = [1.0] self.optimizer_type = 'Adam' self.batch_size = 32 self.replace_mse_with_huber_loss = True self.create_target_network = True
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [NAFHeadParameters()] self.optimizer_type = 'Adam' self.learning_rate = 0.001 self.async_training = True self.create_target_network = True
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters(activation_function='tanh') } self.middleware_parameters = FCMiddlewareParameters( activation_function='tanh') self.heads_parameters = [VHeadParameters()] self.async_training = True self.l2_regularization = 0 self.create_target_network = True self.batch_size = 128
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [QHeadParameters()] self.loss_weights = [1.0] self.optimizer_type = 'Adam' self.async_training = True self.shared_optimizer = True self.create_target_network = True
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [VHeadParameters(), PolicyHeadParameters()] self.loss_weights = [0.5, 1.0] self.rescale_gradient_from_head_by_factor = [1, 1] self.optimizer_type = 'Adam' self.clip_gradients = 40.0 self.async_training = True
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters(activation_function='leaky_relu'), 'measurements': InputEmbedderParameters(activation_function='leaky_relu'), 'goal': InputEmbedderParameters(activation_function='leaky_relu') } self.input_embedders_parameters['observation'].scheme = [ Conv2d([32, 8, 4]), Conv2d([64, 4, 2]), Conv2d([64, 3, 1]), Dense([512]), ] self.input_embedders_parameters['measurements'].scheme = [ Dense([128]), Dense([128]), Dense([128]), ] self.input_embedders_parameters['goal'].scheme = [ Dense([128]), Dense([128]), Dense([128]), ] self.middleware_parameters = FCMiddlewareParameters( activation_function='leaky_relu', scheme=MiddlewareScheme.Empty) self.heads_parameters = [ MeasurementsPredictionHeadParameters( activation_function='leaky_relu') ] self.loss_weights = [1.0] self.async_training = False self.batch_size = 64 self.adam_optimizer_beta1 = 0.95
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [ VHeadParameters(loss_weight=0.5), PolicyHeadParameters(loss_weight=1.0) ] self.optimizer_type = 'Adam' self.clip_gradients = 40.0 self.async_training = True
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters(batchnorm=True) } self.middleware_parameters = FCMiddlewareParameters(batchnorm=True) self.heads_parameters = [DDPGActorHeadParameters()] self.loss_weights = [1.0] self.rescale_gradient_from_head_by_factor = [1] self.optimizer_type = 'Adam' self.batch_size = 64 self.async_training = False self.learning_rate = 0.0001 self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = False
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters() } self.middleware_parameters = FCMiddlewareParameters() self.heads_parameters = [VHeadParameters(), PolicyHeadParameters()] self.loss_weights = [0.5, 1.0] self.sil_loss_weights = [0.5 * 0.01, 1.0] # called beta^SIL in the paper self.rescale_gradient_from_head_by_factor = [1, 1] self.optimizer_type = 'Adam' self.clip_gradients = 40.0 self.batch_size = 32 # = 512 / 16 workers (since training is synchronous) self.async_training = False # A2C self.shared_optimizer = True
def __init__(self): super().__init__() self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')} self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh') self.heads_parameters = [VHeadParameters(), PPOHeadParameters()] self.loss_weights = [1.0, 1.0] self.rescale_gradient_from_head_by_factor = [1, 1] self.batch_size = 64 self.optimizer_type = 'Adam' self.clip_gradients = None self.use_separate_networks_per_head = True self.async_training = False self.l2_regularization = 0 self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = True
def __init__(self): super().__init__() self.input_embedders_parameters = { 'observation': InputEmbedderParameters(activation_function='tanh') } self.middleware_parameters = FCMiddlewareParameters( activation_function='tanh') self.heads_parameters = [VHeadParameters(), PPOHeadParameters()] self.batch_size = 64 self.optimizer_type = 'Adam' self.clip_gradients = None self.use_separate_networks_per_head = True self.async_training = False self.l2_regularization = 0 # The target network is used in order to freeze the old policy, while making updates to the new one # in train_network() self.create_target_network = True self.shared_optimizer = True self.scale_down_gradients_by_number_of_workers_for_sync_training = True
schedule_params.evaluation_steps = EnvironmentEpisodes(5) schedule_params.heatup_steps = EnvironmentSteps(0) ################ # Agent Params # ################ agent_params = CILAgentParameters() # forward camera and measurements input agent_params.network_wrappers['main'].input_embedders_parameters = { 'CameraRGB': InputEmbedderParameters(scheme=[Conv2d([32, 5, 2]), Conv2d([32, 3, 1]), Conv2d([64, 3, 2]), Conv2d([64, 3, 1]), Conv2d([128, 3, 2]), Conv2d([128, 3, 1]), Conv2d([256, 3, 1]), Conv2d([256, 3, 1]), Dense([512]), Dense([512])], dropout=True, batchnorm=True), 'measurements': InputEmbedderParameters(scheme=[Dense([128]), Dense([128])]) } # TODO: batch norm is currently applied to the fc layers which is not desired # TODO: dropout should be configured differenetly per layer [1.0] * 8 + [0.7] * 2 + [0.5] * 2 + [0.5] * 1 + [0.5, 1.] * 5 # simple fc middleware agent_params.network_wrappers['main'].middleware_parameters = FCMiddlewareParameters(scheme=[Dense([512])])
######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.gae_lambda = 0.96 agent_params.algorithm.beta_entropy = 0 agent_params.network_wrappers['main'].clip_gradients = 10.0 agent_params.network_wrappers['main'].learning_rate = 0.00001 # agent_params.network_wrappers['main'].batch_size = 20 agent_params.network_wrappers['main'].input_embedders_parameters = { "screen": InputEmbedderParameters(input_rescaling={'image': 3.0}) } agent_params.exploration = AdditiveNoiseParameters() agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0.05) # agent_params.exploration.noise_percentage_schedule = LinearSchedule(0.4, 0.05, 100000) agent_params.exploration.evaluation_noise_percentage = 0.05 agent_params.network_wrappers['main'].batch_size = 64 agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5 agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999 ############### # Environment # ###############
InputEmbedderParameters( scheme=[ Conv2d(32, 5, 2), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(32, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(64, 3, 2), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(64, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(128, 3, 2), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(128, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(256, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Conv2d(256, 3, 1), BatchnormActivationDropout(batchnorm=True, activation_function=tf.tanh), Dense(512), BatchnormActivationDropout(activation_function=tf.tanh, dropout_rate=0.3), Dense(512), BatchnormActivationDropout(activation_function=tf.tanh, dropout_rate=0.3) ], activation_function= 'none' # we define the activation function for each layer explicitly ),
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes( 16 * 50) # 50 cycles schedule_params.evaluation_steps = EnvironmentEpisodes(10) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = DQNAgentParameters() agent_params.network_wrappers['main'].learning_rate = 0.001 agent_params.network_wrappers['main'].batch_size = 128 agent_params.network_wrappers['main'].middleware_parameters.scheme = [ Dense(256) ] agent_params.network_wrappers['main'].input_embedders_parameters = { 'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } agent_params.algorithm.discount = 0.98 agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16) agent_params.algorithm.num_consecutive_training_steps = 40 agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( 40) agent_params.algorithm.rate_for_copying_weights_to_target = 0.05 agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6) agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2) agent_params.exploration.evaluation_epsilon = 0 ############### # Environment # ###############
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.005 agent_params.network_wrappers['main'].learning_rate = 0.00002 agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \ InputEmbedderParameters(scheme=[Dense(200)]) agent_params.network_wrappers[ 'main'].middleware_parameters = LSTMMiddlewareParameters( scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128) agent_params.input_filter = InputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 20.)) agent_params.input_filter.add_observation_filter( 'observation', 'normalize', ObservationNormalizationFilter()) ############### # Environment # ############### env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))
schedule_params.heatup_steps = EnvironmentSteps(0) ################ # Agent Params # ################ agent_params = DDPGAgentParameters() # actor actor_network = agent_params.network_wrappers['actor'] actor_network.learning_rate = 0.001 actor_network.batch_size = 256 actor_network.optimizer_epsilon = 1e-08 actor_network.adam_optimizer_beta1 = 0.9 actor_network.adam_optimizer_beta2 = 0.999 actor_network.input_embedders_parameters = { 'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty), 'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty) } actor_network.middleware_parameters = FCMiddlewareParameters( scheme=[Dense(256), Dense(256), Dense(256)]) actor_network.heads_parameters[0].batchnorm = False # critic critic_network = agent_params.network_wrappers['critic'] critic_network.learning_rate = 0.001 critic_network.batch_size = 256 critic_network.optimizer_epsilon = 1e-08 critic_network.adam_optimizer_beta1 = 0.9 critic_network.adam_optimizer_beta2 = 0.999 critic_network.input_embedders_parameters = { 'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
schedule_params = ScheduleParameters() schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20) schedule_params.evaluation_steps = EnvironmentEpisodes(1) schedule_params.heatup_steps = EnvironmentSteps(0) ######### # Agent # ######### agent_params = ActorCriticAgentParameters() agent_params.algorithm.apply_gradients_every_x_episodes = 1 agent_params.algorithm.num_steps_between_gradient_updates = 20 agent_params.algorithm.beta_entropy = 0.005 agent_params.network_wrappers['main'].learning_rate = 0.00002 agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \ InputEmbedderParameters(scheme=[Dense([200])]) agent_params.network_wrappers[ 'main'].middleware_parameters = LSTMMiddlewareParameters( scheme=MiddlewareScheme.Empty, number_of_lstm_cells=128) agent_params.input_filter = MujocoInputFilter() agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1 / 20.)) agent_params.input_filter.add_observation_filter( 'observation', 'normalize', ObservationNormalizationFilter()) agent_params.exploration = ContinuousEntropyParameters() ############### # Environment # ###############