Exemplo n.º 1
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu'),
                                            'measurements': InputEmbedderParameters(activation_function='leaky_relu'),
                                            'goal': InputEmbedderParameters(activation_function='leaky_relu')}

        self.input_embedders_parameters['observation'].scheme = [
            Conv2d(32, 8, 4),
            Conv2d(64, 4, 2),
            Conv2d(64, 3, 1),
            Dense(512),
        ]

        self.input_embedders_parameters['measurements'].scheme = [
            Dense(128),
            Dense(128),
            Dense(128),
        ]

        self.input_embedders_parameters['goal'].scheme = [
            Dense(128),
            Dense(128),
            Dense(128),
        ]

        self.middleware_parameters = FCMiddlewareParameters(activation_function='leaky_relu',
                                                            scheme=MiddlewareScheme.Empty)
        self.heads_parameters = [MeasurementsPredictionHeadParameters(activation_function='leaky_relu')]
        self.async_training = False
        self.batch_size = 64
        self.adam_optimizer_beta1 = 0.95
Exemplo n.º 2
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters(batchnorm=True),
         'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow)
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters()]
     self.optimizer_type = 'Adam'
     self.batch_size = 64
     self.async_training = False
     self.learning_rate = 0.001
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = False
 def __init__(self, num_q_networks):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(),
                                         'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow)}
     self.middleware_parameters = FCMiddlewareParameters(num_streams=num_q_networks)
     self.heads_parameters = [TD3VHeadParameters()]
     self.optimizer_type = 'Adam'
     self.adam_optimizer_beta2 = 0.999
     self.optimizer_epsilon = 1e-8
     self.batch_size = 100
     self.async_training = False
     self.learning_rate = 0.001
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = False
Exemplo n.º 4
0
def create_input_embedder(scheme_dict, embedder_type, activation_function):
    """Creates an rl coach input embedder
    scheme_dict - Dictionary where the key is the observation and the value is
                  a dictionary containing all the information required by
                  the scheme creation methods defined above.
    embedder_type - String indicating desired embedder type, available
                     types are defined in SCHEME_TYPE
    activation_function - Desired activationfunction for the embdedder
    """
    try:
        if not ActivationFunctions.has_activation_function(activation_function):
            raise Exception("Invalid activation function for input embedder")

        embedder_types_parameters = dict()

        for observation, info in scheme_dict.items():
            scheme = SCHEME_TYPE[embedder_type](info)
            embedder_types_parameters[observation] = InputEmbedderParameters(
                scheme=scheme, activation_function=activation_function
            )

        return embedder_types_parameters
    except KeyError as err:
        raise Exception("Input embedder, key {} not found".format(err.args[0]))
    except Exception as err:
        raise Exception("Error while creating input emmbedder: {}".format(err))
Exemplo n.º 5
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [DNDQHeadParameters()]
     self.optimizer_type = 'Adam'
     self.should_get_softmax_probabilities = False
Exemplo n.º 6
0
    def get_input_embedder(self, input_name: str, embedder_params: InputEmbedderParameters):
        """
        Given an input embedder parameters class, creates the input embedder and returns it
        :param input_name: the name of the input to the embedder (used for retrieving the shape). The input should
                           be a value within the state or the action.
        :param embedder_params: the parameters of the class of the embedder
        :return: the embedder instance
        """
        allowed_inputs = copy.copy(self.spaces.state.sub_spaces)
        allowed_inputs["action"] = copy.copy(self.spaces.action)
        allowed_inputs["goal"] = copy.copy(self.spaces.goal)

        if input_name not in allowed_inputs.keys():
            raise ValueError("The key for the input embedder ({}) must match one of the following keys: {}"
                             .format(input_name, allowed_inputs.keys()))

        emb_type = "vector"
        if isinstance(allowed_inputs[input_name], TensorObservationSpace):
            emb_type = "tensor"
        elif isinstance(allowed_inputs[input_name], PlanarMapsObservationSpace):
            emb_type = "image"

        embedder_path = embedder_params.path(emb_type)
        embedder_params_copy = copy.copy(embedder_params)
        embedder_params_copy.activation_function = utils.get_activation_function(embedder_params.activation_function)
        embedder_params_copy.input_rescaling = embedder_params_copy.input_rescaling[emb_type]
        embedder_params_copy.input_offset = embedder_params_copy.input_offset[emb_type]
        embedder_params_copy.name = input_name
        module = dynamic_import_and_instantiate_module_from_params(embedder_params_copy,
                                                                   path=embedder_path,
                                                                   positional_args=[allowed_inputs[input_name].shape])
        return module
Exemplo n.º 7
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [PolicyHeadParameters()]
     self.async_training = True
Exemplo n.º 8
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters(loss_weight=0.5), PolicyHeadParameters(loss_weight=1.0)]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.async_training = True
Exemplo n.º 9
0
def test_image_embedder():
    params = InputEmbedderParameters(scheme=EmbedderScheme.Medium)
    emb = ImageEmbedder(params=params)
    emb.initialize()
    input_data = mx.nd.random.uniform(low=0, high=1, shape=(10, 3, 244, 244))
    output = emb(input_data)
    assert len(output.shape) == 2  # since last block was flatten
    assert output.shape[0] == 10  # since batch_size is 10
Exemplo n.º 10
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [DNDQHeadParameters()]
     self.optimizer_type = 'Adam'
Exemplo n.º 11
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [QHeadParameters()]
     self.optimizer_type = 'Adam'
     self.async_training = True
     self.shared_optimizer = True
     self.create_target_network = True
def test_vector_embedder():
    params = InputEmbedderParameters(scheme=EmbedderScheme.Medium)
    emb = VectorEmbedder(params=params)
    emb.initialize()
    input_data = mx.nd.random.uniform(low=0, high=255, shape=(10, 100))
    output = emb(input_data)
    assert len(output.shape) == 2  # since last block was flatten
    assert output.shape[0] == 10  # since batch_size is 10
    assert output.shape[1] == 256  # since last dense layer has 256 units
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)
     self.heads_parameters = [RegressionHeadParameters()]
     self.optimizer_type = 'Adam'
     self.batch_size = 32
     self.replace_mse_with_huber_loss = False
     self.create_target_network = False
Exemplo n.º 14
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters(
         activation_function='none')
     self.heads_parameters = [RNDHeadParameters()]
     self.optimizer_type = 'Adam'
     self.clip_gradients = None
     self.create_target_network = False
Exemplo n.º 15
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty)}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Empty)
     self.heads_parameters = [SACQHeadParameters()]      # SACQHeadParameters includes the topology of the head
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
     self.batch_size = 256
     self.async_training = False
     self.learning_rate = 0.0003
     self.create_target_network = False
Exemplo n.º 16
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu',
                                                                               input_rescaling={'image': 1.0})}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Empty)
     self.heads_parameters = [RNDHeadParameters()]
     self.create_target_network = False
     self.optimizer_type = 'Adam'
     self.batch_size = 100
     self.learning_rate = 0.0001
     self.should_get_softmax_probabilities = False
Exemplo n.º 17
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters(activation_function='tanh')
     }
     self.middleware_parameters = FCMiddlewareParameters(
         activation_function='tanh')
     self.heads_parameters = [VHeadParameters()]
     self.async_training = True
     self.l2_regularization = 0
     self.create_target_network = True
     self.batch_size = 128
Exemplo n.º 18
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='relu')}
     self.middleware_parameters = FCMiddlewareParameters(activation_function='relu')
     self.heads_parameters = [SACPolicyHeadParameters()]
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
     self.batch_size = 256
     self.async_training = False
     self.learning_rate = 0.0003
     self.create_target_network = False
     self.l2_regularization = 0      # weight decay regularization. not used in the original paper
Exemplo n.º 19
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='relu')}
     self.middleware_parameters = FCMiddlewareParameters(activation_function='relu')
     self.heads_parameters = [VHeadParameters(initializer='xavier')]
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
     self.batch_size = 256
     self.async_training = False
     self.learning_rate = 0.0003     # 3e-4 see appendix D in the paper
     # tau is set in SoftActorCriticAlgorithmParameters.rate_for_copying_weights_to_target
     self.create_target_network = True
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [DDPGActorHeadParameters(batchnorm=False)]
     self.optimizer_type = 'Adam'
     self.adam_optimizer_beta2 = 0.999
     self.optimizer_epsilon = 1e-8
     self.batch_size = 100
     self.async_training = False
     self.learning_rate = 0.001
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = False
Exemplo n.º 21
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
     }
     self.middleware_parameters = VGG16MiddlewareParameters(
         scheme=MiddlewareScheme.Medium
     )  #FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)#
     self.heads_parameters = [QHeadParameters()]
     self.optimizer_type = 'Adam'
     self.batch_size = 32
     self.replace_mse_with_huber_loss = True
     self.create_target_network = True
     self.should_get_softmax_probabilities = False
Exemplo n.º 22
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')}
        self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh')
        self.heads_parameters = [VHeadParameters(), PPOHeadParameters()]
        self.batch_size = 64
        self.optimizer_type = 'Adam'
        self.clip_gradients = None
        self.use_separate_networks_per_head = True
        self.async_training = False
        self.l2_regularization = 0

        # The target network is used in order to freeze the old policy, while making updates to the new one
        # in train_network()
        self.create_target_network = True
        self.shared_optimizer = True
        self.scale_down_gradients_by_number_of_workers_for_sync_training = True
Exemplo n.º 23
0
schedule_params.heatup_steps = EnvironmentSteps(0)

################
# Agent Params #
################
agent_params = DDPGAgentParameters()

# actor
actor_network = agent_params.network_wrappers['actor']
actor_network.learning_rate = 0.001
actor_network.batch_size = 256
actor_network.optimizer_epsilon = 1e-08
actor_network.adam_optimizer_beta1 = 0.9
actor_network.adam_optimizer_beta2 = 0.999
actor_network.input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)])
actor_network.heads_parameters[0].batchnorm = False

# critic
critic_network = agent_params.network_wrappers['critic']
critic_network.learning_rate = 0.001
critic_network.batch_size = 256
critic_network.optimizer_epsilon = 1e-08
critic_network.adam_optimizer_beta1 = 0.9
critic_network.adam_optimizer_beta2 = 0.999
critic_network.input_embedders_parameters = {
    'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
Exemplo n.º 24
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(1)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.005
agent_params.network_wrappers['main'].learning_rate = 0.00002
agent_params.network_wrappers['main'].input_embedders_parameters['observation'] = \
    InputEmbedderParameters(scheme=[Dense(200)])
agent_params.network_wrappers['main'].middleware_parameters = LSTMMiddlewareParameters(scheme=MiddlewareScheme.Empty,
                                                                                       number_of_lstm_cells=128)

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_reward_filter('rescale', RewardRescaleFilter(1/20.))
agent_params.input_filter.add_observation_filter('observation', 'normalize', ObservationNormalizationFilter())

###############
# Environment #
###############
env_params = GymVectorEnvironment(level=SingleLevelSelection(mujoco_v2))

########
# Test #
########
Exemplo n.º 25
0
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'STEREO_CAMERAS':
    InputEmbedderParameters(
        scheme=[Conv2d(32, 8, 4),
                Conv2d(32, 4, 2),
                Conv2d(64, 4, 2)],
        activation_function='relu',
        dropout_rate=0.3),
    'LIDAR':
    InputEmbedderParameters(scheme=[Dense(64), Dense(32)],
                            activation_function='relu',
                            dropout_rate=0.3)
}

agent_params.network_wrappers['main'].middleware_parameters = \
     FCMiddlewareParameters(
         scheme=[
             Dense(256)
         ],
         activation_function='relu', dropout_rate=0.3
     )
Exemplo n.º 26
0
#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()

agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.gae_lambda = 0.96
agent_params.algorithm.beta_entropy = 0

agent_params.network_wrappers['main'].clip_gradients = 10.0
agent_params.network_wrappers['main'].learning_rate = 0.00001
# agent_params.network_wrappers['main'].batch_size = 20
agent_params.network_wrappers['main'].input_embedders_parameters = {
    "screen": InputEmbedderParameters(input_rescaling={'image': 3.0})
}

agent_params.exploration = AdditiveNoiseParameters()
agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0.05)
# agent_params.exploration.noise_percentage_schedule = LinearSchedule(0.4, 0.05, 100000)
agent_params.exploration.evaluation_noise_percentage = 0.05

agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

###############
# Environment #
###############
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()
# agent_params.network_wrappers['main'].input_embedders_parameters = {
#         'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3),
#         'stereo': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3)
#         }
agent_params.network_wrappers['main'].input_embedders_parameters = {
        'left_camera': InputEmbedderParameters(activation_function='relu'),
        'stereo': InputEmbedderParameters(activation_function='relu')
        }

agent_params.network_wrappers['main'].learning_rate = 0.0003
agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

# agent_params.network_wrappers['main'].learning_rate_decay_steps = 60000
# agent_params.network_wrappers['main'].learning_rate_decay_rate = 0.95
# agent_params.network_wrappers['main'].input_embedders_parameters['observation'].batchnorm = True
# agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3
# agent_params.network_wrappers['main'].l2_regularization = 2e-5
agent_params.algorithm.beta_entropy = 0.001
    Conv2d(32, 8, 4),
    BatchnormActivationDropout(activation_function='relu'),
    Conv2d(64, 4, 2),
    BatchnormActivationDropout(activation_function='relu'),
    Conv2d(64, 3, 1),
    BatchnormActivationDropout(activation_function='relu'),
    Flatten(),
    Dense(256),
    BatchnormActivationDropout(activation_function='relu')
]

# Actor
actor_network = agent_params.network_wrappers['actor']
actor_network.input_embedders_parameters = {
    'measurements':
    InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    agent_params.algorithm.agent_obs_key:
    InputEmbedderParameters(scheme=camera_obs_scheme,
                            activation_function='none')
}

actor_network.middleware_parameters.scheme = [Dense(300), Dense(200)]
actor_network.learning_rate = 1e-4

# Critic
critic_network = agent_params.network_wrappers['critic']
critic_network.input_embedders_parameters = {
    'action':
    InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'measurements':
    InputEmbedderParameters(scheme=EmbedderScheme.Empty),
Exemplo n.º 29
0
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = EnvironmentEpisodes(16 * 50 * 200)  # 200 epochs
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(16 * 50)  # 50 cycles
schedule_params.evaluation_steps = EnvironmentEpisodes(10)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = DQNAgentParameters()
agent_params.network_wrappers['main'].learning_rate = 0.001
agent_params.network_wrappers['main'].batch_size = 128
agent_params.network_wrappers['main'].middleware_parameters.scheme = [Dense(256)]
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)}
agent_params.algorithm.discount = 0.98
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16)
agent_params.algorithm.num_consecutive_training_steps = 40
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(40)
agent_params.algorithm.rate_for_copying_weights_to_target = 0.05
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2)
agent_params.exploration.evaluation_epsilon = 0

agent_params.memory = EpisodicHindsightExperienceReplayParameters()
agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Final
agent_params.memory.hindsight_transitions_per_regular_transition = 1
agent_params.memory.goals_space = GoalsSpace(goal_name='state',
                                                    reward_type=ReachingGoal(distance_from_goal_threshold=0,
Exemplo n.º 30
0
#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()

# Agent params
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    100)
agent_params.algorithm.discount = 0.99
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(4096)
agent_params.algorithm.act_for_full_episodes = False

# NN configuration
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=[])
}
agent_params.network_wrappers['main'].learning_rate = 0.001

################
#  Environment #
################
env_params = GymVectorEnvironment(
    level='gym_jiminy.envs.acrobot:JiminyAcrobotEnv')
env_params.additional_simulator_parameters = {
    'continuous': True,
    'enableGoalEnv': False
}

################
#   Learning   #