示例#1
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu'),
                                            'measurements': InputEmbedderParameters(activation_function='leaky_relu'),
                                            'goal': InputEmbedderParameters(activation_function='leaky_relu')}

        self.input_embedders_parameters['observation'].scheme = [
            Conv2d(32, 8, 4),
            Conv2d(64, 4, 2),
            Conv2d(64, 3, 1),
            Dense(512),
        ]

        self.input_embedders_parameters['measurements'].scheme = [
            Dense(128),
            Dense(128),
            Dense(128),
        ]

        self.input_embedders_parameters['goal'].scheme = [
            Dense(128),
            Dense(128),
            Dense(128),
        ]

        self.middleware_parameters = FCMiddlewareParameters(activation_function='leaky_relu',
                                                            scheme=MiddlewareScheme.Empty)
        self.heads_parameters = [MeasurementsPredictionHeadParameters(activation_function='leaky_relu')]
        self.async_training = False
        self.batch_size = 64
        self.adam_optimizer_beta1 = 0.95
示例#2
0
    def _build_module(self, input_layer):
        weight_init = Orthogonal(gain=np.sqrt(2))
        input_layer = Conv2d(num_filters=32, kernel_size=8, strides=4)(input_layer, kernel_initializer=weight_init)
        input_layer = BatchnormActivationDropout(activation_function=self.activation_function)(input_layer)[-1]
        input_layer = Conv2d(num_filters=64, kernel_size=4, strides=2)(input_layer, kernel_initializer=weight_init)
        input_layer = BatchnormActivationDropout(activation_function=self.activation_function)(input_layer)[-1]
        input_layer = Conv2d(num_filters=64, kernel_size=3, strides=1)(input_layer, kernel_initializer=weight_init)
        input_layer = BatchnormActivationDropout(activation_function=self.activation_function)(input_layer)[-1]
        input_layer = tf.contrib.layers.flatten(input_layer)

        if self.is_predictor:
            input_layer = self.dense_layer(512)(input_layer, kernel_initializer=weight_init)
            input_layer = BatchnormActivationDropout(activation_function=tf.nn.relu)(input_layer)[-1]
            input_layer = self.dense_layer(512)(input_layer, kernel_initializer=weight_init)
            input_layer = BatchnormActivationDropout(activation_function=tf.nn.relu)(input_layer)[-1]

        self.output = self.dense_layer(512)(input_layer, name='output', kernel_initializer=weight_init)
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ClippedPPOAgentParameters()

agent_params.network_wrappers['main'].learning_rate = 0.0003
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [
        Conv2d(32, 8, 4),
        Conv2d(64, 4, 2),
        Conv2dWithAttention(64, 3, 1, 256)
    ]

agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].activation_function = 'relu'
agent_params.network_wrappers[
    'main'].middleware_parameters.activation_function = 'relu'
#agent_params.network_wrappers['main'].middleware_parameters.scheme = [
#    Conv2dWithAttention(64, 3, 1, 1000)
#]

agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999
示例#4
0
# Graph Scheduling #
####################
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100)
schedule_params.evaluation_steps = EnvironmentEpisodes(3)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = NStepQAgentParameters()

agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].input_embedders_parameters[
    'observation'].scheme = [Conv2d(16, 8, 4),
                             Conv2d(32, 4, 2)]
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense(256)
]

###############
# Environment #
###############
env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.trace_test_levels = [
示例#5
0
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = TrainingSteps(500)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(0)

################
# Agent Params #
################
agent_params = CILAgentParameters()

# forward camera and measurements input
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'CameraRGB':
    InputEmbedderParameters(
        scheme=[
            Conv2d(32, 5, 2),
            BatchnormActivationDropout(batchnorm=True,
                                       activation_function=tf.tanh),
            Conv2d(32, 3, 1),
            BatchnormActivationDropout(batchnorm=True,
                                       activation_function=tf.tanh),
            Conv2d(64, 3, 2),
            BatchnormActivationDropout(batchnorm=True,
                                       activation_function=tf.tanh),
            Conv2d(64, 3, 1),
            BatchnormActivationDropout(batchnorm=True,
                                       activation_function=tf.tanh),
            Conv2d(128, 3, 2),
            BatchnormActivationDropout(batchnorm=True,
                                       activation_function=tf.tanh),
            Conv2d(128, 3, 1),
    def schemes(self):
        return {
            EmbedderScheme.Empty:
                [],

            EmbedderScheme.Shallow:
                [
                    Conv2d(32, 3, 1)
                ],

            # atari dqn
            EmbedderScheme.Medium:
                [
                    Conv2d(32, 8, 4),
                    Conv2d(64, 4, 2),
                    Conv2d(64, 3, 1)
                ],

            # carla
            EmbedderScheme.Deep: \
                [
                    Conv2d(32, 5, 2),
                    Conv2d(32, 3, 1),
                    Conv2d(64, 3, 2),
                    Conv2d(64, 3, 1),
                    Conv2d(128, 3, 2),
                    Conv2d(128, 3, 1),
                    Conv2d(256, 3, 2),
                    Conv2d(256, 3, 1)
                ]
        }
agent_params = ClippedPPOAgentParameters()
# agent_params.network_wrappers['main'].input_embedders_parameters = {
#         'left_camera': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3),
#         'stereo': InputEmbedderParameters(activation_function='relu', dropout_rate=0.3)
#         }
# agent_params.network_wrappers['main'].input_embedders_parameters = {
#         'left_camera': InputEmbedderParameters(activation_function='relu'),
#         'stereo': InputEmbedderParameters(activation_function='relu')
#         }

agent_params.network_wrappers['main'].input_embedders_parameters = {
    'left_camera':
    InputEmbedderParameters(activation_function='relu'),
    'stereo':
    InputEmbedderParameters(scheme=[
        Conv2d(32, 3, 1),
        Conv2d(64, 3, 2),
        Conv2d(64, 3, 1),
        Conv2d(128, 3, 2),
        Conv2d(128, 3, 1),
        Dense(256)
    ],
                            activation_function='relu')
}

agent_params.network_wrappers['main'].learning_rate = 0.0003
agent_params.network_wrappers[
    'main'].middleware_parameters.activation_function = 'relu'
agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999